Skip to content

Commit d4465e3

Browse files
authored
Merge pull request #947 from constructive-io/devin/1775026228-fix-search-ordering
fix: correct search ordering for pgvector, BM25, and trgm with LIMIT
2 parents 490508e + 096f74c commit d4465e3

2 files changed

Lines changed: 184 additions & 21 deletions

File tree

graphile/graphile-search/src/__tests__/unified-search.test.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,147 @@ describe('graphile-search (unified search plugin)', () => {
485485
});
486486
});
487487

488+
// ─── orderBy + LIMIT correctness (regression tests) ─────────────────────
489+
490+
describe('orderBy + LIMIT returns top results (not arbitrary rows)', () => {
491+
it('pgvector: LIMIT 1 with orderBy ASC returns the closest vector', async () => {
492+
// Query vector [1,0,0] — doc 1 has embedding [1,0,0] (distance ≈ 0),
493+
// so it must be the top result when ordering by distance ASC.
494+
const allResult = await query<AllDocumentsResult>(`
495+
query {
496+
allDocuments(
497+
where: { vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } }
498+
orderBy: EMBEDDING_VECTOR_DISTANCE_ASC
499+
) {
500+
nodes { rowId title embeddingVectorDistance }
501+
}
502+
}
503+
`);
504+
expect(allResult.errors).toBeUndefined();
505+
const allNodes = allResult.data!.allDocuments.nodes;
506+
expect(allNodes.length).toBeGreaterThan(1);
507+
508+
// Now fetch with LIMIT 1 — should return the same first row
509+
const limitResult = await query<AllDocumentsResult>(`
510+
query {
511+
allDocuments(
512+
where: { vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } }
513+
orderBy: EMBEDDING_VECTOR_DISTANCE_ASC
514+
first: 1
515+
) {
516+
nodes { rowId title embeddingVectorDistance }
517+
}
518+
}
519+
`);
520+
expect(limitResult.errors).toBeUndefined();
521+
const limitNodes = limitResult.data!.allDocuments.nodes;
522+
expect(limitNodes).toHaveLength(1);
523+
524+
// The LIMIT 1 result must be the closest (smallest distance)
525+
expect(limitNodes[0].rowId).toBe(allNodes[0].rowId);
526+
expect(limitNodes[0].embeddingVectorDistance).toBe(allNodes[0].embeddingVectorDistance);
527+
});
528+
529+
it('pgvector: results are actually sorted by distance ASC', async () => {
530+
const result = await query<AllDocumentsResult>(`
531+
query {
532+
allDocuments(
533+
where: { vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } }
534+
orderBy: EMBEDDING_VECTOR_DISTANCE_ASC
535+
) {
536+
nodes { rowId embeddingVectorDistance }
537+
}
538+
}
539+
`);
540+
expect(result.errors).toBeUndefined();
541+
const nodes = result.data!.allDocuments.nodes;
542+
expect(nodes.length).toBeGreaterThan(1);
543+
544+
for (let i = 0; i < nodes.length - 1; i++) {
545+
expect(nodes[i].embeddingVectorDistance).toBeLessThanOrEqual(
546+
nodes[i + 1].embeddingVectorDistance!
547+
);
548+
}
549+
});
550+
551+
it('BM25: LIMIT 2 with orderBy ASC returns the most relevant rows', async () => {
552+
// Fetch all BM25 results sorted by score ASC (most negative = most relevant)
553+
const allResult = await query<AllDocumentsResult>(`
554+
query {
555+
allDocuments(
556+
where: { bm25Body: { query: "learning" } }
557+
orderBy: BODY_BM25_SCORE_ASC
558+
) {
559+
nodes { rowId title bodyBm25Score }
560+
}
561+
}
562+
`);
563+
expect(allResult.errors).toBeUndefined();
564+
const allNodes = allResult.data!.allDocuments.nodes;
565+
expect(allNodes.length).toBeGreaterThan(2);
566+
567+
// Now fetch with LIMIT 2
568+
const limitResult = await query<AllDocumentsResult>(`
569+
query {
570+
allDocuments(
571+
where: { bm25Body: { query: "learning" } }
572+
orderBy: BODY_BM25_SCORE_ASC
573+
first: 2
574+
) {
575+
nodes { rowId title bodyBm25Score }
576+
}
577+
}
578+
`);
579+
expect(limitResult.errors).toBeUndefined();
580+
const limitNodes = limitResult.data!.allDocuments.nodes;
581+
expect(limitNodes).toHaveLength(2);
582+
583+
// The limited results must be the top-2 from the full sorted list
584+
expect(limitNodes[0].rowId).toBe(allNodes[0].rowId);
585+
expect(limitNodes[1].rowId).toBe(allNodes[1].rowId);
586+
});
587+
588+
it('fullTextSearch + per-adapter orderBy: LIMIT returns top results', async () => {
589+
// fullTextSearch dispatches to all text-compatible adapters.
590+
// Per-adapter orderBy (e.g. BM25 score) still works correctly with LIMIT
591+
// because the adapter score is a SQL-level expression.
592+
// (Note: SEARCH_SCORE is a JS-computed composite and does not produce
593+
// SQL-level ORDER BY, so it should not be relied on with LIMIT.)
594+
const allResult = await query<AllDocumentsResult>(`
595+
query {
596+
allDocuments(
597+
where: { fullTextSearch: "machine learning" }
598+
orderBy: BODY_BM25_SCORE_ASC
599+
) {
600+
nodes { rowId title bodyBm25Score }
601+
}
602+
}
603+
`);
604+
expect(allResult.errors).toBeUndefined();
605+
const allNodes = allResult.data!.allDocuments.nodes;
606+
expect(allNodes.length).toBeGreaterThan(1);
607+
608+
// Now LIMIT 1
609+
const limitResult = await query<AllDocumentsResult>(`
610+
query {
611+
allDocuments(
612+
where: { fullTextSearch: "machine learning" }
613+
orderBy: BODY_BM25_SCORE_ASC
614+
first: 1
615+
) {
616+
nodes { rowId title bodyBm25Score }
617+
}
618+
}
619+
`);
620+
expect(limitResult.errors).toBeUndefined();
621+
const limitNodes = limitResult.data!.allDocuments.nodes;
622+
expect(limitNodes).toHaveLength(1);
623+
624+
// Must be the most relevant BM25 row
625+
expect(limitNodes[0].rowId).toBe(allNodes[0].rowId);
626+
});
627+
});
628+
488629
// ─── Hybrid / multi-adapter queries ─────────────────────────────────────
489630

490631
describe('hybrid search (multiple adapters active simultaneously)', () => {

graphile/graphile-search/src/plugin.ts

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,14 @@ export function createUnifiedSearchPlugin(
182182
// Per-codec cache of discovered columns, keyed by codec name
183183
const codecCache = new Map<string, AdapterColumnCache[]>();
184184

185+
// Bridge between orderBy enum apply and filter apply.
186+
// The orderBy enum runs on the PgSelectStep while the filter runs on
187+
// a separate query-builder object. They share the same SQL `alias`
188+
// reference, so we use a WeakMap keyed by that alias to pass the
189+
// requested sort direction from the orderBy (which fires first) to
190+
// the filter (which fires second and has the score expression).
191+
const _pendingOrderDirections = new WeakMap<object, Record<string, string>>();
192+
185193
/**
186194
* Get (or compute) the adapter columns for a given codec.
187195
*
@@ -634,11 +642,19 @@ export function createUnifiedSearchPlugin(
634642
codec: codec as any,
635643
attributeName: column.attributeName,
636644
});
637-
const metaKey = `unified_order_${adapter.name}_${baseFieldName}`;
645+
const orderKey = `unified_order_${adapter.name}_${baseFieldName}`;
638646
const makePlan =
639647
(direction: 'ASC' | 'DESC') => (step: any) => {
640-
if (typeof step.setMeta === 'function') {
641-
step.setMeta(metaKey, direction);
648+
// Store the requested direction keyed by the SQL alias so
649+
// the filter's apply (which runs second) can read it.
650+
const aliasKey = step?.alias;
651+
if (aliasKey) {
652+
let dirs = _pendingOrderDirections.get(aliasKey);
653+
if (!dirs) {
654+
dirs = {};
655+
_pendingOrderDirections.set(aliasKey, dirs);
656+
}
657+
dirs[orderKey] = direction;
642658
}
643659
};
644660

@@ -679,8 +695,14 @@ export function createUnifiedSearchPlugin(
679695

680696
const makeSearchScorePlan =
681697
(direction: 'ASC' | 'DESC') => (step: any) => {
682-
if (typeof step.setMeta === 'function') {
683-
step.setMeta('unified_order_search_score', direction);
698+
const aliasKey = step?.alias;
699+
if (aliasKey) {
700+
let dirs = _pendingOrderDirections.get(aliasKey);
701+
if (!dirs) {
702+
dirs = {};
703+
_pendingOrderDirections.set(aliasKey, dirs);
704+
}
705+
dirs['unified_order_search_score'] = direction;
684706
}
685707
};
686708

@@ -812,12 +834,12 @@ export function createUnifiedSearchPlugin(
812834
qb.setMeta(scoreMetaKey, {
813835
selectIndex: scoreIndex,
814836
} as SearchScoreDetails);
815-
}
816837

817-
// ORDER BY: only add when explicitly requested
818-
if (qb && typeof qb.getMetaRaw === 'function') {
819-
const orderMetaKey = `unified_order_${adapter.name}_${baseFieldName}`;
820-
const explicitDir = qb.getMetaRaw(orderMetaKey);
838+
// ORDER BY: read the direction stored by the orderBy
839+
// enum (which ran first) via the shared alias key.
840+
const orderKey = `unified_order_${adapter.name}_${baseFieldName}`;
841+
const dirs = _pendingOrderDirections.get($condition.alias);
842+
const explicitDir = dirs?.[orderKey];
821843
if (explicitDir) {
822844
qb.orderBy({
823845
fragment: result.scoreExpression,
@@ -905,17 +927,17 @@ export function createUnifiedSearchPlugin(
905927
selectIndex: scoreIndex,
906928
} as SearchScoreDetails);
907929

908-
// ORDER BY: only add when explicitly requested via orderBy enum
909-
if (typeof qb.getMetaRaw === 'function') {
910-
const orderMetaKey = `unified_order_${adapter.name}_${baseFieldName}`;
911-
const explicitDir = qb.getMetaRaw(orderMetaKey);
912-
if (explicitDir) {
913-
qb.orderBy({
914-
fragment: result.scoreExpression,
915-
codec: TYPES.float,
916-
direction: explicitDir,
917-
});
918-
}
930+
// ORDER BY: read the direction stored by the orderBy
931+
// enum (which ran first) via the shared alias key.
932+
const orderKey = `unified_order_${adapter.name}_${baseFieldName}`;
933+
const dirs = _pendingOrderDirections.get($condition.alias);
934+
const explicitDir = dirs?.[orderKey];
935+
if (explicitDir) {
936+
qb.orderBy({
937+
fragment: result.scoreExpression,
938+
codec: TYPES.float,
939+
direction: explicitDir,
940+
});
919941
}
920942
}
921943
}

0 commit comments

Comments
 (0)