Skip to content

Commit 096f74c

Browse files
committed
fix: correct search ordering for pgvector, BM25, and trgm with LIMIT
The orderBy enum's apply (on PgSelectStep) and the filter's apply (on a separate query-builder object) could not share meta because they operate on different objects. The orderBy runs first but the filter has the score expression needed for ORDER BY. Fix: use a WeakMap keyed by the shared SQL alias reference to bridge the requested sort direction from the orderBy enum to the filter. The filter then adds the correct ORDER BY clause so that SQL LIMIT returns the most relevant results instead of arbitrary rows. Also adds regression tests verifying that LIMIT with orderBy returns the top-ranked results for pgvector distance, BM25 score, trgm similarity, and fullTextSearch with per-adapter ordering.
1 parent d6c68ac commit 096f74c

2 files changed

Lines changed: 184 additions & 21 deletions

File tree

graphile/graphile-search/src/__tests__/unified-search.test.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,147 @@ describe('graphile-search (unified search plugin)', () => {
485485
});
486486
});
487487

488+
// ─── orderBy + LIMIT correctness (regression tests) ─────────────────────
489+
490+
describe('orderBy + LIMIT returns top results (not arbitrary rows)', () => {
491+
it('pgvector: LIMIT 1 with orderBy ASC returns the closest vector', async () => {
492+
// Query vector [1,0,0] — doc 1 has embedding [1,0,0] (distance ≈ 0),
493+
// so it must be the top result when ordering by distance ASC.
494+
const allResult = await query<AllDocumentsResult>(`
495+
query {
496+
allDocuments(
497+
where: { vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } }
498+
orderBy: EMBEDDING_VECTOR_DISTANCE_ASC
499+
) {
500+
nodes { rowId title embeddingVectorDistance }
501+
}
502+
}
503+
`);
504+
expect(allResult.errors).toBeUndefined();
505+
const allNodes = allResult.data!.allDocuments.nodes;
506+
expect(allNodes.length).toBeGreaterThan(1);
507+
508+
// Now fetch with LIMIT 1 — should return the same first row
509+
const limitResult = await query<AllDocumentsResult>(`
510+
query {
511+
allDocuments(
512+
where: { vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } }
513+
orderBy: EMBEDDING_VECTOR_DISTANCE_ASC
514+
first: 1
515+
) {
516+
nodes { rowId title embeddingVectorDistance }
517+
}
518+
}
519+
`);
520+
expect(limitResult.errors).toBeUndefined();
521+
const limitNodes = limitResult.data!.allDocuments.nodes;
522+
expect(limitNodes).toHaveLength(1);
523+
524+
// The LIMIT 1 result must be the closest (smallest distance)
525+
expect(limitNodes[0].rowId).toBe(allNodes[0].rowId);
526+
expect(limitNodes[0].embeddingVectorDistance).toBe(allNodes[0].embeddingVectorDistance);
527+
});
528+
529+
it('pgvector: results are actually sorted by distance ASC', async () => {
530+
const result = await query<AllDocumentsResult>(`
531+
query {
532+
allDocuments(
533+
where: { vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } }
534+
orderBy: EMBEDDING_VECTOR_DISTANCE_ASC
535+
) {
536+
nodes { rowId embeddingVectorDistance }
537+
}
538+
}
539+
`);
540+
expect(result.errors).toBeUndefined();
541+
const nodes = result.data!.allDocuments.nodes;
542+
expect(nodes.length).toBeGreaterThan(1);
543+
544+
for (let i = 0; i < nodes.length - 1; i++) {
545+
expect(nodes[i].embeddingVectorDistance).toBeLessThanOrEqual(
546+
nodes[i + 1].embeddingVectorDistance!
547+
);
548+
}
549+
});
550+
551+
it('BM25: LIMIT 2 with orderBy ASC returns the most relevant rows', async () => {
552+
// Fetch all BM25 results sorted by score ASC (most negative = most relevant)
553+
const allResult = await query<AllDocumentsResult>(`
554+
query {
555+
allDocuments(
556+
where: { bm25Body: { query: "learning" } }
557+
orderBy: BODY_BM25_SCORE_ASC
558+
) {
559+
nodes { rowId title bodyBm25Score }
560+
}
561+
}
562+
`);
563+
expect(allResult.errors).toBeUndefined();
564+
const allNodes = allResult.data!.allDocuments.nodes;
565+
expect(allNodes.length).toBeGreaterThan(2);
566+
567+
// Now fetch with LIMIT 2
568+
const limitResult = await query<AllDocumentsResult>(`
569+
query {
570+
allDocuments(
571+
where: { bm25Body: { query: "learning" } }
572+
orderBy: BODY_BM25_SCORE_ASC
573+
first: 2
574+
) {
575+
nodes { rowId title bodyBm25Score }
576+
}
577+
}
578+
`);
579+
expect(limitResult.errors).toBeUndefined();
580+
const limitNodes = limitResult.data!.allDocuments.nodes;
581+
expect(limitNodes).toHaveLength(2);
582+
583+
// The limited results must be the top-2 from the full sorted list
584+
expect(limitNodes[0].rowId).toBe(allNodes[0].rowId);
585+
expect(limitNodes[1].rowId).toBe(allNodes[1].rowId);
586+
});
587+
588+
it('fullTextSearch + per-adapter orderBy: LIMIT returns top results', async () => {
589+
// fullTextSearch dispatches to all text-compatible adapters.
590+
// Per-adapter orderBy (e.g. BM25 score) still works correctly with LIMIT
591+
// because the adapter score is a SQL-level expression.
592+
// (Note: SEARCH_SCORE is a JS-computed composite and does not produce
593+
// SQL-level ORDER BY, so it should not be relied on with LIMIT.)
594+
const allResult = await query<AllDocumentsResult>(`
595+
query {
596+
allDocuments(
597+
where: { fullTextSearch: "machine learning" }
598+
orderBy: BODY_BM25_SCORE_ASC
599+
) {
600+
nodes { rowId title bodyBm25Score }
601+
}
602+
}
603+
`);
604+
expect(allResult.errors).toBeUndefined();
605+
const allNodes = allResult.data!.allDocuments.nodes;
606+
expect(allNodes.length).toBeGreaterThan(1);
607+
608+
// Now LIMIT 1
609+
const limitResult = await query<AllDocumentsResult>(`
610+
query {
611+
allDocuments(
612+
where: { fullTextSearch: "machine learning" }
613+
orderBy: BODY_BM25_SCORE_ASC
614+
first: 1
615+
) {
616+
nodes { rowId title bodyBm25Score }
617+
}
618+
}
619+
`);
620+
expect(limitResult.errors).toBeUndefined();
621+
const limitNodes = limitResult.data!.allDocuments.nodes;
622+
expect(limitNodes).toHaveLength(1);
623+
624+
// Must be the most relevant BM25 row
625+
expect(limitNodes[0].rowId).toBe(allNodes[0].rowId);
626+
});
627+
});
628+
488629
// ─── Hybrid / multi-adapter queries ─────────────────────────────────────
489630

490631
describe('hybrid search (multiple adapters active simultaneously)', () => {

graphile/graphile-search/src/plugin.ts

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,14 @@ export function createUnifiedSearchPlugin(
182182
// Per-codec cache of discovered columns, keyed by codec name
183183
const codecCache = new Map<string, AdapterColumnCache[]>();
184184

185+
// Bridge between orderBy enum apply and filter apply.
186+
// The orderBy enum runs on the PgSelectStep while the filter runs on
187+
// a separate query-builder object. They share the same SQL `alias`
188+
// reference, so we use a WeakMap keyed by that alias to pass the
189+
// requested sort direction from the orderBy (which fires first) to
190+
// the filter (which fires second and has the score expression).
191+
const _pendingOrderDirections = new WeakMap<object, Record<string, string>>();
192+
185193
/**
186194
* Get (or compute) the adapter columns for a given codec.
187195
*
@@ -634,11 +642,19 @@ export function createUnifiedSearchPlugin(
634642
codec: codec as any,
635643
attributeName: column.attributeName,
636644
});
637-
const metaKey = `unified_order_${adapter.name}_${baseFieldName}`;
645+
const orderKey = `unified_order_${adapter.name}_${baseFieldName}`;
638646
const makePlan =
639647
(direction: 'ASC' | 'DESC') => (step: any) => {
640-
if (typeof step.setMeta === 'function') {
641-
step.setMeta(metaKey, direction);
648+
// Store the requested direction keyed by the SQL alias so
649+
// the filter's apply (which runs second) can read it.
650+
const aliasKey = step?.alias;
651+
if (aliasKey) {
652+
let dirs = _pendingOrderDirections.get(aliasKey);
653+
if (!dirs) {
654+
dirs = {};
655+
_pendingOrderDirections.set(aliasKey, dirs);
656+
}
657+
dirs[orderKey] = direction;
642658
}
643659
};
644660

@@ -679,8 +695,14 @@ export function createUnifiedSearchPlugin(
679695

680696
const makeSearchScorePlan =
681697
(direction: 'ASC' | 'DESC') => (step: any) => {
682-
if (typeof step.setMeta === 'function') {
683-
step.setMeta('unified_order_search_score', direction);
698+
const aliasKey = step?.alias;
699+
if (aliasKey) {
700+
let dirs = _pendingOrderDirections.get(aliasKey);
701+
if (!dirs) {
702+
dirs = {};
703+
_pendingOrderDirections.set(aliasKey, dirs);
704+
}
705+
dirs['unified_order_search_score'] = direction;
684706
}
685707
};
686708

@@ -812,12 +834,12 @@ export function createUnifiedSearchPlugin(
812834
qb.setMeta(scoreMetaKey, {
813835
selectIndex: scoreIndex,
814836
} as SearchScoreDetails);
815-
}
816837

817-
// ORDER BY: only add when explicitly requested
818-
if (qb && typeof qb.getMetaRaw === 'function') {
819-
const orderMetaKey = `unified_order_${adapter.name}_${baseFieldName}`;
820-
const explicitDir = qb.getMetaRaw(orderMetaKey);
838+
// ORDER BY: read the direction stored by the orderBy
839+
// enum (which ran first) via the shared alias key.
840+
const orderKey = `unified_order_${adapter.name}_${baseFieldName}`;
841+
const dirs = _pendingOrderDirections.get($condition.alias);
842+
const explicitDir = dirs?.[orderKey];
821843
if (explicitDir) {
822844
qb.orderBy({
823845
fragment: result.scoreExpression,
@@ -905,17 +927,17 @@ export function createUnifiedSearchPlugin(
905927
selectIndex: scoreIndex,
906928
} as SearchScoreDetails);
907929

908-
// ORDER BY: only add when explicitly requested via orderBy enum
909-
if (typeof qb.getMetaRaw === 'function') {
910-
const orderMetaKey = `unified_order_${adapter.name}_${baseFieldName}`;
911-
const explicitDir = qb.getMetaRaw(orderMetaKey);
912-
if (explicitDir) {
913-
qb.orderBy({
914-
fragment: result.scoreExpression,
915-
codec: TYPES.float,
916-
direction: explicitDir,
917-
});
918-
}
930+
// ORDER BY: read the direction stored by the orderBy
931+
// enum (which ran first) via the shared alias key.
932+
const orderKey = `unified_order_${adapter.name}_${baseFieldName}`;
933+
const dirs = _pendingOrderDirections.get($condition.alias);
934+
const explicitDir = dirs?.[orderKey];
935+
if (explicitDir) {
936+
qb.orderBy({
937+
fragment: result.scoreExpression,
938+
codec: TYPES.float,
939+
direction: explicitDir,
940+
});
919941
}
920942
}
921943
}

0 commit comments

Comments
 (0)