Skip to content

Commit 4665bb8

Browse files
committed
feat(codegen): pluggable embedding system with --auto-embed flag
Adds a pluggable text-to-vector embedding system for CLI search commands: - New embedder.ts template using @agentic-kit/ollama for Ollama provider - resolveEmbedder() resolves from env vars or appstash config - autoEmbedWhere() converts text values to vector embeddings in where clauses - --auto-embed flag on list and search commands for tables with vector fields - Embedder module conditionally generated when tables have embedding fields - Updated docs-generator with --auto-embed examples for vector fields - CLI e2e tests for embedder: error without provider, real Ollama integration
1 parent 8c1724d commit 4665bb8

8 files changed

Lines changed: 845 additions & 30 deletions

File tree

graphql/codegen/src/core/codegen/cli/index.ts

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import {
1414
generateMultiTargetContextCommand,
1515
} from './infra-generator';
1616
import { generateTableCommand } from './table-command-generator';
17-
import { generateUtilsFile, generateNodeFetchFile, generateEntryPointFile } from './utils-generator';
17+
import { generateUtilsFile, generateNodeFetchFile, generateEntryPointFile, generateEmbedderFile } from './utils-generator';
1818

1919
export interface GenerateCliOptions {
2020
tables: Table[];
@@ -59,6 +59,14 @@ export function generateCli(options: GenerateCliOptions): GenerateCliResult {
5959
const utilsFile = generateUtilsFile();
6060
files.push(utilsFile);
6161

62+
// Generate embedder module if any table has vector embedding fields
63+
const hasAnyEmbeddings = tables.some((t) =>
64+
t.fields.some((f) => f.type.gqlType === 'Vector' || f.type.gqlType === '[Float]'),
65+
);
66+
if (hasAnyEmbeddings) {
67+
files.push(generateEmbedderFile());
68+
}
69+
6270
// Generate node HTTP adapter if configured (for *.localhost subdomain routing)
6371
if (useNodeHttpAdapter) {
6472
files.push(generateNodeFetchFile());
@@ -180,6 +188,16 @@ export function generateMultiTargetCli(
180188
const utilsFile = generateUtilsFile();
181189
files.push(utilsFile);
182190

191+
// Generate embedder module if any target has tables with vector embedding fields
192+
const hasAnyMtEmbeddings = targets.some((tgt) =>
193+
tgt.tables.some((t) =>
194+
t.fields.some((f) => f.type.gqlType === 'Vector' || f.type.gqlType === '[Float]'),
195+
),
196+
);
197+
if (hasAnyMtEmbeddings) {
198+
files.push(generateEmbedderFile());
199+
}
200+
183201
// Generate node HTTP adapter if configured (for *.localhost subdomain routing)
184202
if (options.nodeHttpAdapter) {
185203
files.push(generateNodeFetchFile());
@@ -299,5 +317,5 @@ export {
299317
export type { MultiTargetDocsInput } from './docs-generator';
300318
export { resolveDocsConfig } from '../docs-utils';
301319
export type { GeneratedDocFile } from '../docs-utils';
302-
export { generateUtilsFile, generateEntryPointFile } from './utils-generator';
320+
export { generateUtilsFile, generateEntryPointFile, generateEmbedderFile } from './utils-generator';
303321
export type { GeneratedFile, MultiTargetExecutorInput } from './executor-generator';

graphql/codegen/src/core/codegen/cli/table-command-generator.ts

Lines changed: 159 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,103 @@ function buildSubcommandSwitch(
311311
return t.switchStatement(t.identifier('subcommand'), cases);
312312
}
313313

314-
function buildListHandler(table: Table, targetName?: string, typeRegistry?: TypeRegistry): t.FunctionDeclaration {
314+
/**
315+
* Build an auto-embed block that resolves the embedder and converts text
316+
* values in the where clause to vector embeddings.
317+
*
318+
* Generates code equivalent to:
319+
* if (argv['auto-embed']) {
320+
* const embedder = resolveEmbedder();
321+
* if (!embedder) {
322+
* console.error('--auto-embed requires an embedder. Set EMBEDDER_PROVIDER=ollama');
323+
* process.exit(1);
324+
* }
325+
* await autoEmbedWhere(findManyArgs.where ?? {}, ['fieldA', 'fieldB'], embedder);
326+
* }
327+
*
328+
* @param whereExpr - The expression to access the where clause (e.g. findManyArgs.where, searchWhere)
329+
* @param vectorFieldNames - Names of vector embedding fields detected at codegen time
330+
* @param assignBack - If true, assigns the result back to findManyArgs.where
331+
*/
332+
function buildAutoEmbedBlock(
333+
whereExpr: t.Expression,
334+
vectorFieldNames: string[],
335+
assignBack: boolean = false,
336+
): t.IfStatement {
337+
const fieldNamesArray = t.arrayExpression(
338+
vectorFieldNames.map((n) => t.stringLiteral(n)),
339+
);
340+
341+
const embedderDecl = t.variableDeclaration('const', [
342+
t.variableDeclarator(
343+
t.identifier('embedder'),
344+
t.callExpression(t.identifier('resolveEmbedder'), []),
345+
),
346+
]);
347+
348+
const noEmbedderCheck = t.ifStatement(
349+
t.unaryExpression('!', t.identifier('embedder')),
350+
t.blockStatement([
351+
t.expressionStatement(
352+
t.callExpression(
353+
t.memberExpression(t.identifier('console'), t.identifier('error')),
354+
[
355+
t.stringLiteral(
356+
'--auto-embed requires an embedder. Set EMBEDDER_PROVIDER=ollama (and optionally EMBEDDER_MODEL, EMBEDDER_BASE_URL).',
357+
),
358+
],
359+
),
360+
),
361+
t.expressionStatement(
362+
t.callExpression(
363+
t.memberExpression(t.identifier('process'), t.identifier('exit')),
364+
[t.numericLiteral(1)],
365+
),
366+
),
367+
]),
368+
);
369+
370+
const autoEmbedCall = t.awaitExpression(
371+
t.callExpression(t.identifier('autoEmbedWhere'), [
372+
t.logicalExpression(
373+
'??',
374+
whereExpr,
375+
t.objectExpression([]),
376+
),
377+
fieldNamesArray,
378+
t.identifier('embedder'),
379+
]),
380+
);
381+
382+
const bodyStatements: t.Statement[] = [embedderDecl, noEmbedderCheck];
383+
384+
if (assignBack) {
385+
// findManyArgs.where = await autoEmbedWhere(findManyArgs.where ?? {}, [...], embedder);
386+
bodyStatements.push(
387+
t.expressionStatement(
388+
t.assignmentExpression(
389+
'=',
390+
t.memberExpression(t.identifier('findManyArgs'), t.identifier('where')),
391+
autoEmbedCall,
392+
),
393+
),
394+
);
395+
} else {
396+
// await autoEmbedWhere(searchWhere, [...], embedder);
397+
bodyStatements.push(t.expressionStatement(autoEmbedCall));
398+
}
399+
400+
return t.ifStatement(
401+
t.memberExpression(
402+
t.identifier('argv'),
403+
t.stringLiteral('auto-embed'),
404+
true,
405+
),
406+
t.blockStatement(bodyStatements),
407+
);
408+
}
409+
410+
function buildListHandler(table: Table, vectorFieldNames: string[], targetName?: string, typeRegistry?: TypeRegistry): t.FunctionDeclaration {
315411
const { singularName } = getTableNames(table);
316412
const defaultSelectObj = buildSelectObject(table, typeRegistry);
317413

@@ -338,6 +434,17 @@ function buildListHandler(table: Table, targetName?: string, typeRegistry?: Type
338434
]),
339435
);
340436

437+
// Auto-embed vector fields in the where clause when --auto-embed is passed
438+
if (vectorFieldNames.length > 0) {
439+
tryBody.push(
440+
buildAutoEmbedBlock(
441+
t.memberExpression(t.identifier('findManyArgs'), t.identifier('where')),
442+
vectorFieldNames,
443+
true,
444+
),
445+
);
446+
}
447+
341448
tryBody.push(buildGetClientStatement(targetName));
342449

343450
// const result = await client.<singular>.findMany(findManyArgs).execute();
@@ -479,6 +586,7 @@ function buildFindFirstHandler(table: Table, targetName?: string, typeRegistry?:
479586
function buildSearchHandler(
480587
table: Table,
481588
specialGroups: SpecialFieldGroup[],
589+
vectorFieldNames: string[],
482590
targetName?: string,
483591
typeRegistry?: TypeRegistry,
484592
): t.FunctionDeclaration {
@@ -543,7 +651,7 @@ function buildSearchHandler(
543651
);
544652

545653
// Build the where clause properties from detected search fields
546-
// e.g. { tsvContent: { query }, bm25Body: { query }, trgmTitle: { value: query, threshold: 0.3 }, vectorEmbedding: { value: query } }
654+
// e.g. { tsvContent: { query }, bm25Body: { query }, trgmTitle: { value: query, threshold: 0.3 }, vectorEmbedding: { vector: query } }
547655
const whereProps: t.ObjectProperty[] = [];
548656
for (const group of specialGroups) {
549657
for (const field of group.fields) {
@@ -585,12 +693,13 @@ function buildSearchHandler(
585693
),
586694
);
587695
} else if (group.category === 'embedding') {
588-
// Vector embedding field: { value: query }
696+
// Vector embedding field: { vector: query }
697+
// When --auto-embed is used, autoEmbedWhere will convert the text to a vector
589698
whereProps.push(
590699
t.objectProperty(
591700
t.identifier(field.name),
592701
t.objectExpression([
593-
t.objectProperty(t.identifier('value'), t.identifier('query')),
702+
t.objectProperty(t.identifier('vector'), t.identifier('query')),
594703
]),
595704
),
596705
);
@@ -608,6 +717,17 @@ function buildSearchHandler(
608717
]),
609718
);
610719

720+
// Auto-embed vector fields in the where clause when --auto-embed is passed
721+
if (vectorFieldNames.length > 0) {
722+
tryBody.push(
723+
buildAutoEmbedBlock(
724+
t.identifier('searchWhere'),
725+
vectorFieldNames,
726+
false,
727+
),
728+
);
729+
}
730+
611731
// const defaultSelect = { ... };
612732
tryBody.push(
613733
t.variableDeclaration('const', [
@@ -1122,6 +1242,25 @@ export function generateTableCommand(table: Table, options?: TableCommandOptions
11221242
(g) => g.category === 'search' || g.category === 'embedding',
11231243
);
11241244

1245+
// Collect vector embedding field names for --auto-embed support
1246+
const vectorFieldNames: string[] = [];
1247+
for (const group of specialGroups) {
1248+
if (group.category === 'embedding') {
1249+
for (const field of group.fields) {
1250+
vectorFieldNames.push(field.name);
1251+
}
1252+
}
1253+
}
1254+
const hasEmbeddings = vectorFieldNames.length > 0;
1255+
1256+
// Import embedder functions when table has vector embedding fields
1257+
if (hasEmbeddings) {
1258+
const embedderPath = options?.targetName ? '../../embedder' : '../embedder';
1259+
statements.push(
1260+
createImportDeclaration(embedderPath, ['resolveEmbedder', 'autoEmbedWhere']),
1261+
);
1262+
}
1263+
11251264
const subcommands: string[] = ['list', 'find-first'];
11261265
if (hasSearchFields) subcommands.push('search');
11271266
if (hasGet) subcommands.push('get');
@@ -1169,6 +1308,20 @@ export function generateTableCommand(table: Table, options?: TableCommandOptions
11691308
' --offset <n> Number of records to skip',
11701309
' --fields <fields> Comma-separated list of fields to return',
11711310
' --orderBy <values> Comma-separated list of ordering values',
1311+
);
1312+
if (hasEmbeddings) {
1313+
usageLines.push(
1314+
' --auto-embed Convert text queries to vectors via configured embedder',
1315+
);
1316+
}
1317+
usageLines.push('');
1318+
}
1319+
if (hasEmbeddings) {
1320+
usageLines.push(
1321+
'Embedding Options (for --auto-embed):',
1322+
' Set EMBEDDER_PROVIDER=ollama to enable text-to-vector embedding.',
1323+
' Optional: EMBEDDER_MODEL (default: nomic-embed-text)',
1324+
' Optional: EMBEDDER_BASE_URL (default: http://localhost:11434)',
11721325
'',
11731326
);
11741327
}
@@ -1339,9 +1492,9 @@ export function generateTableCommand(table: Table, options?: TableCommandOptions
13391492

13401493
const tn = options?.targetName;
13411494
const ormTypes = { createInputTypeName, patchTypeName, innerFieldName };
1342-
statements.push(buildListHandler(table, tn, options?.typeRegistry));
1495+
statements.push(buildListHandler(table, vectorFieldNames, tn, options?.typeRegistry));
13431496
statements.push(buildFindFirstHandler(table, tn, options?.typeRegistry));
1344-
if (hasSearchFields) statements.push(buildSearchHandler(table, specialGroups, tn, options?.typeRegistry));
1497+
if (hasSearchFields) statements.push(buildSearchHandler(table, specialGroups, vectorFieldNames, tn, options?.typeRegistry));
13451498
if (hasGet) statements.push(buildGetHandler(table, tn, options?.typeRegistry));
13461499
statements.push(buildMutationHandler(table, 'create', tn, options?.typeRegistry, ormTypes));
13471500
if (hasUpdate) statements.push(buildMutationHandler(table, 'update', tn, options?.typeRegistry, ormTypes));

graphql/codegen/src/core/codegen/cli/utils-generator.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,20 @@ export function generateEntryPointFile(): GeneratedFile {
9494
),
9595
};
9696
}
97+
98+
/**
99+
* Generate an embedder.ts file with pluggable text-to-vector embedding.
100+
*
101+
* Provides a runtime embedder registry using @agentic-kit/ollama so that
102+
* CLI search and list commands can convert text queries into vector arrays
103+
* for pgvector similarity search when --auto-embed is passed.
104+
*/
105+
export function generateEmbedderFile(): GeneratedFile {
106+
return {
107+
fileName: 'embedder.ts',
108+
content: readTemplateFile(
109+
'embedder.ts',
110+
'CLI embedder — pluggable text-to-vector embedding for search commands',
111+
),
112+
};
113+
}

graphql/codegen/src/core/codegen/docs-utils.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,15 +359,23 @@ export function buildSearchExamples(
359359
scoreFields.push(field.name);
360360
}
361361

362-
// pgvector embedding — uses column name, note about CLI limitation
362+
// pgvector embedding — uses column name, with --auto-embed for text-to-vector
363363
if (group.category === 'embedding') {
364364
examples.push({
365-
description: `Vector similarity search via \`${field.name}\` (requires JSON array)`,
365+
description: `Vector similarity search via \`${field.name}\` (manual vector)`,
366366
code: [
367-
`# Note: vector arrays must be passed as JSON strings via dot-notation`,
367+
`# Pass a pre-computed vector array via dot-notation`,
368368
`${toolName} ${cmd} list --where.${field.name}.vector '[0.1,0.2,0.3]' --where.${field.name}.distance 1.0 --fields title,${field.name}VectorDistance`,
369369
],
370370
});
371+
examples.push({
372+
description: `Vector semantic search via \`${field.name}\` with --auto-embed`,
373+
code: [
374+
`# --auto-embed converts text to vectors using the configured embedder (e.g. Ollama nomic-embed-text)`,
375+
`EMBEDDER_PROVIDER=ollama ${toolName} ${cmd} search "semantic query" --auto-embed --fields title,${field.name}VectorDistance`,
376+
`EMBEDDER_PROVIDER=ollama ${toolName} ${cmd} list --where.${field.name}.vector "semantic query" --auto-embed --fields title,${field.name}VectorDistance`,
377+
],
378+
});
371379
}
372380

373381
// searchScore — composite blend field, useful for ordering

0 commit comments

Comments
 (0)