Skip to content

Commit ceba37c

Browse files
committed
feat(codegen): add --auto-embed support for create/update mutations
1 parent 4665bb8 commit ceba37c

3 files changed

Lines changed: 138 additions & 7 deletions

File tree

graphql/codegen/src/core/codegen/cli/table-command-generator.ts

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,79 @@ function buildAutoEmbedBlock(
407407
);
408408
}
409409

410+
/**
411+
* Build an auto-embed block for mutation handlers (create/update).
412+
*
413+
* Generates code equivalent to:
414+
* if (argv['auto-embed']) {
415+
* const embedder = resolveEmbedder();
416+
* if (!embedder) {
417+
* console.error('--auto-embed requires an embedder. Set EMBEDDER_PROVIDER=ollama');
418+
* process.exit(1);
419+
* }
420+
* await autoEmbedInput(cleanedData, ['embedding'], embedder);
421+
* }
422+
*
423+
* @param vectorFieldNames - Names of vector embedding fields detected at codegen time
424+
*/
425+
function buildAutoEmbedInputBlock(
426+
vectorFieldNames: string[],
427+
): t.IfStatement {
428+
const fieldNamesArray = t.arrayExpression(
429+
vectorFieldNames.map((n) => t.stringLiteral(n)),
430+
);
431+
432+
const embedderDecl = t.variableDeclaration('const', [
433+
t.variableDeclarator(
434+
t.identifier('embedder'),
435+
t.callExpression(t.identifier('resolveEmbedder'), []),
436+
),
437+
]);
438+
439+
const noEmbedderCheck = t.ifStatement(
440+
t.unaryExpression('!', t.identifier('embedder')),
441+
t.blockStatement([
442+
t.expressionStatement(
443+
t.callExpression(
444+
t.memberExpression(t.identifier('console'), t.identifier('error')),
445+
[
446+
t.stringLiteral(
447+
'--auto-embed requires an embedder. Set EMBEDDER_PROVIDER=ollama (and optionally EMBEDDER_MODEL, EMBEDDER_BASE_URL).',
448+
),
449+
],
450+
),
451+
),
452+
t.expressionStatement(
453+
t.callExpression(
454+
t.memberExpression(t.identifier('process'), t.identifier('exit')),
455+
[t.numericLiteral(1)],
456+
),
457+
),
458+
]),
459+
);
460+
461+
const autoEmbedCall = t.awaitExpression(
462+
t.callExpression(t.identifier('autoEmbedInput'), [
463+
t.identifier('cleanedData'),
464+
fieldNamesArray,
465+
t.identifier('embedder'),
466+
]),
467+
);
468+
469+
return t.ifStatement(
470+
t.memberExpression(
471+
t.identifier('argv'),
472+
t.stringLiteral('auto-embed'),
473+
true,
474+
),
475+
t.blockStatement([
476+
embedderDecl,
477+
noEmbedderCheck,
478+
t.expressionStatement(autoEmbedCall),
479+
]),
480+
);
481+
}
482+
410483
function buildListHandler(table: Table, vectorFieldNames: string[], targetName?: string, typeRegistry?: TypeRegistry): t.FunctionDeclaration {
411484
const { singularName } = getTableNames(table);
412485
const defaultSelectObj = buildSelectObject(table, typeRegistry);
@@ -904,6 +977,7 @@ export function getFieldsWithDefaults(
904977
function buildMutationHandler(
905978
table: Table,
906979
operation: 'create' | 'update' | 'delete',
980+
vectorFieldNames: string[],
907981
targetName?: string,
908982
typeRegistry?: TypeRegistry,
909983
ormTypes?: { createInputTypeName: string; innerFieldName: string; patchTypeName: string },
@@ -1124,6 +1198,12 @@ function buildMutationHandler(
11241198
),
11251199
]),
11261200
);
1201+
1202+
// Inject --auto-embed block for create/update when table has vector fields.
1203+
// Converts text strings in vector fields to embeddings before the ORM call.
1204+
if (vectorFieldNames.length > 0) {
1205+
tryBody.push(buildAutoEmbedInputBlock(vectorFieldNames));
1206+
}
11271207
}
11281208

11291209
tryBody.push(
@@ -1257,7 +1337,7 @@ export function generateTableCommand(table: Table, options?: TableCommandOptions
12571337
if (hasEmbeddings) {
12581338
const embedderPath = options?.targetName ? '../../embedder' : '../embedder';
12591339
statements.push(
1260-
createImportDeclaration(embedderPath, ['resolveEmbedder', 'autoEmbedWhere']),
1340+
createImportDeclaration(embedderPath, ['resolveEmbedder', 'autoEmbedWhere', 'autoEmbedInput']),
12611341
);
12621342
}
12631343

@@ -1280,6 +1360,13 @@ export function generateTableCommand(table: Table, options?: TableCommandOptions
12801360
if (hasGet) usageLines.push(` get Get a ${singularName} by ID`);
12811361
usageLines.push(` create Create a new ${singularName}`);
12821362
if (hasUpdate) usageLines.push(` update Update an existing ${singularName}`);
1363+
if (hasEmbeddings) {
1364+
usageLines.push(
1365+
'',
1366+
'Create/Update Options:',
1367+
' --auto-embed Convert text values in vector fields to embeddings before saving',
1368+
);
1369+
}
12831370
if (hasDelete) usageLines.push(` delete Delete a ${singularName}`);
12841371
usageLines.push(
12851372
'',
@@ -1496,9 +1583,9 @@ export function generateTableCommand(table: Table, options?: TableCommandOptions
14961583
statements.push(buildFindFirstHandler(table, tn, options?.typeRegistry));
14971584
if (hasSearchFields) statements.push(buildSearchHandler(table, specialGroups, vectorFieldNames, tn, options?.typeRegistry));
14981585
if (hasGet) statements.push(buildGetHandler(table, tn, options?.typeRegistry));
1499-
statements.push(buildMutationHandler(table, 'create', tn, options?.typeRegistry, ormTypes));
1500-
if (hasUpdate) statements.push(buildMutationHandler(table, 'update', tn, options?.typeRegistry, ormTypes));
1501-
if (hasDelete) statements.push(buildMutationHandler(table, 'delete', tn, options?.typeRegistry, ormTypes));
1586+
statements.push(buildMutationHandler(table, 'create', vectorFieldNames, tn, options?.typeRegistry, ormTypes));
1587+
if (hasUpdate) statements.push(buildMutationHandler(table, 'update', vectorFieldNames, tn, options?.typeRegistry, ormTypes));
1588+
if (hasDelete) statements.push(buildMutationHandler(table, 'delete', vectorFieldNames, tn, options?.typeRegistry, ormTypes));
15021589

15031590
const header = getGeneratedFileHeader(`CLI commands for ${table.name}`);
15041591
const code = generateCode(statements);

graphql/codegen/src/core/codegen/docs-utils.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,14 @@ export function buildSearchExamples(
376376
`EMBEDDER_PROVIDER=ollama ${toolName} ${cmd} list --where.${field.name}.vector "semantic query" --auto-embed --fields title,${field.name}VectorDistance`,
377377
],
378378
});
379+
examples.push({
380+
description: `Create/update with auto-embedded \`${field.name}\` via --auto-embed`,
381+
code: [
382+
`# --auto-embed on create/update converts text strings in vector fields to embeddings before saving`,
383+
`EMBEDDER_PROVIDER=ollama ${toolName} ${cmd} create --${field.name} "text to embed" --auto-embed`,
384+
`EMBEDDER_PROVIDER=ollama ${toolName} ${cmd} update --${field.name} "new text to embed" --auto-embed`,
385+
],
386+
});
379387
}
380388

381389
// searchScore — composite blend field, useful for ordering

graphql/codegen/src/core/codegen/templates/embedder.ts

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
/**
2-
* CLI Embedder — pluggable text-to-vector embedding for CLI search commands
2+
* CLI Embedder — pluggable text-to-vector embedding for CLI commands
33
*
44
* This is RUNTIME code that gets copied to generated output.
55
* Provides a pluggable system for registering embedding functions so that
6-
* CLI search and list commands can convert text queries into vector arrays
7-
* for pgvector similarity search.
6+
* CLI commands can convert text queries into vector arrays for pgvector
7+
* similarity search (list/search) and create/update vector fields inline.
88
*
99
* Configuration via appstash config or environment variables:
1010
* embedder.provider = 'ollama' | 'custom'
@@ -137,3 +137,39 @@ export async function autoEmbedWhere(
137137
}
138138
return where;
139139
}
140+
141+
/**
142+
* Auto-embed text values in mutation input data (create/update).
143+
*
144+
* When --auto-embed is passed on create or update, any vector field in
145+
* the input data that contains a text string will be converted to an
146+
* embedding vector using the configured embedder.
147+
*
148+
* Usage:
149+
* csdk article create --input.embedding "Machine learning concepts" --auto-embed
150+
* csdk article update --id xxx --input.embedding "Updated description" --auto-embed
151+
*
152+
* This is a CLI-only convenience — in production, database triggers or
153+
* a job queue should handle embedding generation.
154+
*
155+
* @param data - The mutation input data object (mutated in place)
156+
* @param vectorFieldNames - Names of vector embedding fields (e.g. ['embedding'])
157+
* @param embedder - The resolved embedder function
158+
* @returns The modified data object with text values replaced by vectors
159+
*/
160+
export async function autoEmbedInput(
161+
data: Record<string, unknown>,
162+
vectorFieldNames: string[],
163+
embedder: EmbedderFunction,
164+
): Promise<Record<string, unknown>> {
165+
for (const fieldName of vectorFieldNames) {
166+
const fieldValue = data[fieldName];
167+
if (typeof fieldValue === 'string') {
168+
// Text string → embed to vector array
169+
const embedding = await embedder(fieldValue);
170+
data[fieldName] = embedding;
171+
}
172+
// If it's already an array (pre-computed vector), leave it as-is
173+
}
174+
return data;
175+
}

0 commit comments

Comments
 (0)