Skip to content

Commit 840acbc

Browse files
christsoCopilot
andauthored
feat(cli): add *.eval.ts auto-discovery (#1120)
* feat(cli): add *.eval.ts auto-discovery (#1116) Add TypeScript eval file support to `agentv run`. TS eval files export an EvalConfig (default, `config`, or `evalConfig` named export) and are discovered alongside YAML files via the same glob/path resolution. Changes: - shared.ts: Include .ts in file extension regex and directory auto-glob - config-loader.ts: Add **/evals/**/*.eval.ts to DEFAULT_EVAL_PATTERNS - jsonl-parser.ts: Add typescript format detection in detectFormat() - ts-eval-loader.ts: New loader that imports TS modules and extracts EvalConfig - run-eval.ts: Integrate TS files through evaluate() with CLI overrides, feeding results through the same artifact/reporting pipeline - run.ts: Update CLI description to mention .ts files - index.ts: Export loadTsEvalFile and TsEvalResult from @agentv/core Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * refactor(cli): unify TypeScript eval loading with suite pipeline (#1116) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 0ee2e93 commit 840acbc

17 files changed

Lines changed: 577 additions & 117 deletions

File tree

apps/cli/src/commands/eval/commands/run.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ export const evalRunCommand = command({
2020
evalPaths: restPositionals({
2121
type: string,
2222
displayName: 'eval-paths',
23-
description: 'Path(s) or glob(s) to evaluation .yaml file(s)',
23+
description: 'Path(s) or glob(s) to evaluation files (.yaml, .eval.ts)',
2424
}),
2525
target: multioption({
2626
type: array(string),

apps/cli/src/commands/eval/run-eval.ts

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import {
2020
loadConfig,
2121
loadTestSuite,
2222
loadTsConfig,
23+
resolveTargetDefinition,
2324
shouldEnableCache,
2425
shouldSkipCacheForTemperature,
2526
subscribeToCodexLogEntries,
@@ -531,6 +532,9 @@ async function prepareFileMetadata(params: {
531532
readonly failOnError?: FailOnError;
532533
readonly threshold?: number;
533534
readonly tags?: readonly string[];
535+
readonly providerFactory?: (
536+
target: import('@agentv/core').ResolvedTarget,
537+
) => import('@agentv/core').Provider;
534538
}> {
535539
const { testFilePath, repoRoot, cwd, options } = params;
536540

@@ -574,6 +578,54 @@ async function prepareFileMetadata(params: {
574578
inlineTargetLabel: `transcript (${path.basename(options.transcript)})`,
575579
},
576580
];
581+
} else if (suite.inlineTarget && options.cliTargets.length === 0) {
582+
const targetDefinition = suite.inlineTarget;
583+
const resolvedTarget = options.dryRun
584+
? ({
585+
kind: 'mock',
586+
name: `${targetDefinition.name}-dry-run`,
587+
graderTarget: undefined,
588+
config: {
589+
response: '{"answer":"Mock dry-run response"}',
590+
delayMs: options.dryRunDelay,
591+
delayMinMs: options.dryRunDelayMin,
592+
delayMaxMs: options.dryRunDelayMax,
593+
},
594+
} satisfies ResolvedTarget)
595+
: resolveTargetDefinition(targetDefinition, process.env, testFilePath, {
596+
emitDeprecationWarnings: false,
597+
});
598+
selections = [
599+
{
600+
selection: {
601+
definitions: [targetDefinition],
602+
resolvedTarget,
603+
targetName: targetDefinition.name,
604+
targetSource: 'test-file',
605+
targetsFilePath: testFilePath,
606+
},
607+
inlineTargetLabel: resolveTargetLabel(targetDefinition.name, resolvedTarget.name),
608+
},
609+
];
610+
} else if (suite.providerFactory && options.cliTargets.length === 0) {
611+
const taskTarget: ResolvedTarget = {
612+
kind: 'mock',
613+
name: 'custom-task',
614+
graderTarget: undefined,
615+
config: {},
616+
};
617+
selections = [
618+
{
619+
selection: {
620+
definitions: [],
621+
resolvedTarget: taskTarget,
622+
targetName: 'custom-task',
623+
targetSource: 'test-file',
624+
targetsFilePath: testFilePath,
625+
},
626+
inlineTargetLabel: 'custom-task',
627+
},
628+
];
577629
} else {
578630
// Determine target names: CLI --target flags override YAML
579631
const cliTargets = options.cliTargets;
@@ -658,6 +710,7 @@ async function prepareFileMetadata(params: {
658710
failOnError: suite.failOnError,
659711
threshold: suite.threshold,
660712
tags: suite.metadata?.tags,
713+
providerFactory: suite.providerFactory,
661714
};
662715
}
663716

@@ -1170,33 +1223,12 @@ export async function runEvalCommand(
11701223
readonly failOnError?: FailOnError;
11711224
readonly threshold?: number;
11721225
readonly tags?: readonly string[];
1226+
readonly providerFactory?: (
1227+
target: import('@agentv/core').ResolvedTarget,
1228+
) => import('@agentv/core').Provider;
11731229
}
11741230
>();
1175-
// Separate TypeScript/JS eval files from YAML files.
1176-
// TS files are self-contained scripts that call evaluate() directly.
1177-
const tsFiles: string[] = [];
1178-
const yamlFiles: string[] = [];
11791231
for (const testFilePath of resolvedTestFiles) {
1180-
if (/\.(ts|js|mts|mjs)$/.test(testFilePath)) {
1181-
tsFiles.push(testFilePath);
1182-
} else {
1183-
yamlFiles.push(testFilePath);
1184-
}
1185-
}
1186-
1187-
// Run TypeScript eval files by importing them.
1188-
// evaluate() runs during import via top-level await and handles its own output.
1189-
for (const tsFile of tsFiles) {
1190-
await ensureFileExists(tsFile, 'TypeScript eval file');
1191-
await import(pathToFileURL(tsFile).href);
1192-
}
1193-
1194-
// If only TS files were provided, we're done — evaluate() handled everything.
1195-
if (yamlFiles.length === 0 && tsFiles.length > 0) {
1196-
return;
1197-
}
1198-
1199-
for (const testFilePath of yamlFiles) {
12001232
const meta = await prepareFileMetadata({
12011233
testFilePath,
12021234
repoRoot,
@@ -1355,7 +1387,7 @@ export async function runEvalCommand(
13551387
}
13561388
}
13571389

1358-
// Use only files that survived tag filtering (fileMetadata keys)
1390+
// Use only files that survived tag filtering.
13591391
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
13601392

13611393
// --transcript: create a shared TranscriptProvider and validate entry count
@@ -1442,7 +1474,7 @@ export async function runEvalCommand(
14421474
budgetUsd: targetPrep.budgetUsd,
14431475
failOnError: targetPrep.failOnError,
14441476
threshold: resolvedThreshold,
1445-
providerFactory: transcriptProviderFactory,
1477+
providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory,
14461478
});
14471479
const evalFile = path.relative(cwd, testFilePath);
14481480
const existingSummary = remoteEvalSummaries.find(

apps/cli/src/commands/eval/shared.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
3434
: path.resolve(cwd, pattern);
3535
try {
3636
const stats = await stat(candidatePath);
37-
if (stats.isFile() && /\.(ya?ml|jsonl|json)$/i.test(candidatePath)) {
37+
if (stats.isFile() && /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(candidatePath)) {
3838
results.add(candidatePath);
3939
continue;
4040
}
4141
if (stats.isDirectory()) {
4242
// Auto-expand directory to recursive eval file glob
43-
const dirGlob = path.posix.join(candidatePath.replace(/\\/g, '/'), '**/*.eval.{yaml,yml}');
43+
const dirGlob = path.posix.join(
44+
candidatePath.replace(/\\/g, '/'),
45+
'**/{*.eval.yaml,*.eval.yml,eval.yaml,eval.yml,*.eval.ts,*.eval.mts}',
46+
);
4447
const dirMatches = await fg(dirGlob, {
4548
absolute: true,
4649
onlyFiles: true,
@@ -69,7 +72,9 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
6972
ignore: ignorePatterns,
7073
});
7174

72-
const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json)$/i.test(filePath));
75+
const yamlMatches = matches.filter((filePath) =>
76+
/\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(filePath),
77+
);
7378
for (const filePath of yamlMatches) {
7479
results.add(path.normalize(filePath));
7580
}
@@ -94,7 +99,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
9499
throw new Error(
95100
`No eval files matched any provided paths or globs: ${includePatterns.join(
96101
', ',
97-
)}. Provide YAML, JSONL, or JSON paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl", "evals.json").`,
102+
)}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/eval.yaml", "evals/**/*.eval.ts").`,
98103
);
99104
}
100105

apps/cli/test/commands/eval/shared.test.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,53 @@ describe('resolveEvalPaths', () => {
6464
resolveEvalPaths(['evals/**/*.eval.yaml', 'evals/**/eval.yaml'], tempDir),
6565
).rejects.toThrow('No eval files matched any provided paths or globs');
6666
});
67+
68+
it('discovers *.eval.ts files from directory auto-expansion', async () => {
69+
const evalDir = path.join(tempDir, 'evals');
70+
mkdirSync(evalDir, { recursive: true });
71+
72+
const tsFile = path.join(evalDir, 'greeting.eval.ts');
73+
writeFileSync(tsFile, 'export default { tests: [] }');
74+
75+
const resolved = await resolveEvalPaths([tempDir], tempDir);
76+
77+
expect(resolved).toEqual([path.normalize(tsFile)]);
78+
});
79+
80+
it('accepts a direct .mts file path', async () => {
81+
const tsFile = path.join(tempDir, 'custom.eval.mts');
82+
writeFileSync(tsFile, 'export default { tests: [] }');
83+
84+
const resolved = await resolveEvalPaths([tsFile], tempDir);
85+
86+
expect(resolved).toEqual([path.normalize(tsFile)]);
87+
});
88+
89+
it('accepts a direct .ts file path', async () => {
90+
const tsFile = path.join(tempDir, 'custom.eval.ts');
91+
writeFileSync(tsFile, 'export default { tests: [] }');
92+
93+
const resolved = await resolveEvalPaths([tsFile], tempDir);
94+
95+
expect(resolved).toEqual([path.normalize(tsFile)]);
96+
});
97+
98+
it('discovers both .yaml and .ts files from directory', async () => {
99+
const evalDir = path.join(tempDir, 'evals');
100+
mkdirSync(evalDir, { recursive: true });
101+
102+
const yamlFile = path.join(evalDir, 'suite.eval.yaml');
103+
const evalYamlFile = path.join(evalDir, 'eval.yaml');
104+
const tsFile = path.join(evalDir, 'suite.eval.ts');
105+
writeFileSync(yamlFile, 'tests:\n - id: sample\n input: test\n');
106+
writeFileSync(evalYamlFile, 'tests:\n - id: sample2\n input: test\n');
107+
writeFileSync(tsFile, 'export default { tests: [] }');
108+
109+
const resolved = await resolveEvalPaths([tempDir], tempDir);
110+
111+
expect(resolved).toContain(path.normalize(yamlFile));
112+
expect(resolved).toContain(path.normalize(evalYamlFile));
113+
expect(resolved).toContain(path.normalize(tsFile));
114+
expect(resolved).toHaveLength(3);
115+
});
67116
});

0 commit comments

Comments
 (0)