@@ -88,6 +88,8 @@ interface NormalizedOptions {
8888 readonly model ?: string ;
8989 readonly outputMessages : number | 'all' ;
9090 readonly threshold ?: number ;
91+ readonly tags : readonly string [ ] ;
92+ readonly excludeTags : readonly string [ ] ;
9193}
9294
9395function normalizeBoolean ( value : unknown ) : boolean {
@@ -140,6 +142,43 @@ function normalizeWorkspaceMode(value: unknown): 'pooled' | 'temp' | 'static' |
140142 return value === 'pooled' || value === 'temp' || value === 'static' ? value : undefined ;
141143}
142144
145+ function normalizeStringArray ( value : unknown ) : readonly string [ ] {
146+ if ( Array . isArray ( value ) ) {
147+ return value . filter ( ( v ) : v is string => typeof v === 'string' && v . trim ( ) . length > 0 ) ;
148+ }
149+ return [ ] ;
150+ }
151+
152+ /**
153+ * Check whether an eval file's tags satisfy --tag / --exclude-tag filters.
154+ *
155+ * - `--tag X` means the file must have tag X (AND logic: all specified tags must be present)
156+ * - `--exclude-tag X` means the file must NOT have tag X (AND logic: none of the specified tags may be present)
157+ * - When both are used, both conditions must hold.
158+ * - Files without tags are excluded when --tag is specified, but included when only --exclude-tag is specified.
159+ */
160+ export function matchesTagFilters (
161+ fileTags : readonly string [ ] | undefined ,
162+ includeTags : readonly string [ ] ,
163+ excludeTags : readonly string [ ] ,
164+ ) : boolean {
165+ const tags = new Set ( fileTags ?? [ ] ) ;
166+
167+ // --tag: every specified tag must be present
168+ if ( includeTags . length > 0 ) {
169+ for ( const required of includeTags ) {
170+ if ( ! tags . has ( required ) ) return false ;
171+ }
172+ }
173+
174+ // --exclude-tag: none of the specified tags may be present
175+ for ( const excluded of excludeTags ) {
176+ if ( tags . has ( excluded ) ) return false ;
177+ }
178+
179+ return true ;
180+ }
181+
143182/**
144183 * Normalize --output-messages value. Accepts a number (>= 1) or "all".
145184 * Defaults to 1 (last assistant message only).
@@ -304,6 +343,8 @@ function normalizeOptions(
304343 model : normalizeString ( rawOptions . model ) ,
305344 outputMessages : normalizeOutputMessages ( normalizeString ( rawOptions . outputMessages ) ) ,
306345 threshold : normalizeOptionalNumber ( rawOptions . threshold ) ,
346+ tags : normalizeStringArray ( rawOptions . tag ) ,
347+ excludeTags : normalizeStringArray ( rawOptions . excludeTag ) ,
307348 } satisfies NormalizedOptions ;
308349}
309350
@@ -434,6 +475,7 @@ async function prepareFileMetadata(params: {
434475 readonly totalBudgetUsd ?: number ;
435476 readonly failOnError ?: FailOnError ;
436477 readonly threshold ?: number ;
478+ readonly tags ?: readonly string [ ] ;
437479} > {
438480 const { testFilePath, repoRoot, cwd, options } = params ;
439481
@@ -524,6 +566,7 @@ async function prepareFileMetadata(params: {
524566 totalBudgetUsd : suite . totalBudgetUsd ,
525567 failOnError : suite . failOnError ,
526568 threshold : suite . threshold ,
569+ tags : suite . metadata ?. tags ,
527570 } ;
528571}
529572
@@ -970,6 +1013,7 @@ export async function runEvalCommand(
9701013 readonly totalBudgetUsd ?: number ;
9711014 readonly failOnError ?: FailOnError ;
9721015 readonly threshold ?: number ;
1016+ readonly tags ?: readonly string [ ] ;
9731017 }
9741018 > ( ) ;
9751019 // Separate TypeScript/JS eval files from YAML files.
@@ -1006,6 +1050,27 @@ export async function runEvalCommand(
10061050 fileMetadata . set ( testFilePath , meta ) ;
10071051 }
10081052
1053+ // Apply --tag / --exclude-tag filtering at the eval-file level
1054+ const hasTagFilters = options . tags . length > 0 || options . excludeTags . length > 0 ;
1055+ if ( hasTagFilters ) {
1056+ const skippedFiles : string [ ] = [ ] ;
1057+ for ( const [ testFilePath , meta ] of fileMetadata . entries ( ) ) {
1058+ if ( ! matchesTagFilters ( meta . tags , options . tags , options . excludeTags ) ) {
1059+ fileMetadata . delete ( testFilePath ) ;
1060+ skippedFiles . push ( path . relative ( cwd , testFilePath ) ) ;
1061+ }
1062+ }
1063+ if ( skippedFiles . length > 0 && options . verbose ) {
1064+ console . log (
1065+ `Skipped ${ skippedFiles . length } eval file(s) by tag filter: ${ skippedFiles . join ( ', ' ) } ` ,
1066+ ) ;
1067+ }
1068+ if ( fileMetadata . size === 0 ) {
1069+ console . log ( 'No eval files matched the tag filters. Nothing to run.' ) ;
1070+ return ;
1071+ }
1072+ }
1073+
10091074 // Resolve cache: combine CLI flags with YAML config
10101075 // Use first file's YAML config for cache settings (consistent across a run)
10111076 const firstMeta = fileMetadata . values ( ) . next ( ) . value ;
@@ -1116,8 +1181,11 @@ export async function runEvalCommand(
11161181 }
11171182 }
11181183
1184+ // Use only files that survived tag filtering (fileMetadata keys)
1185+ const activeTestFiles = resolvedTestFiles . filter ( ( f ) => fileMetadata . has ( f ) ) ;
1186+
11191187 try {
1120- await runWithLimit ( resolvedTestFiles , fileConcurrency , async ( testFilePath ) => {
1188+ await runWithLimit ( activeTestFiles , fileConcurrency , async ( testFilePath ) => {
11211189 const targetPrep = fileMetadata . get ( testFilePath ) ;
11221190 if ( ! targetPrep ) {
11231191 throw new Error ( `Missing metadata for ${ testFilePath } ` ) ;
@@ -1208,7 +1276,7 @@ export async function runEvalCommand(
12081276 }
12091277
12101278 if ( usesDefaultArtifactWorkspace ) {
1211- const evalFile = resolvedTestFiles . length === 1 ? resolvedTestFiles [ 0 ] : '' ;
1279+ const evalFile = activeTestFiles . length === 1 ? activeTestFiles [ 0 ] : '' ;
12121280 const workspaceDir = path . dirname ( outputPath ) ;
12131281 const {
12141282 testArtifactDir,
@@ -1230,7 +1298,7 @@ export async function runEvalCommand(
12301298 // Write companion artifacts (grading, timing, benchmark) if requested
12311299 if ( options . artifacts ) {
12321300 const artifactsDir = path . resolve ( options . artifacts ) ;
1233- const evalFile = resolvedTestFiles . length === 1 ? resolvedTestFiles [ 0 ] : '' ;
1301+ const evalFile = activeTestFiles . length === 1 ? activeTestFiles [ 0 ] : '' ;
12341302 const {
12351303 testArtifactDir,
12361304 indexPath,
@@ -1275,7 +1343,7 @@ export async function runEvalCommand(
12751343
12761344 // Suggest retry-errors command when execution errors are detected
12771345 if ( summary . executionErrorCount > 0 && ! options . retryErrors ) {
1278- const evalFileArgs = resolvedTestFiles . map ( ( f ) => path . relative ( cwd , f ) ) . join ( ' ' ) ;
1346+ const evalFileArgs = activeTestFiles . map ( ( f ) => path . relative ( cwd , f ) ) . join ( ' ' ) ;
12791347 const targetFlag = options . target ? ` --target ${ options . target } ` : '' ;
12801348 const relativeOutputPath = path . relative ( cwd , outputPath ) ;
12811349 console . log (
@@ -1287,7 +1355,7 @@ export async function runEvalCommand(
12871355 return {
12881356 executionErrorCount : summary . executionErrorCount ,
12891357 outputPath,
1290- testFiles : resolvedTestFiles ,
1358+ testFiles : activeTestFiles ,
12911359 target : options . target ,
12921360 thresholdFailed,
12931361 } ;
0 commit comments