Skip to content

Commit 17431c2

Browse files
christsoclaude
andcommitted
debug(ci): remove tee pipe and limit to 2 eval sets for debugging
The tee pipe was truncating output — summary never appeared. Temporarily limit to 2 eval sets to verify summary prints. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d8c9f8d commit 17431c2

1 file changed

Lines changed: 8 additions & 16 deletions

File tree

.github/workflows/evals.yml

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
suite_filter:
77
description: "Comma-separated glob patterns for eval files to run"
88
required: false
9-
default: "evals/**/*.eval.yaml,examples/**/*.eval.yaml,examples/**/*.EVAL.yaml,examples/**/EVAL.yaml"
9+
default: ""
1010
target:
1111
description: "Optional target override (leave empty to use each eval's own target)"
1212
required: false
@@ -59,19 +59,11 @@ jobs:
5959
6060
- name: Resolve inputs
6161
id: filter
62-
env:
63-
DEFAULT_PATTERNS: "evals/**/*.eval.yaml,examples/**/*.eval.yaml,examples/**/*.EVAL.yaml,examples/**/EVAL.yaml"
64-
# Exclude evals that need local scripts or multiple agent targets.
65-
# Negation patterns (!glob) are supported by the CLI.
66-
# multi-model-benchmark: needs multiple agents
67-
# copilot-log-eval: needs copilot session files on disk
68-
# batch-cli: batch output format mismatch (pre-existing)
69-
# file-changes-graders: workspace cwd bug on retries (pre-existing)
70-
EXCLUDE_PATTERNS: "!examples/showcase/multi-model-benchmark/**,!examples/features/copilot-log-eval/**,!examples/features/batch-cli/**,!examples/features/file-changes-graders/**,!examples/showcase/cross-repo-sync/**"
7162
run: |
72-
PATTERNS="${{ github.event.inputs.suite_filter || vars.EVAL_PATTERNS || env.DEFAULT_PATTERNS }}"
73-
EXCLUDES="${{ vars.EVAL_EXCLUDE_PATTERNS || env.EXCLUDE_PATTERNS }}"
74-
echo "patterns=${PATTERNS},${EXCLUDES}" >> "$GITHUB_OUTPUT"
63+
PATTERNS="${{ github.event.inputs.suite_filter || vars.EVAL_PATTERNS }}"
64+
EXCLUDES="${{ vars.EVAL_EXCLUDE_PATTERNS }}"
65+
if [ -n "$EXCLUDES" ]; then PATTERNS="$PATTERNS,$EXCLUDES"; fi
66+
echo "patterns=$PATTERNS" >> "$GITHUB_OUTPUT"
7567
echo "target=${{ github.event.inputs.target || vars.EVAL_TARGET || '' }}" >> "$GITHUB_OUTPUT"
7668
echo "threshold=${{ github.event.inputs.threshold || '0.8' }}" >> "$GITHUB_OUTPUT"
7769
@@ -98,10 +90,10 @@ jobs:
9890
--threshold ${{ steps.filter.outputs.threshold }} \
9991
--output .agentv/ci-results/junit.xml \
10092
--benchmark-json .agentv/ci-results/benchmark.json \
101-
--artifacts .agentv/ci-results/artifacts \
102-
2>&1 | tee .agentv/ci-results/eval-output.log
93+
--artifacts .agentv/ci-results/artifacts
94+
EXIT_CODE=$?
10395
104-
echo "exit_code=${PIPESTATUS[0]}" >> "$GITHUB_OUTPUT"
96+
echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
10597
10698
- name: Post eval summary
10799
if: always()

0 commit comments

Comments
 (0)