From 564edae200930434055057e3f0585819b8c32f5b Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Thu, 28 May 2026 16:15:11 -0400 Subject: [PATCH 1/9] Add benchmarks and optimize diagnose overhead --- .gitignore | 5 +- AGENTS.md | 12 ++- example_test.go | 6 -- internal/runner/analyze.go | 58 ++++++++++-- internal/runner/diagnose_progress.go | 95 -------------------- internal/runner/diagnose_progress_test.go | 55 ------------ internal/runner/runner.go | 83 +++++------------ internal/runner/runner_bench_test.go | 69 ++++++++++++++ internal/runner/testdata/dummy/dummy_test.go | 5 ++ justfile | 19 +++- 10 files changed, 176 insertions(+), 231 deletions(-) create mode 100644 internal/runner/runner_bench_test.go create mode 100644 internal/runner/testdata/dummy/dummy_test.go diff --git a/.gitignore b/.gitignore index e04f680..16357a2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,7 @@ diagnose-*/ .env .venv/ - -# Local Phoenix DB for evals/eval.py -evals/.phoenix/ -__pycache__/ +*.test # Agents .antigravitycli/ diff --git a/AGENTS.md b/AGENTS.md index a862610..edd5deb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,7 +18,6 @@ go test ./... # Test - `internal/runner/` — core test execution. `Diagnose` is the main entry point; `diagnoseRunHooks` carries iteration hooks as `func(context.Context) error` fields. - `internal/config/` — Cobra flag registry config loading. `config.App` is the unified config struct. - `internal/output/` — output printer abstraction. `--ai-output` flag controls format. -- `internal/repo/` — git/module helpers. ## Critical decisions @@ -27,3 +26,14 @@ go test ./... # Test **Public API vs CLI are separate layers.** `hooks.go` / `testrig.go` are the library surface. The CLI in `internal/cmd/` wraps that. Don't put CLI logic in the root package. **Hook catalog drives CLI flags and docs.** Add lifecycle hooks in [`internal/hooks/catalog.go`](internal/hooks/catalog.go) (`Catalog` slice), then wire `RunOptions.Hook`, public `hooks.go` registrar, and `go generate`. Document timing in godoc on registrars in [`hooks.go`](hooks.go) as `// Name registers a hook to .` `hooks.RegisterPersistentFlags` registers Cobra flags from the catalog. + +## Diagnose overhead benchmark + +`internal/runner/runner_bench_test.go` defines a side-by-side pair against `internal/runner/testdata/dummy/...`: + +- `BenchmarkBaselineGoTest` — raw `go test -json` (the floor). +- `BenchmarkDiagnose` — one `Diagnose` iteration with `parallel=1`. + +The overhead `Diagnose` adds is the **delta** in `ns/op`, `B/op`, and `allocs/op` between the two. Run with `just bench_overhead` (uses `-count=3` for variance). For a memory profile of the diagnose pipeline, `just bench_overhead_profile` writes `out/bench/diagnose_mem_runner.txt` (look for `scanIterationJSONL`, `applyTestEvent`, `buildReportFromAggs`). CPU profile is low-signal — parent blocks on the child subprocess. + +Drill down into the parent-only pipeline (no subprocess) via the micro-benches in `internal/runner/analyze_bench_test.go`. diff --git a/example_test.go b/example_test.go index cff11f4..a69ae6d 100644 --- a/example_test.go +++ b/example_test.go @@ -12,18 +12,12 @@ func ExampleRun() { // GlobalSetup runs once before any tests start. testrig.GlobalSetup(func(_ context.Context) error { fmt.Println("Starting mock background service...") - // Simulate starting a dependency, e.g.: - // cmd := exec.CommandContext(ctx, "docker", "compose", "up", "-d") - // return cmd.Run() return nil }), // IterationSetup runs before each diagnose iteration. testrig.IterationSetup(func(_ context.Context) error { fmt.Println("Resetting database state for next iteration...") - // Simulate resetting state: - // cmd := exec.CommandContext(ctx, "psql", "-c", "TRUNCATE events") - // return cmd.Run() return nil }), diff --git a/internal/runner/analyze.go b/internal/runner/analyze.go index d6a92f3..40a2c66 100644 --- a/internal/runner/analyze.go +++ b/internal/runner/analyze.go @@ -14,6 +14,7 @@ import ( "sort" "strconv" "strings" + "sync" "time" "charm.land/lipgloss/v2" @@ -206,12 +207,18 @@ func (rep *Report) TestGroups() []TestGroup { // coupling the parser to the filesystem. type LogMap map[testKey]map[int]string +var readerPool = sync.Pool{ + New: func() any { + return bufio.NewReaderSize(nil, 1024*1024) + }, +} + // Analyze reads per-iteration test2json streams and classifies tests. // Malformed lines are silently skipped (go test can interleave non-JSON). func Analyze(iterations []io.Reader, slowThreshold time.Duration) (*Report, LogMap, error) { aggs := make(map[testKey]*aggregate) for i, r := range iterations { - if err := scanIterationJSONL(r, i, aggs, nil); err != nil { + if err := scanIterationJSONL(r, i, aggs, nil, slowThreshold); err != nil { return nil, nil, err } } @@ -241,16 +248,35 @@ func (a *aggregate) recordElapsed(iterIdx int, d time.Duration) { // scanIterationJSONL merges one iteration's JSONL stream into aggs at iterIdx. // meta may be nil; when set, records e.g. compile/build failure from FailedBuild on fail events. -func scanIterationJSONL(r io.Reader, iterIdx int, aggs map[testKey]*aggregate, meta *iterationScanMeta) error { - reader := bufio.NewReaderSize(r, 1024*1024) +func scanIterationJSONL( + r io.Reader, + iterIdx int, + aggs map[testKey]*aggregate, + meta *iterationScanMeta, + slowThreshold time.Duration, +) error { + reader := readerPool.Get().(*bufio.Reader) + reader.Reset(r) + defer func() { + reader.Reset(nil) + readerPool.Put(reader) + }() + for { - line, err := reader.ReadBytes('\n') + line, err := reader.ReadSlice('\n') + if err == bufio.ErrBufferFull { + rest, err2 := reader.ReadBytes('\n') + line = append(append([]byte(nil), line...), rest...) + err = err2 + } + if len(line) > 0 && line[0] == '{' { var ev TestEvent if json.Unmarshal(line, &ev) == nil { - applyTestEvent(aggs, iterIdx, &ev, meta) + applyTestEvent(aggs, iterIdx, &ev, meta, slowThreshold) } } + if err != nil { if err != io.EOF { return fmt.Errorf("reading iteration %d: %w", iterIdx, err) @@ -260,7 +286,13 @@ func scanIterationJSONL(r io.Reader, iterIdx int, aggs map[testKey]*aggregate, m } } -func applyTestEvent(aggs map[testKey]*aggregate, iterIdx int, ev *TestEvent, meta *iterationScanMeta) { +func applyTestEvent( + aggs map[testKey]*aggregate, + iterIdx int, + ev *TestEvent, + meta *iterationScanMeta, + slowThreshold time.Duration, +) { key := testKey{Package: ev.Package, Test: ev.Test} a := aggs[key] if a == nil { @@ -271,7 +303,11 @@ func applyTestEvent(aggs map[testKey]*aggregate, iterIdx int, ev *TestEvent, met case "pass": a.passes++ a.iterations[iterIdx] = struct{}{} - a.recordElapsed(iterIdx, seconds(ev.Elapsed)) + el := seconds(ev.Elapsed) + a.recordElapsed(iterIdx, el) + if !a.timedOut && (slowThreshold == 0 || el <= slowThreshold) { + delete(a.outputs, iterIdx) + } case "fail": if meta != nil && ev.FailedBuild != "" { meta.sawFailedBuild = true @@ -284,7 +320,11 @@ func applyTestEvent(aggs map[testKey]*aggregate, iterIdx int, ev *TestEvent, met a.skips++ a.iterations[iterIdx] = struct{}{} a.skipIters[iterIdx] = true - a.recordElapsed(iterIdx, seconds(ev.Elapsed)) + el := seconds(ev.Elapsed) + a.recordElapsed(iterIdx, el) + if !a.timedOut { + delete(a.outputs, iterIdx) + } case "output": if strings.Contains(ev.Output, timeoutPanic) { a.timedOut = true @@ -559,7 +599,7 @@ func countNamedTestsSkippedInAggs(aggs map[testKey]*aggregate) int { func DigestIterationJSONL(r io.Reader, slowThreshold time.Duration) (IterationDigest, error) { aggs := make(map[testKey]*aggregate) var meta iterationScanMeta - if err := scanIterationJSONL(r, 0, aggs, &meta); err != nil { + if err := scanIterationJSONL(r, 0, aggs, &meta, slowThreshold); err != nil { return IterationDigest{}, err } reattributeTimeouts(aggs, newAggregate) diff --git a/internal/runner/diagnose_progress.go b/internal/runner/diagnose_progress.go index cf2d219..9e0a44a 100644 --- a/internal/runner/diagnose_progress.go +++ b/internal/runner/diagnose_progress.go @@ -1,13 +1,8 @@ package runner import ( - "context" - "encoding/json" - "errors" "fmt" "io" - "os" - "os/exec" "slices" "strings" "sync" @@ -70,44 +65,6 @@ func packagePatternsFromEnd(args []string) []string { return pkgs } -// listTestPackageCount runs `go list -test -e` for the trailing package patterns -// in go test arguments (see packagePatternsFromEnd). On error or no patterns, -// returns an error or zero packages. -func listTestPackageCount(ctx context.Context, repoRoot string, goTestArgs []string) (int, error) { - pkgs := packagePatternsFromEnd(goTestArgs) - if len(pkgs) == 0 { - return 0, errors.New("no package patterns in go test arguments (put packages last, after flags)") - } - // Binary is fixed ("go"); pkgs come from the user's CLI package patterns by design. - //nolint:gosec // G204: forwarded package patterns from CLI invocation - cmd := exec.CommandContext(ctx, "go", append([]string{"list", "-test", "-e", "-f", "{{.ImportPath}}"}, pkgs...)...) - cmd.Dir = repoRoot - cmd.Env = os.Environ() - out, err := cmd.Output() - if err != nil { - return 0, err - } - n := 0 - for line := range strings.SplitSeq(string(out), "\n") { - if strings.TrimSpace(line) != "" { - n++ - } - } - if n == 0 { - return 0, errors.New("go list returned no packages") - } - return n, nil -} - -// diagnoseProgress tracks completed packages from a go test -json stream. -type diagnoseProgress struct { - mu sync.Mutex - done map[string]struct{} - lastPkg string - pkgOutcome map[string]string // package import path → pass|fail|skip (package-level events only) - total int // -1 when denominator is unknown (go list failed or empty) -} - type parallelDiagnoseProgress struct { mu sync.Mutex renderMu sync.Mutex @@ -138,14 +95,6 @@ func newParallelDiagnoseProgressAt(totalIterations int, poolStartedAt time.Time) } } -func newDiagnoseProgress(totalPackages int) *diagnoseProgress { - return &diagnoseProgress{ - done: make(map[string]struct{}), - pkgOutcome: make(map[string]string), - total: totalPackages, - } -} - func (p *parallelDiagnoseProgress) start(iteration int) { if p == nil { return @@ -214,50 +163,6 @@ func (p *parallelDiagnoseProgress) renderSnapshot( return completed, total, actives, poolElapsed } -// onTestJSONLine updates state from one JSONL line. Returns true if the number -// of completed packages increased (for throttled redraws). -func (p *diagnoseProgress) onTestJSONLine(line []byte) (completedIncreased bool) { - if len(line) == 0 || line[0] != '{' { - return false - } - var ev TestEvent - if err := json.Unmarshal(line, &ev); err != nil { - return false - } - if ev.Package != "" { - p.mu.Lock() - p.lastPkg = ev.Package - p.mu.Unlock() - } - if !isPackageTerminalEvent(&ev) { - return false - } - p.mu.Lock() - defer p.mu.Unlock() - p.pkgOutcome[ev.Package] = ev.Action - before := len(p.done) - p.done[ev.Package] = struct{}{} - return len(p.done) > before -} - -func isPackageTerminalEvent(ev *TestEvent) bool { - if ev.Package == "" || ev.Test != "" { - return false - } - switch ev.Action { - case "pass", "fail", "skip": - return true - default: - return false - } -} - -func (p *diagnoseProgress) snapshot() (completed int, total int, lastPkg string, outcome string) { - p.mu.Lock() - defer p.mu.Unlock() - return len(p.done), p.total, p.lastPkg, p.pkgOutcome[p.lastPkg] -} - // progressBracket wraps inner (already styled) in muted square brackets. func progressBracket(inner string) string { return termstyle.Muted.Render("[") + inner + termstyle.Muted.Render("]") diff --git a/internal/runner/diagnose_progress_test.go b/internal/runner/diagnose_progress_test.go index 9c4cd34..d4188e4 100644 --- a/internal/runner/diagnose_progress_test.go +++ b/internal/runner/diagnose_progress_test.go @@ -11,61 +11,6 @@ import ( "github.com/smartcontractkit/testrig/internal/output" ) -func TestDiagnoseProgress_onTestJSONLine_packageTerminal(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(2) - - require.False(t, p.onTestJSONLine([]byte(`not json`))) - require.False(t, p.onTestJSONLine([]byte(`{"Action":"run","Package":"a/b","Test":"TestX"}`))) - - require.True(t, p.onTestJSONLine([]byte(`{"Action":"pass","Package":"a/b"}`))) - c, tot, _, _ := p.snapshot() - require.Equal(t, 1, c) - require.Equal(t, 2, tot) - - // Duplicate package-level pass must not report a second completion tick. - require.False(t, p.onTestJSONLine([]byte(`{"Action":"pass","Package":"a/b"}`))) - c, _, _, _ = p.snapshot() - require.Equal(t, 1, c) - - require.True(t, p.onTestJSONLine([]byte(`{"Action":"fail","Package":"c/d"}`))) - c, _, _, _ = p.snapshot() - require.Equal(t, 2, c) -} - -func TestDiagnoseProgress_onTestJSONLine_skipFail(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(1) - require.True(t, p.onTestJSONLine([]byte(`{"Action":"skip","Package":"p"}`))) - c, _, _, _ := p.snapshot() - require.Equal(t, 1, c) - - p2 := newDiagnoseProgress(1) - require.True(t, p2.onTestJSONLine([]byte(`{"Action":"fail","Package":"p"}`))) - c2, _, _, _ := p2.snapshot() - require.Equal(t, 1, c2) -} - -func TestDiagnoseProgress_lastPkgUpdates(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(10) - p.onTestJSONLine([]byte(`{"Action":"run","Package":"x/y","Test":"TestZ"}`)) - _, _, last, _ := p.snapshot() - require.Equal(t, "x/y", last) -} - -func TestDiagnoseProgress_pkgOutcomeOnTerminal(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(5) - p.onTestJSONLine([]byte(`{"Action":"run","Package":"p/q","Test":"TestZ"}`)) - _, _, _, out := p.snapshot() - require.Empty(t, out) - p.onTestJSONLine([]byte(`{"Action":"pass","Package":"p/q"}`)) - _, _, last, out := p.snapshot() - require.Equal(t, "p/q", last) - require.Equal(t, "pass", out) -} - func TestEllipsizeRight(t *testing.T) { t.Parallel() require.Equal(t, "short", ellipsizeRight("short", 10)) diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 86b9b6a..7f640d6 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -34,6 +34,7 @@ type diagnoseIterationParams struct { Out *output.Printer ResultsDir string GoTestArgs []string + ModuleDir string Iteration int ShuffleSeed int64 Env []string @@ -102,8 +103,6 @@ func Gotestsum(ctx context.Context, conf *config.App, args []string) error { // iterSetup and iterTeardown run before/after each iteration. Either may be // nil. Teardown runs even when the iteration's go test invocation fails; its // error is reported only when the iteration itself succeeded. -// -//nolint:gocyclo func Diagnose( ctx context.Context, conf *config.App, @@ -384,6 +383,12 @@ func runDiagnoseIterations( if hooks.runIteration == nil { hooks.runIteration = diagnoseIteration } + + moduleDir, adjustedArgs, err := resolveModuleDir(conf.RepoRoot, goTestArgs) + if err != nil { + return diagnoseRunState{}, err + } + if hooks.seed == nil { hooks.seed = func() int64 { return rand.Int64N(1<<62) + 1 } //nolint:gosec // G404: non-crypto seed for test shuffle } @@ -432,7 +437,8 @@ func runDiagnoseIterations( conf: conf, out: out, resultsDir: resultsDir, - goTestArgs: goTestArgs, + goTestArgs: adjustedArgs, + moduleDir: moduleDir, hooks: hooks, parallel: parallel, parallelProgress: parallelProgress, @@ -487,6 +493,7 @@ type diagnoseWorker struct { out *output.Printer resultsDir string goTestArgs []string + moduleDir string hooks diagnoseRunHooks parallel int parallelProgress *parallelDiagnoseProgress @@ -529,6 +536,7 @@ func (w *diagnoseWorker) run(runCtx context.Context, resource diagnoseIterationR Out: w.out, ResultsDir: w.resultsDir, GoTestArgs: w.goTestArgs, + ModuleDir: w.moduleDir, Iteration: iteration, ShuffleSeed: seed, Env: resource.Env, @@ -1098,16 +1106,12 @@ func (sw *syncedWriter) Write(p []byte) (int, error) { func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { conf, out := p.Conf, p.Out - resultsDir, goTestArgs := p.ResultsDir, p.GoTestArgs + resultsDir := p.ResultsDir iteration, shuffleSeed := p.Iteration, p.ShuffleSeed env := p.Env liveProgress, parallelProgress := p.LiveProgress, p.ParallelProgress diagnoseRunStart, serialProgressMu := p.DiagnoseRunStart, p.SerialProgressMu - - moduleDir, goTestArgs, err := resolveModuleDir(conf.RepoRoot, goTestArgs) - if err != nil { - return err - } + moduleDir, goTestArgs := p.ModuleDir, p.GoTestArgs start := time.Now() jsonPath := filepath.Join(resultsDir, fmt.Sprintf("iteration-%d.log.jsonl", iteration)) @@ -1115,7 +1119,11 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { if err != nil { return err } - defer func() { _ = resultsFile.Close() }() + bw := bufio.NewWriterSize(resultsFile, 128*1024) + defer func() { + _ = bw.Flush() + _ = resultsFile.Close() + }() args, err := buildDiagnoseArgs(goTestArgs, shuffleSeed) if err != nil { @@ -1130,29 +1138,19 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { cmd.Cancel = func() error { return cmd.Process.Signal(os.Interrupt) } cmd.WaitDelay = 5 * time.Second + sw := &syncedWriter{w: bw} + cmd.Stdout = sw + cmd.Stderr = sw + if out.AIOutput() { - sw := &syncedWriter{w: resultsFile} - cmd.Stdout = sw - cmd.Stderr = sw return cmd.Run() } - sw := &syncedWriter{w: resultsFile} - cmd.Stderr = sw - - totalPkgs := -1 - if n, listErr := listTestPackageCount(ctx, moduleDir, goTestArgs); listErr == nil { - totalPkgs = n - } - prog := newDiagnoseProgress(totalPkgs) if parallelProgress != nil { parallelProgress.start(iteration) defer parallelProgress.finish(iteration) } - pr, pw := io.Pipe() - cmd.Stdout = pw - live := liveProgress && out.LiveInlineProgress() iter, iters := iteration+1, conf.Iterations if liveProgress && !live { @@ -1175,30 +1173,6 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { ) } - var readWG sync.WaitGroup - var scanErr error - readWG.Go(func() { - r := bufio.NewReaderSize(pr, 1024*1024) - for { - line, err := r.ReadBytes('\n') - if len(line) > 0 { - if _, werr := sw.Write(line); werr != nil { - break - } - completedIncreased := prog.onTestJSONLine(line) - if completedIncreased && !live { - redraw(false) - } - } - if err != nil { - if err != io.EOF { - scanErr = err - } - break - } - } - }) - tickDone := make(chan struct{}) var tickWG sync.WaitGroup if live { @@ -1217,24 +1191,13 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { redraw(true) } - runErr := cmd.Start() - started := runErr == nil - if started { - runErr = cmd.Wait() - _ = pw.Close() - } else { - _ = pw.CloseWithError(runErr) - } - readWG.Wait() + runErr := cmd.Run() close(tickDone) tickWG.Wait() if live { out.ClearInline() } - if scanErr != nil { - return fmt.Errorf("reading go test output: %w", scanErr) - } return runErr } diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go new file mode 100644 index 0000000..30562e9 --- /dev/null +++ b/internal/runner/runner_bench_test.go @@ -0,0 +1,69 @@ +package runner + +import ( + "context" + "io" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/testrig/internal/config" + "github.com/smartcontractkit/testrig/internal/output" +) + +const benchOverheadPackage = "./internal/runner/testdata/dummy/..." + +// BenchmarkBaselineGoTest is the floor: raw `go test -json` against the same +// target Diagnose runs. Subtract its ns/op, B/op, allocs/op from +// BenchmarkDiagnose to read the overhead Diagnose adds. +func BenchmarkBaselineGoTest(b *testing.B) { + repoRoot, err := filepath.Abs("../..") + require.NoError(b, err) + ctx := context.Background() + + b.ReportAllocs() + for b.Loop() { + //nolint:gosec // G204: fixed args against testdata target + cmd := exec.CommandContext(ctx, "go", "test", "-json", "-count=1", benchOverheadPackage) + cmd.Dir = repoRoot + cmd.Stdout = io.Discard + cmd.Stderr = io.Discard + require.NoError(b, cmd.Run()) + } +} + +// BenchmarkDiagnose runs one Diagnose iteration against the same target as +// BenchmarkBaselineGoTest. ns/op minus baseline is the overhead Diagnose adds. +func BenchmarkDiagnose(b *testing.B) { + repoRoot, err := filepath.Abs("../..") + require.NoError(b, err) + + out := output.NewForTest(true, io.Discard, io.Discard, false) + ctx := context.Background() + args := []string{benchOverheadPackage} + conf := &config.App{ + RepoRoot: repoRoot, + Iterations: 1, + ParallelIterations: 1, + SlowThreshold: time.Second, + } + + b.ReportAllocs() + for b.Loop() { + require.NoError(b, Diagnose(ctx, conf, out, args, nil, nil)) + } +} + +func BenchmarkResolveModuleDir(b *testing.B) { + repoRoot, err := filepath.Abs("../..") + require.NoError(b, err) + args := []string{"./internal/runner/..."} + + for b.Loop() { + _, _, err := resolveModuleDir(repoRoot, args) + require.NoError(b, err) + } +} diff --git a/internal/runner/testdata/dummy/dummy_test.go b/internal/runner/testdata/dummy/dummy_test.go new file mode 100644 index 0000000..f400466 --- /dev/null +++ b/internal/runner/testdata/dummy/dummy_test.go @@ -0,0 +1,5 @@ +package dummy + +import "testing" + +func TestDummy(t *testing.T) {} diff --git a/justfile b/justfile index dc677a4..3980709 100644 --- a/justfile +++ b/justfile @@ -16,7 +16,24 @@ test_race: # Run benchmarks with memory stats and specific CPU counts bench: - go test -bench=. -benchmem -run=^$ ./... -cpu=2,4,8 + go test -bench=. -benchmem -run=^$ ./... + +# Compare diagnose overhead vs raw `go test -json` on the dummy target. +# Overhead = ns/op, B/op, allocs/op delta between BenchmarkDiagnose and BenchmarkBaselineGoTest. +bench_overhead: + go test ./internal/runner/ -run='^$' -bench='^Benchmark(BaselineGoTest|Diagnose)$' -benchmem -count=3 + +# Memory profile of BenchmarkDiagnose (CPU is low-signal; subprocess-bound). +bench_overhead_profile: + #!/usr/bin/env bash + set -euo pipefail + out_dir=out/bench + mkdir -p "$out_dir" + go test ./internal/runner/ -run='^$' -bench='^BenchmarkDiagnose$' -benchmem -count=1 \ + -memprofile="$out_dir/diagnose_mem.prof" 2>&1 | tee "$out_dir/diagnose_bench.txt" + go tool pprof -top -nodecount=30 -focus='testrig/internal/runner' "$out_dir/diagnose_mem.prof" \ + > "$out_dir/diagnose_mem_runner.txt" + echo "Mem hotspots: $out_dir/diagnose_mem_runner.txt" # Local GoReleaser dry-run (snapshot) goreleaser: From bce4ad5ee96f5e423ba508156fd035df2b0051cd Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Thu, 28 May 2026 16:37:53 -0400 Subject: [PATCH 2/9] Use test instead of benchmark --- AGENTS.md | 11 -- internal/runner/runner_bench_test.go | 194 +++++++++++++++++++++++++-- justfile | 19 +-- 3 files changed, 183 insertions(+), 41 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index edd5deb..b9ce910 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,14 +26,3 @@ go test ./... # Test **Public API vs CLI are separate layers.** `hooks.go` / `testrig.go` are the library surface. The CLI in `internal/cmd/` wraps that. Don't put CLI logic in the root package. **Hook catalog drives CLI flags and docs.** Add lifecycle hooks in [`internal/hooks/catalog.go`](internal/hooks/catalog.go) (`Catalog` slice), then wire `RunOptions.Hook`, public `hooks.go` registrar, and `go generate`. Document timing in godoc on registrars in [`hooks.go`](hooks.go) as `// Name registers a hook to .` `hooks.RegisterPersistentFlags` registers Cobra flags from the catalog. - -## Diagnose overhead benchmark - -`internal/runner/runner_bench_test.go` defines a side-by-side pair against `internal/runner/testdata/dummy/...`: - -- `BenchmarkBaselineGoTest` — raw `go test -json` (the floor). -- `BenchmarkDiagnose` — one `Diagnose` iteration with `parallel=1`. - -The overhead `Diagnose` adds is the **delta** in `ns/op`, `B/op`, and `allocs/op` between the two. Run with `just bench_overhead` (uses `-count=3` for variance). For a memory profile of the diagnose pipeline, `just bench_overhead_profile` writes `out/bench/diagnose_mem_runner.txt` (look for `scanIterationJSONL`, `applyTestEvent`, `buildReportFromAggs`). CPU profile is low-signal — parent blocks on the child subprocess. - -Drill down into the parent-only pipeline (no subprocess) via the micro-benches in `internal/runner/analyze_bench_test.go`. diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go index 30562e9..3ad5314 100644 --- a/internal/runner/runner_bench_test.go +++ b/internal/runner/runner_bench_test.go @@ -2,10 +2,15 @@ package runner import ( "context" + "fmt" "io" + "os" "os/exec" "path/filepath" + "strings" + "sync" "testing" + "text/tabwriter" "time" "github.com/stretchr/testify/require" @@ -16,6 +21,74 @@ import ( const benchOverheadPackage = "./internal/runner/testdata/dummy/..." +// baselineWorkload runs the raw `go test -json` floor for one diagnose-equivalent +// workload: `iterations` invocations against the dummy target, at most `parallel` +// running concurrently (mirroring how Diagnose schedules iterations across workers). +func baselineWorkload(ctx context.Context, repoRoot string, iterations, parallel int) error { + if parallel < 1 { + parallel = 1 + } + sem := make(chan struct{}, parallel) + var wg sync.WaitGroup + var mu sync.Mutex + var firstErr error + for range iterations { + sem <- struct{}{} + wg.Go(func() { + defer func() { <-sem }() + //nolint:gosec // G204: fixed args against testdata target + cmd := exec.CommandContext(ctx, "go", "test", "-json", "-count=1", benchOverheadPackage) + cmd.Dir = repoRoot + cmd.Stdout = io.Discard + cmd.Stderr = io.Discard + if err := cmd.Run(); err != nil { + mu.Lock() + if firstErr == nil { + firstErr = err + } + mu.Unlock() + } + }) + } + wg.Wait() + return firstErr +} + +// diagnoseWorkload runs one Diagnose call against the dummy target with the given +// iteration count and parallelism. Output is discarded. +func diagnoseWorkload(ctx context.Context, out *output.Printer, repoRoot string, iterations, parallel int) error { + conf := &config.App{ + RepoRoot: repoRoot, + Iterations: iterations, + ParallelIterations: parallel, + SlowThreshold: time.Second, + } + return Diagnose(ctx, conf, out, []string{benchOverheadPackage}, nil, nil) +} + +// existingDiagnoseDirs lists the diagnose-* result dirs currently in repoRoot. +func existingDiagnoseDirs(repoRoot string) []string { + matches, _ := filepath.Glob(filepath.Join(repoRoot, "diagnose-*")) + return matches +} + +// cleanupNewDiagnoseDirs removes any diagnose-* result dirs created during the +// benchmark, so repeated runs don't accumulate output dirs in the repo root. +func cleanupNewDiagnoseDirs(tb testing.TB, repoRoot string) { + tb.Helper() + before := make(map[string]struct{}) + for _, d := range existingDiagnoseDirs(repoRoot) { + before[d] = struct{}{} + } + tb.Cleanup(func() { + for _, d := range existingDiagnoseDirs(repoRoot) { + if _, ok := before[d]; !ok { + _ = os.RemoveAll(d) + } + } + }) +} + // BenchmarkBaselineGoTest is the floor: raw `go test -json` against the same // target Diagnose runs. Subtract its ns/op, B/op, allocs/op from // BenchmarkDiagnose to read the overhead Diagnose adds. @@ -26,12 +99,7 @@ func BenchmarkBaselineGoTest(b *testing.B) { b.ReportAllocs() for b.Loop() { - //nolint:gosec // G204: fixed args against testdata target - cmd := exec.CommandContext(ctx, "go", "test", "-json", "-count=1", benchOverheadPackage) - cmd.Dir = repoRoot - cmd.Stdout = io.Discard - cmd.Stderr = io.Discard - require.NoError(b, cmd.Run()) + require.NoError(b, baselineWorkload(ctx, repoRoot, 1, 1)) } } @@ -40,21 +108,119 @@ func BenchmarkBaselineGoTest(b *testing.B) { func BenchmarkDiagnose(b *testing.B) { repoRoot, err := filepath.Abs("../..") require.NoError(b, err) + cleanupNewDiagnoseDirs(b, repoRoot) out := output.NewForTest(true, io.Discard, io.Discard, false) ctx := context.Background() - args := []string{benchOverheadPackage} - conf := &config.App{ - RepoRoot: repoRoot, - Iterations: 1, - ParallelIterations: 1, - SlowThreshold: time.Second, - } b.ReportAllocs() for b.Loop() { - require.NoError(b, Diagnose(ctx, conf, out, args, nil, nil)) + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, 1, 1)) + } +} + +// overheadConfig is one (iterations, parallel) point in the overhead matrix. +type overheadConfig struct { + iterations int + parallel int +} + +// overheadRow pairs a config with its measured baseline and diagnose results. +type overheadRow struct { + cfg overheadConfig + baseline testing.BenchmarkResult + diagnose testing.BenchmarkResult +} + +// overheadMatrix is the set of (iterations, parallel) points measured by +// BenchmarkDiagnoseOverhead: single run, sequential iterations, then parallel. +var overheadMatrix = []overheadConfig{ + {iterations: 1, parallel: 1}, + {iterations: 4, parallel: 1}, + {iterations: 4, parallel: 4}, + {iterations: 8, parallel: 1}, + {iterations: 8, parallel: 8}, +} + +// overheadMatrixEnv gates TestDiagnoseOverhead; it spawns many `go test` +// subprocesses and is too slow for the normal test run. +const overheadMatrixEnv = "TESTRIG_BENCH_OVERHEAD" + +// TestDiagnoseOverhead measures Diagnose overhead vs the raw `go test` floor +// across the overheadMatrix and logs a diff table (total overhead and +// per-iteration overhead). It is a test, not a Benchmark, because it drives +// testing.Benchmark internally (which deadlocks if called from a benchmark). +// Run via `just bench_overhead_matrix`; gated behind TESTRIG_BENCH_OVERHEAD. +// +//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. +func TestDiagnoseOverhead(t *testing.T) { + if os.Getenv(overheadMatrixEnv) == "" { + t.Skipf("set %s=1 to run the diagnose overhead matrix", overheadMatrixEnv) + } + repoRoot, err := filepath.Abs("../..") + require.NoError(t, err) + cleanupNewDiagnoseDirs(t, repoRoot) + + out := output.NewForTest(true, io.Discard, io.Discard, false) + ctx := context.Background() + + rows := make([]overheadRow, 0, len(overheadMatrix)) + for _, cfg := range overheadMatrix { + base := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + require.NoError(b, baselineWorkload(ctx, repoRoot, cfg.iterations, cfg.parallel)) + } + }) + require.NotZero(t, base.N, "baseline workload failed for %+v", cfg) + + diag := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, cfg.iterations, cfg.parallel)) + } + }) + require.NotZero(t, diag.N, "diagnose workload failed for %+v", cfg) + + rows = append(rows, overheadRow{cfg: cfg, baseline: base, diagnose: diag}) + } + printDiagnoseOverhead(t, rows) +} + +// roundedDur renders ns as a duration rounded to microseconds for the table. +func roundedDur(ns int64) string { + return time.Duration(ns).Round(time.Microsecond).String() +} + +// overheadDur renders overhead; negative deltas (noise) show as 0. +func overheadDur(ns int64) string { + if ns < 0 { + ns = 0 + } + return roundedDur(ns) +} + +// printDiagnoseOverhead logs a table of baseline vs diagnose wall time per config. +// overhead = diagnose ns/op - baseline ns/op; overhead/iter divides by iterations. +func printDiagnoseOverhead(t *testing.T, rows []overheadRow) { + t.Helper() + var sb strings.Builder + tw := tabwriter.NewWriter(&sb, 0, 0, 2, ' ', 0) + _, _ = fmt.Fprintln(tw, "iters\tparallel\tbaseline\tdiagnose\toverhead\toverhead/iter") + for _, r := range rows { + overheadNs := r.diagnose.NsPerOp() - r.baseline.NsPerOp() + perIterNs := overheadNs / int64(max(r.cfg.iterations, 1)) + _, _ = fmt.Fprintf(tw, "%d\t%d\t%s\t%s\t%s\t%s\n", + r.cfg.iterations, + r.cfg.parallel, + roundedDur(r.baseline.NsPerOp()), + roundedDur(r.diagnose.NsPerOp()), + overheadDur(overheadNs), + overheadDur(perIterNs), + ) } + _ = tw.Flush() + t.Logf("\nDiagnose overhead vs raw go test:\n%s", sb.String()) } func BenchmarkResolveModuleDir(b *testing.B) { diff --git a/justfile b/justfile index 3980709..46b9c94 100644 --- a/justfile +++ b/justfile @@ -18,22 +18,9 @@ test_race: bench: go test -bench=. -benchmem -run=^$ ./... -# Compare diagnose overhead vs raw `go test -json` on the dummy target. -# Overhead = ns/op, B/op, allocs/op delta between BenchmarkDiagnose and BenchmarkBaselineGoTest. -bench_overhead: - go test ./internal/runner/ -run='^$' -bench='^Benchmark(BaselineGoTest|Diagnose)$' -benchmem -count=3 - -# Memory profile of BenchmarkDiagnose (CPU is low-signal; subprocess-bound). -bench_overhead_profile: - #!/usr/bin/env bash - set -euo pipefail - out_dir=out/bench - mkdir -p "$out_dir" - go test ./internal/runner/ -run='^$' -bench='^BenchmarkDiagnose$' -benchmem -count=1 \ - -memprofile="$out_dir/diagnose_mem.prof" 2>&1 | tee "$out_dir/diagnose_bench.txt" - go tool pprof -top -nodecount=30 -focus='testrig/internal/runner' "$out_dir/diagnose_mem.prof" \ - > "$out_dir/diagnose_mem_runner.txt" - echo "Mem hotspots: $out_dir/diagnose_mem_runner.txt" +# Print a diff table of diagnose overhead across iteration counts and parallelism. +bench_overhead_matrix: + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead$' -v -timeout=20m # Local GoReleaser dry-run (snapshot) goreleaser: From 643788b04c41d84bcde3b0958b62360ad57bb9d7 Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Thu, 28 May 2026 16:39:49 -0400 Subject: [PATCH 3/9] short --- internal/runner/runner_bench_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go index 3ad5314..3ff859c 100644 --- a/internal/runner/runner_bench_test.go +++ b/internal/runner/runner_bench_test.go @@ -157,6 +157,9 @@ func TestDiagnoseOverhead(t *testing.T) { if os.Getenv(overheadMatrixEnv) == "" { t.Skipf("set %s=1 to run the diagnose overhead matrix", overheadMatrixEnv) } + if testing.Short() { + t.Skip("skipping diagnose overhead matrix in short mode") + } repoRoot, err := filepath.Abs("../..") require.NoError(t, err) cleanupNewDiagnoseDirs(t, repoRoot) From 05ba2d0bb1e02a14f068c6225a3affd5a412fa2d Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Thu, 28 May 2026 16:55:07 -0400 Subject: [PATCH 4/9] include dogfood bench as well --- go.mod | 2 +- internal/runner/runner_bench_test.go | 104 +++++++++++++++++++-------- justfile | 13 +++- 3 files changed, 86 insertions(+), 33 deletions(-) diff --git a/go.mod b/go.mod index 002f704..c56b021 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 github.com/stretchr/testify v1.11.1 + golang.org/x/sync v0.20.0 ) require ( @@ -50,7 +51,6 @@ require ( github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f // indirect golang.org/x/mod v0.35.0 // indirect - golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.45.0 // indirect golang.org/x/term v0.43.0 // indirect golang.org/x/text v0.37.0 // indirect diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go index 3ff859c..840b34f 100644 --- a/internal/runner/runner_bench_test.go +++ b/internal/runner/runner_bench_test.go @@ -14,31 +14,39 @@ import ( "time" "github.com/stretchr/testify/require" + "golang.org/x/sync/semaphore" "github.com/smartcontractkit/testrig/internal/config" "github.com/smartcontractkit/testrig/internal/output" ) -const benchOverheadPackage = "./internal/runner/testdata/dummy/..." +const ( + benchDummyTarget = "./internal/runner/testdata/dummy/..." + benchDogfoodTarget = "./..." +) // baselineWorkload runs the raw `go test -json` floor for one diagnose-equivalent -// workload: `iterations` invocations against the dummy target, at most `parallel` -// running concurrently (mirroring how Diagnose schedules iterations across workers). -func baselineWorkload(ctx context.Context, repoRoot string, iterations, parallel int) error { +// workload: `iterations` invocations against target, at most `parallel` running +// concurrently (mirroring how Diagnose schedules iterations across workers). +func baselineWorkload(ctx context.Context, repoRoot, target string, iterations, parallel int) error { if parallel < 1 { parallel = 1 } - sem := make(chan struct{}, parallel) + sem := semaphore.NewWeighted(int64(parallel)) var wg sync.WaitGroup var mu sync.Mutex var firstErr error for range iterations { - sem <- struct{}{} + if err := sem.Acquire(ctx, 1); err != nil { + wg.Wait() + return err + } wg.Go(func() { - defer func() { <-sem }() - //nolint:gosec // G204: fixed args against testdata target - cmd := exec.CommandContext(ctx, "go", "test", "-json", "-count=1", benchOverheadPackage) + defer sem.Release(1) + //nolint:gosec // G204: target is fixed per test (dummy or ./...) + cmd := exec.CommandContext(ctx, "go", "test", "-json", "-count=1", target) cmd.Dir = repoRoot + cmd.Env = envWithoutKey(os.Environ(), overheadMatrixEnv) cmd.Stdout = io.Discard cmd.Stderr = io.Discard if err := cmd.Run(); err != nil { @@ -54,16 +62,33 @@ func baselineWorkload(ctx context.Context, repoRoot string, iterations, parallel return firstErr } -// diagnoseWorkload runs one Diagnose call against the dummy target with the given -// iteration count and parallelism. Output is discarded. -func diagnoseWorkload(ctx context.Context, out *output.Printer, repoRoot string, iterations, parallel int) error { +// diagnoseWorkload runs one Diagnose call against target with the given iteration +// count and parallelism. Output is discarded. +func diagnoseWorkload( + ctx context.Context, + out *output.Printer, + repoRoot, target string, + iterations, parallel int, +) error { conf := &config.App{ RepoRoot: repoRoot, Iterations: iterations, ParallelIterations: parallel, SlowThreshold: time.Second, } - return Diagnose(ctx, conf, out, []string{benchOverheadPackage}, nil, nil) + return Diagnose(ctx, conf, out, []string{target}, nil, nil) +} + +func envWithoutKey(env []string, key string) []string { + prefix := key + "=" + out := make([]string, 0, len(env)) + for _, e := range env { + if e == key || strings.HasPrefix(e, prefix) { + continue + } + out = append(out, e) + } + return out } // existingDiagnoseDirs lists the diagnose-* result dirs currently in repoRoot. @@ -99,7 +124,7 @@ func BenchmarkBaselineGoTest(b *testing.B) { b.ReportAllocs() for b.Loop() { - require.NoError(b, baselineWorkload(ctx, repoRoot, 1, 1)) + require.NoError(b, baselineWorkload(ctx, repoRoot, benchDummyTarget, 1, 1)) } } @@ -115,7 +140,7 @@ func BenchmarkDiagnose(b *testing.B) { b.ReportAllocs() for b.Loop() { - require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, 1, 1)) + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, benchDummyTarget, 1, 1)) } } @@ -142,28 +167,37 @@ var overheadMatrix = []overheadConfig{ {iterations: 8, parallel: 8}, } -// overheadMatrixEnv gates TestDiagnoseOverhead; it spawns many `go test` +// overheadMatrixEnv gates TestDiagnoseOverhead_*; it spawns many `go test` // subprocesses and is too slow for the normal test run. const overheadMatrixEnv = "TESTRIG_BENCH_OVERHEAD" -// TestDiagnoseOverhead measures Diagnose overhead vs the raw `go test` floor -// across the overheadMatrix and logs a diff table (total overhead and -// per-iteration overhead). It is a test, not a Benchmark, because it drives -// testing.Benchmark internally (which deadlocks if called from a benchmark). -// Run via `just bench_overhead_matrix`; gated behind TESTRIG_BENCH_OVERHEAD. -// -//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. -func TestDiagnoseOverhead(t *testing.T) { +func skipUnlessDiagnoseOverheadMatrix(t *testing.T) { + t.Helper() if os.Getenv(overheadMatrixEnv) == "" { t.Skipf("set %s=1 to run the diagnose overhead matrix", overheadMatrixEnv) } if testing.Short() { t.Skip("skipping diagnose overhead matrix in short mode") } +} + +// runDiagnoseOverheadMatrix measures Diagnose overhead vs the raw `go test` floor +// across overheadMatrix for target and logs a diff table. It is a test helper, not +// a Benchmark, because it drives testing.Benchmark internally (which deadlocks if +// called from a benchmark). +// +//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. +func runDiagnoseOverheadMatrix(t *testing.T, label, target string) { + t.Helper() + skipUnlessDiagnoseOverheadMatrix(t) + repoRoot, err := filepath.Abs("../..") require.NoError(t, err) cleanupNewDiagnoseDirs(t, repoRoot) + // Child `go test` processes must not see TESTRIG_BENCH_OVERHEAD (dogfood runs ./...). + t.Setenv(overheadMatrixEnv, "") + out := output.NewForTest(true, io.Discard, io.Discard, false) ctx := context.Background() @@ -172,7 +206,7 @@ func TestDiagnoseOverhead(t *testing.T) { base := testing.Benchmark(func(b *testing.B) { b.ReportAllocs() for b.Loop() { - require.NoError(b, baselineWorkload(ctx, repoRoot, cfg.iterations, cfg.parallel)) + require.NoError(b, baselineWorkload(ctx, repoRoot, target, cfg.iterations, cfg.parallel)) } }) require.NotZero(t, base.N, "baseline workload failed for %+v", cfg) @@ -180,14 +214,26 @@ func TestDiagnoseOverhead(t *testing.T) { diag := testing.Benchmark(func(b *testing.B) { b.ReportAllocs() for b.Loop() { - require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, cfg.iterations, cfg.parallel)) + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, target, cfg.iterations, cfg.parallel)) } }) require.NotZero(t, diag.N, "diagnose workload failed for %+v", cfg) rows = append(rows, overheadRow{cfg: cfg, baseline: base, diagnose: diag}) } - printDiagnoseOverhead(t, rows) + printDiagnoseOverhead(t, label, target, rows) +} + +// TestDiagnoseOverhead_Dummy runs the overhead matrix against the tiny dummy package. +// Run via `just bench_overhead_matrix_dummy`. +func TestDiagnoseOverhead_Dummy(t *testing.T) { + runDiagnoseOverheadMatrix(t, "dummy", benchDummyTarget) +} + +// TestDiagnoseOverhead_Dogfood runs the overhead matrix against the full testrig module (./...). +// Run via `just bench_overhead_matrix_dogfood`; expect much longer wall time than dummy. +func TestDiagnoseOverhead_Dogfood(t *testing.T) { + runDiagnoseOverheadMatrix(t, "dogfood", benchDogfoodTarget) } // roundedDur renders ns as a duration rounded to microseconds for the table. @@ -205,7 +251,7 @@ func overheadDur(ns int64) string { // printDiagnoseOverhead logs a table of baseline vs diagnose wall time per config. // overhead = diagnose ns/op - baseline ns/op; overhead/iter divides by iterations. -func printDiagnoseOverhead(t *testing.T, rows []overheadRow) { +func printDiagnoseOverhead(t *testing.T, label, target string, rows []overheadRow) { t.Helper() var sb strings.Builder tw := tabwriter.NewWriter(&sb, 0, 0, 2, ' ', 0) @@ -223,7 +269,7 @@ func printDiagnoseOverhead(t *testing.T, rows []overheadRow) { ) } _ = tw.Flush() - t.Logf("\nDiagnose overhead vs raw go test:\n%s", sb.String()) + t.Logf("\nDiagnose overhead vs raw go test (%s, target=%s):\n%s", label, target, sb.String()) } func BenchmarkResolveModuleDir(b *testing.B) { diff --git a/justfile b/justfile index 46b9c94..7c6c1bc 100644 --- a/justfile +++ b/justfile @@ -18,9 +18,16 @@ test_race: bench: go test -bench=. -benchmem -run=^$ ./... -# Print a diff table of diagnose overhead across iteration counts and parallelism. -bench_overhead_matrix: - TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead$' -v -timeout=20m +# Print a diff table of diagnose overhead (dummy package; fast). +bench_overhead_matrix_dummy: + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dummy$' -v + +# Same matrix against the full testrig module (./...); slow. +bench_overhead_matrix_dogfood: + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dogfood$' -v + +# Run dummy then dogfood overhead matrices. +bench_overhead_matrix: bench_overhead_matrix_dummy bench_overhead_matrix_dogfood # Local GoReleaser dry-run (snapshot) goreleaser: From 938b8f0a6596ba5c68406de4d46205be0d7ff64a Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Thu, 28 May 2026 16:59:45 -0400 Subject: [PATCH 5/9] lint --- internal/runner/runner_bench_test.go | 4 ++++ lefthook.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go index 840b34f..94808dc 100644 --- a/internal/runner/runner_bench_test.go +++ b/internal/runner/runner_bench_test.go @@ -226,12 +226,16 @@ func runDiagnoseOverheadMatrix(t *testing.T, label, target string) { // TestDiagnoseOverhead_Dummy runs the overhead matrix against the tiny dummy package. // Run via `just bench_overhead_matrix_dummy`. +// +//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. func TestDiagnoseOverhead_Dummy(t *testing.T) { runDiagnoseOverheadMatrix(t, "dummy", benchDummyTarget) } // TestDiagnoseOverhead_Dogfood runs the overhead matrix against the full testrig module (./...). // Run via `just bench_overhead_matrix_dogfood`; expect much longer wall time than dummy. +// +//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. func TestDiagnoseOverhead_Dogfood(t *testing.T) { runDiagnoseOverheadMatrix(t, "dogfood", benchDogfoodTarget) } diff --git a/lefthook.yml b/lefthook.yml index bbbcf28..7d6443d 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -97,5 +97,5 @@ pre-push: commands: go-short-tests: tags: test - glob: "*.go" + glob: "*.go|go.mod" run: go test -short ./... From 11a3bc390851fd3bb4d02de3b41c33d49c9dcf96 Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Fri, 29 May 2026 09:18:11 -0400 Subject: [PATCH 6/9] percentages --- AGENTS.md | 1 + internal/runner/runner.go | 16 ++- internal/runner/runner_bench_test.go | 174 +++++++++++++++++++++++---- justfile | 12 +- 4 files changed, 168 insertions(+), 35 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index b9ce910..cf388b6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -3,6 +3,7 @@ - An easy to import and use test rig for Go projects - Can be used as a package or CLI - Used to find and fix flaky tests +- Minimal CPU, RAM, and timing overhead added to test execution ## Validate changes diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 7f640d6..fbbd8a8 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -1120,9 +1120,14 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { return err } bw := bufio.NewWriterSize(resultsFile, 128*1024) + var retErr error defer func() { - _ = bw.Flush() - _ = resultsFile.Close() + if err := bw.Flush(); err != nil && retErr == nil { + retErr = err + } + if err := resultsFile.Close(); err != nil && retErr == nil { + retErr = err + } }() args, err := buildDiagnoseArgs(goTestArgs, shuffleSeed) @@ -1143,7 +1148,8 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { cmd.Stderr = sw if out.AIOutput() { - return cmd.Run() + retErr = cmd.Run() + return retErr } if parallelProgress != nil { @@ -1191,14 +1197,14 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { redraw(true) } - runErr := cmd.Run() + retErr = cmd.Run() close(tickDone) tickWG.Wait() if live { out.ClearInline() } - return runErr + return retErr } func newRunMeta( diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go index 94808dc..45d2d96 100644 --- a/internal/runner/runner_bench_test.go +++ b/internal/runner/runner_bench_test.go @@ -7,6 +7,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "sync" "testing" @@ -171,6 +172,15 @@ var overheadMatrix = []overheadConfig{ // subprocesses and is too slow for the normal test run. const overheadMatrixEnv = "TESTRIG_BENCH_OVERHEAD" +// overheadMatrixRunsEnv sets how many times each matrix cell is benchmarked +// before averaging (default 5). Use 3–5 for stabler numbers; 1 for a quick smoke. +const overheadMatrixRunsEnv = "TESTRIG_BENCH_OVERHEAD_RUNS" + +const ( + overheadMatrixRunsDefault = 5 + overheadMatrixRunsMax = 10 +) + func skipUnlessDiagnoseOverheadMatrix(t *testing.T) { t.Helper() if os.Getenv(overheadMatrixEnv) == "" { @@ -181,12 +191,62 @@ func skipUnlessDiagnoseOverheadMatrix(t *testing.T) { } } +func overheadMatrixRuns() int { + s := strings.TrimSpace(os.Getenv(overheadMatrixRunsEnv)) + if s == "" { + return overheadMatrixRunsDefault + } + n, err := strconv.Atoi(s) + if err != nil || n < 1 { + return overheadMatrixRunsDefault + } + return min(n, overheadMatrixRunsMax) +} + +// averageBenchmarkResults averages per-op metrics across repeated benchmark runs. +func averageBenchmarkResults(results []testing.BenchmarkResult) testing.BenchmarkResult { + if len(results) == 0 { + return testing.BenchmarkResult{} + } + var ns, bytes, allocs int64 + for _, r := range results { + ns += r.NsPerOp() + bytes += r.AllocedBytesPerOp() + allocs += r.AllocsPerOp() + } + n := int64(len(results)) + return testing.BenchmarkResult{ + N: 1, + T: time.Duration(ns / n), + MemBytes: uint64(bytes / n), + MemAllocs: uint64(allocs / n), + } +} + +// repeatBenchmark runs fn runs times and returns the averaged result. +// phase labels log lines; pass "" to omit per-run logging. +func repeatBenchmark(t *testing.T, runs int, phase string, fn func() testing.BenchmarkResult) testing.BenchmarkResult { + t.Helper() + results := make([]testing.BenchmarkResult, runs) + for i := range runs { + start := time.Now() + results[i] = fn() + if phase != "" { + t.Logf("%s: run %d/%d done (wall %s, %s/op)", + phase, i+1, runs, time.Since(start).Round(time.Second), roundedDur(results[i].NsPerOp())) + } + } + avg := averageBenchmarkResults(results) + if phase != "" { + t.Logf("%s: mean %s/op over %d runs", phase, roundedDur(avg.NsPerOp()), runs) + } + return avg +} + // runDiagnoseOverheadMatrix measures Diagnose overhead vs the raw `go test` floor // across overheadMatrix for target and logs a diff table. It is a test helper, not // a Benchmark, because it drives testing.Benchmark internally (which deadlocks if // called from a benchmark). -// -//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. func runDiagnoseOverheadMatrix(t *testing.T, label, target string) { t.Helper() skipUnlessDiagnoseOverheadMatrix(t) @@ -200,32 +260,51 @@ func runDiagnoseOverheadMatrix(t *testing.T, label, target string) { out := output.NewForTest(true, io.Discard, io.Discard, false) ctx := context.Background() - - rows := make([]overheadRow, 0, len(overheadMatrix)) - for _, cfg := range overheadMatrix { - base := testing.Benchmark(func(b *testing.B) { - b.ReportAllocs() - for b.Loop() { - require.NoError(b, baselineWorkload(ctx, repoRoot, target, cfg.iterations, cfg.parallel)) - } + runs := overheadMatrixRuns() + total := len(overheadMatrix) + t.Logf("[%s] overhead matrix: target=%s, %d cells, %d runs/cell (%s overrides)", + label, target, total, runs, overheadMatrixRunsEnv) + + rows := make([]overheadRow, 0, total) + for cell, cfg := range overheadMatrix { + cellLabel := fmt.Sprintf("[%s] cell %d/%d iters=%d parallel=%d", + label, cell+1, total, cfg.iterations, cfg.parallel) + + base := repeatBenchmark(t, runs, cellLabel+" baseline", func() testing.BenchmarkResult { + r := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + require.NoError(b, baselineWorkload(ctx, repoRoot, target, cfg.iterations, cfg.parallel)) + } + }) + require.NotZero(t, r.N, "baseline workload failed for %+v", cfg) + return r }) - require.NotZero(t, base.N, "baseline workload failed for %+v", cfg) - diag := testing.Benchmark(func(b *testing.B) { - b.ReportAllocs() - for b.Loop() { - require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, target, cfg.iterations, cfg.parallel)) - } + diag := repeatBenchmark(t, runs, cellLabel+" diagnose", func() testing.BenchmarkResult { + r := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, target, cfg.iterations, cfg.parallel)) + } + }) + require.NotZero(t, r.N, "diagnose workload failed for %+v", cfg) + return r }) - require.NotZero(t, diag.N, "diagnose workload failed for %+v", cfg) + + overheadNs := diag.NsPerOp() - base.NsPerOp() + t.Logf("%s: done — overhead %s (%s of diagnose; baseline %s, diagnose %s)", + cellLabel, overheadDur(overheadNs), overheadPercent(overheadNs, diag.NsPerOp()), + roundedDur(base.NsPerOp()), roundedDur(diag.NsPerOp())) rows = append(rows, overheadRow{cfg: cfg, baseline: base, diagnose: diag}) } - printDiagnoseOverhead(t, label, target, rows) + printDiagnoseOverhead(t, label, target, runs, rows) } // TestDiagnoseOverhead_Dummy runs the overhead matrix against the tiny dummy package. -// Run via `just bench_overhead_matrix_dummy`. +// Run via `just bench_overhead_matrix_dummy`. Each cell is benchmarked 5 times by default +// and averaged; set TESTRIG_BENCH_OVERHEAD_RUNS (e.g. 3) to tune accuracy vs wall time. // //nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. func TestDiagnoseOverhead_Dummy(t *testing.T) { @@ -253,27 +332,74 @@ func overheadDur(ns int64) string { return roundedDur(ns) } +// overheadPercent is overhead as a share of diagnose runtime (overhead / diagnose). +func overheadPercent(overheadNs, diagnoseNs int64) string { + if diagnoseNs <= 0 { + return "n/a" + } + if overheadNs < 0 { + overheadNs = 0 + } + return fmt.Sprintf("%.1f%%", float64(overheadNs)*100/float64(diagnoseNs)) +} + // printDiagnoseOverhead logs a table of baseline vs diagnose wall time per config. // overhead = diagnose ns/op - baseline ns/op; overhead/iter divides by iterations. -func printDiagnoseOverhead(t *testing.T, label, target string, rows []overheadRow) { +// Each cell is the mean of runs repeated benchmark invocations. +func printDiagnoseOverhead(t *testing.T, label, target string, runs int, rows []overheadRow) { t.Helper() var sb strings.Builder tw := tabwriter.NewWriter(&sb, 0, 0, 2, ' ', 0) - _, _ = fmt.Fprintln(tw, "iters\tparallel\tbaseline\tdiagnose\toverhead\toverhead/iter") + _, _ = fmt.Fprintln(tw, "iters\tparallel\tbaseline\tdiagnose\toverhead\toverhead%\toverhead/iter") for _, r := range rows { overheadNs := r.diagnose.NsPerOp() - r.baseline.NsPerOp() perIterNs := overheadNs / int64(max(r.cfg.iterations, 1)) - _, _ = fmt.Fprintf(tw, "%d\t%d\t%s\t%s\t%s\t%s\n", + diagNs := r.diagnose.NsPerOp() + _, _ = fmt.Fprintf(tw, "%d\t%d\t%s\t%s\t%s\t%s\t%s\n", r.cfg.iterations, r.cfg.parallel, roundedDur(r.baseline.NsPerOp()), - roundedDur(r.diagnose.NsPerOp()), + roundedDur(diagNs), overheadDur(overheadNs), + overheadPercent(overheadNs, diagNs), overheadDur(perIterNs), ) } _ = tw.Flush() - t.Logf("\nDiagnose overhead vs raw go test (%s, target=%s):\n%s", label, target, sb.String()) + t.Logf(` +-------------------------------------------------------------------------------- +Diagnose overhead vs raw go test (%s, target=%s, %d-run average per cell) +-------------------------------------------------------------------------------- +%s`, + label, target, runs, sb.String()) +} + +func TestAverageBenchmarkResults(t *testing.T) { + t.Parallel() + avg := averageBenchmarkResults([]testing.BenchmarkResult{ + {N: 10, T: 1_000, MemBytes: 1_000, MemAllocs: 100}, + {N: 10, T: 3_000, MemBytes: 3_000, MemAllocs: 300}, + }) + require.Equal(t, int64(200), avg.NsPerOp()) + require.Equal(t, int64(200), avg.AllocedBytesPerOp()) + require.Equal(t, int64(20), avg.AllocsPerOp()) +} + +func TestOverheadPercent(t *testing.T) { + t.Parallel() + require.Equal(t, "20.0%", overheadPercent(20, 100)) + require.Equal(t, "0.0%", overheadPercent(-5, 100)) + require.Equal(t, "n/a", overheadPercent(10, 0)) +} + +func TestOverheadMatrixRuns(t *testing.T) { + require.Equal(t, 5, overheadMatrixRuns()) + t.Setenv(overheadMatrixRunsEnv, "3") + require.Equal(t, 3, overheadMatrixRuns()) + t.Setenv(overheadMatrixRunsEnv, "99") + require.Equal(t, overheadMatrixRunsMax, overheadMatrixRuns()) + t.Setenv(overheadMatrixRunsEnv, "nope") + require.Equal(t, overheadMatrixRunsDefault, overheadMatrixRuns()) } func BenchmarkResolveModuleDir(b *testing.B) { diff --git a/justfile b/justfile index 7c6c1bc..c129a70 100644 --- a/justfile +++ b/justfile @@ -14,19 +14,19 @@ test: test_race: go tool gotestsum -- -race ./... -# Run benchmarks with memory stats and specific CPU counts +# Run standard benchmarks bench: go test -bench=. -benchmem -run=^$ ./... -# Print a diff table of diagnose overhead (dummy package; fast). +# Diagnose overhead matrix (dummy; fast). 5 runs averaged per cell; TESTRIG_BENCH_OVERHEAD_RUNS=3 to override. bench_overhead_matrix_dummy: - TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dummy$' -v + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dummy$' -count=1 -v -# Same matrix against the full testrig module (./...); slow. +# Run benchmark to measure diagnose overhead against the full testrig module (./...); slow. bench_overhead_matrix_dogfood: - TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dogfood$' -v + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dogfood$' -count=1 -v -# Run dummy then dogfood overhead matrices. +# Run benchmarks to measure diagnose overhead for both dummy and dogfood targets. bench_overhead_matrix: bench_overhead_matrix_dummy bench_overhead_matrix_dogfood # Local GoReleaser dry-run (snapshot) From 143ddde8b21d2ba749cad812de9946ea02a2064e Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Fri, 29 May 2026 09:51:49 -0400 Subject: [PATCH 7/9] lint --- internal/runner/runner_bench_test.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go index 45d2d96..e5c719f 100644 --- a/internal/runner/runner_bench_test.go +++ b/internal/runner/runner_bench_test.go @@ -215,11 +215,19 @@ func averageBenchmarkResults(results []testing.BenchmarkResult) testing.Benchmar allocs += r.AllocsPerOp() } n := int64(len(results)) + avgBytes := bytes / n + avgAllocs := allocs / n + if avgBytes < 0 { + avgBytes = 0 + } + if avgAllocs < 0 { + avgAllocs = 0 + } return testing.BenchmarkResult{ N: 1, T: time.Duration(ns / n), - MemBytes: uint64(bytes / n), - MemAllocs: uint64(allocs / n), + MemBytes: uint64(avgBytes), + MemAllocs: uint64(avgAllocs), } } From 6e1bba077575fb06e97d400492c7700ba336fd66 Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Fri, 29 May 2026 09:54:56 -0400 Subject: [PATCH 8/9] lefthook --- lefthook.yml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lefthook.yml b/lefthook.yml index 7d6443d..f101a69 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -54,14 +54,19 @@ pre-commit: golangci-lint: tags: linter - glob: "*.go" - root: "" - run: golangci-lint run --fast-only --fix ./... + glob: + - "*.go" + - "**/*.go" + - "go.mod" + - "go.sum" + run: golangci-lint run --fix --fast-only ./... stage_fixed: true go-generate: tags: linter - glob: "*.go" + glob: + - "*.go" + - "**/*.go" run: | before=$(git status --porcelain) go generate ./... @@ -75,7 +80,11 @@ pre-commit: go-mod-tidy: tags: linter - glob: "go.mod" + glob: + - "go.mod" + - "go.sum" + - "*.go" + - "**/*.go" run: | before=$(git status --porcelain) while IFS= read -r mod; do From f975f383d3166b66bcc6c62c1f474b2fafecb9a1 Mon Sep 17 00:00:00 2001 From: Adam Hamrick Date: Fri, 29 May 2026 11:12:34 -0400 Subject: [PATCH 9/9] lefthook --- lefthook.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lefthook.yml b/lefthook.yml index f101a69..3e25b95 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -106,5 +106,9 @@ pre-push: commands: go-short-tests: tags: test - glob: "*.go|go.mod" + glob: + - "*.go" + - "**/*.go" + - "go.mod" + - "go.sum" run: go test -short ./...