diff --git a/.gitignore b/.gitignore index e04f680..16357a2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,7 @@ diagnose-*/ .env .venv/ - -# Local Phoenix DB for evals/eval.py -evals/.phoenix/ -__pycache__/ +*.test # Agents .antigravitycli/ diff --git a/AGENTS.md b/AGENTS.md index a862610..cf388b6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -3,6 +3,7 @@ - An easy to import and use test rig for Go projects - Can be used as a package or CLI - Used to find and fix flaky tests +- Minimal CPU, RAM, and timing overhead added to test execution ## Validate changes @@ -18,7 +19,6 @@ go test ./... # Test - `internal/runner/` — core test execution. `Diagnose` is the main entry point; `diagnoseRunHooks` carries iteration hooks as `func(context.Context) error` fields. - `internal/config/` — Cobra flag registry config loading. `config.App` is the unified config struct. - `internal/output/` — output printer abstraction. `--ai-output` flag controls format. -- `internal/repo/` — git/module helpers. ## Critical decisions diff --git a/example_test.go b/example_test.go index cff11f4..a69ae6d 100644 --- a/example_test.go +++ b/example_test.go @@ -12,18 +12,12 @@ func ExampleRun() { // GlobalSetup runs once before any tests start. testrig.GlobalSetup(func(_ context.Context) error { fmt.Println("Starting mock background service...") - // Simulate starting a dependency, e.g.: - // cmd := exec.CommandContext(ctx, "docker", "compose", "up", "-d") - // return cmd.Run() return nil }), // IterationSetup runs before each diagnose iteration. testrig.IterationSetup(func(_ context.Context) error { fmt.Println("Resetting database state for next iteration...") - // Simulate resetting state: - // cmd := exec.CommandContext(ctx, "psql", "-c", "TRUNCATE events") - // return cmd.Run() return nil }), diff --git a/go.mod b/go.mod index 002f704..c56b021 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 github.com/stretchr/testify v1.11.1 + golang.org/x/sync v0.20.0 ) require ( @@ -50,7 +51,6 @@ require ( github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f // indirect golang.org/x/mod v0.35.0 // indirect - golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.45.0 // indirect golang.org/x/term v0.43.0 // indirect golang.org/x/text v0.37.0 // indirect diff --git a/internal/runner/analyze.go b/internal/runner/analyze.go index d6a92f3..40a2c66 100644 --- a/internal/runner/analyze.go +++ b/internal/runner/analyze.go @@ -14,6 +14,7 @@ import ( "sort" "strconv" "strings" + "sync" "time" "charm.land/lipgloss/v2" @@ -206,12 +207,18 @@ func (rep *Report) TestGroups() []TestGroup { // coupling the parser to the filesystem. type LogMap map[testKey]map[int]string +var readerPool = sync.Pool{ + New: func() any { + return bufio.NewReaderSize(nil, 1024*1024) + }, +} + // Analyze reads per-iteration test2json streams and classifies tests. // Malformed lines are silently skipped (go test can interleave non-JSON). func Analyze(iterations []io.Reader, slowThreshold time.Duration) (*Report, LogMap, error) { aggs := make(map[testKey]*aggregate) for i, r := range iterations { - if err := scanIterationJSONL(r, i, aggs, nil); err != nil { + if err := scanIterationJSONL(r, i, aggs, nil, slowThreshold); err != nil { return nil, nil, err } } @@ -241,16 +248,35 @@ func (a *aggregate) recordElapsed(iterIdx int, d time.Duration) { // scanIterationJSONL merges one iteration's JSONL stream into aggs at iterIdx. // meta may be nil; when set, records e.g. compile/build failure from FailedBuild on fail events. -func scanIterationJSONL(r io.Reader, iterIdx int, aggs map[testKey]*aggregate, meta *iterationScanMeta) error { - reader := bufio.NewReaderSize(r, 1024*1024) +func scanIterationJSONL( + r io.Reader, + iterIdx int, + aggs map[testKey]*aggregate, + meta *iterationScanMeta, + slowThreshold time.Duration, +) error { + reader := readerPool.Get().(*bufio.Reader) + reader.Reset(r) + defer func() { + reader.Reset(nil) + readerPool.Put(reader) + }() + for { - line, err := reader.ReadBytes('\n') + line, err := reader.ReadSlice('\n') + if err == bufio.ErrBufferFull { + rest, err2 := reader.ReadBytes('\n') + line = append(append([]byte(nil), line...), rest...) + err = err2 + } + if len(line) > 0 && line[0] == '{' { var ev TestEvent if json.Unmarshal(line, &ev) == nil { - applyTestEvent(aggs, iterIdx, &ev, meta) + applyTestEvent(aggs, iterIdx, &ev, meta, slowThreshold) } } + if err != nil { if err != io.EOF { return fmt.Errorf("reading iteration %d: %w", iterIdx, err) @@ -260,7 +286,13 @@ func scanIterationJSONL(r io.Reader, iterIdx int, aggs map[testKey]*aggregate, m } } -func applyTestEvent(aggs map[testKey]*aggregate, iterIdx int, ev *TestEvent, meta *iterationScanMeta) { +func applyTestEvent( + aggs map[testKey]*aggregate, + iterIdx int, + ev *TestEvent, + meta *iterationScanMeta, + slowThreshold time.Duration, +) { key := testKey{Package: ev.Package, Test: ev.Test} a := aggs[key] if a == nil { @@ -271,7 +303,11 @@ func applyTestEvent(aggs map[testKey]*aggregate, iterIdx int, ev *TestEvent, met case "pass": a.passes++ a.iterations[iterIdx] = struct{}{} - a.recordElapsed(iterIdx, seconds(ev.Elapsed)) + el := seconds(ev.Elapsed) + a.recordElapsed(iterIdx, el) + if !a.timedOut && (slowThreshold == 0 || el <= slowThreshold) { + delete(a.outputs, iterIdx) + } case "fail": if meta != nil && ev.FailedBuild != "" { meta.sawFailedBuild = true @@ -284,7 +320,11 @@ func applyTestEvent(aggs map[testKey]*aggregate, iterIdx int, ev *TestEvent, met a.skips++ a.iterations[iterIdx] = struct{}{} a.skipIters[iterIdx] = true - a.recordElapsed(iterIdx, seconds(ev.Elapsed)) + el := seconds(ev.Elapsed) + a.recordElapsed(iterIdx, el) + if !a.timedOut { + delete(a.outputs, iterIdx) + } case "output": if strings.Contains(ev.Output, timeoutPanic) { a.timedOut = true @@ -559,7 +599,7 @@ func countNamedTestsSkippedInAggs(aggs map[testKey]*aggregate) int { func DigestIterationJSONL(r io.Reader, slowThreshold time.Duration) (IterationDigest, error) { aggs := make(map[testKey]*aggregate) var meta iterationScanMeta - if err := scanIterationJSONL(r, 0, aggs, &meta); err != nil { + if err := scanIterationJSONL(r, 0, aggs, &meta, slowThreshold); err != nil { return IterationDigest{}, err } reattributeTimeouts(aggs, newAggregate) diff --git a/internal/runner/diagnose_progress.go b/internal/runner/diagnose_progress.go index cf2d219..9e0a44a 100644 --- a/internal/runner/diagnose_progress.go +++ b/internal/runner/diagnose_progress.go @@ -1,13 +1,8 @@ package runner import ( - "context" - "encoding/json" - "errors" "fmt" "io" - "os" - "os/exec" "slices" "strings" "sync" @@ -70,44 +65,6 @@ func packagePatternsFromEnd(args []string) []string { return pkgs } -// listTestPackageCount runs `go list -test -e` for the trailing package patterns -// in go test arguments (see packagePatternsFromEnd). On error or no patterns, -// returns an error or zero packages. -func listTestPackageCount(ctx context.Context, repoRoot string, goTestArgs []string) (int, error) { - pkgs := packagePatternsFromEnd(goTestArgs) - if len(pkgs) == 0 { - return 0, errors.New("no package patterns in go test arguments (put packages last, after flags)") - } - // Binary is fixed ("go"); pkgs come from the user's CLI package patterns by design. - //nolint:gosec // G204: forwarded package patterns from CLI invocation - cmd := exec.CommandContext(ctx, "go", append([]string{"list", "-test", "-e", "-f", "{{.ImportPath}}"}, pkgs...)...) - cmd.Dir = repoRoot - cmd.Env = os.Environ() - out, err := cmd.Output() - if err != nil { - return 0, err - } - n := 0 - for line := range strings.SplitSeq(string(out), "\n") { - if strings.TrimSpace(line) != "" { - n++ - } - } - if n == 0 { - return 0, errors.New("go list returned no packages") - } - return n, nil -} - -// diagnoseProgress tracks completed packages from a go test -json stream. -type diagnoseProgress struct { - mu sync.Mutex - done map[string]struct{} - lastPkg string - pkgOutcome map[string]string // package import path → pass|fail|skip (package-level events only) - total int // -1 when denominator is unknown (go list failed or empty) -} - type parallelDiagnoseProgress struct { mu sync.Mutex renderMu sync.Mutex @@ -138,14 +95,6 @@ func newParallelDiagnoseProgressAt(totalIterations int, poolStartedAt time.Time) } } -func newDiagnoseProgress(totalPackages int) *diagnoseProgress { - return &diagnoseProgress{ - done: make(map[string]struct{}), - pkgOutcome: make(map[string]string), - total: totalPackages, - } -} - func (p *parallelDiagnoseProgress) start(iteration int) { if p == nil { return @@ -214,50 +163,6 @@ func (p *parallelDiagnoseProgress) renderSnapshot( return completed, total, actives, poolElapsed } -// onTestJSONLine updates state from one JSONL line. Returns true if the number -// of completed packages increased (for throttled redraws). -func (p *diagnoseProgress) onTestJSONLine(line []byte) (completedIncreased bool) { - if len(line) == 0 || line[0] != '{' { - return false - } - var ev TestEvent - if err := json.Unmarshal(line, &ev); err != nil { - return false - } - if ev.Package != "" { - p.mu.Lock() - p.lastPkg = ev.Package - p.mu.Unlock() - } - if !isPackageTerminalEvent(&ev) { - return false - } - p.mu.Lock() - defer p.mu.Unlock() - p.pkgOutcome[ev.Package] = ev.Action - before := len(p.done) - p.done[ev.Package] = struct{}{} - return len(p.done) > before -} - -func isPackageTerminalEvent(ev *TestEvent) bool { - if ev.Package == "" || ev.Test != "" { - return false - } - switch ev.Action { - case "pass", "fail", "skip": - return true - default: - return false - } -} - -func (p *diagnoseProgress) snapshot() (completed int, total int, lastPkg string, outcome string) { - p.mu.Lock() - defer p.mu.Unlock() - return len(p.done), p.total, p.lastPkg, p.pkgOutcome[p.lastPkg] -} - // progressBracket wraps inner (already styled) in muted square brackets. func progressBracket(inner string) string { return termstyle.Muted.Render("[") + inner + termstyle.Muted.Render("]") diff --git a/internal/runner/diagnose_progress_test.go b/internal/runner/diagnose_progress_test.go index 9c4cd34..d4188e4 100644 --- a/internal/runner/diagnose_progress_test.go +++ b/internal/runner/diagnose_progress_test.go @@ -11,61 +11,6 @@ import ( "github.com/smartcontractkit/testrig/internal/output" ) -func TestDiagnoseProgress_onTestJSONLine_packageTerminal(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(2) - - require.False(t, p.onTestJSONLine([]byte(`not json`))) - require.False(t, p.onTestJSONLine([]byte(`{"Action":"run","Package":"a/b","Test":"TestX"}`))) - - require.True(t, p.onTestJSONLine([]byte(`{"Action":"pass","Package":"a/b"}`))) - c, tot, _, _ := p.snapshot() - require.Equal(t, 1, c) - require.Equal(t, 2, tot) - - // Duplicate package-level pass must not report a second completion tick. - require.False(t, p.onTestJSONLine([]byte(`{"Action":"pass","Package":"a/b"}`))) - c, _, _, _ = p.snapshot() - require.Equal(t, 1, c) - - require.True(t, p.onTestJSONLine([]byte(`{"Action":"fail","Package":"c/d"}`))) - c, _, _, _ = p.snapshot() - require.Equal(t, 2, c) -} - -func TestDiagnoseProgress_onTestJSONLine_skipFail(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(1) - require.True(t, p.onTestJSONLine([]byte(`{"Action":"skip","Package":"p"}`))) - c, _, _, _ := p.snapshot() - require.Equal(t, 1, c) - - p2 := newDiagnoseProgress(1) - require.True(t, p2.onTestJSONLine([]byte(`{"Action":"fail","Package":"p"}`))) - c2, _, _, _ := p2.snapshot() - require.Equal(t, 1, c2) -} - -func TestDiagnoseProgress_lastPkgUpdates(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(10) - p.onTestJSONLine([]byte(`{"Action":"run","Package":"x/y","Test":"TestZ"}`)) - _, _, last, _ := p.snapshot() - require.Equal(t, "x/y", last) -} - -func TestDiagnoseProgress_pkgOutcomeOnTerminal(t *testing.T) { - t.Parallel() - p := newDiagnoseProgress(5) - p.onTestJSONLine([]byte(`{"Action":"run","Package":"p/q","Test":"TestZ"}`)) - _, _, _, out := p.snapshot() - require.Empty(t, out) - p.onTestJSONLine([]byte(`{"Action":"pass","Package":"p/q"}`)) - _, _, last, out := p.snapshot() - require.Equal(t, "p/q", last) - require.Equal(t, "pass", out) -} - func TestEllipsizeRight(t *testing.T) { t.Parallel() require.Equal(t, "short", ellipsizeRight("short", 10)) diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 86b9b6a..fbbd8a8 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -34,6 +34,7 @@ type diagnoseIterationParams struct { Out *output.Printer ResultsDir string GoTestArgs []string + ModuleDir string Iteration int ShuffleSeed int64 Env []string @@ -102,8 +103,6 @@ func Gotestsum(ctx context.Context, conf *config.App, args []string) error { // iterSetup and iterTeardown run before/after each iteration. Either may be // nil. Teardown runs even when the iteration's go test invocation fails; its // error is reported only when the iteration itself succeeded. -// -//nolint:gocyclo func Diagnose( ctx context.Context, conf *config.App, @@ -384,6 +383,12 @@ func runDiagnoseIterations( if hooks.runIteration == nil { hooks.runIteration = diagnoseIteration } + + moduleDir, adjustedArgs, err := resolveModuleDir(conf.RepoRoot, goTestArgs) + if err != nil { + return diagnoseRunState{}, err + } + if hooks.seed == nil { hooks.seed = func() int64 { return rand.Int64N(1<<62) + 1 } //nolint:gosec // G404: non-crypto seed for test shuffle } @@ -432,7 +437,8 @@ func runDiagnoseIterations( conf: conf, out: out, resultsDir: resultsDir, - goTestArgs: goTestArgs, + goTestArgs: adjustedArgs, + moduleDir: moduleDir, hooks: hooks, parallel: parallel, parallelProgress: parallelProgress, @@ -487,6 +493,7 @@ type diagnoseWorker struct { out *output.Printer resultsDir string goTestArgs []string + moduleDir string hooks diagnoseRunHooks parallel int parallelProgress *parallelDiagnoseProgress @@ -529,6 +536,7 @@ func (w *diagnoseWorker) run(runCtx context.Context, resource diagnoseIterationR Out: w.out, ResultsDir: w.resultsDir, GoTestArgs: w.goTestArgs, + ModuleDir: w.moduleDir, Iteration: iteration, ShuffleSeed: seed, Env: resource.Env, @@ -1098,16 +1106,12 @@ func (sw *syncedWriter) Write(p []byte) (int, error) { func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { conf, out := p.Conf, p.Out - resultsDir, goTestArgs := p.ResultsDir, p.GoTestArgs + resultsDir := p.ResultsDir iteration, shuffleSeed := p.Iteration, p.ShuffleSeed env := p.Env liveProgress, parallelProgress := p.LiveProgress, p.ParallelProgress diagnoseRunStart, serialProgressMu := p.DiagnoseRunStart, p.SerialProgressMu - - moduleDir, goTestArgs, err := resolveModuleDir(conf.RepoRoot, goTestArgs) - if err != nil { - return err - } + moduleDir, goTestArgs := p.ModuleDir, p.GoTestArgs start := time.Now() jsonPath := filepath.Join(resultsDir, fmt.Sprintf("iteration-%d.log.jsonl", iteration)) @@ -1115,7 +1119,16 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { if err != nil { return err } - defer func() { _ = resultsFile.Close() }() + bw := bufio.NewWriterSize(resultsFile, 128*1024) + var retErr error + defer func() { + if err := bw.Flush(); err != nil && retErr == nil { + retErr = err + } + if err := resultsFile.Close(); err != nil && retErr == nil { + retErr = err + } + }() args, err := buildDiagnoseArgs(goTestArgs, shuffleSeed) if err != nil { @@ -1130,29 +1143,20 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { cmd.Cancel = func() error { return cmd.Process.Signal(os.Interrupt) } cmd.WaitDelay = 5 * time.Second - if out.AIOutput() { - sw := &syncedWriter{w: resultsFile} - cmd.Stdout = sw - cmd.Stderr = sw - return cmd.Run() - } - - sw := &syncedWriter{w: resultsFile} + sw := &syncedWriter{w: bw} + cmd.Stdout = sw cmd.Stderr = sw - totalPkgs := -1 - if n, listErr := listTestPackageCount(ctx, moduleDir, goTestArgs); listErr == nil { - totalPkgs = n + if out.AIOutput() { + retErr = cmd.Run() + return retErr } - prog := newDiagnoseProgress(totalPkgs) + if parallelProgress != nil { parallelProgress.start(iteration) defer parallelProgress.finish(iteration) } - pr, pw := io.Pipe() - cmd.Stdout = pw - live := liveProgress && out.LiveInlineProgress() iter, iters := iteration+1, conf.Iterations if liveProgress && !live { @@ -1175,30 +1179,6 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { ) } - var readWG sync.WaitGroup - var scanErr error - readWG.Go(func() { - r := bufio.NewReaderSize(pr, 1024*1024) - for { - line, err := r.ReadBytes('\n') - if len(line) > 0 { - if _, werr := sw.Write(line); werr != nil { - break - } - completedIncreased := prog.onTestJSONLine(line) - if completedIncreased && !live { - redraw(false) - } - } - if err != nil { - if err != io.EOF { - scanErr = err - } - break - } - } - }) - tickDone := make(chan struct{}) var tickWG sync.WaitGroup if live { @@ -1217,25 +1197,14 @@ func diagnoseIteration(ctx context.Context, p diagnoseIterationParams) error { redraw(true) } - runErr := cmd.Start() - started := runErr == nil - if started { - runErr = cmd.Wait() - _ = pw.Close() - } else { - _ = pw.CloseWithError(runErr) - } - readWG.Wait() + retErr = cmd.Run() close(tickDone) tickWG.Wait() if live { out.ClearInline() } - if scanErr != nil { - return fmt.Errorf("reading go test output: %w", scanErr) - } - return runErr + return retErr } func newRunMeta( diff --git a/internal/runner/runner_bench_test.go b/internal/runner/runner_bench_test.go new file mode 100644 index 0000000..e5c719f --- /dev/null +++ b/internal/runner/runner_bench_test.go @@ -0,0 +1,422 @@ +package runner + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "testing" + "text/tabwriter" + "time" + + "github.com/stretchr/testify/require" + "golang.org/x/sync/semaphore" + + "github.com/smartcontractkit/testrig/internal/config" + "github.com/smartcontractkit/testrig/internal/output" +) + +const ( + benchDummyTarget = "./internal/runner/testdata/dummy/..." + benchDogfoodTarget = "./..." +) + +// baselineWorkload runs the raw `go test -json` floor for one diagnose-equivalent +// workload: `iterations` invocations against target, at most `parallel` running +// concurrently (mirroring how Diagnose schedules iterations across workers). +func baselineWorkload(ctx context.Context, repoRoot, target string, iterations, parallel int) error { + if parallel < 1 { + parallel = 1 + } + sem := semaphore.NewWeighted(int64(parallel)) + var wg sync.WaitGroup + var mu sync.Mutex + var firstErr error + for range iterations { + if err := sem.Acquire(ctx, 1); err != nil { + wg.Wait() + return err + } + wg.Go(func() { + defer sem.Release(1) + //nolint:gosec // G204: target is fixed per test (dummy or ./...) + cmd := exec.CommandContext(ctx, "go", "test", "-json", "-count=1", target) + cmd.Dir = repoRoot + cmd.Env = envWithoutKey(os.Environ(), overheadMatrixEnv) + cmd.Stdout = io.Discard + cmd.Stderr = io.Discard + if err := cmd.Run(); err != nil { + mu.Lock() + if firstErr == nil { + firstErr = err + } + mu.Unlock() + } + }) + } + wg.Wait() + return firstErr +} + +// diagnoseWorkload runs one Diagnose call against target with the given iteration +// count and parallelism. Output is discarded. +func diagnoseWorkload( + ctx context.Context, + out *output.Printer, + repoRoot, target string, + iterations, parallel int, +) error { + conf := &config.App{ + RepoRoot: repoRoot, + Iterations: iterations, + ParallelIterations: parallel, + SlowThreshold: time.Second, + } + return Diagnose(ctx, conf, out, []string{target}, nil, nil) +} + +func envWithoutKey(env []string, key string) []string { + prefix := key + "=" + out := make([]string, 0, len(env)) + for _, e := range env { + if e == key || strings.HasPrefix(e, prefix) { + continue + } + out = append(out, e) + } + return out +} + +// existingDiagnoseDirs lists the diagnose-* result dirs currently in repoRoot. +func existingDiagnoseDirs(repoRoot string) []string { + matches, _ := filepath.Glob(filepath.Join(repoRoot, "diagnose-*")) + return matches +} + +// cleanupNewDiagnoseDirs removes any diagnose-* result dirs created during the +// benchmark, so repeated runs don't accumulate output dirs in the repo root. +func cleanupNewDiagnoseDirs(tb testing.TB, repoRoot string) { + tb.Helper() + before := make(map[string]struct{}) + for _, d := range existingDiagnoseDirs(repoRoot) { + before[d] = struct{}{} + } + tb.Cleanup(func() { + for _, d := range existingDiagnoseDirs(repoRoot) { + if _, ok := before[d]; !ok { + _ = os.RemoveAll(d) + } + } + }) +} + +// BenchmarkBaselineGoTest is the floor: raw `go test -json` against the same +// target Diagnose runs. Subtract its ns/op, B/op, allocs/op from +// BenchmarkDiagnose to read the overhead Diagnose adds. +func BenchmarkBaselineGoTest(b *testing.B) { + repoRoot, err := filepath.Abs("../..") + require.NoError(b, err) + ctx := context.Background() + + b.ReportAllocs() + for b.Loop() { + require.NoError(b, baselineWorkload(ctx, repoRoot, benchDummyTarget, 1, 1)) + } +} + +// BenchmarkDiagnose runs one Diagnose iteration against the same target as +// BenchmarkBaselineGoTest. ns/op minus baseline is the overhead Diagnose adds. +func BenchmarkDiagnose(b *testing.B) { + repoRoot, err := filepath.Abs("../..") + require.NoError(b, err) + cleanupNewDiagnoseDirs(b, repoRoot) + + out := output.NewForTest(true, io.Discard, io.Discard, false) + ctx := context.Background() + + b.ReportAllocs() + for b.Loop() { + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, benchDummyTarget, 1, 1)) + } +} + +// overheadConfig is one (iterations, parallel) point in the overhead matrix. +type overheadConfig struct { + iterations int + parallel int +} + +// overheadRow pairs a config with its measured baseline and diagnose results. +type overheadRow struct { + cfg overheadConfig + baseline testing.BenchmarkResult + diagnose testing.BenchmarkResult +} + +// overheadMatrix is the set of (iterations, parallel) points measured by +// BenchmarkDiagnoseOverhead: single run, sequential iterations, then parallel. +var overheadMatrix = []overheadConfig{ + {iterations: 1, parallel: 1}, + {iterations: 4, parallel: 1}, + {iterations: 4, parallel: 4}, + {iterations: 8, parallel: 1}, + {iterations: 8, parallel: 8}, +} + +// overheadMatrixEnv gates TestDiagnoseOverhead_*; it spawns many `go test` +// subprocesses and is too slow for the normal test run. +const overheadMatrixEnv = "TESTRIG_BENCH_OVERHEAD" + +// overheadMatrixRunsEnv sets how many times each matrix cell is benchmarked +// before averaging (default 5). Use 3–5 for stabler numbers; 1 for a quick smoke. +const overheadMatrixRunsEnv = "TESTRIG_BENCH_OVERHEAD_RUNS" + +const ( + overheadMatrixRunsDefault = 5 + overheadMatrixRunsMax = 10 +) + +func skipUnlessDiagnoseOverheadMatrix(t *testing.T) { + t.Helper() + if os.Getenv(overheadMatrixEnv) == "" { + t.Skipf("set %s=1 to run the diagnose overhead matrix", overheadMatrixEnv) + } + if testing.Short() { + t.Skip("skipping diagnose overhead matrix in short mode") + } +} + +func overheadMatrixRuns() int { + s := strings.TrimSpace(os.Getenv(overheadMatrixRunsEnv)) + if s == "" { + return overheadMatrixRunsDefault + } + n, err := strconv.Atoi(s) + if err != nil || n < 1 { + return overheadMatrixRunsDefault + } + return min(n, overheadMatrixRunsMax) +} + +// averageBenchmarkResults averages per-op metrics across repeated benchmark runs. +func averageBenchmarkResults(results []testing.BenchmarkResult) testing.BenchmarkResult { + if len(results) == 0 { + return testing.BenchmarkResult{} + } + var ns, bytes, allocs int64 + for _, r := range results { + ns += r.NsPerOp() + bytes += r.AllocedBytesPerOp() + allocs += r.AllocsPerOp() + } + n := int64(len(results)) + avgBytes := bytes / n + avgAllocs := allocs / n + if avgBytes < 0 { + avgBytes = 0 + } + if avgAllocs < 0 { + avgAllocs = 0 + } + return testing.BenchmarkResult{ + N: 1, + T: time.Duration(ns / n), + MemBytes: uint64(avgBytes), + MemAllocs: uint64(avgAllocs), + } +} + +// repeatBenchmark runs fn runs times and returns the averaged result. +// phase labels log lines; pass "" to omit per-run logging. +func repeatBenchmark(t *testing.T, runs int, phase string, fn func() testing.BenchmarkResult) testing.BenchmarkResult { + t.Helper() + results := make([]testing.BenchmarkResult, runs) + for i := range runs { + start := time.Now() + results[i] = fn() + if phase != "" { + t.Logf("%s: run %d/%d done (wall %s, %s/op)", + phase, i+1, runs, time.Since(start).Round(time.Second), roundedDur(results[i].NsPerOp())) + } + } + avg := averageBenchmarkResults(results) + if phase != "" { + t.Logf("%s: mean %s/op over %d runs", phase, roundedDur(avg.NsPerOp()), runs) + } + return avg +} + +// runDiagnoseOverheadMatrix measures Diagnose overhead vs the raw `go test` floor +// across overheadMatrix for target and logs a diff table. It is a test helper, not +// a Benchmark, because it drives testing.Benchmark internally (which deadlocks if +// called from a benchmark). +func runDiagnoseOverheadMatrix(t *testing.T, label, target string) { + t.Helper() + skipUnlessDiagnoseOverheadMatrix(t) + + repoRoot, err := filepath.Abs("../..") + require.NoError(t, err) + cleanupNewDiagnoseDirs(t, repoRoot) + + // Child `go test` processes must not see TESTRIG_BENCH_OVERHEAD (dogfood runs ./...). + t.Setenv(overheadMatrixEnv, "") + + out := output.NewForTest(true, io.Discard, io.Discard, false) + ctx := context.Background() + runs := overheadMatrixRuns() + total := len(overheadMatrix) + t.Logf("[%s] overhead matrix: target=%s, %d cells, %d runs/cell (%s overrides)", + label, target, total, runs, overheadMatrixRunsEnv) + + rows := make([]overheadRow, 0, total) + for cell, cfg := range overheadMatrix { + cellLabel := fmt.Sprintf("[%s] cell %d/%d iters=%d parallel=%d", + label, cell+1, total, cfg.iterations, cfg.parallel) + + base := repeatBenchmark(t, runs, cellLabel+" baseline", func() testing.BenchmarkResult { + r := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + require.NoError(b, baselineWorkload(ctx, repoRoot, target, cfg.iterations, cfg.parallel)) + } + }) + require.NotZero(t, r.N, "baseline workload failed for %+v", cfg) + return r + }) + + diag := repeatBenchmark(t, runs, cellLabel+" diagnose", func() testing.BenchmarkResult { + r := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + require.NoError(b, diagnoseWorkload(ctx, out, repoRoot, target, cfg.iterations, cfg.parallel)) + } + }) + require.NotZero(t, r.N, "diagnose workload failed for %+v", cfg) + return r + }) + + overheadNs := diag.NsPerOp() - base.NsPerOp() + t.Logf("%s: done — overhead %s (%s of diagnose; baseline %s, diagnose %s)", + cellLabel, overheadDur(overheadNs), overheadPercent(overheadNs, diag.NsPerOp()), + roundedDur(base.NsPerOp()), roundedDur(diag.NsPerOp())) + + rows = append(rows, overheadRow{cfg: cfg, baseline: base, diagnose: diag}) + } + printDiagnoseOverhead(t, label, target, runs, rows) +} + +// TestDiagnoseOverhead_Dummy runs the overhead matrix against the tiny dummy package. +// Run via `just bench_overhead_matrix_dummy`. Each cell is benchmarked 5 times by default +// and averaged; set TESTRIG_BENCH_OVERHEAD_RUNS (e.g. 3) to tune accuracy vs wall time. +// +//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. +func TestDiagnoseOverhead_Dummy(t *testing.T) { + runDiagnoseOverheadMatrix(t, "dummy", benchDummyTarget) +} + +// TestDiagnoseOverhead_Dogfood runs the overhead matrix against the full testrig module (./...). +// Run via `just bench_overhead_matrix_dogfood`; expect much longer wall time than dummy. +// +//nolint:paralleltest // serial by design: spawns many go test subprocesses and measures wall time. +func TestDiagnoseOverhead_Dogfood(t *testing.T) { + runDiagnoseOverheadMatrix(t, "dogfood", benchDogfoodTarget) +} + +// roundedDur renders ns as a duration rounded to microseconds for the table. +func roundedDur(ns int64) string { + return time.Duration(ns).Round(time.Microsecond).String() +} + +// overheadDur renders overhead; negative deltas (noise) show as 0. +func overheadDur(ns int64) string { + if ns < 0 { + ns = 0 + } + return roundedDur(ns) +} + +// overheadPercent is overhead as a share of diagnose runtime (overhead / diagnose). +func overheadPercent(overheadNs, diagnoseNs int64) string { + if diagnoseNs <= 0 { + return "n/a" + } + if overheadNs < 0 { + overheadNs = 0 + } + return fmt.Sprintf("%.1f%%", float64(overheadNs)*100/float64(diagnoseNs)) +} + +// printDiagnoseOverhead logs a table of baseline vs diagnose wall time per config. +// overhead = diagnose ns/op - baseline ns/op; overhead/iter divides by iterations. +// Each cell is the mean of runs repeated benchmark invocations. +func printDiagnoseOverhead(t *testing.T, label, target string, runs int, rows []overheadRow) { + t.Helper() + var sb strings.Builder + tw := tabwriter.NewWriter(&sb, 0, 0, 2, ' ', 0) + _, _ = fmt.Fprintln(tw, "iters\tparallel\tbaseline\tdiagnose\toverhead\toverhead%\toverhead/iter") + for _, r := range rows { + overheadNs := r.diagnose.NsPerOp() - r.baseline.NsPerOp() + perIterNs := overheadNs / int64(max(r.cfg.iterations, 1)) + diagNs := r.diagnose.NsPerOp() + _, _ = fmt.Fprintf(tw, "%d\t%d\t%s\t%s\t%s\t%s\t%s\n", + r.cfg.iterations, + r.cfg.parallel, + roundedDur(r.baseline.NsPerOp()), + roundedDur(diagNs), + overheadDur(overheadNs), + overheadPercent(overheadNs, diagNs), + overheadDur(perIterNs), + ) + } + _ = tw.Flush() + t.Logf(` +-------------------------------------------------------------------------------- +Diagnose overhead vs raw go test (%s, target=%s, %d-run average per cell) +-------------------------------------------------------------------------------- +%s`, + label, target, runs, sb.String()) +} + +func TestAverageBenchmarkResults(t *testing.T) { + t.Parallel() + avg := averageBenchmarkResults([]testing.BenchmarkResult{ + {N: 10, T: 1_000, MemBytes: 1_000, MemAllocs: 100}, + {N: 10, T: 3_000, MemBytes: 3_000, MemAllocs: 300}, + }) + require.Equal(t, int64(200), avg.NsPerOp()) + require.Equal(t, int64(200), avg.AllocedBytesPerOp()) + require.Equal(t, int64(20), avg.AllocsPerOp()) +} + +func TestOverheadPercent(t *testing.T) { + t.Parallel() + require.Equal(t, "20.0%", overheadPercent(20, 100)) + require.Equal(t, "0.0%", overheadPercent(-5, 100)) + require.Equal(t, "n/a", overheadPercent(10, 0)) +} + +func TestOverheadMatrixRuns(t *testing.T) { + require.Equal(t, 5, overheadMatrixRuns()) + t.Setenv(overheadMatrixRunsEnv, "3") + require.Equal(t, 3, overheadMatrixRuns()) + t.Setenv(overheadMatrixRunsEnv, "99") + require.Equal(t, overheadMatrixRunsMax, overheadMatrixRuns()) + t.Setenv(overheadMatrixRunsEnv, "nope") + require.Equal(t, overheadMatrixRunsDefault, overheadMatrixRuns()) +} + +func BenchmarkResolveModuleDir(b *testing.B) { + repoRoot, err := filepath.Abs("../..") + require.NoError(b, err) + args := []string{"./internal/runner/..."} + + for b.Loop() { + _, _, err := resolveModuleDir(repoRoot, args) + require.NoError(b, err) + } +} diff --git a/internal/runner/testdata/dummy/dummy_test.go b/internal/runner/testdata/dummy/dummy_test.go new file mode 100644 index 0000000..f400466 --- /dev/null +++ b/internal/runner/testdata/dummy/dummy_test.go @@ -0,0 +1,5 @@ +package dummy + +import "testing" + +func TestDummy(t *testing.T) {} diff --git a/justfile b/justfile index dc677a4..c129a70 100644 --- a/justfile +++ b/justfile @@ -14,9 +14,20 @@ test: test_race: go tool gotestsum -- -race ./... -# Run benchmarks with memory stats and specific CPU counts +# Run standard benchmarks bench: - go test -bench=. -benchmem -run=^$ ./... -cpu=2,4,8 + go test -bench=. -benchmem -run=^$ ./... + +# Diagnose overhead matrix (dummy; fast). 5 runs averaged per cell; TESTRIG_BENCH_OVERHEAD_RUNS=3 to override. +bench_overhead_matrix_dummy: + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dummy$' -count=1 -v + +# Run benchmark to measure diagnose overhead against the full testrig module (./...); slow. +bench_overhead_matrix_dogfood: + TESTRIG_BENCH_OVERHEAD=1 go test ./internal/runner/ -run='^TestDiagnoseOverhead_Dogfood$' -count=1 -v + +# Run benchmarks to measure diagnose overhead for both dummy and dogfood targets. +bench_overhead_matrix: bench_overhead_matrix_dummy bench_overhead_matrix_dogfood # Local GoReleaser dry-run (snapshot) goreleaser: diff --git a/lefthook.yml b/lefthook.yml index bbbcf28..3e25b95 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -54,14 +54,19 @@ pre-commit: golangci-lint: tags: linter - glob: "*.go" - root: "" - run: golangci-lint run --fast-only --fix ./... + glob: + - "*.go" + - "**/*.go" + - "go.mod" + - "go.sum" + run: golangci-lint run --fix --fast-only ./... stage_fixed: true go-generate: tags: linter - glob: "*.go" + glob: + - "*.go" + - "**/*.go" run: | before=$(git status --porcelain) go generate ./... @@ -75,7 +80,11 @@ pre-commit: go-mod-tidy: tags: linter - glob: "go.mod" + glob: + - "go.mod" + - "go.sum" + - "*.go" + - "**/*.go" run: | before=$(git status --porcelain) while IFS= read -r mod; do @@ -97,5 +106,9 @@ pre-push: commands: go-short-tests: tags: test - glob: "*.go" + glob: + - "*.go" + - "**/*.go" + - "go.mod" + - "go.sum" run: go test -short ./...