Skip to content

Commit 5e3db23

Browse files
authored
feat: structured benchmark result output via BENCH_RESULT_OUTPUT (#3195)
* feat(benchmarking): add structured result output via BENCH_RESULT_OUTPUT emit full benchmark run metadata (config, tags, metrics, block range, spamoor stats) as JSON when BENCH_RESULT_OUTPUT is set. consumed by external matrix runner for table generation. * fix: address PR review feedback for structured benchmark output - Deduplicate overhead/reth-rate computation: move stats-based helpers to helpers.go, make span-based wrappers delegate to them - Fix sub-millisecond precision loss in engine span timings by using microsecond-based float division instead of integer truncation - Add spamoor stats to TestGasBurner for consistency with other tests * refactor: make spamoor config fully configurable via BENCH_* env vars - Add MaxPending, Rebroadcast, BaseFee, TipFee to benchConfig - Fix ERC20 test hardcoding max_wallets=200 instead of using cfg - Replace all hardcoded spamoor params with cfg fields across tests * feat: extract host metadata from OTEL resource attributes in trace spans - Add resourceAttrs struct with host, OS, and service fields - Extract attributes from VictoriaTraces LogsQL span data via resourceAttrCollector interface - Include host metadata in structured benchmark result JSON * fix: defer emitRunResult so results are written even on test failure Move emitRunResult into a deferred closure in all three test functions. If the test fails after metrics are collected, the structured JSON is still written. If it fails before result data exists, the defer is a no-op. * fix: state pressure benchmark CI failure and align with other tests Remove the 3-second sleep before requireSpammersRunning that caused all transactions to be mined before the measurement window started, leaving SteadyState at 0s. Also add deferred emitRunResult, configurable spamoor params, and spamoorStats collection to match the other benchmark tests. * fix: use deployment-level service names for trace queries in external mode In external mode the sequencer reports spans as "ev-node" (not the test-specific name like "ev-node-erc20"), so trace queries returned zero spans. Store service names on env: local mode uses the test-specific name, external mode defaults to "ev-node"/"ev-reth" with BENCH_EVNODE_SERVICE_NAME/BENCH_EVRETH_SERVICE_NAME overrides. * perf: use limit=1 for resource attribute trace queries fetchResourceAttrs only needs one span but was streaming the full result set from VictoriaTraces. Add limit=1 to the LogsQL query to avoid wasting bandwidth on long-lived instances with many spans. * docs: add missing doc comments to run_result.go functions
1 parent 146e6e1 commit 5e3db23

11 files changed

Lines changed: 533 additions & 68 deletions

test/e2e/benchmark/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ type benchConfig struct {
2727
WarmupTxs int
2828
GasUnitsToBurn int
2929
MaxWallets int
30+
MaxPending int
31+
Rebroadcast int
32+
BaseFee int
33+
TipFee int
3034
WaitTimeout time.Duration
3135
}
3236

@@ -43,6 +47,10 @@ func newBenchConfig(serviceName string) benchConfig {
4347
WarmupTxs: envInt("BENCH_WARMUP_TXS", 200),
4448
GasUnitsToBurn: envInt("BENCH_GAS_UNITS_TO_BURN", 1_000_000),
4549
MaxWallets: envInt("BENCH_MAX_WALLETS", 500),
50+
MaxPending: envInt("BENCH_MAX_PENDING", 50_000),
51+
Rebroadcast: envInt("BENCH_REBROADCAST", 0),
52+
BaseFee: envInt("BENCH_BASE_FEE", 20),
53+
TipFee: envInt("BENCH_TIP_FEE", 2),
4654
WaitTimeout: envDuration("BENCH_WAIT_TIMEOUT", 10*time.Minute),
4755
}
4856
}

test/e2e/benchmark/gasburner_test.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@ func (s *SpamoorSuite) TestGasBurner() {
2323

2424
cfg.log(t)
2525

26+
var result *benchmarkResult
27+
var wallClock time.Duration
28+
var spamoorStats *runSpamoorStats
29+
defer func() {
30+
if result != nil {
31+
emitRunResult(t, cfg, result, wallClock, spamoorStats)
32+
}
33+
}()
34+
2635
e := s.setupEnv(cfg)
2736
api := e.spamoorAPI
2837

@@ -32,11 +41,11 @@ func (s *SpamoorSuite) TestGasBurner() {
3241
"gas_units_to_burn": cfg.GasUnitsToBurn,
3342
"total_count": cfg.CountPerSpammer,
3443
"throughput": cfg.Throughput,
35-
"max_pending": 50000,
44+
"max_pending": cfg.MaxPending,
3645
"max_wallets": cfg.MaxWallets,
37-
"rebroadcast": 5,
38-
"base_fee": 100,
39-
"tip_fee": 50,
46+
"rebroadcast": cfg.Rebroadcast,
47+
"base_fee": cfg.BaseFee,
48+
"tip_fee": cfg.TipFee,
4049
"refill_amount": "500000000000000000000",
4150
"refill_balance": "200000000000000000000",
4251
"refill_interval": 300,
@@ -83,7 +92,7 @@ func (s *SpamoorSuite) TestGasBurner() {
8392
if err := waitForDrain(drainCtx, t.Logf, e.ethClient, 10); err != nil {
8493
t.Logf("warning: %v", err)
8594
}
86-
wallClock := time.Since(loadStart)
95+
wallClock = time.Since(loadStart)
8796

8897
endHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
8998
s.Require().NoError(err, "failed to get end block header")
@@ -94,10 +103,16 @@ func (s *SpamoorSuite) TestGasBurner() {
94103
bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock)
95104
s.Require().NoError(err, "failed to collect block metrics")
96105

97-
traces := s.collectTraces(e, cfg.ServiceName)
106+
traces := s.collectTraces(e)
98107

99-
result := newBenchmarkResult("GasBurner", bm, traces)
108+
result = newBenchmarkResult("GasBurner", bm, traces)
100109
s.Require().Greater(result.summary.SteadyState, time.Duration(0), "expected non-zero steady-state duration")
101110
result.log(t, wallClock)
102111
w.addEntries(result.entries())
112+
113+
metrics, mErr := api.GetMetrics()
114+
s.Require().NoError(mErr, "failed to get final metrics")
115+
sent := sumCounter(metrics["spamoor_transactions_sent_total"])
116+
failed := sumCounter(metrics["spamoor_transactions_failed_total"])
117+
spamoorStats = &runSpamoorStats{Sent: sent, Failed: failed}
103118
}

test/e2e/benchmark/helpers.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -382,23 +382,16 @@ func (s *blockMetricsSummary) entries(prefix string) []entry {
382382
}
383383
}
384384

385-
// evNodeOverhead computes the fraction of block production time spent outside
386-
// EVM execution. It looks up the average durations of BlockExecutor.ProduceBlock
387-
// (the outer span covering the full block lifecycle) and Executor.ExecuteTxs
388-
// (the inner span covering only EVM tx execution), then returns:
385+
// overheadFromStats computes ev-node overhead from pre-aggregated span stats.
389386
//
390387
// overhead% = (avgProduce - avgExecute) / avgProduce * 100
391-
//
392-
// This captures time spent on sequencing, DA submission, header construction,
393-
// and other ev-node orchestration work. Returns false if either span is missing.
394-
func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) {
395-
stats := e2e.AggregateSpanStats(spans)
388+
func overheadFromStats(stats map[string]*e2e.SpanStats) (float64, bool) {
396389
produce, ok := stats[spanProduceBlock]
397-
if !ok {
390+
if !ok || produce.Count == 0 {
398391
return 0, false
399392
}
400393
execute, ok := stats[spanExecuteTxs]
401-
if !ok {
394+
if !ok || execute.Count == 0 {
402395
return 0, false
403396
}
404397
produceAvg := float64(produce.Total.Microseconds()) / float64(produce.Count)
@@ -409,20 +402,25 @@ func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) {
409402
return (produceAvg - executeAvg) / produceAvg * 100, true
410403
}
411404

412-
// rethExecutionRate computes ev-reth's effective execution throughput in GGas/s
413-
// based on the total gas processed and the cumulative Engine.NewPayload duration.
414-
// NewPayload is the engine API call where reth validates and executes all state
415-
// transitions for a block (EVM execution + state root + disk commit).
416-
func rethExecutionRate(spans []e2e.TraceSpan, totalGasUsed uint64) (float64, bool) {
417-
stats := e2e.AggregateSpanStats(spans)
405+
// evNodeOverhead aggregates spans then computes overhead.
406+
func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) {
407+
return overheadFromStats(e2e.AggregateSpanStats(spans))
408+
}
409+
410+
// rethRateFromStats computes ev-reth GGas/s from pre-aggregated span stats.
411+
func rethRateFromStats(stats map[string]*e2e.SpanStats, totalGasUsed uint64) (float64, bool) {
418412
np, ok := stats[spanNewPayload]
419413
if !ok || np.Total <= 0 || totalGasUsed == 0 {
420414
return 0, false
421415
}
422-
// GGas/s = totalGas / newPayloadSeconds / 1e9
423416
return float64(totalGasUsed) / np.Total.Seconds() / 1e9, true
424417
}
425418

419+
// rethExecutionRate aggregates spans then computes GGas/s.
420+
func rethExecutionRate(spans []e2e.TraceSpan, totalGasUsed uint64) (float64, bool) {
421+
return rethRateFromStats(e2e.AggregateSpanStats(spans), totalGasUsed)
422+
}
423+
426424
// engineSpanEntries extracts ProduceBlock, Engine.GetPayload, and
427425
// Engine.NewPayload timing stats from ev-node spans and returns them as
428426
// result writer entries. these are the key metrics for answering "does block
@@ -443,11 +441,13 @@ func engineSpanEntries(prefix string, spans []e2e.TraceSpan) []entry {
443441
if !ok || s.Count == 0 {
444442
continue
445443
}
446-
avg := s.Total / time.Duration(s.Count)
444+
avg := float64(s.Total.Microseconds()) / float64(s.Count) / 1000.0
445+
min := float64(s.Min.Microseconds()) / 1000.0
446+
max := float64(s.Max.Microseconds()) / 1000.0
447447
entries = append(entries,
448-
entry{Name: prefix + " - " + k.label + " avg", Unit: "ms", Value: float64(avg.Milliseconds())},
449-
entry{Name: prefix + " - " + k.label + " min", Unit: "ms", Value: float64(s.Min.Milliseconds())},
450-
entry{Name: prefix + " - " + k.label + " max", Unit: "ms", Value: float64(s.Max.Milliseconds())},
448+
entry{Name: prefix + " - " + k.label + " avg", Unit: "ms", Value: avg},
449+
entry{Name: prefix + " - " + k.label + " min", Unit: "ms", Value: min},
450+
entry{Name: prefix + " - " + k.label + " max", Unit: "ms", Value: max},
451451
)
452452
}
453453
return entries

test/e2e/benchmark/result.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ type traceResult struct {
2020
// empty when the trace provider doesn't support rich span collection.
2121
evNodeRich []richSpan
2222
evRethRich []richSpan
23+
24+
// resource attributes extracted from trace spans (OTEL_RESOURCE_ATTRIBUTES).
25+
evNodeAttrs *resourceAttrs
26+
evRethAttrs *resourceAttrs
2327
}
2428

2529
// displayFlowcharts renders ASCII flowcharts from rich spans. Falls back to

0 commit comments

Comments
 (0)