From 69c1e2e24989201dc7070e779730bcd62b42f268 Mon Sep 17 00:00:00 2001 From: Glittersup Date: Fri, 26 Jun 2026 12:19:36 +0100 Subject: [PATCH] feat: automated performance benchmark suite for compiler endpoints --- backend/benchmarks/README.md | 67 ++++++++++++ backend/benchmarks/config.ts | 129 +++++++++++++++++++++++ backend/benchmarks/lib/stats.ts | 141 ++++++++++++++++++++++++++ backend/benchmarks/results/.gitignore | 3 + backend/benchmarks/runBenchmarks.ts | 90 ++++++++++++++++ backend/package.json | 3 + backend/tests/benchmark-stats.test.ts | 95 +++++++++++++++++ 7 files changed, 528 insertions(+) create mode 100644 backend/benchmarks/README.md create mode 100644 backend/benchmarks/config.ts create mode 100644 backend/benchmarks/lib/stats.ts create mode 100644 backend/benchmarks/results/.gitignore create mode 100644 backend/benchmarks/runBenchmarks.ts create mode 100644 backend/tests/benchmark-stats.test.ts diff --git a/backend/benchmarks/README.md b/backend/benchmarks/README.md new file mode 100644 index 00000000..36a14004 --- /dev/null +++ b/backend/benchmarks/README.md @@ -0,0 +1,67 @@ +# Automated Performance Benchmark Suite + +Load-tests the playground **compiler endpoints** under simulated traffic peaks +and reports latency + success ratios, using [autocannon](https://github.com/mcollina/autocannon). + +## What it does + +- Floods `POST /api/v1/contracts/compile` (and is easily extended to other + endpoints) with concurrent connections across several scenarios. +- Measures latency percentiles (mean/p50/p90/p99/max), throughput, and the + ratio of `2xx` responses to total attempts. +- Checks each scenario against pass/fail thresholds and writes statistical logs + (JSON + text) to `benchmarks/results/`. +- Exits non-zero if any scenario misses its thresholds (CI-gating friendly). + +## Layout + +| File | Responsibility | +|------|----------------| +| `config.ts` | Scenario definitions (endpoint, payload, connections, duration, thresholds) + env overrides. | +| `lib/stats.ts` | Pure stats/reporting: `summarize`, `formatSummary`, success-ratio + threshold logic. Unit tested. | +| `runBenchmarks.ts` | Thin runner: drives autocannon per scenario and persists logs. | +| `results/` | Generated logs (git-ignored). | + +The number-crunching lives in `lib/stats.ts` with **no autocannon/network +dependency**, so it is unit-tested deterministically in +`tests/benchmark-stats.test.ts` without running a load test. + +## Running + +```bash +cd backend +npm install # installs autocannon (added as a devDependency) +npm run start & # start the API under test (or: npm run dev) +npm run bench # run all scenarios against http://localhost:8080/api/v1 +``` + +### Environment overrides + +| Variable | Default | Purpose | +|----------|---------|---------| +| `BENCH_BASE_URL` | `http://localhost:8080/api/v1` | API base url. | +| `BENCH_WORKSPACE_ID` | `default` | Value for the required `x-workspace-id` header. | +| `BENCH_CONNECTIONS` | `50` (peak) | Concurrency for the peak scenario. | +| `BENCH_DURATION` | per-scenario | Override duration (seconds) for all scenarios. | + +```bash +BENCH_CONNECTIONS=150 BENCH_DURATION=10 npm run bench +``` + +## Sample output + +``` +[PASS] compile-peak + duration: 20s @ 50 connections + requests: 10000 total (500/s) + latency (ms): mean 120 | p50 100 | p90 200 | p99 900 | max 1500 + responses: 2xx 9990 | non2xx 10 | errors 0 | timeouts 0 + success: 99.9% +``` + +## Tests + +```bash +cd backend +npm test -- benchmark-stats +``` diff --git a/backend/benchmarks/config.ts b/backend/benchmarks/config.ts new file mode 100644 index 00000000..e1819a84 --- /dev/null +++ b/backend/benchmarks/config.ts @@ -0,0 +1,129 @@ +/** + * Benchmark scenario configuration for the playground compiler endpoints. + * + * Each scenario is a self-contained load test: which endpoint to flood, what + * payload to send, how many concurrent connections to open, and for how long. + * The runner (`runBenchmarks.ts`) executes these with autocannon and reports + * latency + success ratios via the pure stats module (`lib/stats.ts`). + * + * Tune scenarios with environment variables (so CI and local runs differ + * without code changes): + * BENCH_BASE_URL base API url (default http://localhost:8080/api/v1) + * BENCH_WORKSPACE_ID x-workspace-id (default "default") + * BENCH_CONNECTIONS override concurrency for the peak scenario + * BENCH_DURATION override duration (seconds) for every scenario + */ + +/** Pass/fail thresholds applied to a scenario's results. */ +export interface BenchmarkThresholds { + /** Minimum fraction of 2xx responses (0–1) for the scenario to pass. */ + minSuccessRatio: number; + /** Maximum tolerated p99 latency in milliseconds. */ + maxP99LatencyMs: number; +} + +/** A single load-test scenario. */ +export interface BenchmarkScenario { + name: string; + description: string; + /** Path appended to the base url, e.g. "/contracts/compile". */ + path: string; + method: 'GET' | 'POST'; + /** Concurrent open connections (the load). */ + connections: number; + /** Test duration in seconds. */ + duration: number; + /** Requests pipelined per connection. */ + pipelining?: number; + /** JSON body sent with each request (stringified by the runner). */ + body?: unknown; + thresholds: BenchmarkThresholds; +} + +/** Resolve the base URL from the environment, with a sensible local default. */ +export function resolveBaseUrl(env: NodeJS.ProcessEnv = process.env): string { + return env.BENCH_BASE_URL ?? 'http://localhost:8080/api/v1'; +} + +/** Resolve the workspace id header value. */ +export function resolveWorkspaceId(env: NodeJS.ProcessEnv = process.env): string { + return env.BENCH_WORKSPACE_ID ?? 'default'; +} + +/** Headers sent with every benchmarked request. */ +export function resolveHeaders(env: NodeJS.ProcessEnv = process.env): Record { + return { + 'content-type': 'application/json', + 'x-workspace-id': resolveWorkspaceId(env), + }; +} + +// A minimal but valid Soroban contract (>= 32 chars) that satisfies +// contractCompileSchema, so the compiler does real work under load. +const SAMPLE_SOURCE = `#![no_std] +use soroban_sdk::{contract, contractimpl, Env, Symbol, symbol_short}; + +#[contract] +pub struct BenchContract; + +#[contractimpl] +impl BenchContract { + pub fn ping(_env: Env) -> Symbol { + symbol_short!("pong") + } +}`; + +const COMPILE_BODY = { + sourceCode: SAMPLE_SOURCE, + compilerVersion: '0.8.10', + optimization: true, + target: 'soroban', + entryPoint: 'ping', +}; + +/** Apply BENCH_CONNECTIONS / BENCH_DURATION overrides to a number. */ +function override(value: number, envVar: string | undefined): number { + const parsed = envVar ? Number(envVar) : NaN; + return Number.isFinite(parsed) && parsed > 0 ? parsed : value; +} + +/** Build the scenario list, honouring environment overrides. */ +export function buildScenarios(env: NodeJS.ProcessEnv = process.env): BenchmarkScenario[] { + const duration = (d: number) => override(d, env.BENCH_DURATION); + + return [ + { + name: 'compile-warmup', + description: 'Light warm-up load to prime the compiler endpoint.', + path: '/contracts/compile', + method: 'POST', + connections: 5, + duration: duration(5), + body: COMPILE_BODY, + thresholds: { minSuccessRatio: 0.99, maxP99LatencyMs: 1500 }, + }, + { + name: 'compile-peak', + description: 'Simulated load peak flooding the compiler endpoint.', + path: '/contracts/compile', + method: 'POST', + connections: override(50, env.BENCH_CONNECTIONS), + duration: duration(20), + pipelining: 1, + body: COMPILE_BODY, + thresholds: { minSuccessRatio: 0.97, maxP99LatencyMs: 4000 }, + }, + { + name: 'compile-sustained', + description: 'Sustained moderate load to observe steady-state latency.', + path: '/contracts/compile', + method: 'POST', + connections: 20, + duration: duration(30), + body: COMPILE_BODY, + thresholds: { minSuccessRatio: 0.98, maxP99LatencyMs: 3000 }, + }, + ]; +} + +export const scenarios = buildScenarios(); diff --git a/backend/benchmarks/lib/stats.ts b/backend/benchmarks/lib/stats.ts new file mode 100644 index 00000000..8d17effe --- /dev/null +++ b/backend/benchmarks/lib/stats.ts @@ -0,0 +1,141 @@ +/** + * Pure statistics & reporting helpers for the performance benchmark suite. + * + * These functions take an autocannon-style result object and turn it into a + * normalised summary, a human-readable report, and a pass/fail verdict against + * thresholds. They have **no I/O and no autocannon dependency**, so they can be + * unit-tested deterministically without running a load test or a live server. + */ + +import type { BenchmarkThresholds } from '../config.js'; + +/** + * The subset of an autocannon result we consume. Autocannon returns much more, + * but the suite only needs latency percentiles and response-class counts. + * @see https://github.com/mcollina/autocannon#result + */ +export interface AutocannonResultLike { + duration?: number; + connections?: number; + latency?: { mean?: number; p50?: number; p90?: number; p99?: number; max?: number }; + requests?: { total?: number; mean?: number }; + '1xx'?: number; + '2xx'?: number; + '3xx'?: number; + '4xx'?: number; + '5xx'?: number; + non2xx?: number; + errors?: number; + timeouts?: number; +} + +/** A normalised, report-ready summary of one scenario run. */ +export interface BenchmarkSummary { + name: string; + durationSec: number; + connections: number; + totalRequests: number; + requestsPerSec: number; + latencyMs: { mean: number; p50: number; p90: number; p99: number; max: number }; + responses: { '2xx': number; non2xx: number; errors: number; timeouts: number }; + /** Fraction of attempts that returned 2xx (0–1). */ + successRatio: number; + passed: boolean; + /** Human-readable reasons when `passed` is false. */ + failures: string[]; +} + +function num(value: number | undefined): number { + return typeof value === 'number' && Number.isFinite(value) ? value : 0; +} + +/** Round to a fixed number of decimal places. */ +function round(value: number, dp = 2): number { + const f = 10 ** dp; + return Math.round(value * f) / f; +} + +/** + * Total request attempts = successes + non-2xx + transport errors + timeouts. + * Used as the denominator for the success ratio so failed connections count + * against reliability, not just HTTP error responses. + */ +export function totalAttempts(result: AutocannonResultLike): number { + return num(result['2xx']) + num(result.non2xx) + num(result.errors) + num(result.timeouts); +} + +/** Success ratio (0–1): 2xx responses over all attempts. Zero attempts → 0. */ +export function computeSuccessRatio(result: AutocannonResultLike): number { + const total = totalAttempts(result); + if (total === 0) return 0; + return num(result['2xx']) / total; +} + +/** Normalise an autocannon result into a {@link BenchmarkSummary}. */ +export function summarize( + name: string, + result: AutocannonResultLike, + thresholds: BenchmarkThresholds +): BenchmarkSummary { + const successRatio = computeSuccessRatio(result); + const latency = result.latency ?? {}; + const latencyMs = { + mean: num(latency.mean), + p50: num(latency.p50), + p90: num(latency.p90), + p99: num(latency.p99), + max: num(latency.max), + }; + + const failures: string[] = []; + if (successRatio < thresholds.minSuccessRatio) { + failures.push( + `success ratio ${round(successRatio * 100)}% < required ${round( + thresholds.minSuccessRatio * 100 + )}%` + ); + } + if (latencyMs.p99 > thresholds.maxP99LatencyMs) { + failures.push(`p99 latency ${latencyMs.p99}ms > max ${thresholds.maxP99LatencyMs}ms`); + } + + return { + name, + durationSec: num(result.duration), + connections: num(result.connections), + totalRequests: num(result.requests?.total), + requestsPerSec: round(num(result.requests?.mean)), + latencyMs, + responses: { + '2xx': num(result['2xx']), + non2xx: num(result.non2xx), + errors: num(result.errors), + timeouts: num(result.timeouts), + }, + successRatio: round(successRatio, 4), + passed: failures.length === 0, + failures, + }; +} + +/** Render a single summary as an aligned, human-readable log block. */ +export function formatSummary(summary: BenchmarkSummary): string { + const status = summary.passed ? 'PASS' : 'FAIL'; + const lines = [ + `[${status}] ${summary.name}`, + ` duration: ${summary.durationSec}s @ ${summary.connections} connections`, + ` requests: ${summary.totalRequests} total (${summary.requestsPerSec}/s)`, + ` latency (ms): mean ${summary.latencyMs.mean} | p50 ${summary.latencyMs.p50} | p90 ${summary.latencyMs.p90} | p99 ${summary.latencyMs.p99} | max ${summary.latencyMs.max}`, + ` responses: 2xx ${summary.responses['2xx']} | non2xx ${summary.responses.non2xx} | errors ${summary.responses.errors} | timeouts ${summary.responses.timeouts}`, + ` success: ${round(summary.successRatio * 100)}%`, + ]; + if (!summary.passed) { + lines.push(` threshold: ${summary.failures.join('; ')}`); + } + return lines.join('\n'); +} + +/** True only if every scenario passed its thresholds. */ +export function allPassed(summaries: BenchmarkSummary[]): boolean { + return summaries.length > 0 && summaries.every((s) => s.passed); +} diff --git a/backend/benchmarks/results/.gitignore b/backend/benchmarks/results/.gitignore new file mode 100644 index 00000000..3bf109a5 --- /dev/null +++ b/backend/benchmarks/results/.gitignore @@ -0,0 +1,3 @@ +# Generated benchmark logs — keep the directory, ignore the output files. +* +!.gitignore diff --git a/backend/benchmarks/runBenchmarks.ts b/backend/benchmarks/runBenchmarks.ts new file mode 100644 index 00000000..28b98dd6 --- /dev/null +++ b/backend/benchmarks/runBenchmarks.ts @@ -0,0 +1,90 @@ +/** + * Automated performance benchmark runner. + * + * Floods the playground compiler endpoints with autocannon according to the + * scenarios in `config.ts`, then reports latency and success ratios using the + * pure stats module and writes statistical logs (JSON + text) to disk. + * + * Usage: + * npm run bench # run all scenarios against BENCH_BASE_URL + * BENCH_CONNECTIONS=100 npm run bench + * + * Exit code is non-zero if any scenario misses its thresholds, so this can gate + * CI. The target API must already be running. + */ + +import autocannon from 'autocannon'; +import { mkdirSync, writeFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { buildScenarios, resolveBaseUrl, resolveHeaders, type BenchmarkScenario } from './config.js'; +import { allPassed, formatSummary, summarize, type BenchmarkSummary } from './lib/stats.js'; + +const RESULTS_DIR = join(dirname(fileURLToPath(import.meta.url)), 'results'); + +/** Run a single scenario with autocannon and return its normalised summary. */ +async function runScenario( + baseUrl: string, + headers: Record, + scenario: BenchmarkScenario +): Promise { + console.log(`\n▶ ${scenario.name} — ${scenario.description}`); + + const result = await autocannon({ + url: `${baseUrl}${scenario.path}`, + method: scenario.method, + connections: scenario.connections, + duration: scenario.duration, + pipelining: scenario.pipelining ?? 1, + headers, + body: scenario.body !== undefined ? JSON.stringify(scenario.body) : undefined, + }); + + return summarize(scenario.name, result, scenario.thresholds); +} + +async function main(): Promise { + const baseUrl = resolveBaseUrl(); + const headers = resolveHeaders(); + const scenarios = buildScenarios(); + + console.log(`Performance Benchmark Suite → ${baseUrl}`); + console.log(`Scenarios: ${scenarios.map((s) => s.name).join(', ')}`); + + const summaries: BenchmarkSummary[] = []; + for (const scenario of scenarios) { + try { + const summary = await runScenario(baseUrl, headers, scenario); + summaries.push(summary); + console.log(formatSummary(summary)); + } catch (error) { + console.error(`✖ ${scenario.name} failed to run:`, (error as Error).message); + summaries.push( + summarize(scenario.name, { errors: 1 }, scenario.thresholds) // record as a failure + ); + } + } + + // Persist statistical logs. + mkdirSync(RESULTS_DIR, { recursive: true }); + const stamp = new Date().toISOString().replace(/[:.]/g, '-'); + const report = { baseUrl, generatedAt: new Date().toISOString(), summaries }; + const jsonPath = join(RESULTS_DIR, `benchmark-${stamp}.json`); + const textPath = join(RESULTS_DIR, `benchmark-${stamp}.log`); + writeFileSync(jsonPath, JSON.stringify(report, null, 2)); + writeFileSync(textPath, summaries.map(formatSummary).join('\n\n')); + + console.log(`\nStatistical logs written:\n ${jsonPath}\n ${textPath}`); + + if (!allPassed(summaries)) { + console.error('\nOne or more scenarios missed their thresholds.'); + process.exitCode = 1; + } else { + console.log('\nAll scenarios passed their thresholds.'); + } +} + +main().catch((error) => { + console.error('Benchmark run crashed:', error); + process.exitCode = 1; +}); diff --git a/backend/package.json b/backend/package.json index a3695fdd..88854eb1 100644 --- a/backend/package.json +++ b/backend/package.json @@ -10,6 +10,7 @@ "dev": "tsx watch src/index.ts", "test": "jest --runInBand --forceExit", "test:coverage": "jest --coverage --runInBand --forceExit", + "bench": "tsx benchmarks/runBenchmarks.ts", "collaboration": "tsx src/collaborationServer.ts" }, "keywords": [], @@ -61,10 +62,12 @@ "@types/node": "^25.5.0", "@types/qrcode": "^1.5.5", "@types/sanitize-html": "^2.16.1", + "@types/autocannon": "^7.12.7", "@types/supertest": "^7.2.0", "@types/swagger-jsdoc": "^6.0.4", "@types/swagger-ui-express": "^4.1.8", "@types/ws": "^8.18.1", + "autocannon": "^8.0.0", "ioredis-mock": "^8.13.1", "jest": "^30.4.2", "supertest": "^7.2.2", diff --git a/backend/tests/benchmark-stats.test.ts b/backend/tests/benchmark-stats.test.ts new file mode 100644 index 00000000..b4c2e632 --- /dev/null +++ b/backend/tests/benchmark-stats.test.ts @@ -0,0 +1,95 @@ +import { + allPassed, + computeSuccessRatio, + formatSummary, + summarize, + totalAttempts, + type AutocannonResultLike, +} from '../benchmarks/lib/stats.js'; +import type { BenchmarkThresholds } from '../benchmarks/config.js'; + +const thresholds: BenchmarkThresholds = { minSuccessRatio: 0.97, maxP99LatencyMs: 4000 }; + +const healthyResult: AutocannonResultLike = { + duration: 20, + connections: 50, + latency: { mean: 120, p50: 100, p90: 200, p99: 900, max: 1500 }, + requests: { total: 10000, mean: 500 }, + '2xx': 9990, + non2xx: 10, + errors: 0, + timeouts: 0, +}; + +describe('benchmark stats', () => { + describe('totalAttempts / computeSuccessRatio', () => { + it('counts 2xx, non2xx, errors and timeouts', () => { + expect(totalAttempts(healthyResult)).toBe(10000); + expect(computeSuccessRatio(healthyResult)).toBeCloseTo(0.999, 3); + }); + + it('returns 0 (no NaN) when there are no attempts', () => { + expect(totalAttempts({})).toBe(0); + expect(computeSuccessRatio({})).toBe(0); + }); + + it('treats transport errors and timeouts as failures', () => { + const result: AutocannonResultLike = { '2xx': 50, errors: 25, timeouts: 25 }; + expect(computeSuccessRatio(result)).toBe(0.5); + }); + }); + + describe('summarize', () => { + it('marks a healthy run as passed', () => { + const summary = summarize('compile-peak', healthyResult, thresholds); + expect(summary.passed).toBe(true); + expect(summary.failures).toEqual([]); + expect(summary.totalRequests).toBe(10000); + expect(summary.requestsPerSec).toBe(500); + expect(summary.latencyMs.p99).toBe(900); + }); + + it('fails when success ratio is below threshold', () => { + const summary = summarize( + 'compile-peak', + { '2xx': 80, non2xx: 20, latency: { p99: 100 } }, + thresholds + ); + expect(summary.passed).toBe(false); + expect(summary.failures.join(' ')).toMatch(/success ratio/); + }); + + it('fails when p99 latency exceeds the threshold', () => { + const summary = summarize( + 'compile-peak', + { '2xx': 100, latency: { p99: 9000 } }, + thresholds + ); + expect(summary.passed).toBe(false); + expect(summary.failures.join(' ')).toMatch(/p99 latency/); + }); + + it('handles a missing latency object without throwing', () => { + const summary = summarize('compile-peak', { '2xx': 100 }, thresholds); + expect(summary.latencyMs).toEqual({ mean: 0, p50: 0, p90: 0, p99: 0, max: 0 }); + expect(summary.passed).toBe(true); + }); + }); + + describe('formatSummary / allPassed', () => { + it('renders a readable PASS block', () => { + const text = formatSummary(summarize('compile-warmup', healthyResult, thresholds)); + expect(text).toContain('[PASS] compile-warmup'); + expect(text).toContain('latency (ms)'); + expect(text).toContain('success:'); + }); + + it('aggregates pass/fail across scenarios', () => { + const pass = summarize('a', healthyResult, thresholds); + const fail = summarize('b', { '2xx': 1, errors: 99, latency: { p99: 1 } }, thresholds); + expect(allPassed([pass])).toBe(true); + expect(allPassed([pass, fail])).toBe(false); + expect(allPassed([])).toBe(false); + }); + }); +});