From 69c1e2e24989201dc7070e779730bcd62b42f268 Mon Sep 17 00:00:00 2001
From: Glittersup <fasholagifty@gmail.com>
Date: Fri, 26 Jun 2026 12:19:36 +0100
Subject: [PATCH] feat: automated performance benchmark suite for compiler
 endpoints

---
 backend/benchmarks/README.md          |  67 ++++++++++++
 backend/benchmarks/config.ts          | 129 +++++++++++++++++++++++
 backend/benchmarks/lib/stats.ts       | 141 ++++++++++++++++++++++++++
 backend/benchmarks/results/.gitignore |   3 +
 backend/benchmarks/runBenchmarks.ts   |  90 ++++++++++++++++
 backend/package.json                  |   3 +
 backend/tests/benchmark-stats.test.ts |  95 +++++++++++++++++
 7 files changed, 528 insertions(+)
 create mode 100644 backend/benchmarks/README.md
 create mode 100644 backend/benchmarks/config.ts
 create mode 100644 backend/benchmarks/lib/stats.ts
 create mode 100644 backend/benchmarks/results/.gitignore
 create mode 100644 backend/benchmarks/runBenchmarks.ts
 create mode 100644 backend/tests/benchmark-stats.test.ts

diff --git a/backend/benchmarks/README.md b/backend/benchmarks/README.md
new file mode 100644
index 00000000..36a14004
--- /dev/null
+++ b/backend/benchmarks/README.md
@@ -0,0 +1,67 @@
+# Automated Performance Benchmark Suite
+
+Load-tests the playground **compiler endpoints** under simulated traffic peaks
+and reports latency + success ratios, using [autocannon](https://github.com/mcollina/autocannon).
+
+## What it does
+
+- Floods `POST /api/v1/contracts/compile` (and is easily extended to other
+  endpoints) with concurrent connections across several scenarios.
+- Measures latency percentiles (mean/p50/p90/p99/max), throughput, and the
+  ratio of `2xx` responses to total attempts.
+- Checks each scenario against pass/fail thresholds and writes statistical logs
+  (JSON + text) to `benchmarks/results/`.
+- Exits non-zero if any scenario misses its thresholds (CI-gating friendly).
+
+## Layout
+
+| File | Responsibility |
+|------|----------------|
+| `config.ts` | Scenario definitions (endpoint, payload, connections, duration, thresholds) + env overrides. |
+| `lib/stats.ts` | Pure stats/reporting: `summarize`, `formatSummary`, success-ratio + threshold logic. Unit tested. |
+| `runBenchmarks.ts` | Thin runner: drives autocannon per scenario and persists logs. |
+| `results/` | Generated logs (git-ignored). |
+
+The number-crunching lives in `lib/stats.ts` with **no autocannon/network
+dependency**, so it is unit-tested deterministically in
+`tests/benchmark-stats.test.ts` without running a load test.
+
+## Running
+
+```bash
+cd backend
+npm install                 # installs autocannon (added as a devDependency)
+npm run start &             # start the API under test (or: npm run dev)
+npm run bench               # run all scenarios against http://localhost:8080/api/v1
+```
+
+### Environment overrides
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `BENCH_BASE_URL` | `http://localhost:8080/api/v1` | API base url. |
+| `BENCH_WORKSPACE_ID` | `default` | Value for the required `x-workspace-id` header. |
+| `BENCH_CONNECTIONS` | `50` (peak) | Concurrency for the peak scenario. |
+| `BENCH_DURATION` | per-scenario | Override duration (seconds) for all scenarios. |
+
+```bash
+BENCH_CONNECTIONS=150 BENCH_DURATION=10 npm run bench
+```
+
+## Sample output
+
+```
+[PASS] compile-peak
+  duration:     20s @ 50 connections
+  requests:     10000 total (500/s)
+  latency (ms):  mean 120 | p50 100 | p90 200 | p99 900 | max 1500
+  responses:    2xx 9990 | non2xx 10 | errors 0 | timeouts 0
+  success:      99.9%
+```
+
+## Tests
+
+```bash
+cd backend
+npm test -- benchmark-stats
+```
diff --git a/backend/benchmarks/config.ts b/backend/benchmarks/config.ts
new file mode 100644
index 00000000..e1819a84
--- /dev/null
+++ b/backend/benchmarks/config.ts
@@ -0,0 +1,129 @@
+/**
+ * Benchmark scenario configuration for the playground compiler endpoints.
+ *
+ * Each scenario is a self-contained load test: which endpoint to flood, what
+ * payload to send, how many concurrent connections to open, and for how long.
+ * The runner (`runBenchmarks.ts`) executes these with autocannon and reports
+ * latency + success ratios via the pure stats module (`lib/stats.ts`).
+ *
+ * Tune scenarios with environment variables (so CI and local runs differ
+ * without code changes):
+ *   BENCH_BASE_URL     base API url      (default http://localhost:8080/api/v1)
+ *   BENCH_WORKSPACE_ID x-workspace-id    (default "default")
+ *   BENCH_CONNECTIONS  override concurrency for the peak scenario
+ *   BENCH_DURATION     override duration (seconds) for every scenario
+ */
+
+/** Pass/fail thresholds applied to a scenario's results. */
+export interface BenchmarkThresholds {
+  /** Minimum fraction of 2xx responses (0–1) for the scenario to pass. */
+  minSuccessRatio: number;
+  /** Maximum tolerated p99 latency in milliseconds. */
+  maxP99LatencyMs: number;
+}
+
+/** A single load-test scenario. */
+export interface BenchmarkScenario {
+  name: string;
+  description: string;
+  /** Path appended to the base url, e.g. "/contracts/compile". */
+  path: string;
+  method: 'GET' | 'POST';
+  /** Concurrent open connections (the load). */
+  connections: number;
+  /** Test duration in seconds. */
+  duration: number;
+  /** Requests pipelined per connection. */
+  pipelining?: number;
+  /** JSON body sent with each request (stringified by the runner). */
+  body?: unknown;
+  thresholds: BenchmarkThresholds;
+}
+
+/** Resolve the base URL from the environment, with a sensible local default. */
+export function resolveBaseUrl(env: NodeJS.ProcessEnv = process.env): string {
+  return env.BENCH_BASE_URL ?? 'http://localhost:8080/api/v1';
+}
+
+/** Resolve the workspace id header value. */
+export function resolveWorkspaceId(env: NodeJS.ProcessEnv = process.env): string {
+  return env.BENCH_WORKSPACE_ID ?? 'default';
+}
+
+/** Headers sent with every benchmarked request. */
+export function resolveHeaders(env: NodeJS.ProcessEnv = process.env): Record<string, string> {
+  return {
+    'content-type': 'application/json',
+    'x-workspace-id': resolveWorkspaceId(env),
+  };
+}
+
+// A minimal but valid Soroban contract (>= 32 chars) that satisfies
+// contractCompileSchema, so the compiler does real work under load.
+const SAMPLE_SOURCE = `#![no_std]
+use soroban_sdk::{contract, contractimpl, Env, Symbol, symbol_short};
+
+#[contract]
+pub struct BenchContract;
+
+#[contractimpl]
+impl BenchContract {
+    pub fn ping(_env: Env) -> Symbol {
+        symbol_short!("pong")
+    }
+}`;
+
+const COMPILE_BODY = {
+  sourceCode: SAMPLE_SOURCE,
+  compilerVersion: '0.8.10',
+  optimization: true,
+  target: 'soroban',
+  entryPoint: 'ping',
+};
+
+/** Apply BENCH_CONNECTIONS / BENCH_DURATION overrides to a number. */
+function override(value: number, envVar: string | undefined): number {
+  const parsed = envVar ? Number(envVar) : NaN;
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : value;
+}
+
+/** Build the scenario list, honouring environment overrides. */
+export function buildScenarios(env: NodeJS.ProcessEnv = process.env): BenchmarkScenario[] {
+  const duration = (d: number) => override(d, env.BENCH_DURATION);
+
+  return [
+    {
+      name: 'compile-warmup',
+      description: 'Light warm-up load to prime the compiler endpoint.',
+      path: '/contracts/compile',
+      method: 'POST',
+      connections: 5,
+      duration: duration(5),
+      body: COMPILE_BODY,
+      thresholds: { minSuccessRatio: 0.99, maxP99LatencyMs: 1500 },
+    },
+    {
+      name: 'compile-peak',
+      description: 'Simulated load peak flooding the compiler endpoint.',
+      path: '/contracts/compile',
+      method: 'POST',
+      connections: override(50, env.BENCH_CONNECTIONS),
+      duration: duration(20),
+      pipelining: 1,
+      body: COMPILE_BODY,
+      thresholds: { minSuccessRatio: 0.97, maxP99LatencyMs: 4000 },
+    },
+    {
+      name: 'compile-sustained',
+      description: 'Sustained moderate load to observe steady-state latency.',
+      path: '/contracts/compile',
+      method: 'POST',
+      connections: 20,
+      duration: duration(30),
+      body: COMPILE_BODY,
+      thresholds: { minSuccessRatio: 0.98, maxP99LatencyMs: 3000 },
+    },
+  ];
+}
+
+export const scenarios = buildScenarios();
diff --git a/backend/benchmarks/lib/stats.ts b/backend/benchmarks/lib/stats.ts
new file mode 100644
index 00000000..8d17effe
--- /dev/null
+++ b/backend/benchmarks/lib/stats.ts
@@ -0,0 +1,141 @@
+/**
+ * Pure statistics & reporting helpers for the performance benchmark suite.
+ *
+ * These functions take an autocannon-style result object and turn it into a
+ * normalised summary, a human-readable report, and a pass/fail verdict against
+ * thresholds. They have **no I/O and no autocannon dependency**, so they can be
+ * unit-tested deterministically without running a load test or a live server.
+ */
+
+import type { BenchmarkThresholds } from '../config.js';
+
+/**
+ * The subset of an autocannon result we consume. Autocannon returns much more,
+ * but the suite only needs latency percentiles and response-class counts.
+ * @see https://github.com/mcollina/autocannon#result
+ */
+export interface AutocannonResultLike {
+  duration?: number;
+  connections?: number;
+  latency?: { mean?: number; p50?: number; p90?: number; p99?: number; max?: number };
+  requests?: { total?: number; mean?: number };
+  '1xx'?: number;
+  '2xx'?: number;
+  '3xx'?: number;
+  '4xx'?: number;
+  '5xx'?: number;
+  non2xx?: number;
+  errors?: number;
+  timeouts?: number;
+}
+
+/** A normalised, report-ready summary of one scenario run. */
+export interface BenchmarkSummary {
+  name: string;
+  durationSec: number;
+  connections: number;
+  totalRequests: number;
+  requestsPerSec: number;
+  latencyMs: { mean: number; p50: number; p90: number; p99: number; max: number };
+  responses: { '2xx': number; non2xx: number; errors: number; timeouts: number };
+  /** Fraction of attempts that returned 2xx (0–1). */
+  successRatio: number;
+  passed: boolean;
+  /** Human-readable reasons when `passed` is false. */
+  failures: string[];
+}
+
+function num(value: number | undefined): number {
+  return typeof value === 'number' && Number.isFinite(value) ? value : 0;
+}
+
+/** Round to a fixed number of decimal places. */
+function round(value: number, dp = 2): number {
+  const f = 10 ** dp;
+  return Math.round(value * f) / f;
+}
+
+/**
+ * Total request attempts = successes + non-2xx + transport errors + timeouts.
+ * Used as the denominator for the success ratio so failed connections count
+ * against reliability, not just HTTP error responses.
+ */
+export function totalAttempts(result: AutocannonResultLike): number {
+  return num(result['2xx']) + num(result.non2xx) + num(result.errors) + num(result.timeouts);
+}
+
+/** Success ratio (0–1): 2xx responses over all attempts. Zero attempts → 0. */
+export function computeSuccessRatio(result: AutocannonResultLike): number {
+  const total = totalAttempts(result);
+  if (total === 0) return 0;
+  return num(result['2xx']) / total;
+}
+
+/** Normalise an autocannon result into a {@link BenchmarkSummary}. */
+export function summarize(
+  name: string,
+  result: AutocannonResultLike,
+  thresholds: BenchmarkThresholds
+): BenchmarkSummary {
+  const successRatio = computeSuccessRatio(result);
+  const latency = result.latency ?? {};
+  const latencyMs = {
+    mean: num(latency.mean),
+    p50: num(latency.p50),
+    p90: num(latency.p90),
+    p99: num(latency.p99),
+    max: num(latency.max),
+  };
+
+  const failures: string[] = [];
+  if (successRatio < thresholds.minSuccessRatio) {
+    failures.push(
+      `success ratio ${round(successRatio * 100)}% < required ${round(
+        thresholds.minSuccessRatio * 100
+      )}%`
+    );
+  }
+  if (latencyMs.p99 > thresholds.maxP99LatencyMs) {
+    failures.push(`p99 latency ${latencyMs.p99}ms > max ${thresholds.maxP99LatencyMs}ms`);
+  }
+
+  return {
+    name,
+    durationSec: num(result.duration),
+    connections: num(result.connections),
+    totalRequests: num(result.requests?.total),
+    requestsPerSec: round(num(result.requests?.mean)),
+    latencyMs,
+    responses: {
+      '2xx': num(result['2xx']),
+      non2xx: num(result.non2xx),
+      errors: num(result.errors),
+      timeouts: num(result.timeouts),
+    },
+    successRatio: round(successRatio, 4),
+    passed: failures.length === 0,
+    failures,
+  };
+}
+
+/** Render a single summary as an aligned, human-readable log block. */
+export function formatSummary(summary: BenchmarkSummary): string {
+  const status = summary.passed ? 'PASS' : 'FAIL';
+  const lines = [
+    `[${status}] ${summary.name}`,
+    `  duration:     ${summary.durationSec}s @ ${summary.connections} connections`,
+    `  requests:     ${summary.totalRequests} total (${summary.requestsPerSec}/s)`,
+    `  latency (ms):  mean ${summary.latencyMs.mean} | p50 ${summary.latencyMs.p50} | p90 ${summary.latencyMs.p90} | p99 ${summary.latencyMs.p99} | max ${summary.latencyMs.max}`,
+    `  responses:    2xx ${summary.responses['2xx']} | non2xx ${summary.responses.non2xx} | errors ${summary.responses.errors} | timeouts ${summary.responses.timeouts}`,
+    `  success:      ${round(summary.successRatio * 100)}%`,
+  ];
+  if (!summary.passed) {
+    lines.push(`  threshold:    ${summary.failures.join('; ')}`);
+  }
+  return lines.join('\n');
+}
+
+/** True only if every scenario passed its thresholds. */
+export function allPassed(summaries: BenchmarkSummary[]): boolean {
+  return summaries.length > 0 && summaries.every((s) => s.passed);
+}
diff --git a/backend/benchmarks/results/.gitignore b/backend/benchmarks/results/.gitignore
new file mode 100644
index 00000000..3bf109a5
--- /dev/null
+++ b/backend/benchmarks/results/.gitignore
@@ -0,0 +1,3 @@
+# Generated benchmark logs — keep the directory, ignore the output files.
+*
+!.gitignore
diff --git a/backend/benchmarks/runBenchmarks.ts b/backend/benchmarks/runBenchmarks.ts
new file mode 100644
index 00000000..28b98dd6
--- /dev/null
+++ b/backend/benchmarks/runBenchmarks.ts
@@ -0,0 +1,90 @@
+/**
+ * Automated performance benchmark runner.
+ *
+ * Floods the playground compiler endpoints with autocannon according to the
+ * scenarios in `config.ts`, then reports latency and success ratios using the
+ * pure stats module and writes statistical logs (JSON + text) to disk.
+ *
+ * Usage:
+ *   npm run bench                 # run all scenarios against BENCH_BASE_URL
+ *   BENCH_CONNECTIONS=100 npm run bench
+ *
+ * Exit code is non-zero if any scenario misses its thresholds, so this can gate
+ * CI. The target API must already be running.
+ */
+
+import autocannon from 'autocannon';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { buildScenarios, resolveBaseUrl, resolveHeaders, type BenchmarkScenario } from './config.js';
+import { allPassed, formatSummary, summarize, type BenchmarkSummary } from './lib/stats.js';
+
+const RESULTS_DIR = join(dirname(fileURLToPath(import.meta.url)), 'results');
+
+/** Run a single scenario with autocannon and return its normalised summary. */
+async function runScenario(
+  baseUrl: string,
+  headers: Record<string, string>,
+  scenario: BenchmarkScenario
+): Promise<BenchmarkSummary> {
+  console.log(`\n▶ ${scenario.name} — ${scenario.description}`);
+
+  const result = await autocannon({
+    url: `${baseUrl}${scenario.path}`,
+    method: scenario.method,
+    connections: scenario.connections,
+    duration: scenario.duration,
+    pipelining: scenario.pipelining ?? 1,
+    headers,
+    body: scenario.body !== undefined ? JSON.stringify(scenario.body) : undefined,
+  });
+
+  return summarize(scenario.name, result, scenario.thresholds);
+}
+
+async function main(): Promise<void> {
+  const baseUrl = resolveBaseUrl();
+  const headers = resolveHeaders();
+  const scenarios = buildScenarios();
+
+  console.log(`Performance Benchmark Suite → ${baseUrl}`);
+  console.log(`Scenarios: ${scenarios.map((s) => s.name).join(', ')}`);
+
+  const summaries: BenchmarkSummary[] = [];
+  for (const scenario of scenarios) {
+    try {
+      const summary = await runScenario(baseUrl, headers, scenario);
+      summaries.push(summary);
+      console.log(formatSummary(summary));
+    } catch (error) {
+      console.error(`✖ ${scenario.name} failed to run:`, (error as Error).message);
+      summaries.push(
+        summarize(scenario.name, { errors: 1 }, scenario.thresholds) // record as a failure
+      );
+    }
+  }
+
+  // Persist statistical logs.
+  mkdirSync(RESULTS_DIR, { recursive: true });
+  const stamp = new Date().toISOString().replace(/[:.]/g, '-');
+  const report = { baseUrl, generatedAt: new Date().toISOString(), summaries };
+  const jsonPath = join(RESULTS_DIR, `benchmark-${stamp}.json`);
+  const textPath = join(RESULTS_DIR, `benchmark-${stamp}.log`);
+  writeFileSync(jsonPath, JSON.stringify(report, null, 2));
+  writeFileSync(textPath, summaries.map(formatSummary).join('\n\n'));
+
+  console.log(`\nStatistical logs written:\n  ${jsonPath}\n  ${textPath}`);
+
+  if (!allPassed(summaries)) {
+    console.error('\nOne or more scenarios missed their thresholds.');
+    process.exitCode = 1;
+  } else {
+    console.log('\nAll scenarios passed their thresholds.');
+  }
+}
+
+main().catch((error) => {
+  console.error('Benchmark run crashed:', error);
+  process.exitCode = 1;
+});
diff --git a/backend/package.json b/backend/package.json
index a3695fdd..88854eb1 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -10,6 +10,7 @@
     "dev": "tsx watch src/index.ts",
     "test": "jest --runInBand --forceExit",
     "test:coverage": "jest --coverage --runInBand --forceExit",
+    "bench": "tsx benchmarks/runBenchmarks.ts",
     "collaboration": "tsx src/collaborationServer.ts"
   },
   "keywords": [],
@@ -61,10 +62,12 @@
     "@types/node": "^25.5.0",
     "@types/qrcode": "^1.5.5",
     "@types/sanitize-html": "^2.16.1",
+    "@types/autocannon": "^7.12.7",
     "@types/supertest": "^7.2.0",
     "@types/swagger-jsdoc": "^6.0.4",
     "@types/swagger-ui-express": "^4.1.8",
     "@types/ws": "^8.18.1",
+    "autocannon": "^8.0.0",
     "ioredis-mock": "^8.13.1",
     "jest": "^30.4.2",
     "supertest": "^7.2.2",
diff --git a/backend/tests/benchmark-stats.test.ts b/backend/tests/benchmark-stats.test.ts
new file mode 100644
index 00000000..b4c2e632
--- /dev/null
+++ b/backend/tests/benchmark-stats.test.ts
@@ -0,0 +1,95 @@
+import {
+  allPassed,
+  computeSuccessRatio,
+  formatSummary,
+  summarize,
+  totalAttempts,
+  type AutocannonResultLike,
+} from '../benchmarks/lib/stats.js';
+import type { BenchmarkThresholds } from '../benchmarks/config.js';
+
+const thresholds: BenchmarkThresholds = { minSuccessRatio: 0.97, maxP99LatencyMs: 4000 };
+
+const healthyResult: AutocannonResultLike = {
+  duration: 20,
+  connections: 50,
+  latency: { mean: 120, p50: 100, p90: 200, p99: 900, max: 1500 },
+  requests: { total: 10000, mean: 500 },
+  '2xx': 9990,
+  non2xx: 10,
+  errors: 0,
+  timeouts: 0,
+};
+
+describe('benchmark stats', () => {
+  describe('totalAttempts / computeSuccessRatio', () => {
+    it('counts 2xx, non2xx, errors and timeouts', () => {
+      expect(totalAttempts(healthyResult)).toBe(10000);
+      expect(computeSuccessRatio(healthyResult)).toBeCloseTo(0.999, 3);
+    });
+
+    it('returns 0 (no NaN) when there are no attempts', () => {
+      expect(totalAttempts({})).toBe(0);
+      expect(computeSuccessRatio({})).toBe(0);
+    });
+
+    it('treats transport errors and timeouts as failures', () => {
+      const result: AutocannonResultLike = { '2xx': 50, errors: 25, timeouts: 25 };
+      expect(computeSuccessRatio(result)).toBe(0.5);
+    });
+  });
+
+  describe('summarize', () => {
+    it('marks a healthy run as passed', () => {
+      const summary = summarize('compile-peak', healthyResult, thresholds);
+      expect(summary.passed).toBe(true);
+      expect(summary.failures).toEqual([]);
+      expect(summary.totalRequests).toBe(10000);
+      expect(summary.requestsPerSec).toBe(500);
+      expect(summary.latencyMs.p99).toBe(900);
+    });
+
+    it('fails when success ratio is below threshold', () => {
+      const summary = summarize(
+        'compile-peak',
+        { '2xx': 80, non2xx: 20, latency: { p99: 100 } },
+        thresholds
+      );
+      expect(summary.passed).toBe(false);
+      expect(summary.failures.join(' ')).toMatch(/success ratio/);
+    });
+
+    it('fails when p99 latency exceeds the threshold', () => {
+      const summary = summarize(
+        'compile-peak',
+        { '2xx': 100, latency: { p99: 9000 } },
+        thresholds
+      );
+      expect(summary.passed).toBe(false);
+      expect(summary.failures.join(' ')).toMatch(/p99 latency/);
+    });
+
+    it('handles a missing latency object without throwing', () => {
+      const summary = summarize('compile-peak', { '2xx': 100 }, thresholds);
+      expect(summary.latencyMs).toEqual({ mean: 0, p50: 0, p90: 0, p99: 0, max: 0 });
+      expect(summary.passed).toBe(true);
+    });
+  });
+
+  describe('formatSummary / allPassed', () => {
+    it('renders a readable PASS block', () => {
+      const text = formatSummary(summarize('compile-warmup', healthyResult, thresholds));
+      expect(text).toContain('[PASS] compile-warmup');
+      expect(text).toContain('latency (ms)');
+      expect(text).toContain('success:');
+    });
+
+    it('aggregates pass/fail across scenarios', () => {
+      const pass = summarize('a', healthyResult, thresholds);
+      const fail = summarize('b', { '2xx': 1, errors: 99, latency: { p99: 1 } }, thresholds);
+      expect(allPassed([pass])).toBe(true);
+      expect(allPassed([pass, fail])).toBe(false);
+      expect(allPassed([])).toBe(false);
+    });
+  });
+});