StellarDevHub · Glittersup · Jun 26, 2026
diff --git a/backend/benchmarks/README.md b/backend/benchmarks/README.md
@@ -0,0 +1,67 @@
+# Automated Performance Benchmark Suite
+
+Load-tests the playground **compiler endpoints** under simulated traffic peaks
+and reports latency + success ratios, using [autocannon](https://github.com/mcollina/autocannon).
+
+## What it does
+
+- Floods `POST /api/v1/contracts/compile` (and is easily extended to other
+  endpoints) with concurrent connections across several scenarios.
+- Measures latency percentiles (mean/p50/p90/p99/max), throughput, and the
+  ratio of `2xx` responses to total attempts.
+- Checks each scenario against pass/fail thresholds and writes statistical logs
+  (JSON + text) to `benchmarks/results/`.
+- Exits non-zero if any scenario misses its thresholds (CI-gating friendly).
+
+## Layout
+
+| File | Responsibility |
+|------|----------------|
+| `config.ts` | Scenario definitions (endpoint, payload, connections, duration, thresholds) + env overrides. |
+| `lib/stats.ts` | Pure stats/reporting: `summarize`, `formatSummary`, success-ratio + threshold logic. Unit tested. |
+| `runBenchmarks.ts` | Thin runner: drives autocannon per scenario and persists logs. |
+| `results/` | Generated logs (git-ignored). |
+
+The number-crunching lives in `lib/stats.ts` with **no autocannon/network
+dependency**, so it is unit-tested deterministically in
+`tests/benchmark-stats.test.ts` without running a load test.
+
+## Running
+
+```bash
+cd backend
+npm install                 # installs autocannon (added as a devDependency)
+npm run start &             # start the API under test (or: npm run dev)
+npm run bench               # run all scenarios against http://localhost:8080/api/v1
+```
+
+### Environment overrides
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `BENCH_BASE_URL` | `http://localhost:8080/api/v1` | API base url. |
+| `BENCH_WORKSPACE_ID` | `default` | Value for the required `x-workspace-id` header. |
+| `BENCH_CONNECTIONS` | `50` (peak) | Concurrency for the peak scenario. |
+| `BENCH_DURATION` | per-scenario | Override duration (seconds) for all scenarios. |
+
+```bash
+BENCH_CONNECTIONS=150 BENCH_DURATION=10 npm run bench
+```
+
+## Sample output
+
+```
+[PASS] compile-peak
+  duration:     20s @ 50 connections
+  requests:     10000 total (500/s)
+  latency (ms):  mean 120 | p50 100 | p90 200 | p99 900 | max 1500
+  responses:    2xx 9990 | non2xx 10 | errors 0 | timeouts 0
+  success:      99.9%
+```
+
+## Tests
+
+```bash
+cd backend
+npm test -- benchmark-stats
+```
diff --git a/backend/benchmarks/config.ts b/backend/benchmarks/config.ts
@@ -0,0 +1,129 @@
+/**
+ * Benchmark scenario configuration for the playground compiler endpoints.
+ *
+ * Each scenario is a self-contained load test: which endpoint to flood, what
+ * payload to send, how many concurrent connections to open, and for how long.
+ * The runner (`runBenchmarks.ts`) executes these with autocannon and reports
+ * latency + success ratios via the pure stats module (`lib/stats.ts`).
+ *
+ * Tune scenarios with environment variables (so CI and local runs differ
+ * without code changes):
+ *   BENCH_BASE_URL     base API url      (default http://localhost:8080/api/v1)
+ *   BENCH_WORKSPACE_ID x-workspace-id    (default "default")
+ *   BENCH_CONNECTIONS  override concurrency for the peak scenario
+ *   BENCH_DURATION     override duration (seconds) for every scenario
+ */
+
+/** Pass/fail thresholds applied to a scenario's results. */
+export interface BenchmarkThresholds {
+  /** Minimum fraction of 2xx responses (0–1) for the scenario to pass. */
+  minSuccessRatio: number;
+  /** Maximum tolerated p99 latency in milliseconds. */
+  maxP99LatencyMs: number;
+}
+
+/** A single load-test scenario. */
+export interface BenchmarkScenario {
+  name: string;
+  description: string;
+  /** Path appended to the base url, e.g. "/contracts/compile". */
+  path: string;
+  method: 'GET' | 'POST';
+  /** Concurrent open connections (the load). */
+  connections: number;
+  /** Test duration in seconds. */
+  duration: number;
+  /** Requests pipelined per connection. */
+  pipelining?: number;
+  /** JSON body sent with each request (stringified by the runner). */
+  body?: unknown;
+  thresholds: BenchmarkThresholds;
+}
+
+/** Resolve the base URL from the environment, with a sensible local default. */
+export function resolveBaseUrl(env: NodeJS.ProcessEnv = process.env): string {
+  return env.BENCH_BASE_URL ?? 'http://localhost:8080/api/v1';
+}
+
+/** Resolve the workspace id header value. */
+export function resolveWorkspaceId(env: NodeJS.ProcessEnv = process.env): string {
+  return env.BENCH_WORKSPACE_ID ?? 'default';
+}
+
+/** Headers sent with every benchmarked request. */
+export function resolveHeaders(env: NodeJS.ProcessEnv = process.env): Record<string, string> {
+  return {
+    'content-type': 'application/json',
+    'x-workspace-id': resolveWorkspaceId(env),
+  };
+}
+
+// A minimal but valid Soroban contract (>= 32 chars) that satisfies
+// contractCompileSchema, so the compiler does real work under load.
+const SAMPLE_SOURCE = `#![no_std]
+use soroban_sdk::{contract, contractimpl, Env, Symbol, symbol_short};
+
+#[contract]
+pub struct BenchContract;
+
+#[contractimpl]
+impl BenchContract {
+    pub fn ping(_env: Env) -> Symbol {
+        symbol_short!("pong")
+    }
+}`;
+
+const COMPILE_BODY = {
+  sourceCode: SAMPLE_SOURCE,
+  compilerVersion: '0.8.10',
+  optimization: true,
+  target: 'soroban',
+  entryPoint: 'ping',
+};
+
+/** Apply BENCH_CONNECTIONS / BENCH_DURATION overrides to a number. */
+function override(value: number, envVar: string | undefined): number {
+  const parsed = envVar ? Number(envVar) : NaN;
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : value;
+}
+
+/** Build the scenario list, honouring environment overrides. */
+export function buildScenarios(env: NodeJS.ProcessEnv = process.env): BenchmarkScenario[] {
+  const duration = (d: number) => override(d, env.BENCH_DURATION);
+
+  return [
+    {
+      name: 'compile-warmup',
+      description: 'Light warm-up load to prime the compiler endpoint.',
+      path: '/contracts/compile',
+      method: 'POST',
+      connections: 5,
+      duration: duration(5),
+      body: COMPILE_BODY,
+      thresholds: { minSuccessRatio: 0.99, maxP99LatencyMs: 1500 },
+    },
+    {
+      name: 'compile-peak',
+      description: 'Simulated load peak flooding the compiler endpoint.',
+      path: '/contracts/compile',
+      method: 'POST',
+      connections: override(50, env.BENCH_CONNECTIONS),
+      duration: duration(20),
+      pipelining: 1,
+      body: COMPILE_BODY,
+      thresholds: { minSuccessRatio: 0.97, maxP99LatencyMs: 4000 },
+    },
+    {
+      name: 'compile-sustained',
+      description: 'Sustained moderate load to observe steady-state latency.',
+      path: '/contracts/compile',
+      method: 'POST',
+      connections: 20,
+      duration: duration(30),
+      body: COMPILE_BODY,
+      thresholds: { minSuccessRatio: 0.98, maxP99LatencyMs: 3000 },
+    },
+  ];
+}
+
+export const scenarios = buildScenarios();
diff --git a/backend/benchmarks/lib/stats.ts b/backend/benchmarks/lib/stats.ts
@@ -0,0 +1,141 @@
+/**
+ * Pure statistics & reporting helpers for the performance benchmark suite.
+ *
+ * These functions take an autocannon-style result object and turn it into a
+ * normalised summary, a human-readable report, and a pass/fail verdict against
+ * thresholds. They have **no I/O and no autocannon dependency**, so they can be
+ * unit-tested deterministically without running a load test or a live server.
+ */
+
+import type { BenchmarkThresholds } from '../config.js';
+
+/**
+ * The subset of an autocannon result we consume. Autocannon returns much more,
+ * but the suite only needs latency percentiles and response-class counts.
+ * @see https://github.com/mcollina/autocannon#result
+ */
+export interface AutocannonResultLike {
+  duration?: number;
+  connections?: number;
+  latency?: { mean?: number; p50?: number; p90?: number; p99?: number; max?: number };
+  requests?: { total?: number; mean?: number };
+  '1xx'?: number;
+  '2xx'?: number;
+  '3xx'?: number;
+  '4xx'?: number;
+  '5xx'?: number;
+  non2xx?: number;
+  errors?: number;
+  timeouts?: number;
+}
+
+/** A normalised, report-ready summary of one scenario run. */
+export interface BenchmarkSummary {
+  name: string;
+  durationSec: number;
+  connections: number;
+  totalRequests: number;
+  requestsPerSec: number;
+  latencyMs: { mean: number; p50: number; p90: number; p99: number; max: number };
+  responses: { '2xx': number; non2xx: number; errors: number; timeouts: number };
+  /** Fraction of attempts that returned 2xx (0–1). */
+  successRatio: number;
+  passed: boolean;
+  /** Human-readable reasons when `passed` is false. */
+  failures: string[];
+}
+
+function num(value: number | undefined): number {
+  return typeof value === 'number' && Number.isFinite(value) ? value : 0;
+}
+
+/** Round to a fixed number of decimal places. */
+function round(value: number, dp = 2): number {
+  const f = 10 ** dp;
+  return Math.round(value * f) / f;
+}
+
+/**
+ * Total request attempts = successes + non-2xx + transport errors + timeouts.
+ * Used as the denominator for the success ratio so failed connections count
+ * against reliability, not just HTTP error responses.
+ */
+export function totalAttempts(result: AutocannonResultLike): number {
+  return num(result['2xx']) + num(result.non2xx) + num(result.errors) + num(result.timeouts);
+}
+
+/** Success ratio (0–1): 2xx responses over all attempts. Zero attempts → 0. */
+export function computeSuccessRatio(result: AutocannonResultLike): number {
+  const total = totalAttempts(result);
+  if (total === 0) return 0;
+  return num(result['2xx']) / total;
+}
+
+/** Normalise an autocannon result into a {@link BenchmarkSummary}. */
+export function summarize(
+  name: string,
+  result: AutocannonResultLike,
+  thresholds: BenchmarkThresholds
+): BenchmarkSummary {
+  const successRatio = computeSuccessRatio(result);
+  const latency = result.latency ?? {};
+  const latencyMs = {
+    mean: num(latency.mean),
+    p50: num(latency.p50),
+    p90: num(latency.p90),
+    p99: num(latency.p99),
+    max: num(latency.max),
+  };
+
+  const failures: string[] = [];
+  if (successRatio < thresholds.minSuccessRatio) {
+    failures.push(
+      `success ratio ${round(successRatio * 100)}% < required ${round(
+        thresholds.minSuccessRatio * 100
+      )}%`
+    );
+  }
+  if (latencyMs.p99 > thresholds.maxP99LatencyMs) {
+    failures.push(`p99 latency ${latencyMs.p99}ms > max ${thresholds.maxP99LatencyMs}ms`);
+  }
+
+  return {
+    name,
+    durationSec: num(result.duration),
+    connections: num(result.connections),
+    totalRequests: num(result.requests?.total),
+    requestsPerSec: round(num(result.requests?.mean)),
+    latencyMs,
+    responses: {
+      '2xx': num(result['2xx']),
+      non2xx: num(result.non2xx),
+      errors: num(result.errors),
+      timeouts: num(result.timeouts),
+    },
+    successRatio: round(successRatio, 4),
+    passed: failures.length === 0,
+    failures,
+  };
+}
+
+/** Render a single summary as an aligned, human-readable log block. */
+export function formatSummary(summary: BenchmarkSummary): string {
+  const status = summary.passed ? 'PASS' : 'FAIL';
+  const lines = [
+    `[${status}] ${summary.name}`,
+    `  duration:     ${summary.durationSec}s @ ${summary.connections} connections`,
+    `  requests:     ${summary.totalRequests} total (${summary.requestsPerSec}/s)`,
+    `  latency (ms):  mean ${summary.latencyMs.mean} | p50 ${summary.latencyMs.p50} | p90 ${summary.latencyMs.p90} | p99 ${summary.latencyMs.p99} | max ${summary.latencyMs.max}`,
+    `  responses:    2xx ${summary.responses['2xx']} | non2xx ${summary.responses.non2xx} | errors ${summary.responses.errors} | timeouts ${summary.responses.timeouts}`,
+    `  success:      ${round(summary.successRatio * 100)}%`,
+  ];
+  if (!summary.passed) {
+    lines.push(`  threshold:    ${summary.failures.join('; ')}`);
+  }
+  return lines.join('\n');
+}
+
+/** True only if every scenario passed its thresholds. */
+export function allPassed(summaries: BenchmarkSummary[]): boolean {
+  return summaries.length > 0 && summaries.every((s) => s.passed);
+}
diff --git a/backend/benchmarks/results/.gitignore b/backend/benchmarks/results/.gitignore
@@ -0,0 +1,3 @@
+# Generated benchmark logs — keep the directory, ignore the output files.
+*
+!.gitignore