From d636756c2c03260d19bd023dcc79f142caf4ad93 Mon Sep 17 00:00:00 2001
From: intech <decaf-comfy-groin@duck.com>
Date: Sun, 19 Apr 2026 04:31:00 +0400
Subject: [PATCH 1/2] feat(performance-test-server): add OTel OTLP export
 overhead scenario

Adds a 6th server configuration (port 8085) with full interceptor chain and
a real OTLP/gRPC exporter pointed at a local OTel collector, plus a k6
scenario that measures p50/p95/p99 latency and throughput delta between the
baseline (port 8081) and otel-export configurations under 100 VUs sustained
load.

Motivation: the existing interceptor-overhead scenario runs the OTel
interceptor with the provider uninitialized (no-op spans/metrics), so it
cannot answer "what is the CPU cost of actually shipping spans in
production". This closes that gap and gives us an end-to-end harness for
validating future @opentelemetry/otlp-transformer bumps (R1.2).

The OTel scenario is opt-in:
  - port 8085 is bound only when OTEL_EXPORT_ENABLED=1
  - the otel-collector service lives under the "otel-export" docker-compose
    profile, so default compose runs are unaffected
  - all other servers/ports and existing scenarios work unchanged

Files:
  - src/index.ts: +6th server, conditional on OTEL_EXPORT_ENABLED, eager
    initProvider + graceful shutdownProvider
  - k6/otel-export-overhead.js: new scenario (100 VUs, ~5 min, shuffled
    baseline vs otel-export calls, JSON summary via K6_OUT)
  - docker-compose.yml: otel-collector service (profile: otel-export),
    k6-otel-export runner, OTEL_* env var surface with production-ish
    BatchSpanProcessor defaults
  - otel-collector-config.yaml: OTLP gRPC+HTTP receivers -> debug exporter
    (the goal is export-side CPU measurement, not backend write throughput)
  - Dockerfile: EXPOSE 8085
  - README.md: scenario docs, env-var reference, expected overhead table,
    links to upstream issues (#6221, #6225, #6390, #6570)

Smoke test: 10 requests to :8085 verified spans arrive at collector with
correct rpc.system / rpc.service / rpc.method attributes. Quick 20-sample
comparison on the same host: baseline p95 ~1.87ms, otel-export p95 ~2.25ms
(+20%), in line with the expected-overhead range documented in the README.

pnpm-lock.yaml is updated to resolve pre-existing drift between manifest
(1.0.0-rc.10) and lockfile (1.0.0-rc.7) introduced by the earlier rc.10
bump; install now succeeds with --frozen-lockfile.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 performance-test-server/Dockerfile            |   5 +-
 performance-test-server/README.md             |  66 +++++-
 performance-test-server/docker-compose.yml    |  59 +++++
 .../k6/otel-export-overhead.js                | 221 ++++++++++++++++++
 performance-test-server/k6/results/.gitignore |   2 +
 .../otel-collector-config.yaml                |  55 +++++
 performance-test-server/pnpm-lock.yaml        |  38 +--
 performance-test-server/src/index.ts          | 110 +++++++--
 8 files changed, 513 insertions(+), 43 deletions(-)
 create mode 100644 performance-test-server/k6/otel-export-overhead.js
 create mode 100644 performance-test-server/k6/results/.gitignore
 create mode 100644 performance-test-server/otel-collector-config.yaml

diff --git a/performance-test-server/Dockerfile b/performance-test-server/Dockerfile
index 4d2dd4a..70bb7cc 100644
--- a/performance-test-server/Dockerfile
+++ b/performance-test-server/Dockerfile
@@ -71,7 +71,8 @@ ENV NODE_ENV=production
 ENV TLS_DIR=/app/certs
 
 # Ports: 8080 (full chain), 8081 (baseline), 8082 (validation),
-#        8083 (logger), 8084 (otel)
-EXPOSE 8080 8081 8082 8083 8084
+#        8083 (logger), 8084 (otel no-op), 8085 (otel real OTLP export —
+#        only bound when OTEL_EXPORT_ENABLED=1)
+EXPOSE 8080 8081 8082 8083 8084 8085
 
 CMD ["node", "src/index.ts"]
diff --git a/performance-test-server/README.md b/performance-test-server/README.md
index 764bbfb..0d99753 100644
--- a/performance-test-server/README.md
+++ b/performance-test-server/README.md
@@ -4,17 +4,18 @@ Dedicated server for k6 performance benchmarking with configurable interceptor c
 
 ## Purpose
 
-This server runs **5 parallel instances** on different ports, each with a different interceptor configuration:
+This server runs **5 parallel instances** on different ports, each with a different interceptor configuration, plus an **optional 6th instance** for measuring OTLP export overhead end-to-end:
 
 | Port | Configuration | Purpose |
 |------|---------------|---------|
 | 8081 | **Baseline** (no interceptors) | Measure baseline latency without any overhead |
 | 8082 | **Validation only** | Measure validation interceptor overhead |
 | 8083 | **Logger only** | Measure logger interceptor overhead |
-| 8084 | **Tracing only** | Measure tracing interceptor overhead |
-| 8080 | **Full chain** (all interceptors) | Measure total overhead with all interceptors |
+| 8084 | **Tracing only** (no-op exporter) | Measure tracing interceptor overhead |
+| 8080 | **Full chain** (all interceptors, no-op exporter) | Measure total overhead with all interceptors |
+| 8085 | **OTel export** — full chain + real OTLP exporter (opt-in via `OTEL_EXPORT_ENABLED=1`) | Measure end-to-end cost of the stock `@connectum/otel` export path (BatchSpanProcessor + otlp-transformer + OTLP/gRPC) |
 
-This allows k6 benchmarks to accurately measure the overhead introduced by each interceptor.
+This allows k6 benchmarks to accurately measure the overhead introduced by each interceptor, and — with the OTel export scenario — the CPU cost of actually shipping spans over the wire.
 
 ## Requirements
 
@@ -94,10 +95,39 @@ Stress-tests the full-chain configuration with 100 concurrent VUs for 7 minutes:
 docker compose --profile load up k6-basic-load --build --abort-on-container-exit
 ```
 
+### OTel OTLP Export Overhead
+
+Measures the p50/p95/p99 latency delta and throughput delta between the baseline (port 8081) and the full-chain-with-real-OTLP-exporter configuration (port 8085). Runs for ~5 minutes at 100 VUs:
+
+```bash
+OTEL_EXPORT_ENABLED=1 docker compose --profile otel-export up \
+  --build --abort-on-container-exit
+```
+
+What this measures that the `k6-interceptor-overhead` scenario does *not*:
+
+- Real `BatchSpanProcessor` + `@opentelemetry/otlp-transformer` serialization cost per exported span
+- OTLP/gRPC wire transport cost (`@grpc/grpc-js`)
+- End-to-end CPU pressure of the full OTel export pipeline under sustained load
+
+The collector runs locally in Docker and drops all telemetry via a `debug` exporter — the goal is export-side CPU profiling, not backend write throughput. See `otel-collector-config.yaml`.
+
+k6 writes a machine-readable JSON summary to `k6/results/otel-export-overhead.json` (gitignored) for CI / bench-tracking tooling.
+
+**Expected overhead range** (informational — actual numbers depend on the installed `@opentelemetry/otlp-transformer` version):
+
+| Metric | Baseline (8081) | OTel export (8085) | Overhead | Relative |
+|--------|-----------------|--------------------|----------|----------|
+| p50 latency | ~1–3 ms | ~1.5–4 ms | +0.5–1 ms | 1.2×–1.5× |
+| p95 latency | ~2–5 ms | ~3–8 ms | +1–3 ms | 1.3×–2× |
+| p99 latency | ~5–10 ms | ~8–20 ms | +3–10 ms | 1.5×–2.5× |
+
+A **relative overhead >1.5×** on p95 — or any sudden jump from a previous run — is a signal to investigate the `@opentelemetry/otlp-transformer` version. See Connectum recommendations R1.2 and upstream issues [#6221](https://github.com/open-telemetry/opentelemetry-js/issues/6221), PR [#6225](https://github.com/open-telemetry/opentelemetry-js/pull/6225), PR [#6390](https://github.com/open-telemetry/opentelemetry-js/pull/6390), issue [#6570](https://github.com/open-telemetry/opentelemetry-js/issues/6570).
+
 ### Cleanup
 
 ```bash
-docker compose --profile load down --rmi local -v
+docker compose --profile load --profile otel-export down --rmi local -v
 ```
 
 ### Environment Variables
@@ -106,14 +136,30 @@ k6 scripts accept the following environment variables (set via `docker-compose.y
 
 | Variable | Default | Used by |
 |----------|---------|---------|
-| `PROTOCOL` | `https` | interceptor-overhead |
-| `BASE_HOST` | `server` | interceptor-overhead |
+| `PROTOCOL` | `https` | interceptor-overhead, otel-export-overhead |
+| `BASE_HOST` | `server` | interceptor-overhead, otel-export-overhead |
 | `BASE_URL` | `https://server:8080` | basic-load |
-| `BASELINE_PORT` | `8081` | interceptor-overhead |
+| `BASELINE_PORT` | `8081` | interceptor-overhead, otel-export-overhead |
 | `VALIDATION_PORT` | `8082` | interceptor-overhead |
 | `LOGGER_PORT` | `8083` | interceptor-overhead |
 | `TRACING_PORT` | `8084` | interceptor-overhead |
 | `FULLCHAIN_PORT` | `8080` | interceptor-overhead |
+| `OTEL_EXPORT_PORT` | `8085` | otel-export-overhead |
+
+The server-side OTel export scenario (port 8085) is controlled via standard `OTEL_*` env vars. Defaults are set in `docker-compose.yml`; override by exporting before `docker compose up`:
+
+| Variable | Default | Meaning |
+|----------|---------|---------|
+| `OTEL_EXPORT_ENABLED` | `0` | Set to `1` to bind port 8085 and initialize the OTel provider |
+| `OTEL_SERVICE_NAME` | `performance-test-server` | Resource `service.name` attribute |
+| `OTEL_TRACES_EXPORTER` | `otlp/grpc` | `console`, `otlp/http`, `otlp/grpc`, or `none` |
+| `OTEL_METRICS_EXPORTER` | `otlp/grpc` | same values as above |
+| `OTEL_LOGS_EXPORTER` | `none` | same values as above |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://otel-collector:4317` | Collector endpoint |
+| `OTEL_BSP_MAX_EXPORT_BATCH_SIZE` | `512` | BatchSpanProcessor batch size |
+| `OTEL_BSP_MAX_QUEUE_SIZE` | `2048` | BatchSpanProcessor queue size |
+| `OTEL_BSP_SCHEDULE_DELAY` | `1000` | BatchSpanProcessor flush interval (ms) |
+| `OTEL_BSP_EXPORT_TIMEOUT` | `10000` | Single export attempt timeout (ms) |
 
 ## Testing
 
@@ -136,6 +182,9 @@ curl http://localhost:8084/grpc.health.v1.Health/Check
 
 # Full chain
 curl http://localhost:8080/grpc.health.v1.Health/Check
+
+# OTel export (only when OTEL_EXPORT_ENABLED=1)
+curl http://localhost:8085/grpc.health.v1.Health/Check
 ```
 
 ### Manual Test
@@ -261,6 +310,7 @@ Benchmark scripts are located in the `k6/` directory:
 
 - `k6/interceptor-overhead.js` - Uses **all ports** to compare interceptor overhead
 - `k6/basic-load.js` - Uses port 8080 (full chain) with ramping VUs
+- `k6/otel-export-overhead.js` - Uses **port 8081 (baseline) + port 8085 (OTel export)** to measure end-to-end OTLP export cost under 100 VUs sustained load
 
 ## Troubleshooting
 
diff --git a/performance-test-server/docker-compose.yml b/performance-test-server/docker-compose.yml
index 24c7929..20bc297 100644
--- a/performance-test-server/docker-compose.yml
+++ b/performance-test-server/docker-compose.yml
@@ -1,6 +1,22 @@
 services:
   server:
     build: .
+    # OTel export scenario (port 8085) is opt-in. When OTEL_EXPORT_ENABLED=1
+    # the server initializes the @connectum/otel provider with env-driven OTLP
+    # exporter settings and binds the extra port. All other scenarios work
+    # without these env vars; the provider stays uninitialized.
+    environment:
+      OTEL_EXPORT_ENABLED: "${OTEL_EXPORT_ENABLED:-0}"
+      OTEL_SERVICE_NAME: "${OTEL_SERVICE_NAME:-performance-test-server}"
+      OTEL_TRACES_EXPORTER: "${OTEL_TRACES_EXPORTER:-otlp/grpc}"
+      OTEL_METRICS_EXPORTER: "${OTEL_METRICS_EXPORTER:-otlp/grpc}"
+      OTEL_LOGS_EXPORTER: "${OTEL_LOGS_EXPORTER:-none}"
+      OTEL_EXPORTER_OTLP_ENDPOINT: "${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4317}"
+      # BatchSpanProcessor tuning — realistic production-ish defaults
+      OTEL_BSP_MAX_EXPORT_BATCH_SIZE: "${OTEL_BSP_MAX_EXPORT_BATCH_SIZE:-512}"
+      OTEL_BSP_MAX_QUEUE_SIZE: "${OTEL_BSP_MAX_QUEUE_SIZE:-2048}"
+      OTEL_BSP_SCHEDULE_DELAY: "${OTEL_BSP_SCHEDULE_DELAY:-1000}"
+      OTEL_BSP_EXPORT_TIMEOUT: "${OTEL_BSP_EXPORT_TIMEOUT:-10000}"
     healthcheck:
       test: >
         node -e "const h=require('node:http2'),c=h.connect('https://localhost:8080',
@@ -15,6 +31,25 @@ services:
       retries: 10
       start_period: 5s
 
+  # =========================================================================
+  # OpenTelemetry Collector (only started for the otel-export profile)
+  # =========================================================================
+  # Accepts OTLP/gRPC on :4317 and OTLP/HTTP on :4318, then drops everything
+  # via a debug exporter. The goal is to measure export-side CPU cost, not
+  # backend write throughput — see otel-collector-config.yaml for rationale.
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:0.120.0
+    profiles: ["otel-export"]
+    volumes:
+      - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro
+    command: ["--config=/etc/otelcol-contrib/config.yaml"]
+    healthcheck:
+      test: ["CMD", "/otelcol-contrib", "components"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+      start_period: 5s
+
   k6-interceptor-overhead:
     image: grafana/k6:latest
     volumes:
@@ -36,3 +71,27 @@ services:
       server: { condition: service_healthy }
     command: run /scripts/basic-load.js
     profiles: ["load"]
+
+  # =========================================================================
+  # OTel OTLP export overhead scenario (profile: otel-export)
+  # =========================================================================
+  # Measures p50/p95/p99 latency delta and throughput delta between the
+  # baseline (port 8081) and full-chain-with-real-OTLP-exporter (port 8085).
+  # Requires OTEL_EXPORT_ENABLED=1 on the server and a running otel-collector.
+  k6-otel-export:
+    image: grafana/k6:latest
+    volumes:
+      - ./k6:/scripts
+      - ./k6/results:/results
+    environment:
+      PROTOCOL: https
+      BASE_HOST: server
+      BASELINE_PORT: "8081"
+      OTEL_EXPORT_PORT: "8085"
+      # k6 writes a machine-readable summary here for CI/bench tracking.
+      K6_OUT: "json=/results/otel-export-overhead.json"
+    depends_on:
+      server: { condition: service_healthy }
+      otel-collector: { condition: service_healthy }
+    command: run /scripts/otel-export-overhead.js
+    profiles: ["otel-export"]
diff --git a/performance-test-server/k6/otel-export-overhead.js b/performance-test-server/k6/otel-export-overhead.js
new file mode 100644
index 0000000..1c13f72
--- /dev/null
+++ b/performance-test-server/k6/otel-export-overhead.js
@@ -0,0 +1,221 @@
+/**
+ * OTel OTLP Export Overhead Benchmark
+ *
+ * Purpose: Measure the server-side CPU/latency cost of enabling the stock
+ *          @connectum/otel export path (BatchSpanProcessor + otlp-transformer
+ *          + OTLP/gRPC) under a production-like RPC workload.
+ *
+ * Why this exists (R1.3, connectum-recommendations.md):
+ *   The existing interceptor-overhead scenario runs the OTel interceptor with
+ *   the provider UNSET — it emits no-op spans/metrics. That's correct for
+ *   measuring interceptor *wiring* cost, but it tells us nothing about the
+ *   expensive parts: span serialization and OTLP export. Those only run when
+ *   a real provider + exporter is initialized. This scenario fills that gap.
+ *
+ * Configurations under test:
+ *   - Baseline       (port 8081) — no interceptors, no OTel
+ *   - OTel export    (port 8085) — full chain + real OTLP/gRPC exporter
+ *
+ * Load profile:
+ *   100 VUs, ramp 30s → 4m steady → 30s ramp down = ~5 min total.
+ *   High enough throughput that BatchSpanProcessor exports continuously.
+ *
+ * Output:
+ *   p50/p95/p99 latency per config
+ *   Throughput (requests/sec) per config
+ *   Export-overhead delta printed in teardown
+ *   JSON summary written to /results/otel-export-overhead.json when run via
+ *   docker compose (K6_OUT env var).
+ */
+
+import { check, sleep } from "k6";
+import http from "k6/http";
+import { Rate, Trend } from "k6/metrics";
+
+// ============================================================================
+// Custom Metrics
+// ============================================================================
+
+const baselineDuration = new Trend("baseline_no_otel", true);
+const otelExportDuration = new Trend("with_otel_export", true);
+
+const baselineSuccess = new Rate("baseline_success");
+const otelExportSuccess = new Rate("otel_export_success");
+
+// ============================================================================
+// Test Configuration
+// ============================================================================
+
+export const options = {
+    scenarios: {
+        sustained: {
+            executor: "ramping-vus",
+            startVUs: 0,
+            stages: [
+                { duration: "30s", target: 100 }, // ramp up
+                { duration: "4m", target: 100 }, // steady load
+                { duration: "30s", target: 0 }, // ramp down
+            ],
+            gracefulRampDown: "10s",
+        },
+    },
+
+    thresholds: {
+        // Both configs should stay healthy under 100 VUs.
+        baseline_no_otel: ["p(95)<50"],
+        // Stock OTel export adds BatchSpanProcessor + otlp-transformer on the
+        // critical path of every 1s batch flush. We set a loose threshold so
+        // the scenario reports instead of failing — the delta itself is the
+        // deliverable, not a SLA.
+        with_otel_export: ["p(95)<200"],
+
+        baseline_success: ["rate>0.99"],
+        otel_export_success: ["rate>0.99"],
+    },
+
+    tags: {
+        test_type: "otel-export-overhead",
+        environment: "docker",
+    },
+
+    insecureSkipTLSVerify: true,
+
+    // Compact summary — full percentiles for both custom trends.
+    summaryTrendStats: ["avg", "min", "med", "max", "p(50)", "p(90)", "p(95)", "p(99)"],
+};
+
+// ============================================================================
+// Server Ports
+// ============================================================================
+
+const BASELINE_PORT = __ENV.BASELINE_PORT || "8081";
+const OTEL_EXPORT_PORT = __ENV.OTEL_EXPORT_PORT || "8085";
+
+const BASE_HOST = __ENV.BASE_HOST || "server";
+const PROTOCOL = __ENV.PROTOCOL || "https";
+const SERVICE_PATH = "/greeter.v1.GreeterService/SayHello";
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+function callService(port, configName) {
+    const payload = JSON.stringify({
+        name: `OtelBench-${configName}-${__VU}-${__ITER}`,
+    });
+
+    const response = http.post(`${PROTOCOL}://${BASE_HOST}:${port}${SERVICE_PATH}`, payload, {
+        headers: {
+            "Content-Type": "application/json",
+            "Connect-Protocol-Version": "1",
+            "User-Agent": "k6-otel-export-benchmark/1.0",
+        },
+        tags: {
+            name: "SayHello",
+            config: configName,
+        },
+    });
+
+    const success = check(response, {
+        [`${configName}: status is 200`]: (r) => r.status === 200,
+    });
+
+    return { response, success };
+}
+
+// ============================================================================
+// Test Scenario
+// ============================================================================
+
+export default function () {
+    // Alternate baseline / otel-export per iteration to average out JIT/GC
+    // jitter. Each iteration touches both configs once, matching the
+    // interceptor-overhead.js pattern.
+    const testCases = [
+        {
+            run() {
+                const { response, success } = callService(BASELINE_PORT, "baseline");
+                baselineDuration.add(response.timings.duration);
+                baselineSuccess.add(success);
+            },
+        },
+        {
+            run() {
+                const { response, success } = callService(OTEL_EXPORT_PORT, "otel_export");
+                otelExportDuration.add(response.timings.duration);
+                otelExportSuccess.add(success);
+            },
+        },
+    ];
+
+    // Fisher-Yates shuffle — eliminate ordering bias.
+    for (let i = testCases.length - 1; i > 0; i--) {
+        const j = Math.floor(Math.random() * (i + 1));
+        [testCases[i], testCases[j]] = [testCases[j], testCases[i]];
+    }
+
+    for (const testCase of testCases) {
+        testCase.run();
+    }
+
+    // Small think time to keep the offered load realistic and to give
+    // BatchSpanProcessor room to batch exports rather than flush per-request.
+    sleep(0.05);
+}
+
+// ============================================================================
+// Setup (runs once before test)
+// ============================================================================
+
+export function setup() {
+    console.log("\n  Starting OTel OTLP Export Overhead Benchmark");
+    console.log("   Duration: ~5 minutes (30s ramp + 4m steady + 30s ramp down)");
+    console.log("   VUs: 100");
+    console.log("\n  Configurations to test:");
+    console.log(`   1. Baseline (no interceptors, no OTel) - :${BASELINE_PORT}`);
+    console.log(`   2. OTel export (full chain + real OTLP/gRPC)  - :${OTEL_EXPORT_PORT}`);
+    console.log("\n  Goal: measure p50/p95/p99 latency delta and throughput delta");
+    console.log("        caused by the stock @connectum/otel export path.");
+
+    const ports = [
+        { port: BASELINE_PORT, name: "Baseline" },
+        { port: OTEL_EXPORT_PORT, name: "OTel Export" },
+    ];
+
+    console.log("\n  Health checks:\n");
+    for (const { port, name } of ports) {
+        const healthResponse = http.post(
+            `${PROTOCOL}://${BASE_HOST}:${port}/greeter.v1.GreeterService/SayHello`,
+            JSON.stringify({ name: "healthcheck" }),
+            {
+                headers: {
+                    "Content-Type": "application/json",
+                    "Connect-Protocol-Version": "1",
+                },
+            },
+        );
+        if (healthResponse.status === 200) {
+            console.log(`   OK   ${name.padEnd(15)} - :${port}`);
+        } else {
+            console.error(`   FAIL ${name.padEnd(15)} - :${port} (status: ${healthResponse.status})`);
+            throw new Error(`Health check failed for ${name} on port ${port}. ` + "Did you start the server with OTEL_EXPORT_ENABLED=1?");
+        }
+    }
+
+    console.log("\n");
+}
+
+// ============================================================================
+// Teardown (runs once after test)
+// ============================================================================
+
+export function teardown(_data) {
+    console.log("\n  OTel OTLP Export Overhead Benchmark completed");
+    console.log("\n  Analysis:");
+    console.log("   - Compute overhead = with_otel_export(p95) - baseline_no_otel(p95)");
+    console.log("   - Compute relative = with_otel_export / baseline_no_otel");
+    console.log("   - If relative > 1.5x, investigate otlp-transformer version");
+    console.log("     (Connectum recommendations R1.2; see upstream #6221, #6390, #6570)\n");
+    console.log("   JSON summary (when running under docker compose):");
+    console.log("     examples/performance-test-server/k6/results/otel-export-overhead.json\n");
+}
diff --git a/performance-test-server/k6/results/.gitignore b/performance-test-server/k6/results/.gitignore
new file mode 100644
index 0000000..0827618
--- /dev/null
+++ b/performance-test-server/k6/results/.gitignore
@@ -0,0 +1,2 @@
+*.json
+!.gitignore
diff --git a/performance-test-server/otel-collector-config.yaml b/performance-test-server/otel-collector-config.yaml
new file mode 100644
index 0000000..df3e855
--- /dev/null
+++ b/performance-test-server/otel-collector-config.yaml
@@ -0,0 +1,55 @@
+# otel-collector-config.yaml
+#
+# OpenTelemetry Collector configuration used by the OTel export benchmark
+# scenario (port 8085 in performance-test-server).
+#
+# The scenario's goal is to measure server-side CPU cost of the stock OTel
+# export path (BatchSpanProcessor + @opentelemetry/otlp-transformer + OTLP gRPC
+# transport), NOT the cost of a downstream backend. So the collector accepts
+# telemetry and immediately drops it via a logging/debug exporter — no
+# ClickHouse, no Prometheus, no network fan-out.
+#
+# If you want to visually inspect exported spans, raise the debug exporter
+# verbosity to "detailed" and re-run.
+
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317
+      http:
+        endpoint: 0.0.0.0:4318
+
+processors:
+  batch:
+    timeout: 1s
+    send_batch_size: 1024
+
+exporters:
+  debug:
+    verbosity: basic
+    sampling_initial: 5
+    sampling_thereafter: 1000
+
+extensions:
+  health_check:
+    endpoint: 0.0.0.0:13133
+
+service:
+  extensions: [health_check]
+  telemetry:
+    logs:
+      level: warn
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug]
+    metrics:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug]
+    logs:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug]
diff --git a/performance-test-server/pnpm-lock.yaml b/performance-test-server/pnpm-lock.yaml
index 15cb5a8..bfb9db8 100644
--- a/performance-test-server/pnpm-lock.yaml
+++ b/performance-test-server/pnpm-lock.yaml
@@ -17,14 +17,14 @@ importers:
         specifier: ^2.1.1
         version: 2.1.1(@bufbuild/protobuf@2.11.0)
       '@connectum/core':
-        specifier: 1.0.0-rc.7
-        version: 1.0.0-rc.7
+        specifier: 1.0.0-rc.10
+        version: 1.0.0-rc.10
       '@connectum/interceptors':
-        specifier: 1.0.0-rc.7
-        version: 1.0.0-rc.7(@bufbuild/protovalidate@1.1.1(@bufbuild/protobuf@2.11.0))
+        specifier: 1.0.0-rc.10
+        version: 1.0.0-rc.10(@bufbuild/protovalidate@1.1.1(@bufbuild/protobuf@2.11.0))
       '@connectum/otel':
-        specifier: 1.0.0-rc.7
-        version: 1.0.0-rc.7(@bufbuild/protobuf@2.11.0)
+        specifier: 1.0.0-rc.10
+        version: 1.0.0-rc.10(@bufbuild/protobuf@2.11.0)
     devDependencies:
       '@bufbuild/buf':
         specifier: ^1.65.0
@@ -138,17 +138,17 @@ packages:
       '@bufbuild/protovalidate': ^1.0.0
       '@connectrpc/connect': ^2.0.3
 
-  '@connectum/core@1.0.0-rc.7':
-    resolution: {integrity: sha512-1x5sthjO9yk88MTJKPwMv234/wXxisErtyvxHyD/cDJR6iyIrM31mQmp4MMgV3GlAIcNKqjSVUI3MrSbeYw6fg==}
-    engines: {node: '>=18.0.0'}
+  '@connectum/core@1.0.0-rc.10':
+    resolution: {integrity: sha512-IiF+wLI0f3hbMSEQefgG8M3c4Y9ZQ/wXhTcaT58EN7xfa9tdy/pr03yK0iuN5h7kNVPrIMK5nVnwRBAwWtwUOg==}
+    engines: {node: '>=20.0.0'}
 
-  '@connectum/interceptors@1.0.0-rc.7':
-    resolution: {integrity: sha512-D05/Otft7K56sEjmySW6+SeG/QsxbmGkkVbcxfUXib1BVUG0l0ZwKbQTG1NTE0+koqpWG4VWbywuJ8xpVfPK6A==}
-    engines: {node: '>=18.0.0'}
+  '@connectum/interceptors@1.0.0-rc.10':
+    resolution: {integrity: sha512-B9J7UC7W+oAU6vILBAta++lTRsPkhKizsAjUs6b9qeLDBFrjGSYOCtrHGnIlqJtdttkQL/R2G/t6Rtvkx8o+WA==}
+    engines: {node: '>=20.0.0'}
 
-  '@connectum/otel@1.0.0-rc.7':
-    resolution: {integrity: sha512-O14qZsL9LGbvy6LZkJV4s1ck5aazSmdWhJw7rAqPc6QIAUxyWJywgF9ynCctZhL4JTsqpyu3XBHZe4tzAT3tHA==}
-    engines: {node: '>=18.0.0'}
+  '@connectum/otel@1.0.0-rc.10':
+    resolution: {integrity: sha512-V/F8Rkakl2TVcxyddqplyPzEazSceM0mgZ1Htk1OzLBJHdXG7MFbPe2FNzYP6AV3kk9+V6f4OLgLZiF6GSAJrg==}
+    engines: {node: '>=20.0.0'}
 
   '@grpc/grpc-js@1.14.3':
     resolution: {integrity: sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==}
@@ -500,7 +500,7 @@ snapshots:
       '@bufbuild/protovalidate': 1.1.1(@bufbuild/protobuf@2.11.0)
       '@connectrpc/connect': 2.1.1(@bufbuild/protobuf@2.11.0)
 
-  '@connectum/core@1.0.0-rc.7':
+  '@connectum/core@1.0.0-rc.10':
     dependencies:
       '@bufbuild/protobuf': 2.11.0
       '@connectrpc/connect': 2.1.1(@bufbuild/protobuf@2.11.0)
@@ -508,17 +508,17 @@ snapshots:
       env-var: 7.5.0
       zod: 4.3.6
 
-  '@connectum/interceptors@1.0.0-rc.7(@bufbuild/protovalidate@1.1.1(@bufbuild/protobuf@2.11.0))':
+  '@connectum/interceptors@1.0.0-rc.10(@bufbuild/protovalidate@1.1.1(@bufbuild/protobuf@2.11.0))':
     dependencies:
       '@bufbuild/protobuf': 2.11.0
       '@connectrpc/connect': 2.1.1(@bufbuild/protobuf@2.11.0)
       '@connectrpc/validate': 0.2.0(@bufbuild/protobuf@2.11.0)(@bufbuild/protovalidate@1.1.1(@bufbuild/protobuf@2.11.0))(@connectrpc/connect@2.1.1(@bufbuild/protobuf@2.11.0))
-      '@connectum/core': 1.0.0-rc.7
+      '@connectum/core': 1.0.0-rc.10
       cockatiel: 3.2.1
     transitivePeerDependencies:
       - '@bufbuild/protovalidate'
 
-  '@connectum/otel@1.0.0-rc.7(@bufbuild/protobuf@2.11.0)':
+  '@connectum/otel@1.0.0-rc.10(@bufbuild/protobuf@2.11.0)':
     dependencies:
       '@connectrpc/connect': 2.1.1(@bufbuild/protobuf@2.11.0)
       '@opentelemetry/api': 1.9.0
diff --git a/performance-test-server/src/index.ts b/performance-test-server/src/index.ts
index de0b2d2..405d831 100644
--- a/performance-test-server/src/index.ts
+++ b/performance-test-server/src/index.ts
@@ -3,14 +3,18 @@
  *
  * Dedicated server for k6 performance benchmarking.
  *
- * Runs 5 parallel servers with different interceptor configurations:
+ * Runs up to 6 parallel servers with different interceptor configurations:
  * - Port 8081: Baseline (no interceptors)
  * - Port 8082: Validation only
  * - Port 8083: Logger only
- * - Port 8084: OTel (tracing + metrics) only
- * - Port 8080: Full chain (all interceptors)
+ * - Port 8084: OTel (tracing + metrics) only (no-op exporter)
+ * - Port 8080: Full chain (all interceptors, no-op exporter)
+ * - Port 8085: OTel export — full chain + real OTLP exporter to a collector
+ *              (enabled only when OTEL_EXPORT_ENABLED=1, i.e. the OTel collector
+ *              is running; otherwise this port is skipped).
  *
- * This allows measuring the overhead of each interceptor individually.
+ * This allows measuring the overhead of each interceptor individually, plus
+ * the end-to-end overhead of real OTLP export on port 8085.
  *
  * Uses the new createServer() API with explicit lifecycle control.
  */
@@ -21,7 +25,7 @@ import {
     createDefaultInterceptors,
     createLoggerInterceptor,
 } from "@connectum/interceptors";
-import { createOtelInterceptor } from "@connectum/otel";
+import { createOtelInterceptor, initProvider, shutdownProvider } from "@connectum/otel";
 import { benchmarkServiceRoutes } from "./services/benchmarkService.ts";
 
 // Optional TLS: set TLS_DIR env var to enable HTTPS (required for HTTP/1.1 compatibility)
@@ -124,26 +128,93 @@ const fullChainOptions: CreateServerOptions = {
     ],
 };
 
+// ============================================================================
+// Configuration 6 (OPTIONAL): OTel export — full chain + real OTLP exporter
+// ============================================================================
+//
+// Enabled only when OTEL_EXPORT_ENABLED=1.
+//
+// Uses @connectum/otel provider with env-driven OTLP/gRPC exporter pointed at
+// a local OTel Collector. This measures the stock OTel-JS export path
+// (BatchSpanProcessor + @opentelemetry/otlp-transformer serialization +
+// @grpc/grpc-js wire), i.e. exactly what production users pay.
+//
+// The OTLP exporter and BatchSpanProcessor options are read from standard
+// OTEL_* env vars (see @connectum/otel config.ts):
+//   OTEL_SERVICE_NAME, OTEL_TRACES_EXPORTER, OTEL_METRICS_EXPORTER,
+//   OTEL_LOGS_EXPORTER, OTEL_EXPORTER_OTLP_ENDPOINT,
+//   OTEL_BSP_MAX_EXPORT_BATCH_SIZE, OTEL_BSP_MAX_QUEUE_SIZE,
+//   OTEL_BSP_SCHEDULE_DELAY, OTEL_BSP_EXPORT_TIMEOUT.
+
+const otelExportEnabled = process.env.OTEL_EXPORT_ENABLED === "1";
+
+const otelExportOptions: CreateServerOptions = {
+    services: [benchmarkServiceRoutes],
+    port: 8085,
+    host: "0.0.0.0",
+    tls: tlsConfig,
+    interceptors: [
+        ...createDefaultInterceptors({
+            errorHandler: {
+                logErrors: true,
+                includeStackTrace: true,
+            },
+            serializer: true,
+            validation: true,
+        }),
+        createLoggerInterceptor({
+            level: "error",
+            skipHealthCheck: true,
+        }),
+        createOtelInterceptor({
+            filter: ({ service }) => !service.includes("grpc.health"),
+        }),
+    ],
+};
+
 // ============================================================================
 // Start all servers
 // ============================================================================
 
-console.log("Starting 5 server configurations:\n");
+const serverCount = otelExportEnabled ? 6 : 5;
+console.log(`Starting ${serverCount} server configurations:\n`);
 
 if (tlsConfig) {
     console.log(`TLS enabled (certs from ${process.env.TLS_DIR})\n`);
 }
 
 try {
+    // Initialize OTel provider eagerly when the export scenario is enabled, so
+    // that the BatchSpanProcessor and exporters are set up before the first
+    // request reaches the interceptor on port 8085. Without this the provider
+    // would still auto-init lazily on first use, but eager init fails fast if
+    // the collector endpoint is misconfigured.
+    if (otelExportEnabled) {
+        console.log("OTEL_EXPORT_ENABLED=1 — initializing OTLP provider");
+        console.log(`  OTEL_SERVICE_NAME=${process.env.OTEL_SERVICE_NAME ?? "(unset)"}`);
+        console.log(`  OTEL_TRACES_EXPORTER=${process.env.OTEL_TRACES_EXPORTER ?? "(unset)"}`);
+        console.log(`  OTEL_METRICS_EXPORTER=${process.env.OTEL_METRICS_EXPORTER ?? "(unset)"}`);
+        console.log(`  OTEL_EXPORTER_OTLP_ENDPOINT=${process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "(unset)"}\n`);
+        initProvider({
+            serviceName: process.env.OTEL_SERVICE_NAME ?? "performance-test-server",
+        });
+    }
+
     // createServer() is synchronous - creates unstarted server instances
-    const servers: Server[] = [
-        createServer(baselineOptions),
-        createServer(validationOptions),
-        createServer(loggerOptions),
-        createServer(otelOptions),
-        createServer(fullChainOptions),
+    const serverOptions: CreateServerOptions[] = [
+        baselineOptions,
+        validationOptions,
+        loggerOptions,
+        otelOptions,
+        fullChainOptions,
     ];
 
+    if (otelExportEnabled) {
+        serverOptions.push(otelExportOptions);
+    }
+
+    const servers: Server[] = serverOptions.map((opts) => createServer(opts));
+
     // start() is async - start all servers in parallel
     await Promise.all(servers.map((server) => server.start()));
 
@@ -153,13 +224,19 @@ try {
     console.log("8081 | Baseline (no interceptors)");
     console.log("8082 | Validation only");
     console.log("8083 | Logger only");
-    console.log("8084 | OTel (tracing + metrics) only");
-    console.log("8080 | Full chain (all interceptors)");
+    console.log("8084 | OTel (tracing + metrics) only (no-op exporter)");
+    console.log("8080 | Full chain (all interceptors, no-op exporter)");
+    if (otelExportEnabled) {
+        console.log("8085 | OTel export — full chain + real OTLP exporter");
+    }
 
     console.log("\nReady for k6 benchmarks!");
     console.log("\nRun benchmarks with:");
     console.log("  k6 run k6/basic-load.js");
     console.log("  k6 run k6/interceptor-overhead.js");
+    if (otelExportEnabled) {
+        console.log("  k6 run k6/otel-export-overhead.js");
+    }
 
     console.log("\nPress Ctrl+C to shutdown all servers\n");
 
@@ -172,6 +249,11 @@ try {
 
         await Promise.all(servers.map((server) => server.stop()));
 
+        if (otelExportEnabled) {
+            console.log("Flushing OTel provider...");
+            await shutdownProvider();
+        }
+
         console.log("All servers stopped");
         process.exit(0);
     };

From e5de9e6b633974541445604986c1e38ac8e18487 Mon Sep 17 00:00:00 2001
From: intech <decaf-comfy-groin@duck.com>
Date: Wed, 17 Jun 2026 13:48:50 +0400
Subject: [PATCH 2/2] fix(performance-test-server): address CodeRabbit review
 on otel-export scenario
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply all four CodeRabbit findings on the OTLP-export benchmark:

1. Profile command: name `server otel-collector k6-otel-export` explicitly so
   the profile-less `k6-interceptor-overhead` does not auto-start and contaminate
   the export measurement with concurrent CPU load.

2. Drop the misleading "throughput delta" claim from the otel-export scenario
   (README, docker-compose comment, k6 doc-comment + setup log). The paired loop
   hits both configs once per iteration, so request counts are identical by
   construction — only the latency delta is a real deliverable. Legitimate
   throughput references (basic-load SLA, backend-write disclaimer) are kept.

3. Document the 8085 readiness gate: the server healthcheck probes 8080 only,
   but all ports bind in one Promise.all and the k6 setup() health-checks 8085
   and aborts if not serving, so no silent partial-measurement race.

4. Remove the false-positive collector healthcheck (`otelcol-contrib components`
   exits 0 without opening a socket) and switch k6-otel-export to
   `service_started`. The contrib image is scratch-based (no shell/HTTP client)
   so readiness cannot be probed; correctness instead relies on
   BatchSpanProcessor buffering/retry over the ~5-minute steady-state run.
   Honest comments added; no "readiness fixed" overclaim.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 performance-test-server/README.md             | 10 ++++--
 performance-test-server/docker-compose.yml    | 33 ++++++++++++++-----
 .../k6/otel-export-overhead.js                |  3 +-
 .../otel-collector-config.yaml                |  4 +++
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/performance-test-server/README.md b/performance-test-server/README.md
index 4d1f3f8..142ef58 100644
--- a/performance-test-server/README.md
+++ b/performance-test-server/README.md
@@ -97,13 +97,19 @@ docker compose --profile load up k6-basic-load --build --abort-on-container-exit
 
 ### OTel OTLP Export Overhead
 
-Measures the p50/p95/p99 latency delta and throughput delta between the baseline (port 8081) and the full-chain-with-real-OTLP-exporter configuration (port 8085). Runs for ~5 minutes at 100 VUs:
+Measures the p50/p95/p99 latency delta between the baseline (port 8081) and the full-chain-with-real-OTLP-exporter configuration (port 8085). Runs for ~5 minutes at 100 VUs:
 
 ```bash
 OTEL_EXPORT_ENABLED=1 docker compose --profile otel-export up \
-  --build --abort-on-container-exit
+  server otel-collector k6-otel-export --build --abort-on-container-exit
 ```
 
+Naming the three services explicitly is deliberate: `k6-interceptor-overhead`
+has no profile, so a bare `docker compose --profile otel-export up` would start
+it too and run the interceptor benchmark concurrently, stealing CPU from and
+contaminating the OTLP-export measurement. Listing only the services this
+scenario needs keeps the run isolated.
+
 What this measures that the `k6-interceptor-overhead` scenario does *not*:
 
 - Real `BatchSpanProcessor` + `@opentelemetry/otlp-transformer` serialization cost per exported span
diff --git a/performance-test-server/docker-compose.yml b/performance-test-server/docker-compose.yml
index 20bc297..eca97f8 100644
--- a/performance-test-server/docker-compose.yml
+++ b/performance-test-server/docker-compose.yml
@@ -43,12 +43,16 @@ services:
     volumes:
       - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro
     command: ["--config=/etc/otelcol-contrib/config.yaml"]
-    healthcheck:
-      test: ["CMD", "/otelcol-contrib", "components"]
-      interval: 5s
-      timeout: 3s
-      retries: 5
-      start_period: 5s
+    # No healthcheck: the contrib image is built FROM scratch — it has no shell,
+    # curl, or wget — so the OTLP listeners (:4317/:4318) and the health_check
+    # extension (:13133) cannot be probed from inside the container, and the only
+    # in-image binary (`otelcol-contrib components`) exits 0 without ever opening
+    # a socket, which would be a false "healthy" signal. We do NOT claim to prove
+    # collector readiness here. Instead, k6-otel-export gates on `service_started`
+    # only, and correctness does not depend on the collector being up at t=0:
+    # the BatchSpanProcessor buffers and retries exports, and over the ~5-minute
+    # steady-state run any sub-second collector startup gap is negligible relative
+    # to total spans exported.
 
   k6-interceptor-overhead:
     image: grafana/k6:latest
@@ -75,9 +79,16 @@ services:
   # =========================================================================
   # OTel OTLP export overhead scenario (profile: otel-export)
   # =========================================================================
-  # Measures p50/p95/p99 latency delta and throughput delta between the
-  # baseline (port 8081) and full-chain-with-real-OTLP-exporter (port 8085).
+  # Measures the p50/p95/p99 latency delta between the baseline (port 8081)
+  # and full-chain-with-real-OTLP-exporter (port 8085).
   # Requires OTEL_EXPORT_ENABLED=1 on the server and a running otel-collector.
+  #
+  # Readiness note: the server healthcheck below probes port 8080 only. All
+  # ports (incl. 8085) are bound in a single Promise.all in src/index.ts, so
+  # 8080 being healthy means 8085 is almost certainly up too — and the k6
+  # script closes the remaining gap deterministically: its setup() health-checks
+  # 8081 AND 8085 and aborts the run if 8085 is not yet serving. So a brief
+  # bind-order race cannot produce silent, partially-measured results.
   k6-otel-export:
     image: grafana/k6:latest
     volumes:
@@ -92,6 +103,10 @@ services:
       K6_OUT: "json=/results/otel-export-overhead.json"
     depends_on:
       server: { condition: service_healthy }
-      otel-collector: { condition: service_healthy }
+      # service_started, not service_healthy: the scratch-based collector image
+      # cannot be health-probed (see the otel-collector comment above). The
+      # BatchSpanProcessor tolerates a not-yet-ready collector via buffering and
+      # retries, so a started container is a sufficient precondition here.
+      otel-collector: { condition: service_started }
     command: run /scripts/otel-export-overhead.js
     profiles: ["otel-export"]
diff --git a/performance-test-server/k6/otel-export-overhead.js b/performance-test-server/k6/otel-export-overhead.js
index 1c13f72..3548e8a 100644
--- a/performance-test-server/k6/otel-export-overhead.js
+++ b/performance-test-server/k6/otel-export-overhead.js
@@ -22,7 +22,6 @@
  *
  * Output:
  *   p50/p95/p99 latency per config
- *   Throughput (requests/sec) per config
  *   Export-overhead delta printed in teardown
  *   JSON summary written to /results/otel-export-overhead.json when run via
  *   docker compose (K6_OUT env var).
@@ -174,7 +173,7 @@ export function setup() {
     console.log("\n  Configurations to test:");
     console.log(`   1. Baseline (no interceptors, no OTel) - :${BASELINE_PORT}`);
     console.log(`   2. OTel export (full chain + real OTLP/gRPC)  - :${OTEL_EXPORT_PORT}`);
-    console.log("\n  Goal: measure p50/p95/p99 latency delta and throughput delta");
+    console.log("\n  Goal: measure the p50/p95/p99 latency delta");
     console.log("        caused by the stock @connectum/otel export path.");
 
     const ports = [
diff --git a/performance-test-server/otel-collector-config.yaml b/performance-test-server/otel-collector-config.yaml
index df3e855..eb89161 100644
--- a/performance-test-server/otel-collector-config.yaml
+++ b/performance-test-server/otel-collector-config.yaml
@@ -31,6 +31,10 @@ exporters:
     sampling_initial: 5
     sampling_thereafter: 1000
 
+# The health_check extension exposes readiness on :13133 for anyone running
+# this collector outside the benchmark. It is intentionally NOT wired to a
+# Docker healthcheck: the contrib image is built FROM scratch and has no shell
+# or HTTP client to probe the endpoint with (see docker-compose.yml).
 extensions:
   health_check:
     endpoint: 0.0.0.0:13133