microsoft · JacksonWeber · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -0,0 +1,140 @@
+name: Performance
+
+on:
+  pull_request:
+    branches: [ main ]
+
+# Cancel in-flight perf runs on the same PR to free runner capacity.
+concurrency:
+  group: perf-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  perf-regression:
+    runs-on: ubuntu-latest
+    # Headroom for two full benchmark runs (candidate + baseline) plus npm
+    # installs and tarball packing. Each benchmark run is bounded by
+    # samples * scenarios * (warmup + duration + per-child init).
+    timeout-minutes: 40
+    env:
+      NODE_VERSION: '22.x'
+      PERF_SAMPLES: '3'
+      PERF_DURATION: '5'
+      PERF_WARMUP: '1'
+      PERF_REGRESSION_THRESHOLD: '15'
+
+    steps:
+      - name: Checkout PR (candidate)
+        uses: actions/checkout@v4
+        with:
+          path: pr
+
+      - name: Checkout base branch (baseline)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.base.ref }}
+          path: baseline
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+
+      # ---- Build & pack BOTH versions of the package up-front ----
+      - name: Generate dummy TLS certs (PR)
+        working-directory: pr
+        run: openssl req -x509 -nodes -newkey rsa -keyout ./test/certs/server-key.pem -out ./test/certs/server-cert.pem -days 1 -subj "/C=CL/ST=RM/L=OpenTelemetryTest/O=Root/OU=Test/CN=ca"
+
+      - name: Install + build + pack candidate
+        working-directory: pr
+        run: |
+          npm ci
+          npm run build
+          npm pack
+          mv applicationinsights-*.tgz "$GITHUB_WORKSPACE/candidate.tgz"
+
+      - name: Generate dummy TLS certs (baseline)
+        working-directory: baseline
+        run: openssl req -x509 -nodes -newkey rsa -keyout ./test/certs/server-key.pem -out ./test/certs/server-cert.pem -days 1 -subj "/C=CL/ST=RM/L=OpenTelemetryTest/O=Root/OU=Test/CN=ca"
+
+      - name: Install + build + pack baseline
+        working-directory: baseline
+        run: |
+          npm ci
+          npm run build
+          npm pack
+          mv applicationinsights-*.tgz "$GITHUB_WORKSPACE/baseline.tgz"
+
+      # ---- Use the PR's perf harness for BOTH runs (consistent code) ----
+      - name: Install perf harness deps (PR)
+        working-directory: pr/test/performanceTests
+        run: npm ci
+
+      - name: Install CANDIDATE applicationinsights into perf harness
+        working-directory: pr/test/performanceTests
+        run: npm install --no-save --no-package-lock "$GITHUB_WORKSPACE/candidate.tgz"
+
+      - name: Build perf harness (TS -> dist-esm)
+        working-directory: pr/test/performanceTests
+        run: npm run build
+
+      - name: Run candidate benchmarks
+        working-directory: pr/test/performanceTests
+        run: |
+          node runBenchmarks.mjs \
+            --out "$GITHUB_WORKSPACE/candidate.json" \
+            --samples "$PERF_SAMPLES" \
+            --duration "$PERF_DURATION" \
+            --warmup "$PERF_WARMUP"
+
+      - name: Install BASELINE applicationinsights into perf harness
+        working-directory: pr/test/performanceTests
+        run: npm install --no-save --no-package-lock "$GITHUB_WORKSPACE/baseline.tgz"
+
+      - name: Run baseline benchmarks
+        working-directory: pr/test/performanceTests
+        run: |
+          node runBenchmarks.mjs \
+            --out "$GITHUB_WORKSPACE/baseline.json" \
+            --samples "$PERF_SAMPLES" \
+            --duration "$PERF_DURATION" \
+            --warmup "$PERF_WARMUP"
+
+      # ---- Compare and publish ----
+      - name: Compare results
+        id: compare
+        working-directory: pr/test/performanceTests
+        run: |
+          set +e
+          node comparePerf.mjs \
+            "$GITHUB_WORKSPACE/baseline.json" \
+            "$GITHUB_WORKSPACE/candidate.json" \
+            "$GITHUB_WORKSPACE/perf-comparison.md"
+          echo "exit=$?" >> "$GITHUB_OUTPUT"
+
+      - name: Upload raw results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: perf-results
+          path: |
+            baseline.json
+            candidate.json
+            perf-comparison.md
+
+      - name: Comment on PR (best-effort)
+        if: always() && github.event.pull_request.head.repo.full_name == github.repository
+        uses: marocchino/sticky-pull-request-comment@v2
+        continue-on-error: true
+        with:
+          header: perf-regression
+          path: perf-comparison.md
+
+      - name: Fail job on gating regression
+        if: steps.compare.outputs.exit != '0'
+        run: |
+          echo "Performance regression beyond ${PERF_REGRESSION_THRESHOLD}% detected." >&2
+          exit 1
diff --git a/test/performanceTests/README.md b/test/performanceTests/README.md
@@ -1,10 +1,46 @@
-### Guide
+### Performance Tests
 
-1. Copy the `sample.env` file and name it as `.env`.
-2. Create an Application Insights resource and populate the `.env` file with connectionString.
-3. Run the tests as follows (parameters can be modified to as appropriate):
+The performance test harness measures throughput (ops/s) for hot-path APIs in
+this package and reports them against an upstream-OpenTelemetry-only baseline.
 
-- Tracking Dependencies (spans)
-  - `npm run perf-test:node -- TrackDependencyTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
-- Tracking Traces (logs)
-  - `npm run perf-test:node -- TrackTraceTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+#### Manual run
+
+1. Copy `sample.env` to `.env` and set `APPLICATIONINSIGHTS_CONNECTION_STRING`
+   (any well-formed connection string works; the perf path never sends data
+   when an unreachable ingestion endpoint is configured).
+2. Run a single scenario via the existing harness:
+
+   - `npm run perf-test:node -- TrackDependencyTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+   - `npm run perf-test:node -- TrackTraceTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+   - `npm run perf-test:node -- AzureMonitorSpanTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+   - `npm run perf-test:node -- AzureMonitorLogTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+   - `npm run perf-test:node -- OtelSpanTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+   - `npm run perf-test:node -- OtelLogTest --warmup 1 --iterations 1 --parallel 2 --duration 15`
+
+3. Or run every scenario and produce a JSON summary suitable for comparison:
+
+   `npm run perf:benchmark -- --out results.json --samples 3 --duration 5`
+
+#### Scenario tiers
+
+| Scenario | Tier | What it measures |
+|---|---|---|
+| `TrackDependencyTest` | gating | `appInsights.defaultClient.trackDependency()` via the v2 shim |
+| `TrackTraceTest` | gating | `appInsights.defaultClient.trackTrace()` via the v2 shim |
+| `AzureMonitorSpanTest` | gating | `useAzureMonitor()` + `tracer.startSpan()` |
+| `AzureMonitorLogTest` | gating | `useAzureMonitor()` + `logger.emit()` |
+| `OtelSpanTest` | informational | Upstream `@opentelemetry/sdk-trace-base` only |
+| `OtelLogTest` | informational | Upstream `@opentelemetry/sdk-logs` only |
+
+Only **gating** scenarios block CI on regression. Upstream-OTel scenarios are
+reported as a reference for like-for-like comparison and are not owned by this
+repo, so they are never used for gate-fail decisions.
+
+#### Regression CI
+
+`.github/workflows/performance.yml` runs on every PR. It packs both the PR and
+the base branch as tarballs, installs each in turn under the PR's perf harness,
+runs the benchmark suite, and fails the job (blocking merge when set as a
+required check) if any gating scenario regresses beyond
+`PERF_REGRESSION_THRESHOLD` percent (default 15%). A sticky comment with the
+full comparison table is posted to the PR.
diff --git a/test/performanceTests/bench.mjs b/test/performanceTests/bench.mjs
@@ -0,0 +1,109 @@
+#!/usr/bin/env node
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+/*
+ * Measures throughput of a single scenario directly, without going through
+ * the @azure-tools/test-perf framework's worker pool. Running in a single
+ * process makes JSON output and result capture deterministic, and a fresh
+ * Node child per scenario keeps OpenTelemetry global state isolated.
+ *
+ * Usage:
+ *   node bench.mjs --scenario <Name> --duration <sec> --warmup <sec> --out <file>
+ */
+
+import { writeFileSync } from "node:fs";
+
+function parseArgs(argv) {
+    const a = { duration: 8, warmup: 2 };
+    for (let i = 0; i < argv.length; i++) {
+        const k = argv[i];
+        const v = () => argv[++i];
+        if (k === "--scenario") a.scenario = v();
+        else if (k === "--duration") a.duration = Number(v());
+        else if (k === "--warmup") a.warmup = Number(v());
+        else if (k === "--out") a.out = v();
+    }
+    if (!a.scenario || !a.out) {
+        console.error("Required: --scenario <Name> --out <file>");
+        process.exit(2);
+    }
+    return a;
+}
+
+const SCENARIO_MODULES = {
+    TrackDependencyTest: "./dist-esm/trackDependency.spec.js",
+    TrackTraceTest: "./dist-esm/trackTrace.spec.js",
+    AzureMonitorSpanTest: "./dist-esm/azureMonitorSpan.spec.js",
+    AzureMonitorLogTest: "./dist-esm/azureMonitorLog.spec.js",
+    OtelSpanTest: "./dist-esm/otelSpan.spec.js",
+    OtelLogTest: "./dist-esm/otelLog.spec.js",
+};
+
+async function runLoop(instance, durationMs) {
+    // Tight async loop. We rely on each .run() awaiting only synchronous-ish
+    // work (the scenarios under test do not perform real network I/O). The
+    // loop polls Date.now() infrequently (every BATCH iterations) to keep
+    // measurement overhead negligible.
+    const deadline = Date.now() + durationMs;
+    let ops = 0;
+    const BATCH = 256;
+    while (true) {
+        for (let i = 0; i < BATCH; i++) {
+            await instance.run();
+        }
+        ops += BATCH;
+        if (Date.now() >= deadline) break;
+    }
+    return ops;
+}
+
+async function main() {
+    const args = parseArgs(process.argv.slice(2));
+    const modulePath = SCENARIO_MODULES[args.scenario];
+    if (!modulePath) {
+        console.error(`Unknown scenario: ${args.scenario}`);
+        process.exit(2);
+    }
+    const mod = await import(modulePath);
+    const Cls = mod[args.scenario];
+    if (!Cls) {
+        console.error(`Module ${modulePath} does not export ${args.scenario}`);
+        process.exit(2);
+    }
+    const instance = new Cls();
+
+    // Warmup (not counted)
+    if (args.warmup > 0) {
+        await runLoop(instance, args.warmup * 1000);
+    }
+
+    const startWall = Date.now();
+    const ops = await runLoop(instance, args.duration * 1000);
+    const elapsedMs = Date.now() - startWall;
+    const opsPerSec = (ops / elapsedMs) * 1000;
+
+    writeFileSync(
+        args.out,
+        JSON.stringify(
+            {
+                scenario: args.scenario,
+                opsPerSec,
+                ops,
+                elapsedMs,
+                timestamp: new Date().toISOString(),
+            },
+            null,
+            2,
+        ),
+    );
+
+    console.log(
+        `[bench] ${args.scenario}: ${ops} ops in ${elapsedMs}ms => ${opsPerSec.toFixed(0)} ops/s`,
+    );
+}
+
+main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+});