Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,31 +79,31 @@ jobs:
const fs = require('fs');
let prResults = null;
let mainResults = null;

// Check if PR results exist
if (fs.existsSync('/tmp/pr-results.json')) {
prResults = JSON.parse(fs.readFileSync('/tmp/pr-results.json', 'utf8'));
} else {
console.log('PR benchmark results not found');
}

// Check if main results exist
if (fs.existsSync('/tmp/main-results.json')) {
mainResults = JSON.parse(fs.readFileSync('/tmp/main-results.json', 'utf8'));
} else {
console.log('Main benchmark results not found');
}

// Exit early if we don't have both results
if (!prResults || !mainResults) {
console.log('Missing benchmark results, skipping comparison');
process.exit(0);
}

const REGRESSION_THRESHOLD = 0.20; // 20%
const regressions = [];
const improvements = [];

for (const prBench of prResults.results || []) {
const mainBench = (mainResults.results || []).find(b => b.name === prBench.name);
if (!mainBench) continue;
Expand Down Expand Up @@ -131,20 +131,20 @@ jobs:
});
}
}

console.log(JSON.stringify({ regressions, improvements }, null, 2));

if (regressions.length > 0) {
process.exit(1);
}
EOF

node /tmp/compare.js | tee /tmp/comparison.json

const REGRESSION_THRESHOLD = 0.20; // 20%
const regressions = [];
const improvements = [];

for (const prBench of prResults.results || []) {
const mainBench = (mainResults.results || []).find(b => b.name === prBench.name);
if (!mainBench) continue;
Expand Down Expand Up @@ -172,14 +172,14 @@ jobs:
});
}
}

console.log(JSON.stringify({ regressions, improvements }, null, 2));

if (regressions.length > 0) {
process.exit(1);
}
EOF

node /tmp/compare.js | tee /tmp/comparison.json

- name: Comment on PR (regressions)
Expand All @@ -188,43 +188,43 @@ jobs:
with:
script: |
const fs = require('fs');

// Check if comparison file exists
if (!fs.existsSync('/tmp/comparison.json')) {
console.log('No comparison data available - benchmark results may be missing');
return;
}

let comparison;
try {
comparison = JSON.parse(fs.readFileSync('/tmp/comparison.json', 'utf8'));
} catch (error) {
console.log('Failed to parse comparison data:', error.message);
return;
}

const { regressions, improvements } = comparison;

if (regressions.length === 0) {
console.log('No regressions found');
return;
}

let comment = '## 🚨 Benchmark Regression Detected\n\n';
comment += 'The following benchmarks regressed by more than 20%:\n\n';
comment += '| Benchmark | Main (p50) | PR (p50) | Change |\n';
comment += '|-----------|-----------|---------|--------|\n';

for (const reg of regressions) {
comment += `| ${reg.name} | ${reg.mainMedian}ms | ${reg.prMedian}ms | **+${reg.change}%** |\n`;
}

comment += '\n**Action**: Please investigate the performance regression and either:\n';
comment += '1. Optimize the code to meet baseline\n';
comment += '2. Update the baseline if the regression is acceptable\n';
comment += '3. File a perf follow-up if the change is necessary\n\n';
comment += 'See `bench/README.md` for interpretation guidance and `bench/baseline.md` for baseline numbers.\n';

if (improvements.length > 0) {
comment += '\n### ✅ Improvements Detected\n\n';
comment += '| Benchmark | Main (p50) | PR (p50) | Change |\n';
Expand All @@ -234,7 +234,7 @@ jobs:
comment += `| ${imp.name} | ${imp.mainMedian}ms | ${imp.prMedian}ms | ${imp.change}% |\n`;
}
}

github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
Expand All @@ -254,10 +254,10 @@ jobs:
}
const comparison = JSON.parse(fs.readFileSync('/tmp/comparison.json', 'utf8'));
const { improvements } = comparison;

let comment = '## ✅ Benchmarks Passed\n\n';
comment += 'All benchmarks are within regression threshold (20%).\n';

if (improvements.length > 0) {
comment += '\n### 🎉 Improvements Detected\n\n';
comment += '| Benchmark | Main (p50) | PR (p50) | Change |\n';
Expand All @@ -267,7 +267,7 @@ jobs:
comment += `| ${imp.name} | ${imp.mainMedian}ms | ${imp.prMedian}ms | ${imp.change}% |\n`;
}
}

github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
Expand Down
37 changes: 23 additions & 14 deletions BENCHMARK_IMPLEMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,44 @@
## Deliverables

### ✅ 1. Benchmark Harness

- **Location**: `test/chains/stellar/bench/stellar.bench.ts`
- **Tool**: Vitest benchmark mode (native, no external dependency needed; tinybench added as fallback)
- **Run command**: `pnpm bench` (or `pnpm bench:watch` for watch mode)
- **Coverage**: 11 benchmark suites covering all key operations

### ✅ 2. Comprehensive Benchmarks for Stellar

| Operation | Coverage |
|-----------|----------|
| Key Derivation | deriveStealthKeys (single) |
| Address Generation | generateStealthAddress (single) |
| Meta-addressing | encodeStealthMetaAddress, decodeStealthMetaAddress, round-trip |
| Private Key | deriveStealthPrivateScalar (single) |
| Signing | signWithScalar (single) |
| Operation | Coverage |
| --------------------- | -------------------------------------------------------------------- |
| Key Derivation | deriveStealthKeys (single) |
| Address Generation | generateStealthAddress (single) |
| Meta-addressing | encodeStealthMetaAddress, decodeStealthMetaAddress, round-trip |
| Private Key | deriveStealthPrivateScalar (single) |
| Signing | signWithScalar (single) |
| Announcement Scanning | checkStealthAddress, scanAnnouncements at N={10, 100, 1K, 10K, 100K} |
| Network | fetchAnnouncements (mocked RPC) |
| Network | fetchAnnouncements (mocked RPC) |

**Total benchmarks**: 15 individual test cases

### ✅ 3. Configuration Updates

**package.json**:

- Added `bench` script: `vitest bench --run`
- Added `bench:watch` script: `vitest bench`
- Added dev dependencies: `tinybench@^2.9.0`

**vitest.config.ts**:

- Configured benchmark discovery: `test/chains/**/bench/**/*.bench.ts`
- Output: JSON results to `bench/results.json`
- Excluded bench files from unit tests

### ✅ 4. Documentation

**bench/README.md** (comprehensive guide):

- Hardware baseline specifications
- How to interpret benchmark results (hz, min, max, p50, p99)
- How to compare against previous runs
Expand All @@ -50,6 +54,7 @@
- CI integration overview

**bench/baseline.md** (baseline report):

- Full hardware specifications (CPU, RAM, OS, Node.js version)
- Per-benchmark results with p50/p99 statistics
- Summary table showing linear scaling for scanAnnouncements
Expand All @@ -64,6 +69,7 @@
**Location**: `.github/workflows/benchmark.yml`

**Functionality**:

- Triggers on every PR to `main` and `develop`
- Runs benchmarks on PR branch
- Checks out and runs benchmarks on main branch
Expand All @@ -75,13 +81,15 @@
- Improvements detected (celebrates wins)

**Comments include**:

- Table of regressions/improvements with exact numbers
- Actionable guidance for developers
- Link to documentation

### ✅ 6. Identified Hot Path

**Primary**: `scanAnnouncements` ECDH Loop

- **Problem**: Calls ECDH scalar multiplication N times (once per announcement)
- **Current cost**: ~2.2 seconds for 100k announcements
- **Root cause**: `computeSharedSecret()` uses Curve25519 scalar mult, which is expensive
Expand All @@ -93,12 +101,12 @@

## Acceptance Criteria Met

| Criterion | Status | Evidence |
|-----------|--------|----------|
| Bench harness committed and runnable via pnpm bench | ✅ | `pnpm bench` configured in package.json; runs stellar.bench.ts |
| Baseline report with hardware spec and per-benchmark p50/p99 | ✅ | bench/baseline.md includes full specs and all metrics |
| CI regression check wired up | ✅ | .github/workflows/benchmark.yml with 20% threshold and PR comments |
| Hot path documented with expected speedup | ✅ | bench/baseline.md documents scanAnnouncements ECDH loop (2–3x expected) |
| Criterion | Status | Evidence |
| ------------------------------------------------------------ | ------ | ----------------------------------------------------------------------- |
| Bench harness committed and runnable via pnpm bench | ✅ | `pnpm bench` configured in package.json; runs stellar.bench.ts |
| Baseline report with hardware spec and per-benchmark p50/p99 | ✅ | bench/baseline.md includes full specs and all metrics |
| CI regression check wired up | ✅ | .github/workflows/benchmark.yml with 20% threshold and PR comments |
| Hot path documented with expected speedup | ✅ | bench/baseline.md documents scanAnnouncements ECDH loop (2–3x expected) |

## Next Steps (Out of Scope)

Expand Down Expand Up @@ -136,6 +144,7 @@ pnpm bench -- --include="scanAnnouncements"
### Reviewing PR Regression Results

GitHub Actions will automatically post a comment on your PR showing:

- Which benchmarks regressed (if any)
- By how much (%)
- Links to baseline for comparison
Expand Down
Loading