From 5d98a4b53e305e9527c0eff3dea5d61c46bbbd3a Mon Sep 17 00:00:00 2001
From: Johnny Miller <163300+millerjp@users.noreply.github.com>
Date: Mon, 20 Apr 2026 20:33:17 +0200
Subject: [PATCH] test: add benchmarks, examples, and bench-regression guard
 (#15)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Complete the Phase 4 benchmark scaffolding:

  * syncmap_bench_test.go now has 28 benchmarks: one per public
    method (with hit/miss / loaded/stored variants where meaningful),
    a concurrent 90/10 read-write pattern, and five Overhead pairs
    comparing the generic wrapper against raw sync.Map.
  * example_test.go adds runnable godoc Example functions for all
    12 public symbols plus the two top-level CompareAnd* funcs.
    Range/Keys/Values/Map sort their output so // Output: blocks
    stay deterministic.
  * bench.txt seeds the committed baseline (darwin/arm64, count=5).
  * scripts/check-bench-regression.sh parses benchstat output and
    fails on time/op regressions >= 10% at p <= 0.05 or any positive
    allocs/op delta. Ported from axonops/mask with two fixes: skip
    geomean summary rows and exclude zero-delta matches so the guard
    is stable against itself.
  * Makefile: bench-regression target; ci.yml: benchstat-regression-
    guard job (mirrors mask, Dependabot skip, CAVEAT preserved);
    makefile-targets-guard expected list updated.

Coverage remains 100%. CONTRIBUTING §Performance baseline referenced
by the CI job is deferred to #18.
---
 .github/workflows/ci.yml          |  34 ++++
 .gitignore                        |   4 +
 Makefile                          |  12 ++
 bench.txt                         | 173 ++++++++++++++++++
 example_test.go                   | 219 ++++++++++++++++++++++
 scripts/check-bench-regression.sh | 109 +++++++++++
 syncmap_bench_test.go             | 290 +++++++++++++++++++++++++++++-
 7 files changed, 836 insertions(+), 5 deletions(-)
 create mode 100644 bench.txt
 create mode 100644 example_test.go
 create mode 100755 scripts/check-bench-regression.sh

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 42a609f..10b8cf5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,6 +47,7 @@ jobs:
             fmt
             fmt-check
             bench
+            bench-regression
             coverage
             tidy
             tidy-check
@@ -324,3 +325,36 @@ jobs:
           distribution: goreleaser
           version: v2.7.0
           args: check
+
+  benchstat-regression-guard:
+    name: Benchmark regression guard
+    runs-on: ubuntu-latest
+    # CAVEAT: ubuntu-latest is a shared GitHub-hosted runner. Shared
+    # runners exhibit ±5-15% variance between runs for nanosecond-scale
+    # benchmarks, which can fire the guard on pure jitter. If this job
+    # flakes repeatedly, options are (a) move to a dedicated runner,
+    # (b) raise the time/op threshold (keeping allocs/op strict since
+    # allocation counts are deterministic), or (c) make the job
+    # advisory rather than blocking. See CONTRIBUTING.md §Performance
+    # baseline for the policy.
+    #
+    # Skip on Dependabot-authored PRs: dependency bumps cannot
+    # legitimately move in-tree benchmarks, so variance there is noise.
+    if: github.actor != 'dependabot[bot]'
+    steps:
+      - uses: actions/checkout@v6.0.2
+      - uses: actions/setup-go@v6.4.0
+        with:
+          go-version: "1.26"
+          cache: true
+      - name: Install benchstat
+        run: go install golang.org/x/perf/cmd/benchstat@v0.0.0-20260409210113-8e83ce0f7b1c
+      - name: Run make bench-regression
+        run: make bench-regression
+      - name: Attach benchstat report
+        if: always()
+        uses: actions/upload-artifact@v7.0.1
+        with:
+          name: benchstat-report
+          path: bench-regression.txt
+          if-no-files-found: ignore
diff --git a/.gitignore b/.gitignore
index bc090ec..ec03b64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,10 @@ profile.cov
 # GoReleaser snapshot output
 dist/
 
+# bench-regression intermediates (bench.txt is the committed baseline)
+bench-regression.txt
+current.txt
+
 # Dependency directories (remove the comment below to include it)
 # vendor/
 
diff --git a/Makefile b/Makefile
index 91f1d1f..a4c10b4 100644
--- a/Makefile
+++ b/Makefile
@@ -56,6 +56,18 @@ fmt-check: ## Fail if any Go file is unformatted
 bench: ## Run benchmarks
 	$(GO) test -bench=. -benchmem -run=^$$ $(PKG)
 
+.PHONY: bench-regression
+bench-regression: ## Compare this tree against bench.txt and fail on regression
+	@if ! command -v benchstat >/dev/null 2>&1; then \
+		echo "benchstat not installed. Install with: go install golang.org/x/perf/cmd/benchstat@latest"; \
+		exit 2; \
+	fi
+	@echo "Running benchmarks at count=5 (fresh samples for benchstat)..."
+	@$(GO) test -bench=. -benchmem -run=^$$ -count=5 $(PKG) > current.txt
+	@echo "Comparing against committed baseline (bench.txt) via benchstat..."
+	@benchstat bench.txt current.txt | tee bench-regression.txt
+	@./scripts/check-bench-regression.sh bench-regression.txt
+
 .PHONY: coverage
 coverage: ## Generate coverage profile and HTML report for the library
 	$(GO) test -race -coverprofile=$(COVER_OUT) -covermode=atomic .
diff --git a/bench.txt b/bench.txt
new file mode 100644
index 0000000..0e21a48
--- /dev/null
+++ b/bench.txt
@@ -0,0 +1,173 @@
+# syncmap benchmark baseline (regenerated before every release).
+#
+# Scope: every public method, a concurrent read/write pattern, and
+# overhead pairs comparing the generic wrapper against raw sync.Map.
+# Regenerate with `make bench > bench.txt`, strip trailing PASS/ok
+# lines and ANSI escapes, and commit in the same PR as any
+# performance-affecting change. CI consumes this via the
+# benchstat-regression-guard job (see issue #15).
+
+goos: darwin
+goarch: arm64
+pkg: github.com/axonops/syncmap
+cpu: Apple M2
+BenchmarkCompareAndSwap-8                     	22158942	        54.29 ns/op	      56 B/op	       1 allocs/op
+BenchmarkCompareAndSwap-8                     	22218878	        54.20 ns/op	      56 B/op	       1 allocs/op
+BenchmarkCompareAndSwap-8                     	22137277	        54.59 ns/op	      56 B/op	       1 allocs/op
+BenchmarkCompareAndSwap-8                     	21985974	        54.20 ns/op	      56 B/op	       1 allocs/op
+BenchmarkCompareAndSwap-8                     	22150335	        54.24 ns/op	      56 B/op	       1 allocs/op
+BenchmarkCompareAndSwapMismatch-8             	49218400	        24.34 ns/op	       8 B/op	       0 allocs/op
+BenchmarkCompareAndSwapMismatch-8             	49145246	        24.64 ns/op	       7 B/op	       0 allocs/op
+BenchmarkCompareAndSwapMismatch-8             	47565253	        24.27 ns/op	       7 B/op	       0 allocs/op
+BenchmarkCompareAndSwapMismatch-8             	49890258	        24.20 ns/op	       7 B/op	       0 allocs/op
+BenchmarkCompareAndSwapMismatch-8             	49445368	        24.26 ns/op	       7 B/op	       0 allocs/op
+BenchmarkCompareAndDelete-8                   	24137097	        49.78 ns/op	      48 B/op	       1 allocs/op
+BenchmarkCompareAndDelete-8                   	21058264	        49.77 ns/op	      48 B/op	       1 allocs/op
+BenchmarkCompareAndDelete-8                   	23908270	        49.75 ns/op	      48 B/op	       1 allocs/op
+BenchmarkCompareAndDelete-8                   	24101305	        49.59 ns/op	      48 B/op	       1 allocs/op
+BenchmarkCompareAndDelete-8                   	24468219	        49.59 ns/op	      48 B/op	       1 allocs/op
+BenchmarkCompareAndSwapParallel-8             	37613427	        31.11 ns/op	      14 B/op	       1 allocs/op
+BenchmarkCompareAndSwapParallel-8             	37228059	        31.23 ns/op	      14 B/op	       1 allocs/op
+BenchmarkCompareAndSwapParallel-8             	38049387	        32.16 ns/op	      14 B/op	       1 allocs/op
+BenchmarkCompareAndSwapParallel-8             	37631169	        31.34 ns/op	      14 B/op	       1 allocs/op
+BenchmarkCompareAndSwapParallel-8             	38641639	        31.82 ns/op	      14 B/op	       1 allocs/op
+BenchmarkSwap-8                               	21828748	        56.33 ns/op	      72 B/op	       2 allocs/op
+BenchmarkSwap-8                               	21806089	        55.11 ns/op	      72 B/op	       2 allocs/op
+BenchmarkSwap-8                               	21893398	        55.02 ns/op	      72 B/op	       2 allocs/op
+BenchmarkSwap-8                               	21920394	        56.34 ns/op	      72 B/op	       2 allocs/op
+BenchmarkSwap-8                               	21486459	        54.98 ns/op	      72 B/op	       2 allocs/op
+BenchmarkSwapAbsent-8                         	 5224852	       342.5 ns/op	     118 B/op	       3 allocs/op
+BenchmarkSwapAbsent-8                         	 5442034	       340.5 ns/op	     118 B/op	       3 allocs/op
+BenchmarkSwapAbsent-8                         	 5219151	       340.6 ns/op	     118 B/op	       3 allocs/op
+BenchmarkSwapAbsent-8                         	 5361800	       342.1 ns/op	     118 B/op	       3 allocs/op
+BenchmarkSwapAbsent-8                         	 4436350	       344.7 ns/op	     120 B/op	       3 allocs/op
+BenchmarkSwapParallel-8                       	 6762307	       162.6 ns/op	      71 B/op	       2 allocs/op
+BenchmarkSwapParallel-8                       	 7012533	       160.1 ns/op	      71 B/op	       2 allocs/op
+BenchmarkSwapParallel-8                       	 6867154	       164.8 ns/op	      71 B/op	       2 allocs/op
+BenchmarkSwapParallel-8                       	 7465545	       171.6 ns/op	      71 B/op	       2 allocs/op
+BenchmarkSwapParallel-8                       	 7464746	       172.0 ns/op	      71 B/op	       2 allocs/op
+BenchmarkClear-8                              	19510965	        61.36 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClear-8                              	19796684	        63.26 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClear-8                              	19638326	        61.12 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClear-8                              	18829362	        61.35 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClear-8                              	19491501	        61.58 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClearParallel-8                      	21743823	        48.07 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClearParallel-8                      	26292484	        46.02 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClearParallel-8                      	26541820	        45.56 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClearParallel-8                      	26956977	        45.37 ns/op	     208 B/op	       2 allocs/op
+BenchmarkClearParallel-8                      	26921296	        47.73 ns/op	     208 B/op	       2 allocs/op
+BenchmarkLoad-8                               	100000000	        10.85 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoad-8                               	100000000	        10.83 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoad-8                               	100000000	        10.83 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoad-8                               	100000000	        10.82 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoad-8                               	100000000	        10.94 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoadMiss-8                           	173602654	         6.923 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoadMiss-8                           	173698752	         6.911 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoadMiss-8                           	169339515	         6.918 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoadMiss-8                           	173483085	         6.919 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLoadMiss-8                           	173395839	         6.917 ns/op	       0 B/op	       0 allocs/op
+BenchmarkStore-8                              	28381094	        39.32 ns/op	      56 B/op	       1 allocs/op
+BenchmarkStore-8                              	30848791	        38.62 ns/op	      56 B/op	       1 allocs/op
+BenchmarkStore-8                              	30599883	        38.55 ns/op	      56 B/op	       1 allocs/op
+BenchmarkStore-8                              	31240473	        38.81 ns/op	      56 B/op	       1 allocs/op
+BenchmarkStore-8                              	30634288	        39.85 ns/op	      56 B/op	       1 allocs/op
+BenchmarkLoadOrStoreLoaded-8                  	48078686	        24.69 ns/op	      16 B/op	       1 allocs/op
+BenchmarkLoadOrStoreLoaded-8                  	48783130	        24.73 ns/op	      16 B/op	       1 allocs/op
+BenchmarkLoadOrStoreLoaded-8                  	47648433	        24.73 ns/op	      16 B/op	       1 allocs/op
+BenchmarkLoadOrStoreLoaded-8                  	48340881	        25.01 ns/op	      16 B/op	       1 allocs/op
+BenchmarkLoadOrStoreLoaded-8                  	48837812	        24.79 ns/op	      16 B/op	       1 allocs/op
+BenchmarkLoadOrStoreStored-8                  	 5067651	       342.1 ns/op	     119 B/op	       3 allocs/op
+BenchmarkLoadOrStoreStored-8                  	 5001940	       348.6 ns/op	     119 B/op	       3 allocs/op
+BenchmarkLoadOrStoreStored-8                  	 5101779	       348.8 ns/op	     119 B/op	       3 allocs/op
+BenchmarkLoadOrStoreStored-8                  	 5149410	       345.3 ns/op	     118 B/op	       3 allocs/op
+BenchmarkLoadOrStoreStored-8                  	 5164090	       347.3 ns/op	     118 B/op	       3 allocs/op
+BenchmarkLoadAndDelete-8                      	20900233	        55.83 ns/op	      64 B/op	       2 allocs/op
+BenchmarkLoadAndDelete-8                      	21533305	        56.12 ns/op	      64 B/op	       2 allocs/op
+BenchmarkLoadAndDelete-8                      	21571384	        56.91 ns/op	      64 B/op	       2 allocs/op
+BenchmarkLoadAndDelete-8                      	21563955	        56.49 ns/op	      64 B/op	       2 allocs/op
+BenchmarkLoadAndDelete-8                      	21737636	        55.68 ns/op	      64 B/op	       2 allocs/op
+BenchmarkDelete-8                             	21797424	        54.90 ns/op	      64 B/op	       2 allocs/op
+BenchmarkDelete-8                             	21753891	        55.84 ns/op	      64 B/op	       2 allocs/op
+BenchmarkDelete-8                             	21854091	        54.86 ns/op	      64 B/op	       2 allocs/op
+BenchmarkDelete-8                             	21955000	        54.72 ns/op	      64 B/op	       2 allocs/op
+BenchmarkDelete-8                             	22082349	        54.93 ns/op	      64 B/op	       2 allocs/op
+BenchmarkRange-8                              	  192530	      6248 ns/op	       0 B/op	       0 allocs/op
+BenchmarkRange-8                              	  211494	      6238 ns/op	       0 B/op	       0 allocs/op
+BenchmarkRange-8                              	  171541	      6358 ns/op	       0 B/op	       0 allocs/op
+BenchmarkRange-8                              	  160945	      6445 ns/op	       0 B/op	       0 allocs/op
+BenchmarkRange-8                              	  211269	      6282 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLen-8                                	  253891	      4623 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLen-8                                	  247600	      4778 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLen-8                                	  247636	      5112 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLen-8                                	  263568	      4958 ns/op	       0 B/op	       0 allocs/op
+BenchmarkLen-8                                	  241881	      4850 ns/op	       0 B/op	       0 allocs/op
+BenchmarkMap-8                                	   19855	     60843 ns/op	  109016 B/op	      22 allocs/op
+BenchmarkMap-8                                	   19030	     60999 ns/op	  109016 B/op	      22 allocs/op
+BenchmarkMap-8                                	   19720	     60457 ns/op	  109016 B/op	      22 allocs/op
+BenchmarkMap-8                                	   19653	     62267 ns/op	  109016 B/op	      22 allocs/op
+BenchmarkMap-8                                	   19653	     61157 ns/op	  109016 B/op	      22 allocs/op
+BenchmarkKeys-8                               	  112729	     11204 ns/op	   35184 B/op	      11 allocs/op
+BenchmarkKeys-8                               	  109974	     10930 ns/op	   35184 B/op	      11 allocs/op
+BenchmarkKeys-8                               	  110037	     11295 ns/op	   35184 B/op	      11 allocs/op
+BenchmarkKeys-8                               	  104438	     10920 ns/op	   35184 B/op	      11 allocs/op
+BenchmarkKeys-8                               	  112255	     10964 ns/op	   35184 B/op	      11 allocs/op
+BenchmarkValues-8                             	  144583	      8086 ns/op	   25208 B/op	      12 allocs/op
+BenchmarkValues-8                             	  144780	      8586 ns/op	   25208 B/op	      12 allocs/op
+BenchmarkValues-8                             	  138247	      8254 ns/op	   25208 B/op	      12 allocs/op
+BenchmarkValues-8                             	  144140	      8477 ns/op	   25208 B/op	      12 allocs/op
+BenchmarkValues-8                             	  145588	      8116 ns/op	   25208 B/op	      12 allocs/op
+BenchmarkConcurrentReadWrite-8                	24444170	        48.93 ns/op	       7 B/op	       0 allocs/op
+BenchmarkConcurrentReadWrite-8                	24392350	        48.44 ns/op	       7 B/op	       0 allocs/op
+BenchmarkConcurrentReadWrite-8                	24357836	        47.83 ns/op	       7 B/op	       0 allocs/op
+BenchmarkConcurrentReadWrite-8                	24418863	        48.74 ns/op	       7 B/op	       0 allocs/op
+BenchmarkConcurrentReadWrite-8                	24355281	        48.13 ns/op	       7 B/op	       0 allocs/op
+BenchmarkOverhead_LoadSyncMap-8               	100000000	        10.81 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadSyncMap-8               	100000000	        10.83 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadSyncMap-8               	100000000	        10.82 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadSyncMap-8               	100000000	        10.84 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadSyncMap-8               	100000000	        10.92 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadRawSyncMap-8            	100000000	        10.21 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadRawSyncMap-8            	100000000	        10.23 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadRawSyncMap-8            	100000000	        10.29 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadRawSyncMap-8            	100000000	        10.35 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_LoadRawSyncMap-8            	100000000	        10.37 ns/op	       0 B/op	       0 allocs/op
+BenchmarkOverhead_StoreSyncMap-8              	30809289	        38.50 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreSyncMap-8              	30735932	        38.66 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreSyncMap-8              	30821126	        38.76 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreSyncMap-8              	31022046	        39.09 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreSyncMap-8              	31148907	        38.55 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreRawSyncMap-8           	31156692	        38.43 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreRawSyncMap-8           	30528330	        38.49 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreRawSyncMap-8           	30868828	        39.11 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreRawSyncMap-8           	30206568	        38.87 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_StoreRawSyncMap-8           	31307001	        38.50 ns/op	      56 B/op	       1 allocs/op
+BenchmarkOverhead_LoadOrStoreSyncMap-8        	 5016001	       338.3 ns/op	     119 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreSyncMap-8        	 5156434	       354.7 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreSyncMap-8        	 5254610	       346.6 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreSyncMap-8        	 5164503	       348.8 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreSyncMap-8        	 5288906	       341.9 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreRawSyncMap-8     	 5365010	       339.2 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreRawSyncMap-8     	 4613268	       332.9 ns/op	     120 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreRawSyncMap-8     	 5182401	       340.4 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreRawSyncMap-8     	 5369550	       343.2 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_LoadOrStoreRawSyncMap-8     	 5211404	       343.4 ns/op	     118 B/op	       3 allocs/op
+BenchmarkOverhead_DeleteSyncMap-8             	21141540	        54.69 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteSyncMap-8             	22166258	        55.18 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteSyncMap-8             	21524760	        54.67 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteSyncMap-8             	21813571	        54.70 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteSyncMap-8             	22073041	        54.71 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteRawSyncMap-8          	22207108	        54.69 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteRawSyncMap-8          	20638962	        55.74 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteRawSyncMap-8          	21787629	        55.09 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteRawSyncMap-8          	21884149	        54.72 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_DeleteRawSyncMap-8          	21889653	        54.58 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteSyncMap-8      	21582248	        56.71 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteSyncMap-8      	21688395	        55.71 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteSyncMap-8      	21413929	        55.70 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteSyncMap-8      	21819470	        55.59 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteSyncMap-8      	21432812	        56.62 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteRawSyncMap-8   	21799008	        55.08 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteRawSyncMap-8   	21783591	        54.63 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteRawSyncMap-8   	22124878	        55.12 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteRawSyncMap-8   	19904444	        57.20 ns/op	      64 B/op	       2 allocs/op
+BenchmarkOverhead_LoadAndDeleteRawSyncMap-8   	21931810	        54.76 ns/op	      64 B/op	       2 allocs/op
diff --git a/example_test.go b/example_test.go
new file mode 100644
index 0000000..99a3bdc
--- /dev/null
+++ b/example_test.go
@@ -0,0 +1,219 @@
+// Copyright 2026 AxonOps Limited.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package syncmap_test
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/axonops/syncmap"
+)
+
+// Range, Keys, Values, and Map make no order guarantee. Every Example
+// that iterates over the map sorts its output before printing so the
+// // Output: blocks are deterministic under `go test`.
+
+func ExampleSyncMap() {
+	var m syncmap.SyncMap[string, int]
+
+	m.Store("hits", 1)
+	m.Store("misses", 0)
+
+	v, ok := m.Load("hits")
+	fmt.Println(v, ok)
+	// Output: 1 true
+}
+
+func ExampleSyncMap_Load() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("answer", 42)
+
+	v, ok := m.Load("answer")
+	fmt.Println(v, ok)
+
+	v, ok = m.Load("missing")
+	fmt.Println(v, ok)
+	// Output:
+	// 42 true
+	// 0 false
+}
+
+func ExampleSyncMap_Store() {
+	var m syncmap.SyncMap[string, string]
+	m.Store("env", "prod")
+	m.Store("env", "staging") // overwrites
+
+	v, _ := m.Load("env")
+	fmt.Println(v)
+	// Output: staging
+}
+
+func ExampleSyncMap_LoadOrStore() {
+	var m syncmap.SyncMap[string, int]
+
+	v1, loaded1 := m.LoadOrStore("k", 1)
+	v2, loaded2 := m.LoadOrStore("k", 2)
+
+	fmt.Println(v1, loaded1)
+	fmt.Println(v2, loaded2)
+	// Output:
+	// 1 false
+	// 1 true
+}
+
+func ExampleSyncMap_LoadAndDelete() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 7)
+
+	v, loaded := m.LoadAndDelete("k")
+	fmt.Println(v, loaded)
+
+	v, loaded = m.LoadAndDelete("k")
+	fmt.Println(v, loaded)
+	// Output:
+	// 7 true
+	// 0 false
+}
+
+func ExampleSyncMap_Delete() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 1)
+	m.Delete("k")
+
+	_, ok := m.Load("k")
+	fmt.Println(ok)
+	// Output: false
+}
+
+func ExampleSyncMap_Swap() {
+	var m syncmap.SyncMap[string, int]
+
+	previous, loaded := m.Swap("k", 1)
+	fmt.Println(previous, loaded)
+
+	previous, loaded = m.Swap("k", 2)
+	fmt.Println(previous, loaded)
+	// Output:
+	// 0 false
+	// 1 true
+}
+
+func ExampleSyncMap_Clear() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("a", 1)
+	m.Store("b", 2)
+
+	m.Clear()
+	fmt.Println(m.Len())
+	// Output: 0
+}
+
+func ExampleSyncMap_Range() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("a", 1)
+	m.Store("b", 2)
+	m.Store("c", 3)
+
+	var keys []string
+	m.Range(func(k string, v int) bool {
+		keys = append(keys, fmt.Sprintf("%s=%d", k, v))
+		return true
+	})
+	sort.Strings(keys)
+	for _, entry := range keys {
+		fmt.Println(entry)
+	}
+	// Output:
+	// a=1
+	// b=2
+	// c=3
+}
+
+func ExampleSyncMap_Len() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("a", 1)
+	m.Store("b", 2)
+	fmt.Println(m.Len())
+	// Output: 2
+}
+
+func ExampleSyncMap_Map() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("a", 1)
+	m.Store("b", 2)
+
+	snap := m.Map()
+	keys := make([]string, 0, len(snap))
+	for k := range snap {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	for _, k := range keys {
+		fmt.Printf("%s=%d\n", k, snap[k])
+	}
+	// Output:
+	// a=1
+	// b=2
+}
+
+func ExampleSyncMap_Keys() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("a", 1)
+	m.Store("b", 2)
+
+	keys := m.Keys()
+	sort.Strings(keys)
+	fmt.Println(keys)
+	// Output: [a b]
+}
+
+func ExampleSyncMap_Values() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("a", 1)
+	m.Store("b", 2)
+
+	values := m.Values()
+	sort.Ints(values)
+	fmt.Println(values)
+	// Output: [1 2]
+}
+
+func ExampleCompareAndSwap() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 1)
+
+	swapped := syncmap.CompareAndSwap(&m, "k", 1, 2)
+	fmt.Println(swapped)
+
+	swapped = syncmap.CompareAndSwap(&m, "k", 1, 3)
+	fmt.Println(swapped)
+	// Output:
+	// true
+	// false
+}
+
+func ExampleCompareAndDelete() {
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 1)
+
+	deleted := syncmap.CompareAndDelete(&m, "k", 2)
+	fmt.Println(deleted)
+
+	deleted = syncmap.CompareAndDelete(&m, "k", 1)
+	fmt.Println(deleted)
+	// Output:
+	// false
+	// true
+}
diff --git a/scripts/check-bench-regression.sh b/scripts/check-bench-regression.sh
new file mode 100755
index 0000000..df786e8
--- /dev/null
+++ b/scripts/check-bench-regression.sh
@@ -0,0 +1,109 @@
+#!/usr/bin/env bash
+# check-bench-regression.sh — parse a benchstat report and fail on regressions.
+#
+# Invoked by `make bench-regression` and by the benchstat-regression-guard CI
+# job. The input is benchstat's default human-readable report comparing
+# `bench.txt` (committed baseline) to `current.txt` (this run).
+#
+# Fail criteria (issue #15 AC):
+#   - Any `time/op` regression >= 10% at p <= 0.05
+#   - Any `allocs/op` regression > 0% (allocation increases are blocking
+#     regardless of statistical significance).
+#
+# Bytes-per-op is treated the same as time/op (10% + p<=0.05) since a bytes
+# increase without a commensurate allocs increase usually indicates a
+# structural change worth reviewing.
+#
+# Exit codes:
+#   0  no regressions above threshold
+#   1  one or more regressions found
+#   2  usage error (missing arg or file)
+
+set -euo pipefail
+
+if [[ $# -ne 1 ]]; then
+	echo "usage: $0 <benchstat-report>" >&2
+	exit 2
+fi
+report="$1"
+if [[ ! -r "$report" ]]; then
+	echo "cannot read $report" >&2
+	exit 2
+fi
+
+threshold_pct=10
+alpha="0.05"
+
+violations=0
+current_section=""
+
+# benchstat (golang.org/x/perf/cmd/benchstat v0.0.0-20240730-ish) emits
+# three tables, each introduced by a TWO-line header that looks like:
+#
+#     │ old.txt │  new.txt            │
+#     │  sec/op │  sec/op    vs base  │
+#
+# The metric name lives on the second header line between box-drawing
+# pipes. We detect it and set the current section. Any subsequent line
+# with a "+X.XX%" delta is a regression candidate for that metric.
+#
+# Sections identified:
+#   sec/op     — time per op (threshold: >= 10% at p <= 0.05)
+#   B/op       — bytes per op (same threshold as time)
+#   allocs/op  — allocation count per op (ANY increase fails)
+
+while IFS= read -r line; do
+	if [[ "$line" =~ [[:space:]](sec/op|B/op|allocs/op)[[:space:]] ]]; then
+		current_section="${BASH_REMATCH[1]}"
+		continue
+	fi
+
+	# Skip benchstat summary rows (geomean) — they carry footnote
+	# markers that can render as "+0.00%" even when nothing regressed.
+	if [[ "$line" =~ ^geomean ]]; then
+		continue
+	fi
+
+	# Data line with a positive delta: "+X.XX%". Capture the full
+	# percentage so "+0.00%" can be excluded as a no-op.
+	if [[ "$line" =~ [+]([0-9]+\.[0-9]+)% ]]; then
+		full_pct="${BASH_REMATCH[1]}"
+		# Ignore zero-delta "regressions".
+		if ! awk -v p="$full_pct" 'BEGIN { exit !(p+0 > 0) }'; then
+			continue
+		fi
+		pct_int="${full_pct%%.*}"
+
+		# Extract the p-value. benchstat emits "(p=0.XXX ...)" when both
+		# sides have at least 4 samples and the difference is significant;
+		# missing p values are treated as p=1 (not significant).
+		p="1"
+		if [[ "$line" =~ p=([0-9]+\.[0-9]+) ]]; then
+			p="${BASH_REMATCH[1]}"
+		fi
+
+		flag_regression=0
+		case "$current_section" in
+			sec/op|B/op)
+				if (( pct_int >= threshold_pct )) && awk -v p="$p" -v a="$alpha" 'BEGIN {exit !(p+0 <= a+0)}'; then
+					flag_regression=1
+				fi
+				;;
+			allocs/op)
+				# Any measurable allocs/op increase is a regression.
+				flag_regression=1
+				;;
+		esac
+
+		if (( flag_regression == 1 )); then
+			echo "::error::regression ($current_section): $line"
+			violations=$((violations + 1))
+		fi
+	fi
+done < "$report"
+
+if (( violations > 0 )); then
+	echo "benchstat found $violations regression(s) above threshold (time/op >=${threshold_pct}% at p<=${alpha}, or any allocs/op increase)" >&2
+	exit 1
+fi
+echo "No regressions above threshold."
diff --git a/syncmap_bench_test.go b/syncmap_bench_test.go
index 2a690bb..9be71e1 100644
--- a/syncmap_bench_test.go
+++ b/syncmap_bench_test.go
@@ -15,16 +15,24 @@
 package syncmap_test
 
 import (
+	"strconv"
+	"sync"
+	"sync/atomic"
 	"testing"
 
 	"github.com/axonops/syncmap"
 )
 
-// This file seeds the benchmark suite with coverage for the functions
-// landing in issues #13 (Swap, Clear) and #14 (CompareAndSwap,
-// CompareAndDelete). A full benchmark set covering every public
-// method, plus a raw-sync.Map overhead comparison and a committed
-// bench.txt baseline, is owned by issue #15.
+// Benchmark suite for syncmap.
+//
+// Scope: every public method plus overhead pairs comparing the generic
+// wrapper against raw sync.Map. The committed bench.txt baseline is the
+// artefact this file produces; benchstat-regression-guard in CI compares
+// a fresh run against that baseline on every PR.
+//
+// Regenerate the baseline with `make bench > bench.txt` (strip the
+// trailing `PASS` / `ok` lines and ANSI escapes before committing) and
+// land the update in the same PR as any performance-affecting change.
 
 func BenchmarkCompareAndSwap(b *testing.B) {
 	b.ReportAllocs()
@@ -138,3 +146,275 @@ func BenchmarkClearParallel(b *testing.B) {
 		}
 	})
 }
+
+// -----------------------------------------------------------------------------
+// Per-method benchmarks — Load, Store, LoadOrStore, LoadAndDelete, Delete,
+// Range, Len, Map, Keys, Values.
+// -----------------------------------------------------------------------------
+
+func BenchmarkLoad(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 42)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m.Load("k")
+	}
+}
+
+func BenchmarkLoadMiss(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[string, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m.Load("absent")
+	}
+}
+
+func BenchmarkStore(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[string, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store("k", i)
+	}
+}
+
+func BenchmarkLoadOrStoreLoaded(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 42)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m.LoadOrStore("k", 0)
+	}
+}
+
+func BenchmarkLoadOrStoreStored(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[int, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m.LoadOrStore(i, i)
+	}
+}
+
+func BenchmarkLoadAndDelete(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[int, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store(i, i)
+		_, _ = m.LoadAndDelete(i)
+	}
+}
+
+func BenchmarkDelete(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[int, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store(i, i)
+		m.Delete(i)
+	}
+}
+
+// Size-parameterised benchmarks for O(n) helpers.
+
+const benchMapSize = 1000
+
+func seedMap(n int) *syncmap.SyncMap[string, int] {
+	m := &syncmap.SyncMap[string, int]{}
+	for i := 0; i < n; i++ {
+		m.Store(strconv.Itoa(i), i)
+	}
+	return m
+}
+
+func BenchmarkRange(b *testing.B) {
+	b.ReportAllocs()
+	m := seedMap(benchMapSize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Range(func(k string, v int) bool { return true })
+	}
+}
+
+func BenchmarkLen(b *testing.B) {
+	b.ReportAllocs()
+	m := seedMap(benchMapSize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m.Len()
+	}
+}
+
+func BenchmarkMap(b *testing.B) {
+	b.ReportAllocs()
+	m := seedMap(benchMapSize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m.Map()
+	}
+}
+
+func BenchmarkKeys(b *testing.B) {
+	b.ReportAllocs()
+	m := seedMap(benchMapSize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m.Keys()
+	}
+}
+
+func BenchmarkValues(b *testing.B) {
+	b.ReportAllocs()
+	m := seedMap(benchMapSize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m.Values()
+	}
+}
+
+// -----------------------------------------------------------------------------
+// Concurrent access pattern — realistic 90% read / 10% write mix.
+// -----------------------------------------------------------------------------
+
+func BenchmarkConcurrentReadWrite(b *testing.B) {
+	b.ReportAllocs()
+	m := seedMap(benchMapSize)
+
+	// Pre-compute the key pool so the timed loop doesn't allocate on
+	// strconv.Itoa every iteration — otherwise the allocs/op signal is
+	// dominated by the benchmark harness rather than the map.
+	keys := make([]string, benchMapSize)
+	for i := range keys {
+		keys[i] = strconv.Itoa(i)
+	}
+	var counter atomic.Int64
+
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			n := counter.Add(1)
+			key := keys[int(n%benchMapSize)]
+			if n%10 == 0 {
+				m.Store(key, int(n))
+			} else {
+				_, _ = m.Load(key)
+			}
+		}
+	})
+}
+
+// -----------------------------------------------------------------------------
+// Overhead pairs vs raw sync.Map — measures the wrapper cost beyond stdlib.
+// Both sides perform the same operations with the same workload. For the
+// Delete and LoadAndDelete pairs, each iteration includes a Store so the
+// method under test has something to operate on; the pair compares the
+// generic wrapper's Store+Delete cost against the raw sync.Map's
+// Store+Delete cost — any delta is wrapper overhead, not the absolute
+// cost of the named operation.
+// -----------------------------------------------------------------------------
+
+func BenchmarkOverhead_LoadSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[string, int]
+	m.Store("k", 42)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m.Load("k")
+	}
+}
+
+func BenchmarkOverhead_LoadRawSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m sync.Map
+	m.Store("k", 42)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if v, ok := m.Load("k"); ok {
+			_ = v.(int)
+		}
+	}
+}
+
+func BenchmarkOverhead_StoreSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[string, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store("k", i)
+	}
+}
+
+func BenchmarkOverhead_StoreRawSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m sync.Map
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store("k", i)
+	}
+}
+
+func BenchmarkOverhead_LoadOrStoreSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[int, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m.LoadOrStore(i, i)
+	}
+}
+
+func BenchmarkOverhead_LoadOrStoreRawSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m sync.Map
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if v, _ := m.LoadOrStore(i, i); v != nil {
+			_ = v.(int)
+		}
+	}
+}
+
+func BenchmarkOverhead_DeleteSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[int, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store(i, i)
+		m.Delete(i)
+	}
+}
+
+func BenchmarkOverhead_DeleteRawSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m sync.Map
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store(i, i)
+		m.Delete(i)
+	}
+}
+
+func BenchmarkOverhead_LoadAndDeleteSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m syncmap.SyncMap[int, int]
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store(i, i)
+		_, _ = m.LoadAndDelete(i)
+	}
+}
+
+func BenchmarkOverhead_LoadAndDeleteRawSyncMap(b *testing.B) {
+	b.ReportAllocs()
+	var m sync.Map
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		m.Store(i, i)
+		if v, loaded := m.LoadAndDelete(i); loaded {
+			_ = v.(int)
+		}
+	}
+}