From 1cc48fb071b8a9478d2a851086a4e5396e680555 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 16:57:11 -0700
Subject: [PATCH 01/30] Create two workflows

---
 .github/workflows/build-release.yml       | 172 ++++++++++
 .github/workflows/produce-build-stats.yml | 395 ++++++++++++++++++++++
 2 files changed, 567 insertions(+)
 create mode 100644 .github/workflows/build-release.yml
 create mode 100644 .github/workflows/produce-build-stats.yml
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
new file mode 100644
index 000000000..c9cd2126d
--- /dev/null
+++ b/.github/workflows/build-release.yml
@@ -0,0 +1,172 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+# Release build statistics workflow.
+#
+# Runs on every push to main to capture release build timings, binary sizes,
+# and cargo-bloat analysis. The data is uploaded as structured JSON artifacts
+# so the companion `produce-build-stats.yml` workflow can aggregate trends.
+
+on:
+  push:
+    branches: ["main"]
+
+name: Release Build Stats
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.sha }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  build-release:
+    name: release build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust
+        run: rustup show
+
+      - uses: Swatinem/rust-cache@v2
+        with:
+          # Only cache registry/git — we cargo clean for cold build timings.
+          cache-targets: false
+
+      - name: Install cargo-bloat
+        uses: taiki-e/install-action@v2
+        with:
+          tool: cargo-bloat
+
+      - name: Build release with timings
+        run: |
+          set -euxo pipefail
+          cargo clean
+          cargo build --workspace --release --locked --timings
+
+      - name: Upload cargo-timing report
+        uses: actions/upload-artifact@v4
+        with:
+          name: cargo-timing
+          path: |
+            target/cargo-timings/cargo-timing.html
+          retention-days: 90
+
+      - name: Parse and display build times
+        run: |
+          python3 - <<'PYEOF'
+          import re, json, sys
+          from pathlib import Path
+
+          html_path = Path("target/cargo-timings/cargo-timing.html")
+          if not html_path.exists():
+              print("::warning::cargo-timing.html not found")
+              sys.exit(0)
+
+          html = html_path.read_text()
+
+          # Extract wall-clock duration (seconds)
+          m = re.search(r'DURATION\s*=\s*(\d+(?:\.\d+)?)', html)
+          total_s = float(m.group(1)) if m else 0
+
+          # Extract human-readable total time from summary table
+          m2 = re.search(r'Total time:</td><td>([^<]+)</td>', html)
+          total_display = m2.group(1).strip() if m2 else f"{total_s:.1f}s"
+
+          # Extract per-unit data from the embedded JSON
+          m = re.search(r'const UNIT_DATA\s*=\s*(\[.*?\]);', html, re.DOTALL)
+          if not m:
+              print("::warning::Could not parse UNIT_DATA from timing report")
+              sys.exit(0)
+
+          units = json.loads(m.group(1))
+
+          # Sort by duration descending
+          units_sorted = sorted(units, key=lambda u: u.get("duration", 0), reverse=True)
+
+          # Print markdown table to console
+          print(f"\n### Release Build Times (Total wall time: {total_display})\n")
+          print("| # | Crate | Version | Duration |")
+          print("|---|-------|---------|----------|")
+          for i, u in enumerate(units_sorted, 1):
+              name = u.get("name", "?")
+              version = u.get("version", "?")
+              duration = u.get("duration", 0)
+              print(f"| {i} | {name} | {version} | {duration:.1f}s |")
+
+          # Write structured JSON artifact
+          stats = {
+              "total_wall_time_s": total_s,
+              "total_time_display": total_display,
+              "units": [
+                  {
+                      "name": u["name"],
+                      "version": u.get("version", ""),
+                      "duration": u.get("duration", 0),
+                  }
+                  for u in units_sorted
+              ],
+          }
+          Path("build-times.json").write_text(json.dumps(stats, indent=2))
+          PYEOF
+
+      - name: Log binary sizes
+        run: |
+          set +x
+          echo ""
+          echo "### Release Binary Sizes"
+          echo ""
+          echo "| Binary | Size (bytes) | Size |"
+          echo "|--------|-------------|------|"
+
+          python3 - <<'PYEOF'
+          import json, os
+          from pathlib import Path
+
+          binaries = []
+          release_dir = Path("target/release")
+          for p in sorted(release_dir.iterdir()):
+              if not p.is_file():
+                  continue
+              # On Linux, check executable bit and skip non-ELF extensions
+              if p.suffix in (".d", ".rlib", ".rmeta", ".o", ".dwp"):
+                  continue
+              if not os.access(p, os.X_OK):
+                  continue
+              size = p.stat().st_size
+              if size < 1024:
+                  continue  # skip tiny files (build scripts, etc.)
+              if size > 1048576:
+                  human = f"{size / 1048576:.1f} MiB"
+              elif size > 1024:
+                  human = f"{size / 1024:.1f} KiB"
+              else:
+                  human = f"{size} B"
+              print(f"| {p.name} | {size} | {human} |")
+              binaries.append({"name": p.name, "bytes": size})
+
+          Path("binary-sizes.json").write_text(json.dumps(binaries, indent=2))
+          PYEOF
+
+      - name: Run cargo bloat
+        run: |
+          cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt
+
+      - name: Upload build stats
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-stats
+          path: |
+            build-times.json
+            binary-sizes.json
+            cargo-bloat.txt
+          retention-days: 90
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
new file mode 100644
index 000000000..63c5a3ea8
--- /dev/null
+++ b/.github/workflows/produce-build-stats.yml
@@ -0,0 +1,395 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+# Aggregates release build statistics from the last 30 days of
+# `build-release.yml` runs and produces an HTML dashboard with trend charts.
+#
+# Intended for weekly scrum review.
+
+on:
+  schedule:
+    - cron: "0 8 * * *"
+  workflow_dispatch:
+
+name: Produce Build Stats Report
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+  actions: read
+
+env:
+  GH_TOKEN: ${{ github.token }}
+
+jobs:
+  report:
+    name: generate report
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Collect artifacts from recent runs
+        run: |
+          set -euo pipefail
+          mkdir -p collected
+
+          # List successful runs of build-release.yml from the last 30 days
+          SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
+
+          echo "Fetching runs since $SINCE ..."
+
+          gh api --paginate \
+            "repos/${{ github.repository }}/actions/workflows/build-release.yml/runs?status=success&created=>=$SINCE&per_page=100" \
+            --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
+            > runs.tsv || true
+
+          if [ ! -s runs.tsv ]; then
+            echo "::warning::No successful build-release runs found in the last 30 days"
+            echo '[]' > collected/all_runs.json
+            exit 0
+          fi
+
+          echo "Found $(wc -l < runs.tsv) runs"
+
+          # For each run, download the build-stats artifact and build metadata
+          echo '[]' > collected/all_runs.json
+
+          while IFS=$'\t' read -r run_id created_at head_sha; do
+            echo "Processing run $run_id ($created_at) ..."
+            ARTIFACT_DIR="collected/$run_id"
+            mkdir -p "$ARTIFACT_DIR"
+
+            # Download build-stats artifact
+            if ! gh run download "$run_id" \
+                --repo "${{ github.repository }}" \
+                --name build-stats \
+                --dir "$ARTIFACT_DIR/build-stats" 2>/dev/null; then
+              echo "  ::warning::Could not download build-stats for run $run_id (artifact may have expired)"
+              rm -rf "$ARTIFACT_DIR"
+              continue
+            fi
+            echo "  Downloaded build-stats for run $run_id"
+
+            # Build a metadata record and append to collected data
+            python3 -c "
+          import json, sys
+          from pathlib import Path
+
+          base = Path(sys.argv[1]) / 'build-stats'
+          record = {
+              'run_id': int(sys.argv[2]),
+              'created_at': sys.argv[3],
+              'head_sha': sys.argv[4],
+          }
+
+          bt = base / 'build-times.json'
+          if bt.exists():
+              record['build_times'] = json.loads(bt.read_text())
+
+          bs = base / 'binary-sizes.json'
+          if bs.exists():
+              record['binary_sizes'] = json.loads(bs.read_text())
+
+          cb = base / 'cargo-bloat.txt'
+          if cb.exists():
+              record['cargo_bloat'] = cb.read_text()
+
+          all_file = Path('collected/all_runs.json')
+          data = json.loads(all_file.read_text())
+          data.append(record)
+          all_file.write_text(json.dumps(data))
+          " "$ARTIFACT_DIR" "$run_id" "$created_at" "$head_sha"
+
+          done < runs.tsv
+
+          COUNT=$(python3 -c "import json; print(len(json.loads(open('collected/all_runs.json').read())))")
+          echo "Collected data for $COUNT runs"
+
+      - name: Generate HTML report
+        run: |
+          python3 - <<'PYEOF'
+          import json
+          from pathlib import Path
+          from datetime import datetime
+
+          data = json.loads(Path("collected/all_runs.json").read_text())
+
+          # Sort by date ascending
+          data.sort(key=lambda r: r.get("created_at", ""))
+
+          # --- Prepare chart data ---
+
+          dates = []
+          total_build_times = []
+          # crate_times: { crate_name: [time_per_run...] }
+          crate_times = {}
+          total_binary_sizes = []
+          # per_binary: { binary_name: [size_per_run...] }
+          per_binary = {}
+
+          for run in data:
+              dt_str = run.get("created_at", "")
+              dates.append(dt_str[:10] if dt_str else "?")
+
+              bt = run.get("build_times", {})
+              total_build_times.append(bt.get("total_wall_time_s", 0))
+
+              # Per-crate build times
+              units = bt.get("units", [])
+              seen_crates = set()
+              for u in units:
+                  name = u.get("name", "")
+                  if name not in crate_times:
+                      crate_times[name] = [None] * (len(dates) - 1)
+                  crate_times[name].append(u.get("duration", 0))
+                  seen_crates.add(name)
+              for name in crate_times:
+                  if name not in seen_crates:
+                      crate_times[name].append(None)
+
+              # Binary sizes
+              bs = run.get("binary_sizes", [])
+              run_total_size = sum(b.get("bytes", 0) for b in bs)
+              total_binary_sizes.append(run_total_size)
+
+              seen_bins = set()
+              for b in bs:
+                  bname = b.get("name", "")
+                  if bname not in per_binary:
+                      per_binary[bname] = [None] * (len(dates) - 1)
+                  per_binary[bname].append(b.get("bytes", 0))
+                  seen_bins.add(bname)
+              for bname in per_binary:
+                  if bname not in seen_bins:
+                      per_binary[bname].append(None)
+
+          # Filter to top 15 crates by average duration
+          def avg_non_none(lst):
+              vals = [v for v in lst if v is not None]
+              return sum(vals) / len(vals) if vals else 0
+
+          top_crates = sorted(crate_times.keys(), key=lambda c: avg_non_none(crate_times[c]), reverse=True)[:15]
+
+          # Latest cargo bloat
+          latest_bloat = ""
+          for run in reversed(data):
+              if run.get("cargo_bloat"):
+                  latest_bloat = run["cargo_bloat"]
+                  break
+
+          # --- Color palette ---
+          COLORS = [
+              "#4e79a7", "#f28e2b", "#e15759", "#76b7b2", "#59a14f",
+              "#edc948", "#b07aa1", "#ff9da7", "#9c755f", "#bab0ac",
+              "#86bcb6", "#8cd17d", "#b6992d", "#499894", "#d37295",
+          ]
+
+          def js_array(lst):
+              return json.dumps(lst)
+
+          def js_datasets_crates():
+              datasets = []
+              for i, name in enumerate(top_crates):
+                  color = COLORS[i % len(COLORS)]
+                  datasets.append({
+                      "label": name,
+                      "data": crate_times[name],
+                      "borderColor": color,
+                      "backgroundColor": color + "33",
+                      "tension": 0.3,
+                      "spanGaps": True,
+                  })
+              return json.dumps(datasets)
+
+          def js_datasets_binaries():
+              datasets = []
+              for i, name in enumerate(sorted(per_binary.keys())):
+                  color = COLORS[i % len(COLORS)]
+                  datasets.append({
+                      "label": name,
+                      "data": [b / 1048576 if b is not None else None for b in per_binary[name]],
+                      "borderColor": color,
+                      "backgroundColor": color + "33",
+                      "tension": 0.3,
+                      "spanGaps": True,
+                  })
+              return json.dumps(datasets)
+
+          # --- Generate HTML ---
+          now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
+
+          html = f"""<!DOCTYPE html>
+          <html lang="en">
+          <head>
+            <meta charset="utf-8">
+            <title>DiskANN Release Build Stats</title>
+            <script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
+            <style>
+              body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+                     max-width: 1400px; margin: 0 auto; padding: 20px; background: #fafafa; }}
+              h1 {{ color: #1b1e24; border-bottom: 2px solid #4e79a7; padding-bottom: 8px; }}
+              h2 {{ color: #333; margin-top: 40px; }}
+              .chart-container {{ background: white; border-radius: 8px; padding: 20px;
+                                  box-shadow: 0 1px 3px rgba(0,0,0,0.12); margin-bottom: 30px; }}
+              canvas {{ max-height: 400px; }}
+              pre {{ background: #1b1e24; color: #d5dde5; padding: 16px; border-radius: 8px;
+                     overflow-x: auto; font-size: 13px; line-height: 1.4; }}
+              .meta {{ color: #666; font-size: 14px; }}
+              table {{ border-collapse: collapse; width: 100%; margin: 10px 0; }}
+              th, td {{ border: 1px solid #ddd; padding: 8px 12px; text-align: left; }}
+              th {{ background: #1b1e24; color: #d5dde5; }}
+              tr:nth-child(even) {{ background: #f2f2f2; }}
+            </style>
+          </head>
+          <body>
+            <h1>DiskANN Release Build Statistics</h1>
+            <p class="meta">Generated: {now} &mdash; Last 30 days ({len(data)} runs)</p>
+
+            <h2>Total Build Time Trend</h2>
+            <div class="chart-container">
+              <canvas id="totalBuildTime"></canvas>
+            </div>
+
+            <h2>Build Time by Crate (Top 15)</h2>
+            <div class="chart-container">
+              <canvas id="crateBuildTime"></canvas>
+            </div>
+
+            <h2>Total Binary Size Trend</h2>
+            <div class="chart-container">
+              <canvas id="totalBinarySize"></canvas>
+            </div>
+
+            <h2>Binary Size per Binary</h2>
+            <div class="chart-container">
+              <canvas id="perBinarySize"></canvas>
+            </div>
+
+            <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
+            <pre>{latest_bloat if latest_bloat else "No cargo bloat data available."}</pre>
+
+            <h2>Latest Build Details</h2>
+          """
+
+          # Add latest run details table
+          if data:
+              latest = data[-1]
+              bt = latest.get("build_times", {})
+              units = bt.get("units", [])
+              html += f"""
+              <p class="meta">Run: {latest.get('created_at', '?')} &mdash;
+              Commit: <code>{latest.get('head_sha', '?')[:12]}</code> &mdash;
+              Total wall time: {bt.get('total_time_display', '?')}</p>
+              <table>
+                <thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead>
+                <tbody>
+              """
+              for i, u in enumerate(units, 1):
+                  html += f"<tr><td>{i}</td><td>{u.get('name','?')}</td><td>{u.get('version','?')}</td><td>{u.get('duration',0):.1f}s</td></tr>\n"
+              html += "</tbody></table>\n"
+
+              bs = latest.get("binary_sizes", [])
+              if bs:
+                  html += """
+                  <h3>Binary Sizes</h3>
+                  <table>
+                    <thead><tr><th>Binary</th><th>Size (bytes)</th><th>Size</th></tr></thead>
+                    <tbody>
+                  """
+                  for b in bs:
+                      size = b.get("bytes", 0)
+                      human = f"{size / 1048576:.1f} MiB" if size > 1048576 else f"{size / 1024:.1f} KiB"
+                      html += f"<tr><td>{b.get('name','?')}</td><td>{size:,}</td><td>{human}</td></tr>\n"
+                  html += "</tbody></table>\n"
+
+          html += f"""
+            <script>
+              const dates = {js_array(dates)};
+
+              // Total Build Time
+              new Chart(document.getElementById('totalBuildTime'), {{
+                type: 'line',
+                data: {{
+                  labels: dates,
+                  datasets: [{{
+                    label: 'Total Wall Time (s)',
+                    data: {js_array(total_build_times)},
+                    borderColor: '#4e79a7',
+                    backgroundColor: '#4e79a733',
+                    fill: true,
+                    tension: 0.3,
+                  }}]
+                }},
+                options: {{
+                  responsive: true,
+                  plugins: {{ title: {{ display: true, text: 'Total Release Build Time' }} }},
+                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'Seconds' }} }} }}
+                }}
+              }});
+
+              // Per-Crate Build Time
+              new Chart(document.getElementById('crateBuildTime'), {{
+                type: 'line',
+                data: {{
+                  labels: dates,
+                  datasets: {js_datasets_crates()}
+                }},
+                options: {{
+                  responsive: true,
+                  plugins: {{ title: {{ display: true, text: 'Build Time by Crate (Top 15)' }} }},
+                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'Seconds' }} }} }}
+                }}
+              }});
+
+              // Total Binary Size
+              new Chart(document.getElementById('totalBinarySize'), {{
+                type: 'line',
+                data: {{
+                  labels: dates,
+                  datasets: [{{
+                    label: 'Total Binary Size (MiB)',
+                    data: {js_array([s / 1048576 for s in total_binary_sizes])},
+                    borderColor: '#e15759',
+                    backgroundColor: '#e1575933',
+                    fill: true,
+                    tension: 0.3,
+                  }}]
+                }},
+                options: {{
+                  responsive: true,
+                  plugins: {{ title: {{ display: true, text: 'Total Binary Size' }} }},
+                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'MiB' }} }} }}
+                }}
+              }});
+
+              // Per-Binary Size
+              new Chart(document.getElementById('perBinarySize'), {{
+                type: 'line',
+                data: {{
+                  labels: dates,
+                  datasets: {js_datasets_binaries()}
+                }},
+                options: {{
+                  responsive: true,
+                  plugins: {{ title: {{ display: true, text: 'Binary Size per Binary' }} }},
+                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'MiB' }} }} }}
+                }}
+              }});
+            </script>
+          </body>
+          </html>
+          """
+
+          Path("build-stats-report.html").write_text(html)
+          print(f"Report generated: build-stats-report.html ({len(data)} runs)")
+          PYEOF
+
+      - name: Upload report
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-stats-report
+          path: build-stats-report.html
+          retention-days: 90

From 9cda4e6f380467e020c479b47b368eeeab7f3f95 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 17:00:01 -0700
Subject: [PATCH 02/30] Add workflow dispatch

---
 .github/workflows/build-release.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index c9cd2126d..fe66974df 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -10,6 +10,7 @@
 on:
   push:
     branches: ["main"]
+  workflow_dispatch:
 
 name: Release Build Stats
 

From 9d4301d80ec452945b2f91fde7a6c04a72caf0a2 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 17:08:37 -0700
Subject: [PATCH 03/30] Enable on push

---
 .github/workflows/build-release.yml       | 4 +++-
 .github/workflows/produce-build-stats.yml | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index fe66974df..d7d07ca42 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -9,7 +9,9 @@
 
 on:
   push:
-    branches: ["main"]
+    branches:
+      - "main"
+      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
   workflow_dispatch:
 
 name: Release Build Stats
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index 63c5a3ea8..8425fbd0b 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -7,6 +7,9 @@
 # Intended for weekly scrum review.
 
 on:
+  push:
+    branches:
+      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
   schedule:
     - cron: "0 8 * * *"
   workflow_dispatch:

From f2baa290bdc2ec3038fa16551a621e80265a1b41 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 18:02:40 -0700
Subject: [PATCH 04/30] Extract build stats HTML

---
 .github/scripts/build-stats-report.html   | 176 +++++++++++++++
 .github/workflows/produce-build-stats.yml | 264 ++++------------------
 2 files changed, 221 insertions(+), 219 deletions(-)
 create mode 100644 .github/scripts/build-stats-report.html

diff --git a/.github/scripts/build-stats-report.html b/.github/scripts/build-stats-report.html
new file mode 100644
index 000000000..61bf2c9c6
--- /dev/null
+++ b/.github/scripts/build-stats-report.html
@@ -0,0 +1,176 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>DiskANN Release Build Stats</title>
+  <script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
+  <script src="build-stats-data.js"></script>
+  <style>
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+      max-width: 1400px; margin: 0 auto; padding: 20px; background: #fafafa;
+    }
+    h1 { color: #1b1e24; border-bottom: 2px solid #4e79a7; padding-bottom: 8px; }
+    h2 { color: #333; margin-top: 40px; }
+    .chart-container {
+      background: white; border-radius: 8px; padding: 20px;
+      box-shadow: 0 1px 3px rgba(0,0,0,0.12); margin-bottom: 30px;
+    }
+    canvas { max-height: 400px; }
+    pre {
+      background: #1b1e24; color: #d5dde5; padding: 16px; border-radius: 8px;
+      overflow-x: auto; font-size: 13px; line-height: 1.4;
+    }
+    .meta { color: #666; font-size: 14px; }
+    table { border-collapse: collapse; width: 100%; margin: 10px 0; }
+    th, td { border: 1px solid #ddd; padding: 8px 12px; text-align: left; }
+    th { background: #1b1e24; color: #d5dde5; }
+    tr:nth-child(even) { background: #f2f2f2; }
+  </style>
+</head>
+<body>
+  <h1>DiskANN Release Build Statistics</h1>
+  <p class="meta" id="report-meta"></p>
+
+  <h2>Total Build Time Trend</h2>
+  <div class="chart-container"><canvas id="totalBuildTime"></canvas></div>
+
+  <h2>Build Time by Crate (Top 15)</h2>
+  <div class="chart-container"><canvas id="crateBuildTime"></canvas></div>
+
+  <h2>Total Binary Size Trend</h2>
+  <div class="chart-container"><canvas id="totalBinarySize"></canvas></div>
+
+  <h2>Binary Size per Binary</h2>
+  <div class="chart-container"><canvas id="perBinarySize"></canvas></div>
+
+  <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
+  <pre id="cargo-bloat"></pre>
+
+  <h2>Latest Build Details</h2>
+  <div id="latest-details"></div>
+
+  <script>
+    const COLORS = [
+      "#4e79a7", "#f28e2b", "#e15759", "#76b7b2", "#59a14f",
+      "#edc948", "#b07aa1", "#ff9da7", "#9c755f", "#bab0ac",
+      "#86bcb6", "#8cd17d", "#b6992d", "#499894", "#d37295",
+    ];
+
+    // Populate metadata
+    document.getElementById('report-meta').textContent =
+      `Generated: ${BUILD_DATA.generated} — Last 30 days (${BUILD_DATA.dates.length} runs)`;
+
+    // Total Build Time chart
+    new Chart(document.getElementById('totalBuildTime'), {
+      type: 'line',
+      data: {
+        labels: BUILD_DATA.dates,
+        datasets: [{
+          label: 'Total Wall Time (s)',
+          data: BUILD_DATA.total_build_times,
+          borderColor: '#4e79a7',
+          backgroundColor: '#4e79a733',
+          fill: true,
+          tension: 0.3,
+        }]
+      },
+      options: {
+        responsive: true,
+        plugins: { title: { display: true, text: 'Total Release Build Time' } },
+        scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
+      }
+    });
+
+    // Per-Crate Build Time chart
+    new Chart(document.getElementById('crateBuildTime'), {
+      type: 'line',
+      data: {
+        labels: BUILD_DATA.dates,
+        datasets: BUILD_DATA.crate_datasets.map((ds, i) => ({
+          ...ds,
+          borderColor: COLORS[i % COLORS.length],
+          backgroundColor: COLORS[i % COLORS.length] + '33',
+          tension: 0.3,
+          spanGaps: true,
+        }))
+      },
+      options: {
+        responsive: true,
+        plugins: { title: { display: true, text: 'Build Time by Crate (Top 15)' } },
+        scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
+      }
+    });
+
+    // Total Binary Size chart
+    new Chart(document.getElementById('totalBinarySize'), {
+      type: 'line',
+      data: {
+        labels: BUILD_DATA.dates,
+        datasets: [{
+          label: 'Total Binary Size (MiB)',
+          data: BUILD_DATA.total_binary_sizes_mib,
+          borderColor: '#e15759',
+          backgroundColor: '#e1575933',
+          fill: true,
+          tension: 0.3,
+        }]
+      },
+      options: {
+        responsive: true,
+        plugins: { title: { display: true, text: 'Total Binary Size' } },
+        scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
+      }
+    });
+
+    // Per-Binary Size chart
+    new Chart(document.getElementById('perBinarySize'), {
+      type: 'line',
+      data: {
+        labels: BUILD_DATA.dates,
+        datasets: BUILD_DATA.binary_datasets.map((ds, i) => ({
+          ...ds,
+          borderColor: COLORS[i % COLORS.length],
+          backgroundColor: COLORS[i % COLORS.length] + '33',
+          tension: 0.3,
+          spanGaps: true,
+        }))
+      },
+      options: {
+        responsive: true,
+        plugins: { title: { display: true, text: 'Binary Size per Binary' } },
+        scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
+      }
+    });
+
+    // Cargo bloat
+    document.getElementById('cargo-bloat').textContent =
+      BUILD_DATA.latest_cargo_bloat || 'No cargo bloat data available.';
+
+    // Latest build details table
+    const latest = BUILD_DATA.latest_run;
+    if (latest) {
+      let html = `<p class="meta">Run: ${latest.created_at} — Commit: <code>${latest.head_sha}</code> — Total wall time: ${latest.total_time_display}</p>`;
+
+      if (latest.units && latest.units.length) {
+        html += '<table><thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead><tbody>';
+        latest.units.forEach((u, i) => {
+          html += `<tr><td>${i+1}</td><td>${u.name}</td><td>${u.version}</td><td>${u.duration.toFixed(1)}s</td></tr>`;
+        });
+        html += '</tbody></table>';
+      }
+
+      if (latest.binary_sizes && latest.binary_sizes.length) {
+        html += '<h3>Binary Sizes</h3><table><thead><tr><th>Binary</th><th>Size (bytes)</th><th>Size</th></tr></thead><tbody>';
+        latest.binary_sizes.forEach(b => {
+          const human = b.bytes > 1048576 ? (b.bytes / 1048576).toFixed(1) + ' MiB' : (b.bytes / 1024).toFixed(1) + ' KiB';
+          html += `<tr><td>${b.name}</td><td>${b.bytes.toLocaleString()}</td><td>${human}</td></tr>`;
+        });
+        html += '</tbody></table>';
+      }
+
+      document.getElementById('latest-details').innerHTML = html;
+    }
+  </script>
+</body>
+</html>
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index 8425fbd0b..f0cc254c9 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -111,26 +111,21 @@ jobs:
           COUNT=$(python3 -c "import json; print(len(json.loads(open('collected/all_runs.json').read())))")
           echo "Collected data for $COUNT runs"
 
-      - name: Generate HTML report
+      - name: Generate data file for report
         run: |
+          mkdir -p report
           python3 - <<'PYEOF'
           import json
           from pathlib import Path
           from datetime import datetime
 
           data = json.loads(Path("collected/all_runs.json").read_text())
-
-          # Sort by date ascending
           data.sort(key=lambda r: r.get("created_at", ""))
 
-          # --- Prepare chart data ---
-
           dates = []
           total_build_times = []
-          # crate_times: { crate_name: [time_per_run...] }
           crate_times = {}
           total_binary_sizes = []
-          # per_binary: { binary_name: [size_per_run...] }
           per_binary = {}
 
           for run in data:
@@ -140,7 +135,6 @@ jobs:
               bt = run.get("build_times", {})
               total_build_times.append(bt.get("total_wall_time_s", 0))
 
-              # Per-crate build times
               units = bt.get("units", [])
               seen_crates = set()
               for u in units:
@@ -153,10 +147,8 @@ jobs:
                   if name not in seen_crates:
                       crate_times[name].append(None)
 
-              # Binary sizes
               bs = run.get("binary_sizes", [])
-              run_total_size = sum(b.get("bytes", 0) for b in bs)
-              total_binary_sizes.append(run_total_size)
+              total_binary_sizes.append(sum(b.get("bytes", 0) for b in bs))
 
               seen_bins = set()
               for b in bs:
@@ -169,12 +161,12 @@ jobs:
                   if bname not in seen_bins:
                       per_binary[bname].append(None)
 
-          # Filter to top 15 crates by average duration
-          def avg_non_none(lst):
+          # Top 15 crates by average duration
+          def avg(lst):
               vals = [v for v in lst if v is not None]
               return sum(vals) / len(vals) if vals else 0
 
-          top_crates = sorted(crate_times.keys(), key=lambda c: avg_non_none(crate_times[c]), reverse=True)[:15]
+          top_crates = sorted(crate_times.keys(), key=lambda c: avg(crate_times[c]), reverse=True)[:15]
 
           # Latest cargo bloat
           latest_bloat = ""
@@ -183,216 +175,50 @@ jobs:
                   latest_bloat = run["cargo_bloat"]
                   break
 
-          # --- Color palette ---
-          COLORS = [
-              "#4e79a7", "#f28e2b", "#e15759", "#76b7b2", "#59a14f",
-              "#edc948", "#b07aa1", "#ff9da7", "#9c755f", "#bab0ac",
-              "#86bcb6", "#8cd17d", "#b6992d", "#499894", "#d37295",
-          ]
-
-          def js_array(lst):
-              return json.dumps(lst)
-
-          def js_datasets_crates():
-              datasets = []
-              for i, name in enumerate(top_crates):
-                  color = COLORS[i % len(COLORS)]
-                  datasets.append({
-                      "label": name,
-                      "data": crate_times[name],
-                      "borderColor": color,
-                      "backgroundColor": color + "33",
-                      "tension": 0.3,
-                      "spanGaps": True,
-                  })
-              return json.dumps(datasets)
-
-          def js_datasets_binaries():
-              datasets = []
-              for i, name in enumerate(sorted(per_binary.keys())):
-                  color = COLORS[i % len(COLORS)]
-                  datasets.append({
-                      "label": name,
-                      "data": [b / 1048576 if b is not None else None for b in per_binary[name]],
-                      "borderColor": color,
-                      "backgroundColor": color + "33",
-                      "tension": 0.3,
-                      "spanGaps": True,
-                  })
-              return json.dumps(datasets)
-
-          # --- Generate HTML ---
-          now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
-
-          html = f"""<!DOCTYPE html>
-          <html lang="en">
-          <head>
-            <meta charset="utf-8">
-            <title>DiskANN Release Build Stats</title>
-            <script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
-            <style>
-              body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-                     max-width: 1400px; margin: 0 auto; padding: 20px; background: #fafafa; }}
-              h1 {{ color: #1b1e24; border-bottom: 2px solid #4e79a7; padding-bottom: 8px; }}
-              h2 {{ color: #333; margin-top: 40px; }}
-              .chart-container {{ background: white; border-radius: 8px; padding: 20px;
-                                  box-shadow: 0 1px 3px rgba(0,0,0,0.12); margin-bottom: 30px; }}
-              canvas {{ max-height: 400px; }}
-              pre {{ background: #1b1e24; color: #d5dde5; padding: 16px; border-radius: 8px;
-                     overflow-x: auto; font-size: 13px; line-height: 1.4; }}
-              .meta {{ color: #666; font-size: 14px; }}
-              table {{ border-collapse: collapse; width: 100%; margin: 10px 0; }}
-              th, td {{ border: 1px solid #ddd; padding: 8px 12px; text-align: left; }}
-              th {{ background: #1b1e24; color: #d5dde5; }}
-              tr:nth-child(even) {{ background: #f2f2f2; }}
-            </style>
-          </head>
-          <body>
-            <h1>DiskANN Release Build Statistics</h1>
-            <p class="meta">Generated: {now} &mdash; Last 30 days ({len(data)} runs)</p>
-
-            <h2>Total Build Time Trend</h2>
-            <div class="chart-container">
-              <canvas id="totalBuildTime"></canvas>
-            </div>
-
-            <h2>Build Time by Crate (Top 15)</h2>
-            <div class="chart-container">
-              <canvas id="crateBuildTime"></canvas>
-            </div>
-
-            <h2>Total Binary Size Trend</h2>
-            <div class="chart-container">
-              <canvas id="totalBinarySize"></canvas>
-            </div>
-
-            <h2>Binary Size per Binary</h2>
-            <div class="chart-container">
-              <canvas id="perBinarySize"></canvas>
-            </div>
-
-            <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
-            <pre>{latest_bloat if latest_bloat else "No cargo bloat data available."}</pre>
-
-            <h2>Latest Build Details</h2>
-          """
-
-          # Add latest run details table
+          # Latest run details
+          latest_run = None
           if data:
-              latest = data[-1]
-              bt = latest.get("build_times", {})
-              units = bt.get("units", [])
-              html += f"""
-              <p class="meta">Run: {latest.get('created_at', '?')} &mdash;
-              Commit: <code>{latest.get('head_sha', '?')[:12]}</code> &mdash;
-              Total wall time: {bt.get('total_time_display', '?')}</p>
-              <table>
-                <thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead>
-                <tbody>
-              """
-              for i, u in enumerate(units, 1):
-                  html += f"<tr><td>{i}</td><td>{u.get('name','?')}</td><td>{u.get('version','?')}</td><td>{u.get('duration',0):.1f}s</td></tr>\n"
-              html += "</tbody></table>\n"
-
-              bs = latest.get("binary_sizes", [])
-              if bs:
-                  html += """
-                  <h3>Binary Sizes</h3>
-                  <table>
-                    <thead><tr><th>Binary</th><th>Size (bytes)</th><th>Size</th></tr></thead>
-                    <tbody>
-                  """
-                  for b in bs:
-                      size = b.get("bytes", 0)
-                      human = f"{size / 1048576:.1f} MiB" if size > 1048576 else f"{size / 1024:.1f} KiB"
-                      html += f"<tr><td>{b.get('name','?')}</td><td>{size:,}</td><td>{human}</td></tr>\n"
-                  html += "</tbody></table>\n"
-
-          html += f"""
-            <script>
-              const dates = {js_array(dates)};
-
-              // Total Build Time
-              new Chart(document.getElementById('totalBuildTime'), {{
-                type: 'line',
-                data: {{
-                  labels: dates,
-                  datasets: [{{
-                    label: 'Total Wall Time (s)',
-                    data: {js_array(total_build_times)},
-                    borderColor: '#4e79a7',
-                    backgroundColor: '#4e79a733',
-                    fill: true,
-                    tension: 0.3,
-                  }}]
-                }},
-                options: {{
-                  responsive: true,
-                  plugins: {{ title: {{ display: true, text: 'Total Release Build Time' }} }},
-                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'Seconds' }} }} }}
-                }}
-              }});
-
-              // Per-Crate Build Time
-              new Chart(document.getElementById('crateBuildTime'), {{
-                type: 'line',
-                data: {{
-                  labels: dates,
-                  datasets: {js_datasets_crates()}
-                }},
-                options: {{
-                  responsive: true,
-                  plugins: {{ title: {{ display: true, text: 'Build Time by Crate (Top 15)' }} }},
-                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'Seconds' }} }} }}
-                }}
-              }});
-
-              // Total Binary Size
-              new Chart(document.getElementById('totalBinarySize'), {{
-                type: 'line',
-                data: {{
-                  labels: dates,
-                  datasets: [{{
-                    label: 'Total Binary Size (MiB)',
-                    data: {js_array([s / 1048576 for s in total_binary_sizes])},
-                    borderColor: '#e15759',
-                    backgroundColor: '#e1575933',
-                    fill: true,
-                    tension: 0.3,
-                  }}]
-                }},
-                options: {{
-                  responsive: true,
-                  plugins: {{ title: {{ display: true, text: 'Total Binary Size' }} }},
-                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'MiB' }} }} }}
-                }}
-              }});
-
-              // Per-Binary Size
-              new Chart(document.getElementById('perBinarySize'), {{
-                type: 'line',
-                data: {{
-                  labels: dates,
-                  datasets: {js_datasets_binaries()}
-                }},
-                options: {{
-                  responsive: true,
-                  plugins: {{ title: {{ display: true, text: 'Binary Size per Binary' }} }},
-                  scales: {{ y: {{ beginAtZero: true, title: {{ display: true, text: 'MiB' }} }} }}
-                }}
-              }});
-            </script>
-          </body>
-          </html>
-          """
-
-          Path("build-stats-report.html").write_text(html)
-          print(f"Report generated: build-stats-report.html ({len(data)} runs)")
+              last = data[-1]
+              bt = last.get("build_times", {})
+              latest_run = {
+                  "created_at": last.get("created_at", "?"),
+                  "head_sha": last.get("head_sha", "?")[:12],
+                  "total_time_display": bt.get("total_time_display", "?"),
+                  "units": bt.get("units", []),
+                  "binary_sizes": last.get("binary_sizes", []),
+              }
+
+          # Assemble the data object
+          build_data = {
+              "generated": datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"),
+              "dates": dates,
+              "total_build_times": total_build_times,
+              "total_binary_sizes_mib": [s / 1048576 for s in total_binary_sizes],
+              "crate_datasets": [
+                  {"label": name, "data": crate_times[name]}
+                  for name in top_crates
+              ],
+              "binary_datasets": [
+                  {"label": name, "data": [b / 1048576 if b is not None else None for b in per_binary[name]]}
+                  for name in sorted(per_binary.keys())
+              ],
+              "latest_cargo_bloat": latest_bloat,
+              "latest_run": latest_run,
+          }
+
+          # Write as a JS file that assigns to a global constant
+          js_content = f"const BUILD_DATA = {json.dumps(build_data, indent=2)};\n"
+          Path("report/build-stats-data.js").write_text(js_content)
+          print(f"Generated build-stats-data.js ({len(data)} runs)")
           PYEOF
 
+      - name: Assemble report
+        run: |
+          cp .github/scripts/build-stats-report.html report/build-stats-report.html
+
       - name: Upload report
         uses: actions/upload-artifact@v4
         with:
           name: build-stats-report
-          path: build-stats-report.html
+          path: report/
           retention-days: 90

From b25c600c8078b4ff845c253ca1804dfe804811cb Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 18:07:59 -0700
Subject: [PATCH 05/30] Extract scripts

---
 .github/scripts/generate-stats-data.py    | 100 +++++++++++++
 .github/scripts/parse-build-stats.py      |  67 +++++++++
 .github/workflows/build-release.yml       | 115 +--------------
 .github/workflows/produce-build-stats.yml | 168 +++-------------------
 4 files changed, 190 insertions(+), 260 deletions(-)
 create mode 100644 .github/scripts/generate-stats-data.py
 create mode 100644 .github/scripts/parse-build-stats.py

diff --git a/.github/scripts/generate-stats-data.py b/.github/scripts/generate-stats-data.py
new file mode 100644
index 000000000..c1b4338a9
--- /dev/null
+++ b/.github/scripts/generate-stats-data.py
@@ -0,0 +1,100 @@
+"""Aggregate build-stats artifacts into a JS data file for the HTML report."""
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def main():
+    collected_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("collected")
+    output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("report")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    data = json.loads((collected_dir / "all_runs.json").read_text())
+    data.sort(key=lambda r: r.get("created_at", ""))
+
+    dates = []
+    total_build_times = []
+    crate_times: dict[str, list] = {}
+    total_binary_sizes = []
+    per_binary: dict[str, list] = {}
+
+    for run in data:
+        dt_str = run.get("created_at", "")
+        dates.append(dt_str[:10] if dt_str else "?")
+
+        bt = run.get("build_times", {})
+        total_build_times.append(bt.get("total_wall_time_s", 0))
+
+        # Per-crate build times
+        units = bt.get("units", [])
+        seen = set()
+        for u in units:
+            name = u.get("name", "")
+            if name not in crate_times:
+                crate_times[name] = [None] * (len(dates) - 1)
+            crate_times[name].append(u.get("duration", 0))
+            seen.add(name)
+        for name in crate_times:
+            if name not in seen:
+                crate_times[name].append(None)
+
+        # Binary sizes
+        bs = run.get("binary_sizes", [])
+        total_binary_sizes.append(sum(b.get("bytes", 0) for b in bs))
+
+        seen_bins = set()
+        for b in bs:
+            bname = b.get("name", "")
+            if bname not in per_binary:
+                per_binary[bname] = [None] * (len(dates) - 1)
+            per_binary[bname].append(b.get("bytes", 0))
+            seen_bins.add(bname)
+        for bname in per_binary:
+            if bname not in seen_bins:
+                per_binary[bname].append(None)
+
+    # Top 15 crates by average duration
+    def avg(lst):
+        vals = [v for v in lst if v is not None]
+        return sum(vals) / len(vals) if vals else 0
+
+    top_crates = sorted(crate_times.keys(), key=lambda c: avg(crate_times[c]), reverse=True)[:15]
+
+    # Latest cargo bloat
+    latest_bloat = next((r["cargo_bloat"] for r in reversed(data) if r.get("cargo_bloat")), "")
+
+    # Latest run details
+    latest_run = None
+    if data:
+        last = data[-1]
+        bt = last.get("build_times", {})
+        latest_run = {
+            "created_at": last.get("created_at", "?"),
+            "head_sha": last.get("head_sha", "?")[:12],
+            "total_time_display": bt.get("total_time_display", "?"),
+            "units": bt.get("units", []),
+            "binary_sizes": last.get("binary_sizes", []),
+        }
+
+    build_data = {
+        "generated": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
+        "dates": dates,
+        "total_build_times": total_build_times,
+        "total_binary_sizes_mib": [s / 1048576 for s in total_binary_sizes],
+        "crate_datasets": [{"label": name, "data": crate_times[name]} for name in top_crates],
+        "binary_datasets": [
+            {"label": name, "data": [b / 1048576 if b is not None else None for b in per_binary[name]]}
+            for name in sorted(per_binary.keys())
+        ],
+        "latest_cargo_bloat": latest_bloat,
+        "latest_run": latest_run,
+    }
+
+    js_path = output_dir / "build-stats-data.js"
+    js_path.write_text(f"const BUILD_DATA = {json.dumps(build_data, indent=2)};\n")
+    print(f"Generated {js_path} ({len(data)} runs)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/parse-build-stats.py b/.github/scripts/parse-build-stats.py
new file mode 100644
index 000000000..53aa08976
--- /dev/null
+++ b/.github/scripts/parse-build-stats.py
@@ -0,0 +1,67 @@
+"""Parse cargo-timing.html and target/release binaries into JSON artifacts."""
+import json
+import os
+import re
+import sys
+from pathlib import Path
+
+html_path = Path("target/cargo-timings/cargo-timing.html")
+if not html_path.exists():
+    print("::warning::cargo-timing.html not found")
+    sys.exit(0)
+
+html = html_path.read_text()
+
+# --- Build times ---
+m = re.search(r"DURATION\s*=\s*(\d+(?:\.\d+)?)", html)
+total_s = float(m.group(1)) if m else 0
+
+m2 = re.search(r"Total time:</td><td>([^<]+)</td>", html)
+total_display = m2.group(1).strip() if m2 else f"{total_s:.1f}s"
+
+m = re.search(r"const UNIT_DATA\s*=\s*(\[.*?\]);", html, re.DOTALL)
+if not m:
+    print("::warning::Could not parse UNIT_DATA from timing report")
+    sys.exit(0)
+
+units = json.loads(m.group(1))
+units_sorted = sorted(units, key=lambda u: u.get("duration", 0), reverse=True)
+
+# Print markdown table
+print(f"\n### Release Build Times (Total wall time: {total_display})\n")
+print("| # | Crate | Version | Duration |")
+print("|---|-------|---------|----------|")
+for i, u in enumerate(units_sorted, 1):
+    print(f"| {i} | {u.get('name', '?')} | {u.get('version', '?')} | {u.get('duration', 0):.1f}s |")
+
+Path("build-times.json").write_text(json.dumps({
+    "total_wall_time_s": total_s,
+    "total_time_display": total_display,
+    "units": [{"name": u["name"], "version": u.get("version", ""), "duration": u.get("duration", 0)} for u in units_sorted],
+}, indent=2))
+
+# --- Binary sizes ---
+print("\n### Release Binary Sizes\n")
+print("| Binary | Size (bytes) | Size |")
+print("|--------|-------------|------|")
+
+binaries = []
+release_dir = Path("target/release")
+for p in sorted(release_dir.iterdir()):
+    if not p.is_file():
+        continue
+    if p.suffix in (".d", ".rlib", ".rmeta", ".o", ".dwp"):
+        continue
+    if not os.access(p, os.X_OK):
+        continue
+    size = p.stat().st_size
+    if size < 1024:
+        continue
+    if size > 1048576:
+        human = f"{size / 1048576:.1f} MiB"
+    else:
+        human = f"{size / 1024:.1f} KiB"
+    print(f"| {p.name} | {size} | {human} |")
+    binaries.append({"name": p.name, "bytes": size})
+
+Path("binary-sizes.json").write_text(json.dumps(binaries, indent=2))
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index d7d07ca42..2162ed2f0 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -42,7 +42,6 @@ jobs:
 
       - uses: Swatinem/rust-cache@v2
         with:
-          # Only cache registry/git — we cargo clean for cold build timings.
           cache-targets: false
 
       - name: Install cargo-bloat
@@ -51,124 +50,20 @@ jobs:
           tool: cargo-bloat
 
       - name: Build release with timings
-        run: |
-          set -euxo pipefail
-          cargo clean
-          cargo build --workspace --release --locked --timings
+        run: cargo clean && cargo build --workspace --release --locked --timings
 
-      - name: Upload cargo-timing report
-        uses: actions/upload-artifact@v4
-        with:
-          name: cargo-timing
-          path: |
-            target/cargo-timings/cargo-timing.html
-          retention-days: 90
-
-      - name: Parse and display build times
-        run: |
-          python3 - <<'PYEOF'
-          import re, json, sys
-          from pathlib import Path
-
-          html_path = Path("target/cargo-timings/cargo-timing.html")
-          if not html_path.exists():
-              print("::warning::cargo-timing.html not found")
-              sys.exit(0)
-
-          html = html_path.read_text()
-
-          # Extract wall-clock duration (seconds)
-          m = re.search(r'DURATION\s*=\s*(\d+(?:\.\d+)?)', html)
-          total_s = float(m.group(1)) if m else 0
-
-          # Extract human-readable total time from summary table
-          m2 = re.search(r'Total time:</td><td>([^<]+)</td>', html)
-          total_display = m2.group(1).strip() if m2 else f"{total_s:.1f}s"
-
-          # Extract per-unit data from the embedded JSON
-          m = re.search(r'const UNIT_DATA\s*=\s*(\[.*?\]);', html, re.DOTALL)
-          if not m:
-              print("::warning::Could not parse UNIT_DATA from timing report")
-              sys.exit(0)
-
-          units = json.loads(m.group(1))
-
-          # Sort by duration descending
-          units_sorted = sorted(units, key=lambda u: u.get("duration", 0), reverse=True)
-
-          # Print markdown table to console
-          print(f"\n### Release Build Times (Total wall time: {total_display})\n")
-          print("| # | Crate | Version | Duration |")
-          print("|---|-------|---------|----------|")
-          for i, u in enumerate(units_sorted, 1):
-              name = u.get("name", "?")
-              version = u.get("version", "?")
-              duration = u.get("duration", 0)
-              print(f"| {i} | {name} | {version} | {duration:.1f}s |")
-
-          # Write structured JSON artifact
-          stats = {
-              "total_wall_time_s": total_s,
-              "total_time_display": total_display,
-              "units": [
-                  {
-                      "name": u["name"],
-                      "version": u.get("version", ""),
-                      "duration": u.get("duration", 0),
-                  }
-                  for u in units_sorted
-              ],
-          }
-          Path("build-times.json").write_text(json.dumps(stats, indent=2))
-          PYEOF
-
-      - name: Log binary sizes
-        run: |
-          set +x
-          echo ""
-          echo "### Release Binary Sizes"
-          echo ""
-          echo "| Binary | Size (bytes) | Size |"
-          echo "|--------|-------------|------|"
-
-          python3 - <<'PYEOF'
-          import json, os
-          from pathlib import Path
-
-          binaries = []
-          release_dir = Path("target/release")
-          for p in sorted(release_dir.iterdir()):
-              if not p.is_file():
-                  continue
-              # On Linux, check executable bit and skip non-ELF extensions
-              if p.suffix in (".d", ".rlib", ".rmeta", ".o", ".dwp"):
-                  continue
-              if not os.access(p, os.X_OK):
-                  continue
-              size = p.stat().st_size
-              if size < 1024:
-                  continue  # skip tiny files (build scripts, etc.)
-              if size > 1048576:
-                  human = f"{size / 1048576:.1f} MiB"
-              elif size > 1024:
-                  human = f"{size / 1024:.1f} KiB"
-              else:
-                  human = f"{size} B"
-              print(f"| {p.name} | {size} | {human} |")
-              binaries.append({"name": p.name, "bytes": size})
-
-          Path("binary-sizes.json").write_text(json.dumps(binaries, indent=2))
-          PYEOF
+      - name: Parse and display build stats
+        run: python3 .github/scripts/parse-build-stats.py
 
       - name: Run cargo bloat
-        run: |
-          cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt
+        run: cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt
 
       - name: Upload build stats
         uses: actions/upload-artifact@v4
         with:
           name: build-stats
           path: |
+            target/cargo-timings/cargo-timing.html
             build-times.json
             binary-sizes.json
             cargo-bloat.txt
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index f0cc254c9..a5481383d 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -38,12 +38,8 @@ jobs:
         run: |
           set -euo pipefail
           mkdir -p collected
-
-          # List successful runs of build-release.yml from the last 30 days
           SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
 
-          echo "Fetching runs since $SINCE ..."
-
           gh api --paginate \
             "repos/${{ github.repository }}/actions/workflows/build-release.yml/runs?status=success&created=>=$SINCE&per_page=100" \
             --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
@@ -56,165 +52,36 @@ jobs:
           fi
 
           echo "Found $(wc -l < runs.tsv) runs"
-
-          # For each run, download the build-stats artifact and build metadata
           echo '[]' > collected/all_runs.json
 
           while IFS=$'\t' read -r run_id created_at head_sha; do
-            echo "Processing run $run_id ($created_at) ..."
-            ARTIFACT_DIR="collected/$run_id"
-            mkdir -p "$ARTIFACT_DIR"
-
-            # Download build-stats artifact
-            if ! gh run download "$run_id" \
-                --repo "${{ github.repository }}" \
-                --name build-stats \
-                --dir "$ARTIFACT_DIR/build-stats" 2>/dev/null; then
-              echo "  ::warning::Could not download build-stats for run $run_id (artifact may have expired)"
-              rm -rf "$ARTIFACT_DIR"
+            dir="collected/$run_id/build-stats"
+            if ! gh run download "$run_id" --repo "${{ github.repository }}" \
+                --name build-stats --dir "$dir" 2>/dev/null; then
+              echo "::warning::Skipping run $run_id (artifact expired)"
               continue
             fi
-            echo "  Downloaded build-stats for run $run_id"
-
-            # Build a metadata record and append to collected data
             python3 -c "
           import json, sys
           from pathlib import Path
-
-          base = Path(sys.argv[1]) / 'build-stats'
-          record = {
-              'run_id': int(sys.argv[2]),
-              'created_at': sys.argv[3],
-              'head_sha': sys.argv[4],
-          }
-
-          bt = base / 'build-times.json'
-          if bt.exists():
-              record['build_times'] = json.loads(bt.read_text())
-
-          bs = base / 'binary-sizes.json'
-          if bs.exists():
-              record['binary_sizes'] = json.loads(bs.read_text())
-
-          cb = base / 'cargo-bloat.txt'
-          if cb.exists():
-              record['cargo_bloat'] = cb.read_text()
-
-          all_file = Path('collected/all_runs.json')
-          data = json.loads(all_file.read_text())
+          base = Path(sys.argv[1])
+          record = {'run_id': int(sys.argv[2]), 'created_at': sys.argv[3], 'head_sha': sys.argv[4]}
+          for name, key in [('build-times.json','build_times'),('binary-sizes.json','binary_sizes'),('cargo-bloat.txt','cargo_bloat')]:
+              p = base / name
+              if p.exists():
+                  record[key] = json.loads(p.read_text()) if name.endswith('.json') else p.read_text()
+          all_f = Path('collected/all_runs.json')
+          data = json.loads(all_f.read_text())
           data.append(record)
-          all_file.write_text(json.dumps(data))
-          " "$ARTIFACT_DIR" "$run_id" "$created_at" "$head_sha"
-
+          all_f.write_text(json.dumps(data))
+          " "$dir" "$run_id" "$created_at" "$head_sha"
           done < runs.tsv
 
-          COUNT=$(python3 -c "import json; print(len(json.loads(open('collected/all_runs.json').read())))")
-          echo "Collected data for $COUNT runs"
-
-      - name: Generate data file for report
+      - name: Generate report
         run: |
           mkdir -p report
-          python3 - <<'PYEOF'
-          import json
-          from pathlib import Path
-          from datetime import datetime
-
-          data = json.loads(Path("collected/all_runs.json").read_text())
-          data.sort(key=lambda r: r.get("created_at", ""))
-
-          dates = []
-          total_build_times = []
-          crate_times = {}
-          total_binary_sizes = []
-          per_binary = {}
-
-          for run in data:
-              dt_str = run.get("created_at", "")
-              dates.append(dt_str[:10] if dt_str else "?")
-
-              bt = run.get("build_times", {})
-              total_build_times.append(bt.get("total_wall_time_s", 0))
-
-              units = bt.get("units", [])
-              seen_crates = set()
-              for u in units:
-                  name = u.get("name", "")
-                  if name not in crate_times:
-                      crate_times[name] = [None] * (len(dates) - 1)
-                  crate_times[name].append(u.get("duration", 0))
-                  seen_crates.add(name)
-              for name in crate_times:
-                  if name not in seen_crates:
-                      crate_times[name].append(None)
-
-              bs = run.get("binary_sizes", [])
-              total_binary_sizes.append(sum(b.get("bytes", 0) for b in bs))
-
-              seen_bins = set()
-              for b in bs:
-                  bname = b.get("name", "")
-                  if bname not in per_binary:
-                      per_binary[bname] = [None] * (len(dates) - 1)
-                  per_binary[bname].append(b.get("bytes", 0))
-                  seen_bins.add(bname)
-              for bname in per_binary:
-                  if bname not in seen_bins:
-                      per_binary[bname].append(None)
-
-          # Top 15 crates by average duration
-          def avg(lst):
-              vals = [v for v in lst if v is not None]
-              return sum(vals) / len(vals) if vals else 0
-
-          top_crates = sorted(crate_times.keys(), key=lambda c: avg(crate_times[c]), reverse=True)[:15]
-
-          # Latest cargo bloat
-          latest_bloat = ""
-          for run in reversed(data):
-              if run.get("cargo_bloat"):
-                  latest_bloat = run["cargo_bloat"]
-                  break
-
-          # Latest run details
-          latest_run = None
-          if data:
-              last = data[-1]
-              bt = last.get("build_times", {})
-              latest_run = {
-                  "created_at": last.get("created_at", "?"),
-                  "head_sha": last.get("head_sha", "?")[:12],
-                  "total_time_display": bt.get("total_time_display", "?"),
-                  "units": bt.get("units", []),
-                  "binary_sizes": last.get("binary_sizes", []),
-              }
-
-          # Assemble the data object
-          build_data = {
-              "generated": datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"),
-              "dates": dates,
-              "total_build_times": total_build_times,
-              "total_binary_sizes_mib": [s / 1048576 for s in total_binary_sizes],
-              "crate_datasets": [
-                  {"label": name, "data": crate_times[name]}
-                  for name in top_crates
-              ],
-              "binary_datasets": [
-                  {"label": name, "data": [b / 1048576 if b is not None else None for b in per_binary[name]]}
-                  for name in sorted(per_binary.keys())
-              ],
-              "latest_cargo_bloat": latest_bloat,
-              "latest_run": latest_run,
-          }
-
-          # Write as a JS file that assigns to a global constant
-          js_content = f"const BUILD_DATA = {json.dumps(build_data, indent=2)};\n"
-          Path("report/build-stats-data.js").write_text(js_content)
-          print(f"Generated build-stats-data.js ({len(data)} runs)")
-          PYEOF
-
-      - name: Assemble report
-        run: |
-          cp .github/scripts/build-stats-report.html report/build-stats-report.html
+          python3 .github/scripts/generate-stats-data.py collected report
+          cp .github/scripts/build-stats-report.html report/
 
       - name: Upload report
         uses: actions/upload-artifact@v4
@@ -222,3 +89,4 @@ jobs:
           name: build-stats-report
           path: report/
           retention-days: 90
+

From 50e9d08e8a9d322b96eded2178d1d8c098341916 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 18:17:31 -0700
Subject: [PATCH 06/30] Improve scripts

---
 .github/scripts/generate-stats-data.py    | 49 +++++++++++++++++++----
 .github/workflows/produce-build-stats.yml | 26 ++----------
 2 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/.github/scripts/generate-stats-data.py b/.github/scripts/generate-stats-data.py
index c1b4338a9..8739b363f 100644
--- a/.github/scripts/generate-stats-data.py
+++ b/.github/scripts/generate-stats-data.py
@@ -1,4 +1,11 @@
-"""Aggregate build-stats artifacts into a JS data file for the HTML report."""
+"""Aggregate build-stats artifacts into a JS data file for the HTML report.
+
+Reads from:
+  collected/runs.tsv        — tab-separated: run_id, created_at, head_sha
+  collected/<run_id>/       — contains build-times.json, binary-sizes.json, cargo-bloat.txt
+
+Usage: python generate-stats-data.py <collected_dir> <output_dir>
+"""
 import json
 import sys
 from datetime import datetime, timezone
@@ -10,8 +17,34 @@ def main():
     output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("report")
     output_dir.mkdir(parents=True, exist_ok=True)
 
-    data = json.loads((collected_dir / "all_runs.json").read_text())
-    data.sort(key=lambda r: r.get("created_at", ""))
+    runs_tsv = collected_dir / "runs.tsv"
+
+    # Parse runs.tsv and load per-run artifacts
+    runs = []
+    for line in runs_tsv.read_text().strip().splitlines():
+        parts = line.split("\t")
+        if len(parts) < 3:
+            continue
+        run_id, created_at, head_sha = parts[0], parts[1], parts[2]
+        run_dir = collected_dir / run_id
+
+        bt_path = run_dir / "build-times.json"
+        bs_path = run_dir / "binary-sizes.json"
+        cb_path = run_dir / "cargo-bloat.txt"
+
+        if not bt_path.exists():
+            continue  # skip runs without data
+
+        runs.append({
+            "run_id": run_id,
+            "created_at": created_at,
+            "head_sha": head_sha,
+            "build_times": json.loads(bt_path.read_text()) if bt_path.exists() else {},
+            "binary_sizes": json.loads(bs_path.read_text()) if bs_path.exists() else [],
+            "cargo_bloat": cb_path.read_text() if cb_path.exists() else "",
+        })
+
+    runs.sort(key=lambda r: r["created_at"])
 
     dates = []
     total_build_times = []
@@ -19,7 +52,7 @@ def main():
     total_binary_sizes = []
     per_binary: dict[str, list] = {}
 
-    for run in data:
+    for run in runs:
         dt_str = run.get("created_at", "")
         dates.append(dt_str[:10] if dt_str else "?")
 
@@ -62,12 +95,12 @@ def avg(lst):
     top_crates = sorted(crate_times.keys(), key=lambda c: avg(crate_times[c]), reverse=True)[:15]
 
     # Latest cargo bloat
-    latest_bloat = next((r["cargo_bloat"] for r in reversed(data) if r.get("cargo_bloat")), "")
+    latest_bloat = next((r["cargo_bloat"] for r in reversed(runs) if r.get("cargo_bloat")), "")
 
     # Latest run details
     latest_run = None
-    if data:
-        last = data[-1]
+    if runs:
+        last = runs[-1]
         bt = last.get("build_times", {})
         latest_run = {
             "created_at": last.get("created_at", "?"),
@@ -93,7 +126,7 @@ def avg(lst):
 
     js_path = output_dir / "build-stats-data.js"
     js_path.write_text(f"const BUILD_DATA = {json.dumps(build_data, indent=2)};\n")
-    print(f"Generated {js_path} ({len(data)} runs)")
+    print(f"Generated {js_path} ({len(runs)} runs)")
 
 
 if __name__ == "__main__":
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index a5481383d..f8e9e24b3 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -47,34 +47,16 @@ jobs:
 
           if [ ! -s runs.tsv ]; then
             echo "::warning::No successful build-release runs found in the last 30 days"
-            echo '[]' > collected/all_runs.json
             exit 0
           fi
 
           echo "Found $(wc -l < runs.tsv) runs"
-          echo '[]' > collected/all_runs.json
+          cp runs.tsv collected/runs.tsv
 
           while IFS=$'\t' read -r run_id created_at head_sha; do
-            dir="collected/$run_id/build-stats"
-            if ! gh run download "$run_id" --repo "${{ github.repository }}" \
-                --name build-stats --dir "$dir" 2>/dev/null; then
-              echo "::warning::Skipping run $run_id (artifact expired)"
-              continue
-            fi
-            python3 -c "
-          import json, sys
-          from pathlib import Path
-          base = Path(sys.argv[1])
-          record = {'run_id': int(sys.argv[2]), 'created_at': sys.argv[3], 'head_sha': sys.argv[4]}
-          for name, key in [('build-times.json','build_times'),('binary-sizes.json','binary_sizes'),('cargo-bloat.txt','cargo_bloat')]:
-              p = base / name
-              if p.exists():
-                  record[key] = json.loads(p.read_text()) if name.endswith('.json') else p.read_text()
-          all_f = Path('collected/all_runs.json')
-          data = json.loads(all_f.read_text())
-          data.append(record)
-          all_f.write_text(json.dumps(data))
-          " "$dir" "$run_id" "$created_at" "$head_sha"
+            gh run download "$run_id" --repo "${{ github.repository }}" \
+              --name build-stats --dir "collected/$run_id" 2>/dev/null \
+              || echo "::warning::Skipping run $run_id (artifact expired)"
           done < runs.tsv
 
       - name: Generate report

From 4858e4eaf151b84e3fe3db346b3fa03b1b6962b3 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 18:22:01 -0700
Subject: [PATCH 07/30] Switched template to Vue 3

---
 .github/scripts/build-stats-report.html | 275 +++++++++++++-----------
 1 file changed, 146 insertions(+), 129 deletions(-)

diff --git a/.github/scripts/build-stats-report.html b/.github/scripts/build-stats-report.html
index 61bf2c9c6..55a24c119 100644
--- a/.github/scripts/build-stats-report.html
+++ b/.github/scripts/build-stats-report.html
@@ -3,6 +3,7 @@
 <head>
   <meta charset="utf-8">
   <title>DiskANN Release Build Stats</title>
+  <script src="https://cdn.jsdelivr.net/npm/vue@3/dist/vue.global.prod.js"></script>
   <script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
   <script src="build-stats-data.js"></script>
   <style>
@@ -29,148 +30,164 @@
   </style>
 </head>
 <body>
+<div id="app">
   <h1>DiskANN Release Build Statistics</h1>
-  <p class="meta" id="report-meta"></p>
+  <p class="meta">Generated: {{ data.generated }} — Last 30 days ({{ data.dates.length }} runs)</p>
 
   <h2>Total Build Time Trend</h2>
-  <div class="chart-container"><canvas id="totalBuildTime"></canvas></div>
+  <div class="chart-container"><canvas ref="totalBuildTime"></canvas></div>
 
   <h2>Build Time by Crate (Top 15)</h2>
-  <div class="chart-container"><canvas id="crateBuildTime"></canvas></div>
+  <div class="chart-container"><canvas ref="crateBuildTime"></canvas></div>
 
   <h2>Total Binary Size Trend</h2>
-  <div class="chart-container"><canvas id="totalBinarySize"></canvas></div>
+  <div class="chart-container"><canvas ref="totalBinarySize"></canvas></div>
 
   <h2>Binary Size per Binary</h2>
-  <div class="chart-container"><canvas id="perBinarySize"></canvas></div>
+  <div class="chart-container"><canvas ref="perBinarySize"></canvas></div>
 
   <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
-  <pre id="cargo-bloat"></pre>
-
-  <h2>Latest Build Details</h2>
-  <div id="latest-details"></div>
-
-  <script>
-    const COLORS = [
-      "#4e79a7", "#f28e2b", "#e15759", "#76b7b2", "#59a14f",
-      "#edc948", "#b07aa1", "#ff9da7", "#9c755f", "#bab0ac",
-      "#86bcb6", "#8cd17d", "#b6992d", "#499894", "#d37295",
-    ];
-
-    // Populate metadata
-    document.getElementById('report-meta').textContent =
-      `Generated: ${BUILD_DATA.generated} — Last 30 days (${BUILD_DATA.dates.length} runs)`;
-
-    // Total Build Time chart
-    new Chart(document.getElementById('totalBuildTime'), {
-      type: 'line',
-      data: {
-        labels: BUILD_DATA.dates,
-        datasets: [{
-          label: 'Total Wall Time (s)',
-          data: BUILD_DATA.total_build_times,
-          borderColor: '#4e79a7',
-          backgroundColor: '#4e79a733',
-          fill: true,
-          tension: 0.3,
-        }]
-      },
-      options: {
-        responsive: true,
-        plugins: { title: { display: true, text: 'Total Release Build Time' } },
-        scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
-      }
-    });
-
-    // Per-Crate Build Time chart
-    new Chart(document.getElementById('crateBuildTime'), {
-      type: 'line',
-      data: {
-        labels: BUILD_DATA.dates,
-        datasets: BUILD_DATA.crate_datasets.map((ds, i) => ({
-          ...ds,
-          borderColor: COLORS[i % COLORS.length],
-          backgroundColor: COLORS[i % COLORS.length] + '33',
-          tension: 0.3,
-          spanGaps: true,
-        }))
-      },
-      options: {
-        responsive: true,
-        plugins: { title: { display: true, text: 'Build Time by Crate (Top 15)' } },
-        scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
-      }
-    });
-
-    // Total Binary Size chart
-    new Chart(document.getElementById('totalBinarySize'), {
-      type: 'line',
-      data: {
-        labels: BUILD_DATA.dates,
-        datasets: [{
-          label: 'Total Binary Size (MiB)',
-          data: BUILD_DATA.total_binary_sizes_mib,
-          borderColor: '#e15759',
-          backgroundColor: '#e1575933',
-          fill: true,
-          tension: 0.3,
-        }]
-      },
-      options: {
-        responsive: true,
-        plugins: { title: { display: true, text: 'Total Binary Size' } },
-        scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
-      }
-    });
+  <pre>{{ data.latest_cargo_bloat || 'No cargo bloat data available.' }}</pre>
+
+  <template v-if="data.latest_run">
+    <h2>Latest Build Details</h2>
+    <p class="meta">
+      Run: {{ data.latest_run.created_at }} —
+      Commit: <code>{{ data.latest_run.head_sha }}</code> —
+      Total wall time: {{ data.latest_run.total_time_display }}
+    </p>
+
+    <table v-if="data.latest_run.units.length">
+      <thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead>
+      <tbody>
+        <tr v-for="(u, i) in data.latest_run.units" :key="i">
+          <td>{{ i + 1 }}</td>
+          <td>{{ u.name }}</td>
+          <td>{{ u.version }}</td>
+          <td>{{ u.duration.toFixed(1) }}s</td>
+        </tr>
+      </tbody>
+    </table>
+
+    <template v-if="data.latest_run.binary_sizes.length">
+      <h3>Binary Sizes</h3>
+      <table>
+        <thead><tr><th>Binary</th><th>Size (bytes)</th><th>Size</th></tr></thead>
+        <tbody>
+          <tr v-for="b in data.latest_run.binary_sizes" :key="b.name">
+            <td>{{ b.name }}</td>
+            <td>{{ b.bytes.toLocaleString() }}</td>
+            <td>{{ formatSize(b.bytes) }}</td>
+          </tr>
+        </tbody>
+      </table>
+    </template>
+  </template>
+</div>
+
+<script>
+const COLORS = [
+  '#4e79a7', '#f28e2b', '#e15759', '#76b7b2', '#59a14f',
+  '#edc948', '#b07aa1', '#ff9da7', '#9c755f', '#bab0ac',
+  '#86bcb6', '#8cd17d', '#b6992d', '#499894', '#d37295',
+];
+
+const { createApp, ref, onMounted } = Vue;
+
+createApp({
+  setup() {
+    const data = BUILD_DATA;
+    const totalBuildTime = ref(null);
+    const crateBuildTime = ref(null);
+    const totalBinarySize = ref(null);
+    const perBinarySize = ref(null);
+
+    function formatSize(bytes) {
+      return bytes > 1048576
+        ? (bytes / 1048576).toFixed(1) + ' MiB'
+        : (bytes / 1024).toFixed(1) + ' KiB';
+    }
 
-    // Per-Binary Size chart
-    new Chart(document.getElementById('perBinarySize'), {
-      type: 'line',
-      data: {
-        labels: BUILD_DATA.dates,
-        datasets: BUILD_DATA.binary_datasets.map((ds, i) => ({
-          ...ds,
-          borderColor: COLORS[i % COLORS.length],
-          backgroundColor: COLORS[i % COLORS.length] + '33',
-          tension: 0.3,
-          spanGaps: true,
-        }))
-      },
-      options: {
-        responsive: true,
-        plugins: { title: { display: true, text: 'Binary Size per Binary' } },
-        scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
-      }
+    onMounted(() => {
+      new Chart(totalBuildTime.value, {
+        type: 'line',
+        data: {
+          labels: data.dates,
+          datasets: [{
+            label: 'Total Wall Time (s)',
+            data: data.total_build_times,
+            borderColor: '#4e79a7',
+            backgroundColor: '#4e79a733',
+            fill: true, tension: 0.3,
+          }]
+        },
+        options: {
+          responsive: true,
+          plugins: { title: { display: true, text: 'Total Release Build Time' } },
+          scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
+        }
+      });
+
+      new Chart(crateBuildTime.value, {
+        type: 'line',
+        data: {
+          labels: data.dates,
+          datasets: data.crate_datasets.map((ds, i) => ({
+            ...ds,
+            borderColor: COLORS[i % COLORS.length],
+            backgroundColor: COLORS[i % COLORS.length] + '33',
+            tension: 0.3, spanGaps: true,
+          }))
+        },
+        options: {
+          responsive: true,
+          plugins: { title: { display: true, text: 'Build Time by Crate (Top 15)' } },
+          scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
+        }
+      });
+
+      new Chart(totalBinarySize.value, {
+        type: 'line',
+        data: {
+          labels: data.dates,
+          datasets: [{
+            label: 'Total Binary Size (MiB)',
+            data: data.total_binary_sizes_mib,
+            borderColor: '#e15759',
+            backgroundColor: '#e1575933',
+            fill: true, tension: 0.3,
+          }]
+        },
+        options: {
+          responsive: true,
+          plugins: { title: { display: true, text: 'Total Binary Size' } },
+          scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
+        }
+      });
+
+      new Chart(perBinarySize.value, {
+        type: 'line',
+        data: {
+          labels: data.dates,
+          datasets: data.binary_datasets.map((ds, i) => ({
+            ...ds,
+            borderColor: COLORS[i % COLORS.length],
+            backgroundColor: COLORS[i % COLORS.length] + '33',
+            tension: 0.3, spanGaps: true,
+          }))
+        },
+        options: {
+          responsive: true,
+          plugins: { title: { display: true, text: 'Binary Size per Binary' } },
+          scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
+        }
+      });
     });
 
-    // Cargo bloat
-    document.getElementById('cargo-bloat').textContent =
-      BUILD_DATA.latest_cargo_bloat || 'No cargo bloat data available.';
-
-    // Latest build details table
-    const latest = BUILD_DATA.latest_run;
-    if (latest) {
-      let html = `<p class="meta">Run: ${latest.created_at} — Commit: <code>${latest.head_sha}</code> — Total wall time: ${latest.total_time_display}</p>`;
-
-      if (latest.units && latest.units.length) {
-        html += '<table><thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead><tbody>';
-        latest.units.forEach((u, i) => {
-          html += `<tr><td>${i+1}</td><td>${u.name}</td><td>${u.version}</td><td>${u.duration.toFixed(1)}s</td></tr>`;
-        });
-        html += '</tbody></table>';
-      }
-
-      if (latest.binary_sizes && latest.binary_sizes.length) {
-        html += '<h3>Binary Sizes</h3><table><thead><tr><th>Binary</th><th>Size (bytes)</th><th>Size</th></tr></thead><tbody>';
-        latest.binary_sizes.forEach(b => {
-          const human = b.bytes > 1048576 ? (b.bytes / 1048576).toFixed(1) + ' MiB' : (b.bytes / 1024).toFixed(1) + ' KiB';
-          html += `<tr><td>${b.name}</td><td>${b.bytes.toLocaleString()}</td><td>${human}</td></tr>`;
-        });
-        html += '</tbody></table>';
-      }
-
-      document.getElementById('latest-details').innerHTML = html;
-    }
-  </script>
+    return { data, formatSize, totalBuildTime, crateBuildTime, totalBinarySize, perBinarySize };
+  }
+}).mount('#app');
+</script>
 </body>
 </html>
+

From cc38f5bb5cb91f3db610a49c673ee1053cc43ea0 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 18:33:15 -0700
Subject: [PATCH 08/30] Latest Build Details (Top 20 Crates)

---
 .github/scripts/build-stats-report.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/build-stats-report.html b/.github/scripts/build-stats-report.html
index 55a24c119..65caa5707 100644
--- a/.github/scripts/build-stats-report.html
+++ b/.github/scripts/build-stats-report.html
@@ -50,7 +50,7 @@ <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
   <pre>{{ data.latest_cargo_bloat || 'No cargo bloat data available.' }}</pre>
 
   <template v-if="data.latest_run">
-    <h2>Latest Build Details</h2>
+    <h2>Latest Build Details (Top 20 Crates)</h2>
     <p class="meta">
       Run: {{ data.latest_run.created_at }} —
       Commit: <code>{{ data.latest_run.head_sha }}</code> —
@@ -60,7 +60,7 @@ <h2>Latest Build Details</h2>
     <table v-if="data.latest_run.units.length">
       <thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead>
       <tbody>
-        <tr v-for="(u, i) in data.latest_run.units" :key="i">
+        <tr v-for="(u, i) in data.latest_run.units.slice(0, 20)" :key="i">
           <td>{{ i + 1 }}</td>
           <td>{{ u.name }}</td>
           <td>{{ u.version }}</td>

From 17d8921cda7e907f5dedf0c73f98383ca8ec4048 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 18:44:18 -0700
Subject: [PATCH 09/30] Removed temp branch

---
 .github/workflows/build-release.yml       | 6 ++----
 .github/workflows/produce-build-stats.yml | 5 +----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 2162ed2f0..67192125d 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -9,10 +9,8 @@
 
 on:
   push:
-    branches:
-      - "main"
-      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
-  workflow_dispatch:
+    branches: ["main"]
+  workflow_dispatch:  # allows manual triggering from the Actions UI
 
 name: Release Build Stats
 
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index f8e9e24b3..cc42e0581 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -7,12 +7,9 @@
 # Intended for weekly scrum review.
 
 on:
-  push:
-    branches:
-      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
   schedule:
     - cron: "0 8 * * *"
-  workflow_dispatch:
+  workflow_dispatch:  # allows manual triggering from the Actions UI
 
 name: Produce Build Stats Report
 

From a43429f0126b5e352a9896a1fa7056c2c047a09d Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 19:38:56 -0700
Subject: [PATCH 10/30] Add cargo-llvm-lines

---
 .github/scripts/build-stats-report.html | 3 +++
 .github/scripts/generate-stats-data.py  | 7 ++++++-
 .github/workflows/build-release.yml     | 8 ++++++--
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/build-stats-report.html b/.github/scripts/build-stats-report.html
index 65caa5707..5ccc6508b 100644
--- a/.github/scripts/build-stats-report.html
+++ b/.github/scripts/build-stats-report.html
@@ -49,6 +49,9 @@ <h2>Binary Size per Binary</h2>
   <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
   <pre>{{ data.latest_cargo_bloat || 'No cargo bloat data available.' }}</pre>
 
+  <h2>Latest LLVM Lines (diskann-benchmark, top 20)</h2>
+  <pre>{{ data.latest_cargo_llvm_lines || 'No cargo llvm-lines data available.' }}</pre>
+
   <template v-if="data.latest_run">
     <h2>Latest Build Details (Top 20 Crates)</h2>
     <p class="meta">
diff --git a/.github/scripts/generate-stats-data.py b/.github/scripts/generate-stats-data.py
index 8739b363f..aab8b735e 100644
--- a/.github/scripts/generate-stats-data.py
+++ b/.github/scripts/generate-stats-data.py
@@ -32,6 +32,8 @@ def main():
         bs_path = run_dir / "binary-sizes.json"
         cb_path = run_dir / "cargo-bloat.txt"
 
+        ll_path = run_dir / "cargo-llvm-lines.txt"
+
         if not bt_path.exists():
             continue  # skip runs without data
 
@@ -42,6 +44,7 @@ def main():
             "build_times": json.loads(bt_path.read_text()) if bt_path.exists() else {},
             "binary_sizes": json.loads(bs_path.read_text()) if bs_path.exists() else [],
             "cargo_bloat": cb_path.read_text() if cb_path.exists() else "",
+            "cargo_llvm_lines": ll_path.read_text() if ll_path.exists() else "",
         })
 
     runs.sort(key=lambda r: r["created_at"])
@@ -94,8 +97,9 @@ def avg(lst):
 
     top_crates = sorted(crate_times.keys(), key=lambda c: avg(crate_times[c]), reverse=True)[:15]
 
-    # Latest cargo bloat
+    # Latest cargo bloat and llvm-lines
     latest_bloat = next((r["cargo_bloat"] for r in reversed(runs) if r.get("cargo_bloat")), "")
+    latest_llvm_lines = next((r["cargo_llvm_lines"] for r in reversed(runs) if r.get("cargo_llvm_lines")), "")
 
     # Latest run details
     latest_run = None
@@ -121,6 +125,7 @@ def avg(lst):
             for name in sorted(per_binary.keys())
         ],
         "latest_cargo_bloat": latest_bloat,
+        "latest_cargo_llvm_lines": latest_llvm_lines,
         "latest_run": latest_run,
     }
 
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 67192125d..ce2297fd8 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -42,10 +42,10 @@ jobs:
         with:
           cache-targets: false
 
-      - name: Install cargo-bloat
+      - name: Install tools
         uses: taiki-e/install-action@v2
         with:
-          tool: cargo-bloat
+          tool: cargo-bloat,cargo-llvm-lines
 
       - name: Build release with timings
         run: cargo clean && cargo build --workspace --release --locked --timings
@@ -56,6 +56,9 @@ jobs:
       - name: Run cargo bloat
         run: cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt
 
+      - name: Run cargo llvm-lines
+        run: cargo llvm-lines --package diskann-benchmark -n 20 | tee cargo-llvm-lines.txt
+
       - name: Upload build stats
         uses: actions/upload-artifact@v4
         with:
@@ -65,4 +68,5 @@ jobs:
             build-times.json
             binary-sizes.json
             cargo-bloat.txt
+            cargo-llvm-lines.txt
           retention-days: 90

From b2f0012a574eace8b22c5571e52e20bfa5954e3b Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 19:40:02 -0700
Subject: [PATCH 11/30] Revert "Removed temp branch"

This reverts commit 17d8921cda7e907f5dedf0c73f98383ca8ec4048.
---
 .github/workflows/build-release.yml       | 6 ++++--
 .github/workflows/produce-build-stats.yml | 5 ++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index ce2297fd8..17aeea9b3 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -9,8 +9,10 @@
 
 on:
   push:
-    branches: ["main"]
-  workflow_dispatch:  # allows manual triggering from the Actions UI
+    branches:
+      - "main"
+      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
+  workflow_dispatch:
 
 name: Release Build Stats
 
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index cc42e0581..f8e9e24b3 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -7,9 +7,12 @@
 # Intended for weekly scrum review.
 
 on:
+  push:
+    branches:
+      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
   schedule:
     - cron: "0 8 * * *"
-  workflow_dispatch:  # allows manual triggering from the Actions UI
+  workflow_dispatch:
 
 name: Produce Build Stats Report
 

From 6232fca5c5bb86d9cced47232d9099547e8dbe33 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 19:54:35 -0700
Subject: [PATCH 12/30] Fixing cargo llvm-lines

---
 .github/scripts/build-stats-report.html | 2 +-
 .github/workflows/build-release.yml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/build-stats-report.html b/.github/scripts/build-stats-report.html
index 5ccc6508b..938ab6d52 100644
--- a/.github/scripts/build-stats-report.html
+++ b/.github/scripts/build-stats-report.html
@@ -49,7 +49,7 @@ <h2>Binary Size per Binary</h2>
   <h2>Latest Cargo Bloat (diskann-benchmark)</h2>
   <pre>{{ data.latest_cargo_bloat || 'No cargo bloat data available.' }}</pre>
 
-  <h2>Latest LLVM Lines (diskann-benchmark, top 20)</h2>
+  <h2>Latest LLVM Lines (diskann-benchmark)</h2>
   <pre>{{ data.latest_cargo_llvm_lines || 'No cargo llvm-lines data available.' }}</pre>
 
   <template v-if="data.latest_run">
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 17aeea9b3..b5bec2683 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -59,7 +59,7 @@ jobs:
         run: cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt
 
       - name: Run cargo llvm-lines
-        run: cargo llvm-lines --package diskann-benchmark -n 20 | tee cargo-llvm-lines.txt
+        run: cargo llvm-lines --release --package diskann-benchmark | head -100 | tee cargo-llvm-lines.txt
 
       - name: Upload build stats
         uses: actions/upload-artifact@v4

From c95345d4a10d65313cd27e38f3a66f70fb6434e1 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Thu, 7 May 2026 21:27:18 -0700
Subject: [PATCH 13/30] Reapply "Removed temp branch"

This reverts commit b2f0012a574eace8b22c5571e52e20bfa5954e3b.
---
 .github/workflows/build-release.yml       | 6 ++----
 .github/workflows/produce-build-stats.yml | 5 +----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index b5bec2683..55b656270 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -9,10 +9,8 @@
 
 on:
   push:
-    branches:
-      - "main"
-      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
-  workflow_dispatch:
+    branches: ["main"]
+  workflow_dispatch:  # allows manual triggering from the Actions UI
 
 name: Release Build Stats
 
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index f8e9e24b3..cc42e0581 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -7,12 +7,9 @@
 # Intended for weekly scrum review.
 
 on:
-  push:
-    branches:
-      - "u/arrayka/release_builds_ci"  # temporary: remove before merge
   schedule:
     - cron: "0 8 * * *"
-  workflow_dispatch:
+  workflow_dispatch:  # allows manual triggering from the Actions UI
 
 name: Produce Build Stats Report
 

From cef8bc7ff4dc16cc16efe2be61ca256f381c65a3 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 16:15:32 -0700
Subject: [PATCH 14/30] Fail if no runs are found

---
 .github/workflows/produce-build-stats.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index cc42e0581..0e79d90d1 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -40,11 +40,11 @@ jobs:
           gh api --paginate \
             "repos/${{ github.repository }}/actions/workflows/build-release.yml/runs?status=success&created=>=$SINCE&per_page=100" \
             --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
-            > runs.tsv || true
+            > runs.tsv
 
           if [ ! -s runs.tsv ]; then
             echo "::warning::No successful build-release runs found in the last 30 days"
-            exit 0
+            exit 1
           fi
 
           echo "Found $(wc -l < runs.tsv) runs"

From e8a99223b91497244cc288f851847647bc15d140 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 16:32:03 -0700
Subject: [PATCH 15/30] Removed concurrency

---
 .github/workflows/build-release.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 55b656270..6176f5726 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -14,10 +14,6 @@ on:
 
 name: Release Build Stats
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.sha }}
-  cancel-in-progress: true
-
 env:
   CARGO_TERM_COLOR: always
 

From b200171f2581212621a6371dc6e689c2a4d9c620 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 16:35:15 -0700
Subject: [PATCH 16/30] Addressing comments

---
 .github/{scripts => reports}/build-stats-report.html | 0
 .github/workflows/build-release.yml                  | 4 ----
 .github/workflows/produce-build-stats.yml            | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)
 rename .github/{scripts => reports}/build-stats-report.html (100%)

diff --git a/.github/scripts/build-stats-report.html b/.github/reports/build-stats-report.html
similarity index 100%
rename from .github/scripts/build-stats-report.html
rename to .github/reports/build-stats-report.html
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 6176f5726..d78111a72 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -34,10 +34,6 @@ jobs:
       - name: Install Rust
         run: rustup show
 
-      - uses: Swatinem/rust-cache@v2
-        with:
-          cache-targets: false
-
       - name: Install tools
         uses: taiki-e/install-action@v2
         with:
diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/produce-build-stats.yml
index 0e79d90d1..27b4d7824 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/produce-build-stats.yml
@@ -60,7 +60,7 @@ jobs:
         run: |
           mkdir -p report
           python3 .github/scripts/generate-stats-data.py collected report
-          cp .github/scripts/build-stats-report.html report/
+          cp .github/reports/build-stats-report.html report/
 
       - name: Upload report
         uses: actions/upload-artifact@v4

From 481ecbc1ad70afc7feb546a6bf88fb4bdc7c5706 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 18:02:46 -0700
Subject: [PATCH 17/30] Renamed yml files

---
 .../{produce-build-stats.yml => build-stats-report.yml}       | 4 ++--
 .github/workflows/{build-release.yml => build-stats.yml}      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename .github/workflows/{produce-build-stats.yml => build-stats-report.yml} (92%)
 rename .github/workflows/{build-release.yml => build-stats.yml} (95%)

diff --git a/.github/workflows/produce-build-stats.yml b/.github/workflows/build-stats-report.yml
similarity index 92%
rename from .github/workflows/produce-build-stats.yml
rename to .github/workflows/build-stats-report.yml
index 27b4d7824..b9d6fc304 100644
--- a/.github/workflows/produce-build-stats.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -2,7 +2,7 @@
 # Licensed under the MIT license.
 
 # Aggregates release build statistics from the last 30 days of
-# `build-release.yml` runs and produces an HTML dashboard with trend charts.
+# `build-stats.yml` runs and produces an HTML dashboard with trend charts.
 #
 # Intended for weekly scrum review.
 
@@ -38,7 +38,7 @@ jobs:
           SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
 
           gh api --paginate \
-            "repos/${{ github.repository }}/actions/workflows/build-release.yml/runs?status=success&created=>=$SINCE&per_page=100" \
+            "repos/${{ github.repository }}/actions/workflows/build-stats.yml/runs?status=success&created=>=$SINCE&per_page=100" \
             --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
             > runs.tsv
 
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-stats.yml
similarity index 95%
rename from .github/workflows/build-release.yml
rename to .github/workflows/build-stats.yml
index d78111a72..0bde41cd7 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-stats.yml
@@ -5,7 +5,7 @@
 #
 # Runs on every push to main to capture release build timings, binary sizes,
 # and cargo-bloat analysis. The data is uploaded as structured JSON artifacts
-# so the companion `produce-build-stats.yml` workflow can aggregate trends.
+# so the companion `build-stats-report.yml` workflow can aggregate trends.
 
 on:
   push:

From a3480cd417b5de9126b245a68c7caebf318fa804 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 18:09:31 -0700
Subject: [PATCH 18/30] Changing paths

---
 .github/reports/build-stats-report.html                       | 2 +-
 .../{generate-stats-data.py => build-stats-report-data.py}    | 4 ++--
 .github/workflows/build-stats-report.yml                      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename .github/scripts/{generate-stats-data.py => build-stats-report-data.py} (97%)

diff --git a/.github/reports/build-stats-report.html b/.github/reports/build-stats-report.html
index 938ab6d52..84c01d511 100644
--- a/.github/reports/build-stats-report.html
+++ b/.github/reports/build-stats-report.html
@@ -5,7 +5,7 @@
   <title>DiskANN Release Build Stats</title>
   <script src="https://cdn.jsdelivr.net/npm/vue@3/dist/vue.global.prod.js"></script>
   <script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
-  <script src="build-stats-data.js"></script>
+  <script src="build-stats-report.js"></script>
   <style>
     body {
       font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
diff --git a/.github/scripts/generate-stats-data.py b/.github/scripts/build-stats-report-data.py
similarity index 97%
rename from .github/scripts/generate-stats-data.py
rename to .github/scripts/build-stats-report-data.py
index aab8b735e..c8e08f2ba 100644
--- a/.github/scripts/generate-stats-data.py
+++ b/.github/scripts/build-stats-report-data.py
@@ -4,7 +4,7 @@
   collected/runs.tsv        — tab-separated: run_id, created_at, head_sha
   collected/<run_id>/       — contains build-times.json, binary-sizes.json, cargo-bloat.txt
 
-Usage: python generate-stats-data.py <collected_dir> <output_dir>
+Usage: python build-stats-report-data.py <collected_dir> <output_dir>
 """
 import json
 import sys
@@ -129,7 +129,7 @@ def avg(lst):
         "latest_run": latest_run,
     }
 
-    js_path = output_dir / "build-stats-data.js"
+    js_path = output_dir / "build-stats-report.js"
     js_path.write_text(f"const BUILD_DATA = {json.dumps(build_data, indent=2)};\n")
     print(f"Generated {js_path} ({len(runs)} runs)")
 
diff --git a/.github/workflows/build-stats-report.yml b/.github/workflows/build-stats-report.yml
index b9d6fc304..77e7aed67 100644
--- a/.github/workflows/build-stats-report.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -59,7 +59,7 @@ jobs:
       - name: Generate report
         run: |
           mkdir -p report
-          python3 .github/scripts/generate-stats-data.py collected report
+          python3 .github/scripts/build-stats-report-data.py collected report
           cp .github/reports/build-stats-report.html report/
 
       - name: Upload report

From 5a605f2b5ed4578834583ee9afce61bdfac16cca Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 18:56:54 -0700
Subject: [PATCH 19/30] Parse cargo-timing.html

---
 .github/scripts/build-stats-report-data.py | 36 +++++++++++++++++---
 .github/scripts/parse-build-stats.py       | 39 +---------------------
 .github/workflows/build-stats.yml          |  1 -
 3 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/.github/scripts/build-stats-report-data.py b/.github/scripts/build-stats-report-data.py
index c8e08f2ba..b2a798087 100644
--- a/.github/scripts/build-stats-report-data.py
+++ b/.github/scripts/build-stats-report-data.py
@@ -2,16 +2,44 @@
 
 Reads from:
   collected/runs.tsv        — tab-separated: run_id, created_at, head_sha
-  collected/<run_id>/       — contains build-times.json, binary-sizes.json, cargo-bloat.txt
+  collected/<run_id>/       — contains cargo-timing.html, binary-sizes.json, cargo-bloat.txt
 
 Usage: python build-stats-report-data.py <collected_dir> <output_dir>
 """
 import json
+import re
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
 
 
+def parse_cargo_timing(html_path: Path) -> dict:
+    """Parse build times from a cargo-timing.html file."""
+    if not html_path.exists():
+        return {}
+
+    html = html_path.read_text()
+
+    m = re.search(r"DURATION\s*=\s*(\d+(?:\.\d+)?)", html)
+    total_s = float(m.group(1)) if m else 0
+
+    m2 = re.search(r"Total time:</td><td>([^<]+)</td>", html)
+    total_display = m2.group(1).strip() if m2 else f"{total_s:.1f}s"
+
+    m = re.search(r"const UNIT_DATA\s*=\s*(\[.*?\]);", html, re.DOTALL)
+    if not m:
+        return {"total_wall_time_s": total_s, "total_time_display": total_display, "units": []}
+
+    units = json.loads(m.group(1))
+    units_sorted = sorted(units, key=lambda u: u.get("duration", 0), reverse=True)
+
+    return {
+        "total_wall_time_s": total_s,
+        "total_time_display": total_display,
+        "units": [{"name": u["name"], "version": u.get("version", ""), "duration": u.get("duration", 0)} for u in units_sorted],
+    }
+
+
 def main():
     collected_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("collected")
     output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("report")
@@ -28,20 +56,20 @@ def main():
         run_id, created_at, head_sha = parts[0], parts[1], parts[2]
         run_dir = collected_dir / run_id
 
-        bt_path = run_dir / "build-times.json"
+        timing_path = run_dir / "cargo-timing.html"
         bs_path = run_dir / "binary-sizes.json"
         cb_path = run_dir / "cargo-bloat.txt"
 
         ll_path = run_dir / "cargo-llvm-lines.txt"
 
-        if not bt_path.exists():
+        if not timing_path.exists():
             continue  # skip runs without data
 
         runs.append({
             "run_id": run_id,
             "created_at": created_at,
             "head_sha": head_sha,
-            "build_times": json.loads(bt_path.read_text()) if bt_path.exists() else {},
+            "build_times": parse_cargo_timing(timing_path),
             "binary_sizes": json.loads(bs_path.read_text()) if bs_path.exists() else [],
             "cargo_bloat": cb_path.read_text() if cb_path.exists() else "",
             "cargo_llvm_lines": ll_path.read_text() if ll_path.exists() else "",
diff --git a/.github/scripts/parse-build-stats.py b/.github/scripts/parse-build-stats.py
index 53aa08976..44999c41a 100644
--- a/.github/scripts/parse-build-stats.py
+++ b/.github/scripts/parse-build-stats.py
@@ -1,47 +1,10 @@
-"""Parse cargo-timing.html and target/release binaries into JSON artifacts."""
+"""Parse target/release binaries into a JSON artifact."""
 import json
 import os
-import re
 import sys
 from pathlib import Path
 
-html_path = Path("target/cargo-timings/cargo-timing.html")
-if not html_path.exists():
-    print("::warning::cargo-timing.html not found")
-    sys.exit(0)
-
-html = html_path.read_text()
-
-# --- Build times ---
-m = re.search(r"DURATION\s*=\s*(\d+(?:\.\d+)?)", html)
-total_s = float(m.group(1)) if m else 0
-
-m2 = re.search(r"Total time:</td><td>([^<]+)</td>", html)
-total_display = m2.group(1).strip() if m2 else f"{total_s:.1f}s"
-
-m = re.search(r"const UNIT_DATA\s*=\s*(\[.*?\]);", html, re.DOTALL)
-if not m:
-    print("::warning::Could not parse UNIT_DATA from timing report")
-    sys.exit(0)
-
-units = json.loads(m.group(1))
-units_sorted = sorted(units, key=lambda u: u.get("duration", 0), reverse=True)
-
-# Print markdown table
-print(f"\n### Release Build Times (Total wall time: {total_display})\n")
-print("| # | Crate | Version | Duration |")
-print("|---|-------|---------|----------|")
-for i, u in enumerate(units_sorted, 1):
-    print(f"| {i} | {u.get('name', '?')} | {u.get('version', '?')} | {u.get('duration', 0):.1f}s |")
-
-Path("build-times.json").write_text(json.dumps({
-    "total_wall_time_s": total_s,
-    "total_time_display": total_display,
-    "units": [{"name": u["name"], "version": u.get("version", ""), "duration": u.get("duration", 0)} for u in units_sorted],
-}, indent=2))
-
 # --- Binary sizes ---
-print("\n### Release Binary Sizes\n")
 print("| Binary | Size (bytes) | Size |")
 print("|--------|-------------|------|")
 
diff --git a/.github/workflows/build-stats.yml b/.github/workflows/build-stats.yml
index 0bde41cd7..17f48bfb4 100644
--- a/.github/workflows/build-stats.yml
+++ b/.github/workflows/build-stats.yml
@@ -57,7 +57,6 @@ jobs:
           name: build-stats
           path: |
             target/cargo-timings/cargo-timing.html
-            build-times.json
             binary-sizes.json
             cargo-bloat.txt
             cargo-llvm-lines.txt

From d0d1dd59e6a4e53588785e4ffedfb594dfe7964a Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:01:09 -0700
Subject: [PATCH 20/30] Rename build-stats-size.py

---
 .github/scripts/build-stats-report-data.py |  4 +--
 .github/scripts/build-stats-size.py        | 14 ++++++++++
 .github/scripts/parse-build-stats.py       | 30 ----------------------
 .github/workflows/build-stats.yml          |  4 +--
 4 files changed, 18 insertions(+), 34 deletions(-)
 create mode 100644 .github/scripts/build-stats-size.py
 delete mode 100644 .github/scripts/parse-build-stats.py

diff --git a/.github/scripts/build-stats-report-data.py b/.github/scripts/build-stats-report-data.py
index b2a798087..44fb2c88c 100644
--- a/.github/scripts/build-stats-report-data.py
+++ b/.github/scripts/build-stats-report-data.py
@@ -2,7 +2,7 @@
 
 Reads from:
   collected/runs.tsv        — tab-separated: run_id, created_at, head_sha
-  collected/<run_id>/       — contains cargo-timing.html, binary-sizes.json, cargo-bloat.txt
+  collected/<run_id>/       — contains cargo-timing.html, build-stats-size.json, cargo-bloat.txt
 
 Usage: python build-stats-report-data.py <collected_dir> <output_dir>
 """
@@ -57,7 +57,7 @@ def main():
         run_dir = collected_dir / run_id
 
         timing_path = run_dir / "cargo-timing.html"
-        bs_path = run_dir / "binary-sizes.json"
+        bs_path = run_dir / "build-stats-size.json"
         cb_path = run_dir / "cargo-bloat.txt"
 
         ll_path = run_dir / "cargo-llvm-lines.txt"
diff --git a/.github/scripts/build-stats-size.py b/.github/scripts/build-stats-size.py
new file mode 100644
index 000000000..8982c009b
--- /dev/null
+++ b/.github/scripts/build-stats-size.py
@@ -0,0 +1,14 @@
+"""Scan target/release for executable binaries and write build-stats-size.json."""
+import json
+import os
+from pathlib import Path
+
+binaries = []
+for p in sorted(Path("target/release").iterdir()):
+    if not p.is_file() or p.suffix in (".d", ".rlib", ".rmeta", ".o", ".dwp"):
+        continue
+    if not os.access(p, os.X_OK) or p.stat().st_size < 1024:
+        continue
+    binaries.append({"name": p.name, "bytes": p.stat().st_size})
+
+Path("build-stats-size.json").write_text(json.dumps(binaries, indent=2))
diff --git a/.github/scripts/parse-build-stats.py b/.github/scripts/parse-build-stats.py
deleted file mode 100644
index 44999c41a..000000000
--- a/.github/scripts/parse-build-stats.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Parse target/release binaries into a JSON artifact."""
-import json
-import os
-import sys
-from pathlib import Path
-
-# --- Binary sizes ---
-print("| Binary | Size (bytes) | Size |")
-print("|--------|-------------|------|")
-
-binaries = []
-release_dir = Path("target/release")
-for p in sorted(release_dir.iterdir()):
-    if not p.is_file():
-        continue
-    if p.suffix in (".d", ".rlib", ".rmeta", ".o", ".dwp"):
-        continue
-    if not os.access(p, os.X_OK):
-        continue
-    size = p.stat().st_size
-    if size < 1024:
-        continue
-    if size > 1048576:
-        human = f"{size / 1048576:.1f} MiB"
-    else:
-        human = f"{size / 1024:.1f} KiB"
-    print(f"| {p.name} | {size} | {human} |")
-    binaries.append({"name": p.name, "bytes": size})
-
-Path("binary-sizes.json").write_text(json.dumps(binaries, indent=2))
diff --git a/.github/workflows/build-stats.yml b/.github/workflows/build-stats.yml
index 17f48bfb4..7cfbfb45c 100644
--- a/.github/workflows/build-stats.yml
+++ b/.github/workflows/build-stats.yml
@@ -43,7 +43,7 @@ jobs:
         run: cargo clean && cargo build --workspace --release --locked --timings
 
       - name: Parse and display build stats
-        run: python3 .github/scripts/parse-build-stats.py
+        run: python3 .github/scripts/build-stats-size.py
 
       - name: Run cargo bloat
         run: cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt
@@ -57,7 +57,7 @@ jobs:
           name: build-stats
           path: |
             target/cargo-timings/cargo-timing.html
-            binary-sizes.json
+            build-stats-size.json
             cargo-bloat.txt
             cargo-llvm-lines.txt
           retention-days: 90

From 155344c6cf9121a0898aaae3927d430e4dd099c2 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:05:27 -0700
Subject: [PATCH 21/30] build-stats-size.py takes parameters

---
 .github/scripts/build-stats-size.py | 13 ++++++++++---
 .github/workflows/build-stats.yml   |  4 ++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/build-stats-size.py b/.github/scripts/build-stats-size.py
index 8982c009b..5788830f6 100644
--- a/.github/scripts/build-stats-size.py
+++ b/.github/scripts/build-stats-size.py
@@ -1,14 +1,21 @@
-"""Scan target/release for executable binaries and write build-stats-size.json."""
+"""Scan a release directory for executable binaries and write a JSON size report.
+
+Usage: python build-stats-size.py <release_dir> <output_file>
+"""
 import json
 import os
+import sys
 from pathlib import Path
 
+release_dir = Path(sys.argv[1])
+output_file = Path(sys.argv[2])
+
 binaries = []
-for p in sorted(Path("target/release").iterdir()):
+for p in sorted(release_dir.iterdir()):
     if not p.is_file() or p.suffix in (".d", ".rlib", ".rmeta", ".o", ".dwp"):
         continue
     if not os.access(p, os.X_OK) or p.stat().st_size < 1024:
         continue
     binaries.append({"name": p.name, "bytes": p.stat().st_size})
 
-Path("build-stats-size.json").write_text(json.dumps(binaries, indent=2))
+output_file.write_text(json.dumps(binaries, indent=2))
diff --git a/.github/workflows/build-stats.yml b/.github/workflows/build-stats.yml
index 7cfbfb45c..4b17676f1 100644
--- a/.github/workflows/build-stats.yml
+++ b/.github/workflows/build-stats.yml
@@ -42,8 +42,8 @@ jobs:
       - name: Build release with timings
         run: cargo clean && cargo build --workspace --release --locked --timings
 
-      - name: Parse and display build stats
-        run: python3 .github/scripts/build-stats-size.py
+      - name: Collect binary sizes
+        run: python3 .github/scripts/build-stats-size.py target/release build-stats-size.json
 
       - name: Run cargo bloat
         run: cargo bloat --release --package diskann-benchmark -n 100 | tee cargo-bloat.txt

From 7b98e2b4bac03696bdbb3c5b2b6226c94ee0dbb9 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:20:41 -0700
Subject: [PATCH 22/30] .github/scripts/build-stats-report.sh

---
 .github/scripts/build-stats-report.sh    | 37 ++++++++++++++++++++++++
 .github/workflows/build-stats-report.yml | 28 ++----------------
 2 files changed, 39 insertions(+), 26 deletions(-)
 create mode 100644 .github/scripts/build-stats-report.sh

diff --git a/.github/scripts/build-stats-report.sh b/.github/scripts/build-stats-report.sh
new file mode 100644
index 000000000..3fd1991af
--- /dev/null
+++ b/.github/scripts/build-stats-report.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+# Collect build-stats artifacts from recent CI runs and generate an HTML report.
+#
+# Usage: build-stats-report.sh <github_repository> <collected_dir> <report_dir>
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+GITHUB_REPOSITORY="$1"
+COLLECTED_DIR="$2"
+REPORT_DIR="$3"
+
+SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
+
+gh api --paginate \
+  "repos/$GITHUB_REPOSITORY/actions/workflows/build-stats.yml/runs?status=success&created=>=$SINCE&per_page=100" \
+  --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
+  > runs.tsv || true
+
+if [ ! -s runs.tsv ]; then
+  echo "::warning::No successful build-stats runs found in the last 30 days"
+  exit 1
+fi
+
+echo "Found $(wc -l < runs.tsv) runs"
+cp runs.tsv "$COLLECTED_DIR/runs.tsv"
+
+while IFS=$'\t' read -r run_id created_at head_sha; do
+  gh run download "$run_id" --repo "$GITHUB_REPOSITORY" \
+    --name build-stats --dir "$COLLECTED_DIR/$run_id" 2>/dev/null \
+    || echo "::warning::Skipping run $run_id (artifact expired)"
+done < runs.tsv
+
+python3 "$SCRIPT_DIR/build-stats-report-data.py" "$COLLECTED_DIR" "$REPORT_DIR"
+cp "$SCRIPT_DIR/../reports/build-stats-report.html" "$REPORT_DIR/"
diff --git a/.github/workflows/build-stats-report.yml b/.github/workflows/build-stats-report.yml
index 77e7aed67..9d1c6cf51 100644
--- a/.github/workflows/build-stats-report.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -31,36 +31,12 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Collect artifacts from recent runs
+      - name: Generate report
         run: |
           set -euo pipefail
           mkdir -p collected
-          SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
-
-          gh api --paginate \
-            "repos/${{ github.repository }}/actions/workflows/build-stats.yml/runs?status=success&created=>=$SINCE&per_page=100" \
-            --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
-            > runs.tsv
-
-          if [ ! -s runs.tsv ]; then
-            echo "::warning::No successful build-release runs found in the last 30 days"
-            exit 1
-          fi
-
-          echo "Found $(wc -l < runs.tsv) runs"
-          cp runs.tsv collected/runs.tsv
-
-          while IFS=$'\t' read -r run_id created_at head_sha; do
-            gh run download "$run_id" --repo "${{ github.repository }}" \
-              --name build-stats --dir "collected/$run_id" 2>/dev/null \
-              || echo "::warning::Skipping run $run_id (artifact expired)"
-          done < runs.tsv
-
-      - name: Generate report
-        run: |
           mkdir -p report
-          python3 .github/scripts/build-stats-report-data.py collected report
-          cp .github/reports/build-stats-report.html report/
+          bash .github/scripts/build-stats-report.sh "${{ github.repository }}" collected report
 
       - name: Upload report
         uses: actions/upload-artifact@v4

From ba605620994740604b5e6062ba49f675e209b177 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:39:04 -0700
Subject: [PATCH 23/30] Fixed the scripts

---
 .github/scripts/build-stats-report-data.py |  4 ++--
 .github/scripts/build-stats-report.sh      | 23 ++++++++++++++++++----
 .github/workflows/build-stats-report.yml   |  2 +-
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/build-stats-report-data.py b/.github/scripts/build-stats-report-data.py
index 44fb2c88c..91a23de52 100644
--- a/.github/scripts/build-stats-report-data.py
+++ b/.github/scripts/build-stats-report-data.py
@@ -41,8 +41,8 @@ def parse_cargo_timing(html_path: Path) -> dict:
 
 
 def main():
-    collected_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("collected")
-    output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("report")
+    collected_dir = Path(sys.argv[1])
+    output_dir = Path(sys.argv[2])
     output_dir.mkdir(parents=True, exist_ok=True)
 
     runs_tsv = collected_dir / "runs.tsv"
diff --git a/.github/scripts/build-stats-report.sh b/.github/scripts/build-stats-report.sh
index 3fd1991af..b42db90df 100644
--- a/.github/scripts/build-stats-report.sh
+++ b/.github/scripts/build-stats-report.sh
@@ -4,13 +4,25 @@
 
 # Collect build-stats artifacts from recent CI runs and generate an HTML report.
 #
-# Usage: build-stats-report.sh <github_repository> <collected_dir> <report_dir>
+# Usage: build-stats-report.sh [github_repository] [collected_dir] [report_dir]
+#
+# Examples:
+#   build-stats-report.sh
+#   build-stats-report.sh microsoft/DiskANN collected report
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-GITHUB_REPOSITORY="$1"
-COLLECTED_DIR="$2"
-REPORT_DIR="$3"
+GITHUB_REPOSITORY="${1:-microsoft/DiskANN}"
+
+if [ -z "${2:-}" ]; then
+  WORK_DIR=$(mktemp -d)
+  COLLECTED_DIR="$WORK_DIR/collected"
+  REPORT_DIR="$WORK_DIR/report"
+  mkdir -p "$COLLECTED_DIR" "$REPORT_DIR"
+else
+  COLLECTED_DIR="$2"
+  REPORT_DIR="${3:?report_dir is required when collected_dir is provided}"
+fi
 
 SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
 
@@ -35,3 +47,6 @@ done < runs.tsv
 
 python3 "$SCRIPT_DIR/build-stats-report-data.py" "$COLLECTED_DIR" "$REPORT_DIR"
 cp "$SCRIPT_DIR/../reports/build-stats-report.html" "$REPORT_DIR/"
+
+echo ""
+echo "Report: $REPORT_DIR/build-stats-report.html"
diff --git a/.github/workflows/build-stats-report.yml b/.github/workflows/build-stats-report.yml
index 9d1c6cf51..2863677b5 100644
--- a/.github/workflows/build-stats-report.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -4,7 +4,7 @@
 # Aggregates release build statistics from the last 30 days of
 # `build-stats.yml` runs and produces an HTML dashboard with trend charts.
 #
-# Intended for weekly scrum review.
+# Intended for weekly review.
 
 on:
   schedule:

From 41ee753942e3fdf12f2becd2abba3bc9c955926e Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:41:18 -0700
Subject: [PATCH 24/30] Remove push

---
 .github/workflows/build-stats-report.yml | 2 ++
 .github/workflows/build-stats.yml        | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-stats-report.yml b/.github/workflows/build-stats-report.yml
index 2863677b5..eaba63e99 100644
--- a/.github/workflows/build-stats-report.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -7,6 +7,8 @@
 # Intended for weekly review.
 
 on:
+  push:
+    branches: ["u/arrayka/release_builds_ci"]  # TODO: remove before merging
   schedule:
     - cron: "0 8 * * *"
   workflow_dispatch:  # allows manual triggering from the Actions UI
diff --git a/.github/workflows/build-stats.yml b/.github/workflows/build-stats.yml
index 4b17676f1..de1cdbe6b 100644
--- a/.github/workflows/build-stats.yml
+++ b/.github/workflows/build-stats.yml
@@ -9,7 +9,7 @@
 
 on:
   push:
-    branches: ["main"]
+    branches: ["main", "u/arrayka/release_builds_ci"]  # TODO: remove temp branch before merging
   workflow_dispatch:  # allows manual triggering from the Actions UI
 
 name: Release Build Stats

From 4a8161b487e83a057d989cbd0b7f885647d2870b Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:53:44 -0700
Subject: [PATCH 25/30] Fixed names

---
 .github/workflows/build-stats-report.yml | 2 +-
 .github/workflows/build-stats.yml        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-stats-report.yml b/.github/workflows/build-stats-report.yml
index eaba63e99..34137f331 100644
--- a/.github/workflows/build-stats-report.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -13,7 +13,7 @@ on:
     - cron: "0 8 * * *"
   workflow_dispatch:  # allows manual triggering from the Actions UI
 
-name: Produce Build Stats Report
+name: Build Stats Report
 
 defaults:
   run:
diff --git a/.github/workflows/build-stats.yml b/.github/workflows/build-stats.yml
index de1cdbe6b..044e5c335 100644
--- a/.github/workflows/build-stats.yml
+++ b/.github/workflows/build-stats.yml
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT license.
 
-# Release build statistics workflow.
+# Build statistics workflow.
 #
 # Runs on every push to main to capture release build timings, binary sizes,
 # and cargo-bloat analysis. The data is uploaded as structured JSON artifacts
@@ -12,7 +12,7 @@ on:
     branches: ["main", "u/arrayka/release_builds_ci"]  # TODO: remove temp branch before merging
   workflow_dispatch:  # allows manual triggering from the Actions UI
 
-name: Release Build Stats
+name: Build Stats
 
 env:
   CARGO_TERM_COLOR: always

From 68b11f56e6cdc34f0eb5dbd53ff530d0ed89f481 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 19:56:59 -0700
Subject: [PATCH 26/30] $COLLECTED_DIR/runs.tsv

---
 .github/scripts/build-stats-report.sh | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/build-stats-report.sh b/.github/scripts/build-stats-report.sh
index b42db90df..d6aa8e419 100644
--- a/.github/scripts/build-stats-report.sh
+++ b/.github/scripts/build-stats-report.sh
@@ -29,21 +29,20 @@ SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
 gh api --paginate \
   "repos/$GITHUB_REPOSITORY/actions/workflows/build-stats.yml/runs?status=success&created=>=$SINCE&per_page=100" \
   --jq '.workflow_runs[] | [.id, .created_at, .head_sha] | @tsv' \
-  > runs.tsv || true
+  > "$COLLECTED_DIR/runs.tsv" || true
 
-if [ ! -s runs.tsv ]; then
+if [ ! -s "$COLLECTED_DIR/runs.tsv" ]; then
   echo "::warning::No successful build-stats runs found in the last 30 days"
   exit 1
 fi
 
-echo "Found $(wc -l < runs.tsv) runs"
-cp runs.tsv "$COLLECTED_DIR/runs.tsv"
+echo "Found $(wc -l < "$COLLECTED_DIR/runs.tsv") runs"
 
 while IFS=$'\t' read -r run_id created_at head_sha; do
   gh run download "$run_id" --repo "$GITHUB_REPOSITORY" \
     --name build-stats --dir "$COLLECTED_DIR/$run_id" 2>/dev/null \
     || echo "::warning::Skipping run $run_id (artifact expired)"
-done < runs.tsv
+done < "$COLLECTED_DIR/runs.tsv"
 
 python3 "$SCRIPT_DIR/build-stats-report-data.py" "$COLLECTED_DIR" "$REPORT_DIR"
 cp "$SCRIPT_DIR/../reports/build-stats-report.html" "$REPORT_DIR/"

From 121e7d5f7d11d581fce7ba605f5c903112f3aed5 Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 20:02:41 -0700
Subject: [PATCH 27/30] Adding failures

---
 .github/scripts/build-stats-report-data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/build-stats-report-data.py b/.github/scripts/build-stats-report-data.py
index 91a23de52..382151445 100644
--- a/.github/scripts/build-stats-report-data.py
+++ b/.github/scripts/build-stats-report-data.py
@@ -52,7 +52,7 @@ def main():
     for line in runs_tsv.read_text().strip().splitlines():
         parts = line.split("\t")
         if len(parts) < 3:
-            continue
+            raise ValueError(f"Malformed line in runs.tsv: {line!r}")
         run_id, created_at, head_sha = parts[0], parts[1], parts[2]
         run_dir = collected_dir / run_id
 
@@ -63,7 +63,7 @@ def main():
         ll_path = run_dir / "cargo-llvm-lines.txt"
 
         if not timing_path.exists():
-            continue  # skip runs without data
+            raise FileNotFoundError(f"Missing cargo-timing.html for run {run_id} in {run_dir}")
 
         runs.append({
             "run_id": run_id,

From dba47fe4416e0dbc8e87a122df8c0d107b64c56d Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 20:12:57 -0700
Subject: [PATCH 28/30] Fixed report

---
 .github/scripts/build-stats-report-data.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/scripts/build-stats-report-data.py b/.github/scripts/build-stats-report-data.py
index 382151445..3155d959b 100644
--- a/.github/scripts/build-stats-report-data.py
+++ b/.github/scripts/build-stats-report-data.py
@@ -56,10 +56,9 @@ def main():
         run_id, created_at, head_sha = parts[0], parts[1], parts[2]
         run_dir = collected_dir / run_id
 
-        timing_path = run_dir / "cargo-timing.html"
+        timing_path = run_dir / "target/cargo-timings/cargo-timing.html"
         bs_path = run_dir / "build-stats-size.json"
         cb_path = run_dir / "cargo-bloat.txt"
-
         ll_path = run_dir / "cargo-llvm-lines.txt"
 
         if not timing_path.exists():

From da19400f819ab8131b465c7f58b2d746c146a25d Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 20:29:38 -0700
Subject: [PATCH 29/30] Fixed macos

---
 .github/scripts/build-stats-report.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/build-stats-report.sh b/.github/scripts/build-stats-report.sh
index d6aa8e419..d1617ef6d 100644
--- a/.github/scripts/build-stats-report.sh
+++ b/.github/scripts/build-stats-report.sh
@@ -24,7 +24,12 @@ else
   REPORT_DIR="${3:?report_dir is required when collected_dir is provided}"
 fi
 
-SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
+# GNU date: Linux/WSL. BSD date (macOS): use 'date -u -v-30d' instead.
+if date -u -d '30 days ago' '+%Y' >/dev/null 2>&1; then
+  SINCE=$(date -u -d '30 days ago' '+%Y-%m-%dT%H:%M:%SZ')
+else
+  SINCE=$(date -u -v-30d '+%Y-%m-%dT%H:%M:%SZ')
+fi
 
 gh api --paginate \
   "repos/$GITHUB_REPOSITORY/actions/workflows/build-stats.yml/runs?status=success&created=>=$SINCE&per_page=100" \

From d7a4a86597e27619838931a0c436493aab6c1f7f Mon Sep 17 00:00:00 2001
From: "Alex Razumov (from Dev Box)" <alrazu@microsoft.com>
Date: Fri, 8 May 2026 20:37:10 -0700
Subject: [PATCH 30/30] Removed temp branch

---
 .github/workflows/build-stats-report.yml | 2 --
 .github/workflows/build-stats.yml        | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/build-stats-report.yml b/.github/workflows/build-stats-report.yml
index 34137f331..48f7975be 100644
--- a/.github/workflows/build-stats-report.yml
+++ b/.github/workflows/build-stats-report.yml
@@ -7,8 +7,6 @@
 # Intended for weekly review.
 
 on:
-  push:
-    branches: ["u/arrayka/release_builds_ci"]  # TODO: remove before merging
   schedule:
     - cron: "0 8 * * *"
   workflow_dispatch:  # allows manual triggering from the Actions UI
diff --git a/.github/workflows/build-stats.yml b/.github/workflows/build-stats.yml
index 044e5c335..955e215a8 100644
--- a/.github/workflows/build-stats.yml
+++ b/.github/workflows/build-stats.yml
@@ -9,7 +9,7 @@
 
 on:
   push:
-    branches: ["main", "u/arrayka/release_builds_ci"]  # TODO: remove temp branch before merging
+    branches: ["main"]
   workflow_dispatch:  # allows manual triggering from the Actions UI
 
 name: Build Stats