Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions .github/reports/build-stats-report.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
<!DOCTYPE html>
Comment thread
hildebrandmw marked this conversation as resolved.
<html lang="en">
Comment thread
arrayka marked this conversation as resolved.
<head>
<meta charset="utf-8">
<title>DiskANN Release Build Stats</title>
<script src="https://cdn.jsdelivr.net/npm/vue@3/dist/vue.global.prod.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
Comment thread
arrayka marked this conversation as resolved.
<script src="build-stats-report.js"></script>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 1400px; margin: 0 auto; padding: 20px; background: #fafafa;
}
h1 { color: #1b1e24; border-bottom: 2px solid #4e79a7; padding-bottom: 8px; }
h2 { color: #333; margin-top: 40px; }
.chart-container {
background: white; border-radius: 8px; padding: 20px;
box-shadow: 0 1px 3px rgba(0,0,0,0.12); margin-bottom: 30px;
}
canvas { max-height: 400px; }
pre {
background: #1b1e24; color: #d5dde5; padding: 16px; border-radius: 8px;
overflow-x: auto; font-size: 13px; line-height: 1.4;
}
.meta { color: #666; font-size: 14px; }
table { border-collapse: collapse; width: 100%; margin: 10px 0; }
th, td { border: 1px solid #ddd; padding: 8px 12px; text-align: left; }
th { background: #1b1e24; color: #d5dde5; }
tr:nth-child(even) { background: #f2f2f2; }
</style>
</head>
<body>
<div id="app">
<h1>DiskANN Release Build Statistics</h1>
<p class="meta">Generated: {{ data.generated }} — Last 30 days ({{ data.dates.length }} runs)</p>

<h2>Total Build Time Trend</h2>
<div class="chart-container"><canvas ref="totalBuildTime"></canvas></div>

<h2>Build Time by Crate (Top 15)</h2>
<div class="chart-container"><canvas ref="crateBuildTime"></canvas></div>

<h2>Total Binary Size Trend</h2>
<div class="chart-container"><canvas ref="totalBinarySize"></canvas></div>

<h2>Binary Size per Binary</h2>
<div class="chart-container"><canvas ref="perBinarySize"></canvas></div>

<h2>Latest Cargo Bloat (diskann-benchmark)</h2>
<pre>{{ data.latest_cargo_bloat || 'No cargo bloat data available.' }}</pre>

<h2>Latest LLVM Lines (diskann-benchmark)</h2>
<pre>{{ data.latest_cargo_llvm_lines || 'No cargo llvm-lines data available.' }}</pre>

<template v-if="data.latest_run">
<h2>Latest Build Details (Top 20 Crates)</h2>
<p class="meta">
Run: {{ data.latest_run.created_at }} —
Commit: <code>{{ data.latest_run.head_sha }}</code> —
Total wall time: {{ data.latest_run.total_time_display }}
</p>

<table v-if="data.latest_run.units.length">
<thead><tr><th>#</th><th>Crate</th><th>Version</th><th>Duration</th></tr></thead>
<tbody>
<tr v-for="(u, i) in data.latest_run.units.slice(0, 20)" :key="i">
<td>{{ i + 1 }}</td>
<td>{{ u.name }}</td>
<td>{{ u.version }}</td>
<td>{{ u.duration.toFixed(1) }}s</td>
</tr>
</tbody>
</table>

<template v-if="data.latest_run.binary_sizes.length">
<h3>Binary Sizes</h3>
<table>
<thead><tr><th>Binary</th><th>Size (bytes)</th><th>Size</th></tr></thead>
<tbody>
<tr v-for="b in data.latest_run.binary_sizes" :key="b.name">
<td>{{ b.name }}</td>
<td>{{ b.bytes.toLocaleString() }}</td>
<td>{{ formatSize(b.bytes) }}</td>
</tr>
</tbody>
</table>
</template>
</template>
</div>

<script>
const COLORS = [
'#4e79a7', '#f28e2b', '#e15759', '#76b7b2', '#59a14f',
'#edc948', '#b07aa1', '#ff9da7', '#9c755f', '#bab0ac',
'#86bcb6', '#8cd17d', '#b6992d', '#499894', '#d37295',
];

const { createApp, ref, onMounted } = Vue;

createApp({
setup() {
const data = BUILD_DATA;
const totalBuildTime = ref(null);
const crateBuildTime = ref(null);
const totalBinarySize = ref(null);
const perBinarySize = ref(null);

function formatSize(bytes) {
return bytes > 1048576
? (bytes / 1048576).toFixed(1) + ' MiB'
: (bytes / 1024).toFixed(1) + ' KiB';
}

onMounted(() => {
new Chart(totalBuildTime.value, {
type: 'line',
data: {
labels: data.dates,
datasets: [{
label: 'Total Wall Time (s)',
data: data.total_build_times,
borderColor: '#4e79a7',
backgroundColor: '#4e79a733',
fill: true, tension: 0.3,
}]
},
options: {
responsive: true,
plugins: { title: { display: true, text: 'Total Release Build Time' } },
scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
}
});

new Chart(crateBuildTime.value, {
type: 'line',
data: {
labels: data.dates,
datasets: data.crate_datasets.map((ds, i) => ({
...ds,
borderColor: COLORS[i % COLORS.length],
backgroundColor: COLORS[i % COLORS.length] + '33',
tension: 0.3, spanGaps: true,
}))
},
options: {
responsive: true,
plugins: { title: { display: true, text: 'Build Time by Crate (Top 15)' } },
scales: { y: { beginAtZero: true, title: { display: true, text: 'Seconds' } } }
}
});

new Chart(totalBinarySize.value, {
type: 'line',
data: {
labels: data.dates,
datasets: [{
label: 'Total Binary Size (MiB)',
data: data.total_binary_sizes_mib,
borderColor: '#e15759',
backgroundColor: '#e1575933',
fill: true, tension: 0.3,
}]
},
options: {
responsive: true,
plugins: { title: { display: true, text: 'Total Binary Size' } },
scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
}
});

new Chart(perBinarySize.value, {
type: 'line',
data: {
labels: data.dates,
datasets: data.binary_datasets.map((ds, i) => ({
...ds,
borderColor: COLORS[i % COLORS.length],
backgroundColor: COLORS[i % COLORS.length] + '33',
tension: 0.3, spanGaps: true,
}))
},
options: {
responsive: true,
plugins: { title: { display: true, text: 'Binary Size per Binary' } },
scales: { y: { beginAtZero: true, title: { display: true, text: 'MiB' } } }
}
});
});

return { data, formatSize, totalBuildTime, crateBuildTime, totalBinarySize, perBinarySize };
}
}).mount('#app');
</script>
</body>
</html>

165 changes: 165 additions & 0 deletions .github/scripts/build-stats-report-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""Aggregate build-stats artifacts into a JS data file for the HTML report.

Reads from:
collected/runs.tsv — tab-separated: run_id, created_at, head_sha
collected/<run_id>/ — contains cargo-timing.html, build-stats-size.json, cargo-bloat.txt

Usage: python build-stats-report-data.py <collected_dir> <output_dir>
"""
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path


def parse_cargo_timing(html_path: Path) -> dict:
"""Parse build times from a cargo-timing.html file."""
if not html_path.exists():
return {}

html = html_path.read_text()

m = re.search(r"DURATION\s*=\s*(\d+(?:\.\d+)?)", html)
total_s = float(m.group(1)) if m else 0

m2 = re.search(r"Total time:</td><td>([^<]+)</td>", html)
total_display = m2.group(1).strip() if m2 else f"{total_s:.1f}s"

m = re.search(r"const UNIT_DATA\s*=\s*(\[.*?\]);", html, re.DOTALL)
if not m:
return {"total_wall_time_s": total_s, "total_time_display": total_display, "units": []}

units = json.loads(m.group(1))
units_sorted = sorted(units, key=lambda u: u.get("duration", 0), reverse=True)

return {
"total_wall_time_s": total_s,
"total_time_display": total_display,
"units": [{"name": u["name"], "version": u.get("version", ""), "duration": u.get("duration", 0)} for u in units_sorted],
}


def main():
collected_dir = Path(sys.argv[1])
output_dir = Path(sys.argv[2])
output_dir.mkdir(parents=True, exist_ok=True)

runs_tsv = collected_dir / "runs.tsv"

# Parse runs.tsv and load per-run artifacts
runs = []
for line in runs_tsv.read_text().strip().splitlines():
Comment thread
arrayka marked this conversation as resolved.
parts = line.split("\t")
if len(parts) < 3:
raise ValueError(f"Malformed line in runs.tsv: {line!r}")
run_id, created_at, head_sha = parts[0], parts[1], parts[2]
run_dir = collected_dir / run_id

timing_path = run_dir / "target/cargo-timings/cargo-timing.html"
bs_path = run_dir / "build-stats-size.json"
cb_path = run_dir / "cargo-bloat.txt"
ll_path = run_dir / "cargo-llvm-lines.txt"

if not timing_path.exists():
raise FileNotFoundError(f"Missing cargo-timing.html for run {run_id} in {run_dir}")

runs.append({
"run_id": run_id,
"created_at": created_at,
"head_sha": head_sha,
"build_times": parse_cargo_timing(timing_path),
"binary_sizes": json.loads(bs_path.read_text()) if bs_path.exists() else [],
"cargo_bloat": cb_path.read_text() if cb_path.exists() else "",
"cargo_llvm_lines": ll_path.read_text() if ll_path.exists() else "",
})

runs.sort(key=lambda r: r["created_at"])

dates = []
total_build_times = []
crate_times: dict[str, list] = {}
total_binary_sizes = []
per_binary: dict[str, list] = {}

for run in runs:
dt_str = run.get("created_at", "")
dates.append(dt_str[:10] if dt_str else "?")

bt = run.get("build_times", {})
total_build_times.append(bt.get("total_wall_time_s", 0))

# Per-crate build times
units = bt.get("units", [])
seen = set()
for u in units:
name = u.get("name", "")
if name not in crate_times:
crate_times[name] = [None] * (len(dates) - 1)
crate_times[name].append(u.get("duration", 0))
seen.add(name)
for name in crate_times:
if name not in seen:
crate_times[name].append(None)

# Binary sizes
bs = run.get("binary_sizes", [])
total_binary_sizes.append(sum(b.get("bytes", 0) for b in bs))

seen_bins = set()
for b in bs:
bname = b.get("name", "")
if bname not in per_binary:
per_binary[bname] = [None] * (len(dates) - 1)
per_binary[bname].append(b.get("bytes", 0))
seen_bins.add(bname)
for bname in per_binary:
if bname not in seen_bins:
per_binary[bname].append(None)

# Top 15 crates by average duration
def avg(lst):
vals = [v for v in lst if v is not None]
return sum(vals) / len(vals) if vals else 0

top_crates = sorted(crate_times.keys(), key=lambda c: avg(crate_times[c]), reverse=True)[:15]

# Latest cargo bloat and llvm-lines
latest_bloat = next((r["cargo_bloat"] for r in reversed(runs) if r.get("cargo_bloat")), "")
latest_llvm_lines = next((r["cargo_llvm_lines"] for r in reversed(runs) if r.get("cargo_llvm_lines")), "")

# Latest run details
latest_run = None
if runs:
last = runs[-1]
bt = last.get("build_times", {})
latest_run = {
"created_at": last.get("created_at", "?"),
"head_sha": last.get("head_sha", "?")[:12],
"total_time_display": bt.get("total_time_display", "?"),
"units": bt.get("units", []),
"binary_sizes": last.get("binary_sizes", []),
}

build_data = {
"generated": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
"dates": dates,
"total_build_times": total_build_times,
"total_binary_sizes_mib": [s / 1048576 for s in total_binary_sizes],
"crate_datasets": [{"label": name, "data": crate_times[name]} for name in top_crates],
"binary_datasets": [
{"label": name, "data": [b / 1048576 if b is not None else None for b in per_binary[name]]}
for name in sorted(per_binary.keys())
],
"latest_cargo_bloat": latest_bloat,
"latest_cargo_llvm_lines": latest_llvm_lines,
"latest_run": latest_run,
}

js_path = output_dir / "build-stats-report.js"
js_path.write_text(f"const BUILD_DATA = {json.dumps(build_data, indent=2)};\n")
print(f"Generated {js_path} ({len(runs)} runs)")


if __name__ == "__main__":
main()
Loading
Loading