Skip to content

Commit d88a19b

Browse files
committed
adding provenance
1 parent 01251d1 commit d88a19b

2 files changed

Lines changed: 163 additions & 3 deletions

File tree

netlab/cli.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616

1717
import argparse
1818
import concurrent.futures
19+
import hashlib
1920
import logging
2021
import os
22+
import platform
2123
import shutil
2224
import subprocess
2325
import sys
@@ -28,6 +30,8 @@
2830

2931
import yaml
3032

33+
from metrics.aggregate import write_json_atomic
34+
3135
from .log_config import configure_from_env, set_global_log_level
3236

3337

@@ -40,6 +44,53 @@ def ensure_dir(p: Path) -> None:
4044
p.mkdir(parents=True, exist_ok=True)
4145

4246

47+
def _create_run_provenance(
48+
masters: List[Path], seeds: List[int], scenarios_dir: Path
49+
) -> Dict[str, object]:
50+
"""Create comprehensive provenance information for a netlab run."""
51+
provenance = {
52+
"generated_at": datetime.now(timezone.utc).isoformat(),
53+
"python": sys.version,
54+
"platform": platform.platform(),
55+
"seeds": sorted(seeds),
56+
"topogen_configs": {},
57+
"scenarios_dir": str(scenarios_dir),
58+
}
59+
60+
# Get git commit if available
61+
try:
62+
commit = (
63+
subprocess.check_output(
64+
["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL
65+
)
66+
.decode("utf-8")
67+
.strip()
68+
)
69+
provenance["git_commit"] = commit
70+
except Exception as e:
71+
logging.warning("Failed to retrieve git commit: %s", e)
72+
provenance["git_commit_error"] = str(e)
73+
74+
# Add topogen config file information with hashes
75+
for master_yaml in masters:
76+
try:
77+
config_content = master_yaml.read_bytes()
78+
config_hash = hashlib.sha256(config_content).hexdigest()
79+
provenance["topogen_configs"][master_yaml.name] = {
80+
"path": str(master_yaml), # Use relative path instead of absolute
81+
"sha256": config_hash,
82+
"size_bytes": len(config_content),
83+
}
84+
except Exception as e:
85+
logging.warning("Failed to hash config %s: %s", master_yaml, e)
86+
provenance["topogen_configs"][master_yaml.name] = {
87+
"path": str(master_yaml), # Use relative path instead of absolute
88+
"hash_error": str(e),
89+
}
90+
91+
return provenance
92+
93+
4394
def _detect_topogen_invoke() -> List[str]:
4495
if shutil.which("topogen"):
4596
return ["topogen"]
@@ -337,6 +388,15 @@ def _cmd_build(args: argparse.Namespace) -> None:
337388
if build_errors:
338389
die("One or more builds failed: " + "; ".join(build_errors))
339390

391+
# Create and save comprehensive provenance information for build
392+
build_provenance = _create_run_provenance(masters, [], scenarios_dir)
393+
build_provenance["command"] = "build"
394+
build_provenance["seeds"] = [] # No seeds for build command
395+
provenance_path = scenarios_dir / "_build_provenance.json"
396+
write_json_atomic(provenance_path, build_provenance)
397+
print(f"📋 Build provenance saved to: {provenance_path}")
398+
print("✅ All builds completed successfully")
399+
340400

341401
def _cmd_run(args: argparse.Namespace) -> None:
342402
masters_dir: Path = args.configs
@@ -471,6 +531,13 @@ def _cmd_run(args: argparse.Namespace) -> None:
471531
)
472532
print(f"⏱️ Overall ngraph run time: {ngraph_elapsed:.3f}s")
473533

534+
# Create and save comprehensive provenance information
535+
provenance = _create_run_provenance(masters, seeds, scenarios_dir)
536+
provenance["command"] = "run"
537+
provenance_path = scenarios_dir / "provenance.json"
538+
write_json_atomic(provenance_path, provenance)
539+
print(f"📋 Run provenance saved to: {provenance_path}")
540+
474541

475542
def main() -> None:
476543
# Initialize logging for NetLab; level can be overridden via NETLAB_LOG_LEVEL

netlab/metrics_cmd.py

Lines changed: 96 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from __future__ import annotations
1010

11+
import hashlib
1112
import io
1213
import json
1314
import logging
@@ -18,9 +19,9 @@
1819
import sys
1920
from contextlib import redirect_stdout
2021
from dataclasses import dataclass, field
21-
from datetime import UTC, datetime
22+
from datetime import datetime, timezone
2223
from pathlib import Path
23-
from typing import Dict, List, Optional, Tuple
24+
from typing import Any, Dict, List, Optional, Tuple
2425

2526
import matplotlib.pyplot as plt
2627
import numpy as np
@@ -795,7 +796,7 @@ def _availability_curve(
795796
write_csv_atomic(scen_dir / "network_stats_summary.csv", ns_df)
796797

797798
provenance = {
798-
"generated_at": datetime.now(UTC).isoformat(),
799+
"generated_at": datetime.now(timezone.utc).isoformat(),
799800
"python": sys.version,
800801
"platform": platform.platform(),
801802
}
@@ -911,6 +912,18 @@ def _availability_curve(
911912
print(text, end="")
912913
print(f"Wrote project CSV: {project_csv}")
913914

915+
# Create and save comprehensive provenance information for metrics run
916+
metrics_provenance = _create_metrics_provenance(root, out_root, files, only)
917+
918+
# Add scenarios and seeds analyzed
919+
for scenario_stem, seed_map in grouped.items():
920+
metrics_provenance["scenarios_analyzed"].append(scenario_stem)
921+
metrics_provenance["seeds_analyzed"][scenario_stem] = sorted(seed_map.keys())
922+
923+
provenance_path = out_root / "provenance.json"
924+
write_json_atomic(provenance_path, metrics_provenance)
925+
print(f"📋 Metrics provenance saved to: {provenance_path}")
926+
914927

915928
def print_summary_from_csv(
916929
root: Path, plots: bool = False, quiet: bool = False
@@ -1042,6 +1055,8 @@ def _plot_dist_abs(column: str, title: str, ylabel: str, fname: str) -> None:
10421055
"lat_fail_p99": "lat_fail_p99",
10431056
"USD_per_Gbit_offered": "USD_per_Gbit_offered",
10441057
"USD_per_Gbit_p999": "USD_per_Gbit_p999",
1058+
"Watt_per_Gbit_offered": "Watt_per_Gbit_offered",
1059+
"Watt_per_Gbit_p999": "Watt_per_Gbit_p999",
10451060
"capex_total": "capex_total",
10461061
"node_count": "node_count",
10471062
"link_count": "link_count",
@@ -1177,6 +1192,18 @@ def _plot_dist_norm(column: str, title: str, ylabel: str, fname: str) -> None:
11771192
ylabel="USD/Gbps",
11781193
fname="abs_USD_per_Gbit_p999.png",
11791194
)
1195+
_plot_dist_abs(
1196+
"Watt_per_Gbit_offered",
1197+
title="Power per Gbps (offered)",
1198+
ylabel="W/Gbps",
1199+
fname="abs_Watt_per_Gbit_offered.png",
1200+
)
1201+
_plot_dist_abs(
1202+
"Watt_per_Gbit_p999",
1203+
title="Power per Gbps at p99.9",
1204+
ylabel="W/Gbps",
1205+
fname="abs_Watt_per_Gbit_p999.png",
1206+
)
11801207
_plot_dist_abs(
11811208
"lat_fail_p99",
11821209
title="Latency p99 under failures (median across seeds)",
@@ -1217,9 +1244,75 @@ def _plot_dist_norm(column: str, title: str, ylabel: str, fname: str) -> None:
12171244
"ratio",
12181245
"norm_USD_per_Gbit_p999.png",
12191246
)
1247+
_plot_dist_norm(
1248+
"Watt_per_Gbit_offered_r",
1249+
"Power per Gbps (offered, relative)",
1250+
"ratio",
1251+
"norm_Watt_per_Gbit_offered.png",
1252+
)
1253+
_plot_dist_norm(
1254+
"Watt_per_Gbit_p999_r",
1255+
"Power per Gbps p99.9 (relative)",
1256+
"ratio",
1257+
"norm_Watt_per_Gbit_p999.png",
1258+
)
12201259
_plot_dist_norm(
12211260
"lat_fail_p99_r",
12221261
"Latency p99 under failures (relative)",
12231262
"ratio",
12241263
"norm_Latency_fail_p99.png",
12251264
)
1265+
1266+
1267+
def _create_metrics_provenance(
1268+
root: Path, out_root: Path, files: List[Path], only: Optional[str] = None
1269+
) -> Dict[str, Any]:
1270+
"""Create comprehensive provenance information for a metrics run."""
1271+
provenance: Dict[str, Any] = {
1272+
"generated_at": datetime.now(timezone.utc).isoformat(),
1273+
"python": sys.version,
1274+
"platform": platform.platform(),
1275+
"command": "metrics",
1276+
"source_root": str(root),
1277+
"output_root": str(out_root),
1278+
"source_files": {},
1279+
"scenarios_analyzed": [],
1280+
"seeds_analyzed": {},
1281+
}
1282+
1283+
# Get git commit if available
1284+
try:
1285+
commit = (
1286+
subprocess.check_output(
1287+
["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL
1288+
)
1289+
.decode("utf-8")
1290+
.strip()
1291+
)
1292+
provenance["git_commit"] = commit
1293+
except Exception as e:
1294+
logging.warning("Failed to retrieve git commit: %s", e)
1295+
provenance["git_commit_error"] = str(e)
1296+
1297+
# Add source file information with hashes
1298+
for file_path in files:
1299+
try:
1300+
file_content = file_path.read_bytes()
1301+
file_hash = hashlib.sha256(file_content).hexdigest()
1302+
provenance["source_files"][str(file_path.relative_to(root))] = {
1303+
"path": str(file_path.relative_to(root)),
1304+
"sha256": file_hash,
1305+
"size_bytes": len(file_content),
1306+
}
1307+
except Exception as e:
1308+
logging.warning("Failed to hash source file %s: %s", file_path, e)
1309+
provenance["source_files"][str(file_path.relative_to(root))] = {
1310+
"path": str(file_path.relative_to(root)),
1311+
"hash_error": str(e),
1312+
}
1313+
1314+
# Add analysis scope information
1315+
if only:
1316+
provenance["only_scenarios"] = [s.strip() for s in only.split(",") if s.strip()]
1317+
1318+
return provenance

0 commit comments

Comments
 (0)