adding provenance

networmix · networmix · commit d88a19b95168 · 2025-08-26T12:00:37.000+01:00
diff --git a/netlab/cli.py b/netlab/cli.py
@@ -16,8 +16,10 @@
 
 import argparse
 import concurrent.futures
+import hashlib
 import logging
 import os
+import platform
 import shutil
 import subprocess
 import sys
@@ -28,6 +30,8 @@
 
 import yaml
 
+from metrics.aggregate import write_json_atomic
+
 from .log_config import configure_from_env, set_global_log_level
 
 
@@ -40,6 +44,53 @@ def ensure_dir(p: Path) -> None:
     p.mkdir(parents=True, exist_ok=True)
 
 
+def _create_run_provenance(
+    masters: List[Path], seeds: List[int], scenarios_dir: Path
+) -> Dict[str, object]:
+    """Create comprehensive provenance information for a netlab run."""
+    provenance = {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "python": sys.version,
+        "platform": platform.platform(),
+        "seeds": sorted(seeds),
+        "topogen_configs": {},
+        "scenarios_dir": str(scenarios_dir),
+    }
+
+    # Get git commit if available
+    try:
+        commit = (
+            subprocess.check_output(
+                ["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL
+            )
+            .decode("utf-8")
+            .strip()
+        )
+        provenance["git_commit"] = commit
+    except Exception as e:
+        logging.warning("Failed to retrieve git commit: %s", e)
+        provenance["git_commit_error"] = str(e)
+
+    # Add topogen config file information with hashes
+    for master_yaml in masters:
+        try:
+            config_content = master_yaml.read_bytes()
+            config_hash = hashlib.sha256(config_content).hexdigest()
+            provenance["topogen_configs"][master_yaml.name] = {
+                "path": str(master_yaml),  # Use relative path instead of absolute
+                "sha256": config_hash,
+                "size_bytes": len(config_content),
+            }
+        except Exception as e:
+            logging.warning("Failed to hash config %s: %s", master_yaml, e)
+            provenance["topogen_configs"][master_yaml.name] = {
+                "path": str(master_yaml),  # Use relative path instead of absolute
+                "hash_error": str(e),
+            }
+
+    return provenance
+
+
 def _detect_topogen_invoke() -> List[str]:
     if shutil.which("topogen"):
         return ["topogen"]
@@ -337,6 +388,15 @@ def _cmd_build(args: argparse.Namespace) -> None:
     if build_errors:
         die("One or more builds failed: " + "; ".join(build_errors))
 
+    # Create and save comprehensive provenance information for build
+    build_provenance = _create_run_provenance(masters, [], scenarios_dir)
+    build_provenance["command"] = "build"
+    build_provenance["seeds"] = []  # No seeds for build command
+    provenance_path = scenarios_dir / "_build_provenance.json"
+    write_json_atomic(provenance_path, build_provenance)
+    print(f"📋 Build provenance saved to: {provenance_path}")
+    print("✅ All builds completed successfully")
+
 
 def _cmd_run(args: argparse.Namespace) -> None:
     masters_dir: Path = args.configs
@@ -471,6 +531,13 @@ def _cmd_run(args: argparse.Namespace) -> None:
     )
     print(f"⏱️ Overall ngraph run time: {ngraph_elapsed:.3f}s")
 
+    # Create and save comprehensive provenance information
+    provenance = _create_run_provenance(masters, seeds, scenarios_dir)
+    provenance["command"] = "run"
+    provenance_path = scenarios_dir / "provenance.json"
+    write_json_atomic(provenance_path, provenance)
+    print(f"📋 Run provenance saved to: {provenance_path}")
+
 
 def main() -> None:
     # Initialize logging for NetLab; level can be overridden via NETLAB_LOG_LEVEL
diff --git a/netlab/metrics_cmd.py b/netlab/metrics_cmd.py
@@ -8,6 +8,7 @@
 
 from __future__ import annotations
 
+import hashlib
 import io
 import json
 import logging
@@ -18,9 +19,9 @@
 import sys
 from contextlib import redirect_stdout
 from dataclasses import dataclass, field
-from datetime import UTC, datetime
+from datetime import datetime, timezone
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -795,7 +796,7 @@ def _availability_curve(
         write_csv_atomic(scen_dir / "network_stats_summary.csv", ns_df)
 
         provenance = {
-            "generated_at": datetime.now(UTC).isoformat(),
+            "generated_at": datetime.now(timezone.utc).isoformat(),
             "python": sys.version,
             "platform": platform.platform(),
         }
@@ -911,6 +912,18 @@ def _availability_curve(
     print(text, end="")
     print(f"Wrote project CSV: {project_csv}")
 
+    # Create and save comprehensive provenance information for metrics run
+    metrics_provenance = _create_metrics_provenance(root, out_root, files, only)
+
+    # Add scenarios and seeds analyzed
+    for scenario_stem, seed_map in grouped.items():
+        metrics_provenance["scenarios_analyzed"].append(scenario_stem)
+        metrics_provenance["seeds_analyzed"][scenario_stem] = sorted(seed_map.keys())
+
+    provenance_path = out_root / "provenance.json"
+    write_json_atomic(provenance_path, metrics_provenance)
+    print(f"📋 Metrics provenance saved to: {provenance_path}")
+
 
 def print_summary_from_csv(
     root: Path, plots: bool = False, quiet: bool = False
@@ -1042,6 +1055,8 @@ def _plot_dist_abs(column: str, title: str, ylabel: str, fname: str) -> None:
                 "lat_fail_p99": "lat_fail_p99",
                 "USD_per_Gbit_offered": "USD_per_Gbit_offered",
                 "USD_per_Gbit_p999": "USD_per_Gbit_p999",
+                "Watt_per_Gbit_offered": "Watt_per_Gbit_offered",
+                "Watt_per_Gbit_p999": "Watt_per_Gbit_p999",
                 "capex_total": "capex_total",
                 "node_count": "node_count",
                 "link_count": "link_count",
@@ -1177,6 +1192,18 @@ def _plot_dist_norm(column: str, title: str, ylabel: str, fname: str) -> None:
             ylabel="USD/Gbps",
             fname="abs_USD_per_Gbit_p999.png",
         )
+        _plot_dist_abs(
+            "Watt_per_Gbit_offered",
+            title="Power per Gbps (offered)",
+            ylabel="W/Gbps",
+            fname="abs_Watt_per_Gbit_offered.png",
+        )
+        _plot_dist_abs(
+            "Watt_per_Gbit_p999",
+            title="Power per Gbps at p99.9",
+            ylabel="W/Gbps",
+            fname="abs_Watt_per_Gbit_p999.png",
+        )
         _plot_dist_abs(
             "lat_fail_p99",
             title="Latency p99 under failures (median across seeds)",
@@ -1217,9 +1244,75 @@ def _plot_dist_norm(column: str, title: str, ylabel: str, fname: str) -> None:
             "ratio",
             "norm_USD_per_Gbit_p999.png",
         )
+        _plot_dist_norm(
+            "Watt_per_Gbit_offered_r",
+            "Power per Gbps (offered, relative)",
+            "ratio",
+            "norm_Watt_per_Gbit_offered.png",
+        )
+        _plot_dist_norm(
+            "Watt_per_Gbit_p999_r",
+            "Power per Gbps p99.9 (relative)",
+            "ratio",
+            "norm_Watt_per_Gbit_p999.png",
+        )
         _plot_dist_norm(
             "lat_fail_p99_r",
             "Latency p99 under failures (relative)",
             "ratio",
             "norm_Latency_fail_p99.png",
         )
+
+
+def _create_metrics_provenance(
+    root: Path, out_root: Path, files: List[Path], only: Optional[str] = None
+) -> Dict[str, Any]:
+    """Create comprehensive provenance information for a metrics run."""
+    provenance: Dict[str, Any] = {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "python": sys.version,
+        "platform": platform.platform(),
+        "command": "metrics",
+        "source_root": str(root),
+        "output_root": str(out_root),
+        "source_files": {},
+        "scenarios_analyzed": [],
+        "seeds_analyzed": {},
+    }
+
+    # Get git commit if available
+    try:
+        commit = (
+            subprocess.check_output(
+                ["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL
+            )
+            .decode("utf-8")
+            .strip()
+        )
+        provenance["git_commit"] = commit
+    except Exception as e:
+        logging.warning("Failed to retrieve git commit: %s", e)
+        provenance["git_commit_error"] = str(e)
+
+    # Add source file information with hashes
+    for file_path in files:
+        try:
+            file_content = file_path.read_bytes()
+            file_hash = hashlib.sha256(file_content).hexdigest()
+            provenance["source_files"][str(file_path.relative_to(root))] = {
+                "path": str(file_path.relative_to(root)),
+                "sha256": file_hash,
+                "size_bytes": len(file_content),
+            }
+        except Exception as e:
+            logging.warning("Failed to hash source file %s: %s", file_path, e)
+            provenance["source_files"][str(file_path.relative_to(root))] = {
+                "path": str(file_path.relative_to(root)),
+                "hash_error": str(e),
+            }
+
+    # Add analysis scope information
+    if only:
+        provenance["only_scenarios"] = [s.strip() for s in only.split(",") if s.strip()]
+
+    return provenance