networmix
diff --git a/‎metrics/bac.py‎
Lines changed: 155 additions & 75 deletions b/‎metrics/bac.py‎
Lines changed: 155 additions & 75 deletions
diff --git a/‎metrics/common.py‎
Lines changed: 87 additions & 0 deletions b/‎metrics/common.py‎
Lines changed: 87 additions & 0 deletions
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 
@@ -9,25 +9,26 @@
 import pandas as pd
 import seaborn as sns
 
+from .common import expand_flow_results
+
 
 @dataclass
 class BacResult:
     step_name: str
     mode: str  # 'placement' or 'maxflow'
     series: pd.Series  # delivered per iteration
     failure_ids: List[str]
-    offered: float  # offered demand (if known; else = series.max())
+    offered: float  # baseline delivered bandwidth
     quantiles_abs: Dict[float, float]
-    quantiles_pct: Dict[float, float]  # normalized by offered (0..1), if offered>0
-    availability_at_pct_of_offer: Dict[float, float]  # {90: 0.97, 99.9: 0.88, ...}
-    auc_normalized: float  # mean(min(delivered/offered,1.0))
-    # Bandwidth threshold at probability p: smallest t s.t. P(delivered >= t) >= p
-    # Absolute units (Gbps) and normalized by offered (0..1)
+    quantiles_pct: Dict[float, float]  # normalized by offered (0..1)
+    availability_at_pct_of_offer: Dict[float, float]  # {90: 0.97, ...}
+    auc_normalized: float  # mean(min(delivered/offered, 1.0))
     bw_at_probability_abs: Dict[float, float]
     bw_at_probability_pct: Dict[float, float]
+    per_flow: Dict[str, "BacResult"] = field(default_factory=dict)
 
     def to_jsonable(self) -> dict:
-        return {
+        d = {
             "step_name": self.step_name,
             "mode": self.mode,
             "series": list(map(float, self.series.values)),
@@ -46,6 +47,9 @@ def to_jsonable(self) -> dict:
                 str(k): float(v) for k, v in self.bw_at_probability_pct.items()
             },
         }
+        if self.per_flow:
+            d["per_flow"] = {k: v.to_jsonable() for k, v in self.per_flow.items()}
+        return d
 
 
 def _get_step(results: dict, name: str) -> dict:
@@ -63,92 +67,167 @@ def _detect_mode(results: dict, step_name: str, mode: str) -> str:
     return "placement"
 
 
-def compute_bac(results: dict, step_name: str, mode: str = "auto") -> BacResult:
-    mode = _detect_mode(results, step_name, mode)
-    # Validate baseline metadata and ordering
-    step_meta = results.get("steps", {}).get(step_name, {}).get("metadata", {}) or {}
-    if bool(step_meta.get("baseline")) is not True:
-        raise ValueError(
-            f"{step_name}.metadata.baseline must be true and baseline must be included"
-        )
-    data = _get_step(results, step_name)
-    flow_results = data.get("flow_results", [])
-    if not isinstance(flow_results, list) or not flow_results:
-        raise ValueError(f"No flow_results for step: {step_name}")
-    first = flow_results[0]
-    if str(first.get("failure_id", "")) != "baseline":
-        raise ValueError(
-            f"{step_name} baseline must be first (flow_results[0].failure_id == 'baseline')"
-        )
+def _sum_delivered(iteration: dict) -> float:
+    """Sum placed bandwidth across all flows in one iteration result."""
+    total = 0.0
+    for rec in iteration.get("flows", []) or []:
+        src = rec.get("source", "")
+        dst = rec.get("destination", "")
+        if not src or not dst or src == dst:
+            continue
+        total += float(rec.get("placed", 0.0))
+    return total
+
+
+_QUANTILE_PROBS = (0.50, 0.90, 0.95, 0.99, 0.999, 0.9999)
+_AVAIL_THRESHOLDS = (90.0, 95.0, 99.0, 99.9, 99.99)
 
-    delivered = []
-    demanded = []
-    fids = []
-    baseline_delivered: Optional[float] = None
-    for it in flow_results:
-        flows = it.get("flows", []) or []
-        total_deliv = 0.0
-        total_dem = 0.0
-        for rec in flows:
-            src = rec.get("source", "")
-            dst = rec.get("destination", "")
-            if not src or not dst or src == dst:
-                continue
-            placed = float(rec.get("placed", 0.0))
-            demand = float(rec.get("demand", 0.0))
-            total_deliv += placed
-            total_dem += demand
-        delivered.append(total_deliv)
-        demanded.append(total_dem)
-        fid = str(it.get("failure_id", f"it{len(fids)}"))
-        fids.append(fid)
-        if fid == "baseline":
-            baseline_delivered = float(total_deliv)
-
-    s = pd.Series(delivered, index=pd.Index(fids, name="failure_id"), dtype=float)  # pyright: ignore[reportAssignmentType]
-    # Normalize strictly by baseline delivered (no-failure). Require presence.
-    if baseline_delivered is not None and np.isfinite(baseline_delivered):
-        offered = float(baseline_delivered)
-    else:
-        raise ValueError(
-            f"{step_name} baseline iteration missing or has non-finite delivered value"
-        )
 
-    probs = [0.50, 0.90, 0.95, 0.99, 0.999, 0.9999]
-    q_abs = {p: float(s.quantile(p, interpolation="lower")) for p in probs}
+def _compute_bac_stats(
+    series: pd.Series, offered: float
+) -> Tuple[
+    Dict[float, float],  # quantiles_abs
+    Dict[float, float],  # quantiles_pct
+    Dict[float, float],  # availability_at_pct_of_offer
+    float,  # auc_normalized
+    Dict[float, float],  # bw_at_probability_abs
+    Dict[float, float],  # bw_at_probability_pct
+]:
+    """Compute all BAC statistics from a delivered-bandwidth series.
 
-    q_pct = {}
+    This is the single source of truth for BAC math. Used for both
+    aggregate and per-flow computation.
+    """
+    q_abs = {
+        p: float(series.quantile(p, interpolation="lower")) for p in _QUANTILE_PROBS
+    }
+
+    q_pct: Dict[float, float] = {}
     if offered > 0:
-        for p in probs:
-            val = float(s.quantile(p, interpolation="lower") / offered)
-            # Guard against rare >1 due to numerical noise or offered<iteration delivered
+        for p in _QUANTILE_PROBS:
+            val = float(series.quantile(p, interpolation="lower") / offered)
             q_pct[p] = float(min(val, 1.0))
 
-    # Availability at thresholds (as fraction of iterations)
-    avail = {}
-    if offered > 0 and len(s) > 0:
-        total = float(len(s))
-        for pct in (90.0, 95.0, 99.0, 99.9, 99.99):
+    avail: Dict[float, float] = {}
+    if offered > 0 and len(series) > 0:
+        total = float(len(series))
+        for pct in _AVAIL_THRESHOLDS:
             thr = (pct / 100.0) * offered
-            avail[pct] = float((s >= thr).sum()) / total  # pyright: ignore[reportOperatorIssue]
+            avail[pct] = float((series >= thr).sum()) / total  # pyright: ignore[reportOperatorIssue]
 
-    # Bandwidth-at-probability (inverse availability)
     bw_abs: Dict[float, float] = {}
     bw_pct: Dict[float, float] = {}
-    for p in (90.0, 95.0, 99.0, 99.9, 99.99):
-        q = max(0.0, 1.0 - (p / 100.0))  # lower-tail quantile
+    for p in _AVAIL_THRESHOLDS:
+        q = max(0.0, 1.0 - (p / 100.0))
         try:
-            t_abs = float(s.quantile(q, interpolation="lower"))
+            t_abs = float(series.quantile(q, interpolation="lower"))
         except Exception:
             t_abs = float("nan")
         bw_abs[p] = t_abs
         bw_pct[p] = float(t_abs / offered) if offered > 0 else float("nan")
 
     auc_norm = 1.0
-    if offered > 0 and len(s) > 0:
-        norm = s.astype(float) / offered
+    if offered > 0 and len(series) > 0:
+        norm = series.astype(float) / offered
         auc_norm = float(norm.clip(upper=1.0).mean())
 
+    return q_abs, q_pct, avail, auc_norm, bw_abs, bw_pct
+
+
+def _flow_label(flow_source: str) -> str:
+    """Extract a readable directional label from a flow's source field.
+
+    Flow source format: ``_src_<source_pattern>|<target_pattern>|<hash>``
+    Returns label like ``abc1/rsw>xyz1/rsw``.
+    """
+    demand_id = flow_source.removeprefix("_src_").removeprefix("_snk_")
+    parts = demand_id.split("|")
+    if len(parts) >= 2:
+        src_part = parts[0].strip("^$")
+        dst_part = parts[1].strip("^$")
+        return f"{src_part}>{dst_part}"
+    return demand_id[:30]
+
+
+def compute_bac(results: dict, step_name: str, mode: str = "auto") -> BacResult:
+    mode = _detect_mode(results, step_name, mode)
+    data = _get_step(results, step_name)
+
+    baseline = data.get("baseline")
+    if not isinstance(baseline, dict):
+        raise ValueError(f"{step_name}: data.baseline dict required")
+    flow_results = data.get("flow_results", [])
+    if not isinstance(flow_results, list) or not flow_results:
+        raise ValueError(f"No flow_results for step: {step_name}")
+
+    # Baseline determines offered bandwidth
+    offered = _sum_delivered(baseline)
+    if not np.isfinite(offered) or offered <= 0:
+        raise ValueError(f"{step_name}: baseline delivered must be finite and > 0")
+
+    # Expand deduplicated patterns by occurrence_count
+    expanded = expand_flow_results(flow_results)
+
+    # ── Aggregate series ──
+    delivered = [offered]
+    fids: List[str] = ["baseline"]
+    for idx, it in enumerate(expanded):
+        delivered.append(_sum_delivered(it))
+        fids.append(str(it.get("failure_id", f"it{idx}")))
+
+    s = pd.Series(delivered, dtype=float)
+    s.index.name = "iteration"
+
+    q_abs, q_pct, avail, auc_norm, bw_abs, bw_pct = _compute_bac_stats(s, offered)
+
+    # ── Per-flow series ──
+    # Build baseline per-flow map: source_field → (label, baseline_placed)
+    flow_map: Dict[str, Tuple[str, float]] = {}
+    for rec in baseline.get("flows", []) or []:
+        src = rec.get("source", "")
+        dst = rec.get("destination", "")
+        if not src or not dst or src == dst:
+            continue
+        placed = float(rec.get("placed", 0.0))
+        if placed <= 0:
+            continue
+        flow_map[src] = (_flow_label(src), placed)
+
+    per_flow: Dict[str, BacResult] = {}
+    if len(flow_map) > 1:
+        # Only compute per-flow when there are multiple flows to separate
+        flow_series: Dict[str, List[float]] = {
+            src: [bl_placed] for src, (_label, bl_placed) in flow_map.items()
+        }
+
+        for it in expanded:
+            it_flows = {f["source"]: f for f in it.get("flows", []) or []}
+            for src, (_label, _bl_placed) in flow_map.items():
+                if src in it_flows:
+                    flow_series[src].append(float(it_flows[src].get("placed", 0.0)))
+                else:
+                    flow_series[src].append(0.0)
+
+        for src, (label, bl_placed) in flow_map.items():
+            fs = pd.Series(flow_series[src], dtype=float)
+            fs.index.name = "iteration"
+            fq_abs, fq_pct, favail, fauc, fbw_abs, fbw_pct = _compute_bac_stats(
+                fs, bl_placed
+            )
+            per_flow[label] = BacResult(
+                step_name=step_name,
+                mode=mode,
+                series=fs,
+                failure_ids=list(fids),
+                offered=float(bl_placed),
+                quantiles_abs=fq_abs,
+                quantiles_pct=fq_pct,
+                availability_at_pct_of_offer=favail,
+                auc_normalized=fauc,
+                bw_at_probability_abs=fbw_abs,
+                bw_at_probability_pct=fbw_pct,
+            )
+
     return BacResult(
         step_name=step_name,
         mode=mode,
@@ -161,6 +240,7 @@ def compute_bac(results: dict, step_name: str, mode: str = "auto") -> BacResult:
         auc_normalized=auc_norm,
         bw_at_probability_abs=bw_abs,
         bw_at_probability_pct=bw_pct,
+        per_flow=per_flow,
     )
 
 
 
@@ -0,0 +1,87 @@
+"""Shared utilities for metrics modules."""
+
+from __future__ import annotations
+
+from typing import Dict, List, Tuple
+
+
+def expand_flow_results(flow_results: list[dict]) -> list[dict]:
+    """Expand deduplicated flow_results by occurrence_count.
+
+    ngraph deduplicates identical failure patterns during Monte Carlo
+    simulation. Each entry's ``occurrence_count`` indicates how many
+    iterations produced that exact pattern. This function repeats each
+    entry accordingly so that downstream statistical operations weight
+    each iteration equally.
+
+    Entries without ``occurrence_count`` default to 1 (backward compatible).
+    """
+    expanded: list[dict] = []
+    for it in flow_results:
+        count = max(1, int(it.get("occurrence_count", 1)))
+        for _ in range(count):
+            expanded.append(it)
+    return expanded
+
+
+def canonical_dc(endpoint: str) -> str:
+    """Normalize endpoint to canonical DC-level path ``metro/dc``.
+
+    Examples::
+
+        'metro1/dc1'           → 'metro1/dc1'
+        'metro1/dc1/dc/dc'     → 'metro1/dc1'
+        'metro1/dc1/rack/node' → 'metro1/dc1'
+    """
+    if not endpoint:
+        return endpoint
+    parts = endpoint.split("/")
+    if len(parts) >= 2:
+        return f"{parts[0]}/{parts[1]}"
+    return endpoint
+
+
+def baseline_demand_map(
+    results: dict, step_name: str = "tm_placement"
+) -> Dict[Tuple[str, str], float]:
+    """Extract per-pair baseline demand from a placement step.
+
+    Returns mapping ``(canonical_src, canonical_dst) -> demand``.
+    Pairs with zero or negative demand are excluded.
+    """
+    step = results.get("steps", {}).get(step_name, {}) or {}
+    data = step.get("data", {}) or {}
+    base = data.get("baseline")
+    if not isinstance(base, dict):
+        return {}
+    out: Dict[Tuple[str, str], float] = {}
+    for rec in base.get("flows", []) or []:
+        s = canonical_dc(rec.get("source", ""))
+        d = canonical_dc(rec.get("destination", ""))
+        if not s or not d or s == d:
+            continue
+        try:
+            dem = float(rec.get("demand", 0.0))
+        except Exception:
+            dem = 0.0
+        if dem <= 0.0:
+            continue
+        out[(s, d)] = dem
+    return out
+
+
+def get_tm_baseline_and_failures(results: dict) -> Tuple[dict, List[dict]]:
+    """Extract baseline dict and expanded failure list from tm_placement.
+
+    The returned failure list is expanded by ``occurrence_count`` so each
+    Monte Carlo iteration is represented as a separate entry.
+    """
+    tm_step = results.get("steps", {}).get("tm_placement", {}) or {}
+    tm_data = tm_step.get("data", {}) or {}
+    baseline = tm_data.get("baseline")
+    if not isinstance(baseline, dict):
+        raise ValueError("tm_placement.data.baseline dict required")
+    flow_results = tm_data.get("flow_results", []) or []
+    if not isinstance(flow_results, list):
+        raise ValueError("tm_placement.data.flow_results must be a list")
+    return baseline, expand_flow_results(flow_results)