From 5105800d02f2da509159264a9c468239044a042e Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sun, 28 Jun 2026 09:26:24 -0700
Subject: [PATCH 1/4] SPMI: per-context tpdiff examples

Add a "top method regressions / improvements" section to the tpdiff
markdown summary, mirroring how asmdiffs surfaces specific contexts.
The per-context data already lives in the details CSV; aggregate it
once and emit FullOpts/MinOpts top-N tables by PDIFF %.

Release JITs don't currently report `MethodFullName`, so the table also
shows the SPMI context number for fallback lookup via `mcs -dumpMap` or
`superpmi -c N`.

Fixes #85755.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/scripts/superpmi.py | 111 +++++++++++++++++++++++++++++---
 1 file changed, 102 insertions(+), 9 deletions(-)

diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py
index 47aadbd45f8e30..190940aff45dd8 100644
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -2006,6 +2006,11 @@ def aggregate_diff_metrics(details_file):
     diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
     diffs = []
 
+    # Per-context throughput diffs (rows where PIN measured base != diff
+    # instruction count). Used by tpdiff to surface specific method examples.
+    tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"]
+    tp_diffs = []
+
     for row in read_csv(details_file):
         base_result = row["Base result"]
 
@@ -2044,6 +2049,9 @@ def aggregate_diff_metrics(details_file):
             base_dict["Diff executed instructions"] += base_insts
             diff_dict["Diff executed instructions"] += diff_insts
 
+            if base_insts != diff_insts:
+                tp_diffs.append({key: row[key] for key in tp_diffs_fields})
+
             base_perfscore = float(row["Base PerfScore"])
             diff_perfscore = float(row["Diff PerfScore"])
             base_dict["Diffed PerfScore"] += base_perfscore
@@ -2083,7 +2091,8 @@ def aggregate_diff_metrics(details_file):
 
     return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts},
             {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts},
-            diffs)
+            diffs,
+            tp_diffs)
 
 
 class SuperPMIReplayAsmDiffs:
@@ -2323,7 +2332,7 @@ def replay_with_asm_diffs(self):
 
                 print_superpmi_error_result(return_code, self.coreclr_args)
 
-                (base_metrics, diff_metrics, diffs) = aggregate_diff_metrics(details_info_file)
+                (base_metrics, diff_metrics, diffs, _) = aggregate_diff_metrics(details_info_file)
                 print_superpmi_success_result(return_code, base_metrics, diff_metrics)
 
                 artifacts_base_name = create_artifacts_base_name(self.coreclr_args, mch_file)
@@ -3193,7 +3202,7 @@ def replay_with_throughput_diff(self):
 
                 print_superpmi_error_result(return_code, self.coreclr_args)
 
-                (base_metrics, diff_metrics, _) = aggregate_diff_metrics(details_info_file)
+                (base_metrics, diff_metrics, _, tp_per_context) = aggregate_diff_metrics(details_info_file)
                 print_superpmi_success_result(return_code, base_metrics, diff_metrics)
 
                 if base_metrics is not None and diff_metrics is not None:
@@ -3205,7 +3214,7 @@ def replay_with_throughput_diff(self):
                     if base_instructions != 0 and diff_instructions != 0:
                         delta_instructions = diff_instructions - base_instructions
                         logging.info("Total instructions executed delta: {} ({:.2%} of base)".format(delta_instructions, delta_instructions / base_instructions))
-                        tp_diffs.append((os.path.basename(mch_file), base_metrics, diff_metrics))
+                        tp_diffs.append((os.path.basename(mch_file), base_metrics, diff_metrics, tp_per_context))
                     else:
                         logging.warning("One compilation failed to produce any results")
                 else:
@@ -3258,6 +3267,9 @@ def replay_with_throughput_diff(self):
 
 def write_tpdiff_markdown_summary(write_fh, base_jit_build_string_decoded, diff_jit_build_string_decoded, base_jit_options, diff_jit_options, tp_diffs, include_details):
 
+    # Tolerate the legacy 3-tuple shape from older saved JSON summaries.
+    tp_diffs = [t if len(t) >= 4 else (t[0], t[1], t[2], []) for t in tp_diffs]
+
     def write_top_context_section():
         if not base_jit_build_string_decoded:
             write_fh.write("{} Could not decode base JIT build string".format(html_color("red", "Warning:")))
@@ -3291,12 +3303,12 @@ def is_significant_pct(base, diff):
     def is_significant(row, base, diff):
         return is_significant_pct(base[row]["Diff executed instructions"], diff[row]["Diff executed instructions"])
 
-    if any(is_significant(row, base, diff) for row in ["Overall", "MinOpts", "FullOpts"] for (_, base, diff) in tp_diffs):
+    if any(is_significant(row, base, diff) for row in ["Overall", "MinOpts", "FullOpts"] for (_, base, diff, _) in tp_diffs):
         def write_pivot_section(row):
-            if not any(is_significant(row, base, diff) for (_, base, diff) in tp_diffs):
+            if not any(is_significant(row, base, diff) for (_, base, diff, _) in tp_diffs):
                 return
 
-            pcts = [compute_pct(base_metrics[row]["Diff executed instructions"], diff_metrics[row]["Diff executed instructions"]) for (_, base_metrics, diff_metrics) in tp_diffs]
+            pcts = [compute_pct(base_metrics[row]["Diff executed instructions"], diff_metrics[row]["Diff executed instructions"]) for (_, base_metrics, diff_metrics, _) in tp_diffs]
             min_pct_str = format_pct(min(pcts))
             max_pct_str = format_pct(max(pcts))
             if min_pct_str == max_pct_str:
@@ -3307,7 +3319,7 @@ def write_pivot_section(row):
             with DetailsSection(write_fh, tp_summary):
                 write_fh.write("|Collection|PDIFF|\n")
                 write_fh.write("|---|--:|\n")
-                for mch_file, base, diff in tp_diffs:
+                for mch_file, base, diff, _ in tp_diffs:
                     base_instructions = base[row]["Diff executed instructions"]
                     diff_instructions = diff[row]["Diff executed instructions"]
 
@@ -3320,6 +3332,8 @@ def write_pivot_section(row):
         write_pivot_section("Overall")
         write_pivot_section("MinOpts")
         write_pivot_section("FullOpts")
+        if include_details:
+            write_tpdiff_context_examples(write_fh, tp_diffs)
     elif include_details:
         write_top_context_section()
         write_fh.write("No significant throughput differences found\n")
@@ -3330,7 +3344,7 @@ def write_pivot_section(row):
                 write_fh.write("{} contexts:\n\n".format(disp))
                 write_fh.write("|Collection|Base # instructions|Diff # instructions|PDIFF|\n")
                 write_fh.write("|---|--:|--:|--:|\n")
-                for mch_file, base, diff in tp_diffs:
+                for mch_file, base, diff, _ in tp_diffs:
                     base_instructions = base[row]["Diff executed instructions"]
                     diff_instructions = diff[row]["Diff executed instructions"]
                     write_fh.write("|{}|{:,d}|{:,d}|{}|\n".format(
@@ -3338,6 +3352,85 @@ def write_pivot_section(row):
                         compute_and_format_pct(base_instructions, diff_instructions)))
                 write_fh.write("\n")
 
+
+def write_tpdiff_context_examples(write_fh, tp_diffs):
+    """ Write top per-context throughput regression/improvement examples.
+
+    Args:
+        write_fh : file handle for file to output to
+        tp_diffs : list of (mch_file, base_metrics, diff_metrics, tp_per_context)
+                   where tp_per_context is a list of dicts with keys
+                   "Context", "Method full name", "MinOpts",
+                   "Base instructions", "Diff instructions".
+    """
+
+    # Flatten per-context rows; tag each with its originating collection.
+    flat = []
+    for (mch_file, _, _, tp_per_context) in tp_diffs:
+        for row in tp_per_context:
+            try:
+                base_insts = int(row["Base instructions"])
+                diff_insts = int(row["Diff instructions"])
+            except (KeyError, ValueError):
+                continue
+            if base_insts <= 0 or diff_insts == base_insts:
+                continue
+            pct = (diff_insts - base_insts) / base_insts * 100
+            flat.append({
+                "Collection": mch_file,
+                "Method full name": row.get("Method full name", ""),
+                "Context": row.get("Context", ""),
+                "MinOpts": row.get("MinOpts", "False") == "True",
+                "Base instructions": base_insts,
+                "Diff instructions": diff_insts,
+                "PDIFF pct": pct,
+            })
+
+    if not flat:
+        return
+
+    # Suppress tiny absolute deltas that show up as big percentages but are noise.
+    MIN_ABS_DELTA = 50
+    significant = [r for r in flat if abs(r["Diff instructions"] - r["Base instructions"]) >= MIN_ABS_DELTA]
+
+    if not significant:
+        return
+
+    def write_examples(title, rows):
+        if not rows:
+            return
+        with DetailsSection(write_fh, title):
+            write_fh.write("|Collection|Context|Method|Base|Diff|PDIFF|\n")
+            write_fh.write("|---|--:|---|--:|--:|--:|\n")
+            for r in rows:
+                # Release JITs don't report MethodFullName; fall back to context number.
+                method = r["Method full name"] or "<no name reported by JIT>"
+                write_fh.write("|{}|{}|{}|{:,d}|{:,d}|{}|\n".format(
+                    r["Collection"],
+                    r["Context"],
+                    method,
+                    r["Base instructions"],
+                    r["Diff instructions"],
+                    compute_and_format_pct(r["Base instructions"], r["Diff instructions"])))
+
+    TOP_N = 20
+
+    def split_and_emit(label, rows):
+        regressions = sorted([r for r in rows if r["PDIFF pct"] > 0],
+                             key=lambda r: r["PDIFF pct"], reverse=True)[:TOP_N]
+        improvements = sorted([r for r in rows if r["PDIFF pct"] < 0],
+                              key=lambda r: r["PDIFF pct"])[:TOP_N]
+        write_examples("Top method regressions ({}, by PDIFF %)".format(label), regressions)
+        write_examples("Top method improvements ({}, by PDIFF %)".format(label), improvements)
+
+    fullopts = [r for r in significant if not r["MinOpts"]]
+    minopts = [r for r in significant if r["MinOpts"]]
+
+    if fullopts:
+        split_and_emit("FullOpts", fullopts)
+    if minopts:
+        split_and_emit("MinOpts", minopts)
+
 ################################################################################
 # SuperPMI Metric Diff
 ################################################################################

From 62cd05e28c4378d306e3d04f7908f564708f0a6f Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Mon, 29 Jun 2026 07:46:41 -0700
Subject: [PATCH 2/4] PR review feedback

- Drop legacy 3-tuple tolerance and the try/except / .get defensive paths;
  callers always produce the current shape.
- Bound per-context tp_diff retention via four top-K heaps (200 each) so very
  large MCH files can't blow up memory.
- Escape '|' / HTML / newlines in markdown cells; drop the HTML-looking
  "<no name reported by JIT>" placeholder (use an empty cell instead).
---
 src/coreclr/scripts/superpmi.py | 60 +++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py
index 190940aff45dd8..22677f49824d9d 100644
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -20,6 +20,7 @@
 import asyncio
 import csv
 import datetime
+import heapq
 import html
 import json
 import locale
@@ -2006,10 +2007,18 @@ def aggregate_diff_metrics(details_file):
     diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
     diffs = []
 
-    # Per-context throughput diffs (rows where PIN measured base != diff
-    # instruction count). Used by tpdiff to surface specific method examples.
+    # Per-context throughput diffs. Use bounded per-bucket heaps so very large
+    # MCH files don't blow up memory: four buckets (FullOpts|MinOpts) x
+    # (regression|improvement), each capped at TP_TOP_K. Each heap is a min-heap
+    # of (key, idx, row) where `key` is `pct` for regressions (keep largest)
+    # and `-pct` for improvements (keep most negative).
     tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"]
-    tp_diffs = []
+    TP_TOP_K = 200
+    tp_idx = 0
+    tp_heaps = {
+        ("FullOpts", "reg"): [], ("FullOpts", "imp"): [],
+        ("MinOpts",  "reg"): [], ("MinOpts",  "imp"): [],
+    }
 
     for row in read_csv(details_file):
         base_result = row["Base result"]
@@ -2049,8 +2058,18 @@ def aggregate_diff_metrics(details_file):
             base_dict["Diff executed instructions"] += base_insts
             diff_dict["Diff executed instructions"] += diff_insts
 
-            if base_insts != diff_insts:
-                tp_diffs.append({key: row[key] for key in tp_diffs_fields})
+            if base_insts > 0 and base_insts != diff_insts:
+                pct = (diff_insts - base_insts) / base_insts * 100
+                bucket = "MinOpts" if row["MinOpts"] == "True" else "FullOpts"
+                direction = "reg" if pct > 0 else "imp"
+                key = pct if direction == "reg" else -pct
+                tp_idx += 1
+                h = tp_heaps[(bucket, direction)]
+                entry = (key, tp_idx, {f: row[f] for f in tp_diffs_fields})
+                if len(h) < TP_TOP_K:
+                    heapq.heappush(h, entry)
+                elif key > h[0][0]:
+                    heapq.heapreplace(h, entry)
 
             base_perfscore = float(row["Base PerfScore"])
             diff_perfscore = float(row["Diff PerfScore"])
@@ -2089,6 +2108,8 @@ def aggregate_diff_metrics(details_file):
         else:
             d["Relative PerfScore Geomean (Diffs)"] = 1
 
+    tp_diffs = [record for h in tp_heaps.values() for (_, _, record) in h]
+
     return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts},
             {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts},
             diffs,
@@ -3267,9 +3288,6 @@ def replay_with_throughput_diff(self):
 
 def write_tpdiff_markdown_summary(write_fh, base_jit_build_string_decoded, diff_jit_build_string_decoded, base_jit_options, diff_jit_options, tp_diffs, include_details):
 
-    # Tolerate the legacy 3-tuple shape from older saved JSON summaries.
-    tp_diffs = [t if len(t) >= 4 else (t[0], t[1], t[2], []) for t in tp_diffs]
-
     def write_top_context_section():
         if not base_jit_build_string_decoded:
             write_fh.write("{} Could not decode base JIT build string".format(html_color("red", "Warning:")))
@@ -3364,23 +3382,23 @@ def write_tpdiff_context_examples(write_fh, tp_diffs):
                    "Base instructions", "Diff instructions".
     """
 
+    # Escape values destined for a markdown table cell: '|' splits cells, '<>&'
+    # render as HTML on GitHub, and newlines break the row.
+    def md_cell(s):
+        return html.escape(s).replace("|", "&#124;").replace("\n", " ").replace("\r", "")
+
     # Flatten per-context rows; tag each with its originating collection.
     flat = []
     for (mch_file, _, _, tp_per_context) in tp_diffs:
         for row in tp_per_context:
-            try:
-                base_insts = int(row["Base instructions"])
-                diff_insts = int(row["Diff instructions"])
-            except (KeyError, ValueError):
-                continue
-            if base_insts <= 0 or diff_insts == base_insts:
-                continue
+            base_insts = int(row["Base instructions"])
+            diff_insts = int(row["Diff instructions"])
             pct = (diff_insts - base_insts) / base_insts * 100
             flat.append({
                 "Collection": mch_file,
-                "Method full name": row.get("Method full name", ""),
-                "Context": row.get("Context", ""),
-                "MinOpts": row.get("MinOpts", "False") == "True",
+                "Method full name": row["Method full name"],
+                "Context": row["Context"],
+                "MinOpts": row["MinOpts"] == "True",
                 "Base instructions": base_insts,
                 "Diff instructions": diff_insts,
                 "PDIFF pct": pct,
@@ -3403,12 +3421,10 @@ def write_examples(title, rows):
             write_fh.write("|Collection|Context|Method|Base|Diff|PDIFF|\n")
             write_fh.write("|---|--:|---|--:|--:|--:|\n")
             for r in rows:
-                # Release JITs don't report MethodFullName; fall back to context number.
-                method = r["Method full name"] or "<no name reported by JIT>"
                 write_fh.write("|{}|{}|{}|{:,d}|{:,d}|{}|\n".format(
-                    r["Collection"],
+                    md_cell(r["Collection"]),
                     r["Context"],
-                    method,
+                    md_cell(r["Method full name"]),
                     r["Base instructions"],
                     r["Diff instructions"],
                     compute_and_format_pct(r["Base instructions"], r["Diff instructions"])))

From 7e96da13006c156884f16fd576a66af99e27ebe8 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Mon, 29 Jun 2026 07:51:16 -0700
Subject: [PATCH 3/4] Drop the tp_diffs heap cap

---
 src/coreclr/scripts/superpmi.py | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py
index 22677f49824d9d..8dfc4a3858f421 100644
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -20,7 +20,6 @@
 import asyncio
 import csv
 import datetime
-import heapq
 import html
 import json
 import locale
@@ -2007,18 +2006,10 @@ def aggregate_diff_metrics(details_file):
     diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
     diffs = []
 
-    # Per-context throughput diffs. Use bounded per-bucket heaps so very large
-    # MCH files don't blow up memory: four buckets (FullOpts|MinOpts) x
-    # (regression|improvement), each capped at TP_TOP_K. Each heap is a min-heap
-    # of (key, idx, row) where `key` is `pct` for regressions (keep largest)
-    # and `-pct` for improvements (keep most negative).
+    # Per-context throughput diffs (rows where PIN measured base != diff
+    # instruction count). Used by tpdiff to surface specific method examples.
     tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"]
-    TP_TOP_K = 200
-    tp_idx = 0
-    tp_heaps = {
-        ("FullOpts", "reg"): [], ("FullOpts", "imp"): [],
-        ("MinOpts",  "reg"): [], ("MinOpts",  "imp"): [],
-    }
+    tp_diffs = []
 
     for row in read_csv(details_file):
         base_result = row["Base result"]
@@ -2059,17 +2050,7 @@ def aggregate_diff_metrics(details_file):
             diff_dict["Diff executed instructions"] += diff_insts
 
             if base_insts > 0 and base_insts != diff_insts:
-                pct = (diff_insts - base_insts) / base_insts * 100
-                bucket = "MinOpts" if row["MinOpts"] == "True" else "FullOpts"
-                direction = "reg" if pct > 0 else "imp"
-                key = pct if direction == "reg" else -pct
-                tp_idx += 1
-                h = tp_heaps[(bucket, direction)]
-                entry = (key, tp_idx, {f: row[f] for f in tp_diffs_fields})
-                if len(h) < TP_TOP_K:
-                    heapq.heappush(h, entry)
-                elif key > h[0][0]:
-                    heapq.heapreplace(h, entry)
+                tp_diffs.append({f: row[f] for f in tp_diffs_fields})
 
             base_perfscore = float(row["Base PerfScore"])
             diff_perfscore = float(row["Diff PerfScore"])
@@ -2108,8 +2089,6 @@ def aggregate_diff_metrics(details_file):
         else:
             d["Relative PerfScore Geomean (Diffs)"] = 1
 
-    tp_diffs = [record for h in tp_heaps.values() for (_, _, record) in h]
-
     return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts},
             {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts},
             diffs,

From be9196e1e7c2991a0e86ef66f3761549472ea0fe Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Mon, 29 Jun 2026 14:18:38 -0700
Subject: [PATCH 4/4] Drop the Method column from per-context examples

---
 src/coreclr/scripts/superpmi.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py
index 8dfc4a3858f421..4e1c1d75014f2e 100644
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -2008,7 +2008,7 @@ def aggregate_diff_metrics(details_file):
 
     # Per-context throughput diffs (rows where PIN measured base != diff
     # instruction count). Used by tpdiff to surface specific method examples.
-    tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"]
+    tp_diffs_fields = ["Context", "MinOpts", "Base instructions", "Diff instructions"]
     tp_diffs = []
 
     for row in read_csv(details_file):
@@ -3357,15 +3357,9 @@ def write_tpdiff_context_examples(write_fh, tp_diffs):
         write_fh : file handle for file to output to
         tp_diffs : list of (mch_file, base_metrics, diff_metrics, tp_per_context)
                    where tp_per_context is a list of dicts with keys
-                   "Context", "Method full name", "MinOpts",
-                   "Base instructions", "Diff instructions".
+                   "Context", "MinOpts", "Base instructions", "Diff instructions".
     """
 
-    # Escape values destined for a markdown table cell: '|' splits cells, '<>&'
-    # render as HTML on GitHub, and newlines break the row.
-    def md_cell(s):
-        return html.escape(s).replace("|", "&#124;").replace("\n", " ").replace("\r", "")
-
     # Flatten per-context rows; tag each with its originating collection.
     flat = []
     for (mch_file, _, _, tp_per_context) in tp_diffs:
@@ -3375,7 +3369,6 @@ def md_cell(s):
             pct = (diff_insts - base_insts) / base_insts * 100
             flat.append({
                 "Collection": mch_file,
-                "Method full name": row["Method full name"],
                 "Context": row["Context"],
                 "MinOpts": row["MinOpts"] == "True",
                 "Base instructions": base_insts,
@@ -3397,13 +3390,12 @@ def write_examples(title, rows):
         if not rows:
             return
         with DetailsSection(write_fh, title):
-            write_fh.write("|Collection|Context|Method|Base|Diff|PDIFF|\n")
-            write_fh.write("|---|--:|---|--:|--:|--:|\n")
+            write_fh.write("|Collection|Context|Base|Diff|PDIFF|\n")
+            write_fh.write("|---|--:|--:|--:|--:|\n")
             for r in rows:
-                write_fh.write("|{}|{}|{}|{:,d}|{:,d}|{}|\n".format(
-                    md_cell(r["Collection"]),
+                write_fh.write("|{}|{}|{:,d}|{:,d}|{}|\n".format(
+                    r["Collection"],
                     r["Context"],
-                    md_cell(r["Method full name"]),
                     r["Base instructions"],
                     r["Diff instructions"],
                     compute_and_format_pct(r["Base instructions"], r["Diff instructions"])))