From 5105800d02f2da509159264a9c468239044a042e Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sun, 28 Jun 2026 09:26:24 -0700 Subject: [PATCH 1/4] SPMI: per-context tpdiff examples Add a "top method regressions / improvements" section to the tpdiff markdown summary, mirroring how asmdiffs surfaces specific contexts. The per-context data already lives in the details CSV; aggregate it once and emit FullOpts/MinOpts top-N tables by PDIFF %. Release JITs don't currently report `MethodFullName`, so the table also shows the SPMI context number for fallback lookup via `mcs -dumpMap` or `superpmi -c N`. Fixes #85755. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/scripts/superpmi.py | 111 +++++++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 47aadbd45f8e30..190940aff45dd8 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2006,6 +2006,11 @@ def aggregate_diff_metrics(details_file): diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"] diffs = [] + # Per-context throughput diffs (rows where PIN measured base != diff + # instruction count). Used by tpdiff to surface specific method examples. + tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"] + tp_diffs = [] + for row in read_csv(details_file): base_result = row["Base result"] @@ -2044,6 +2049,9 @@ def aggregate_diff_metrics(details_file): base_dict["Diff executed instructions"] += base_insts diff_dict["Diff executed instructions"] += diff_insts + if base_insts != diff_insts: + tp_diffs.append({key: row[key] for key in tp_diffs_fields}) + base_perfscore = float(row["Base PerfScore"]) diff_perfscore = float(row["Diff PerfScore"]) base_dict["Diffed PerfScore"] += base_perfscore @@ -2083,7 +2091,8 @@ def aggregate_diff_metrics(details_file): return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts}, {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts}, - diffs) + diffs, + tp_diffs) class SuperPMIReplayAsmDiffs: @@ -2323,7 +2332,7 @@ def replay_with_asm_diffs(self): print_superpmi_error_result(return_code, self.coreclr_args) - (base_metrics, diff_metrics, diffs) = aggregate_diff_metrics(details_info_file) + (base_metrics, diff_metrics, diffs, _) = aggregate_diff_metrics(details_info_file) print_superpmi_success_result(return_code, base_metrics, diff_metrics) artifacts_base_name = create_artifacts_base_name(self.coreclr_args, mch_file) @@ -3193,7 +3202,7 @@ def replay_with_throughput_diff(self): print_superpmi_error_result(return_code, self.coreclr_args) - (base_metrics, diff_metrics, _) = aggregate_diff_metrics(details_info_file) + (base_metrics, diff_metrics, _, tp_per_context) = aggregate_diff_metrics(details_info_file) print_superpmi_success_result(return_code, base_metrics, diff_metrics) if base_metrics is not None and diff_metrics is not None: @@ -3205,7 +3214,7 @@ def replay_with_throughput_diff(self): if base_instructions != 0 and diff_instructions != 0: delta_instructions = diff_instructions - base_instructions logging.info("Total instructions executed delta: {} ({:.2%} of base)".format(delta_instructions, delta_instructions / base_instructions)) - tp_diffs.append((os.path.basename(mch_file), base_metrics, diff_metrics)) + tp_diffs.append((os.path.basename(mch_file), base_metrics, diff_metrics, tp_per_context)) else: logging.warning("One compilation failed to produce any results") else: @@ -3258,6 +3267,9 @@ def replay_with_throughput_diff(self): def write_tpdiff_markdown_summary(write_fh, base_jit_build_string_decoded, diff_jit_build_string_decoded, base_jit_options, diff_jit_options, tp_diffs, include_details): + # Tolerate the legacy 3-tuple shape from older saved JSON summaries. + tp_diffs = [t if len(t) >= 4 else (t[0], t[1], t[2], []) for t in tp_diffs] + def write_top_context_section(): if not base_jit_build_string_decoded: write_fh.write("{} Could not decode base JIT build string".format(html_color("red", "Warning:"))) @@ -3291,12 +3303,12 @@ def is_significant_pct(base, diff): def is_significant(row, base, diff): return is_significant_pct(base[row]["Diff executed instructions"], diff[row]["Diff executed instructions"]) - if any(is_significant(row, base, diff) for row in ["Overall", "MinOpts", "FullOpts"] for (_, base, diff) in tp_diffs): + if any(is_significant(row, base, diff) for row in ["Overall", "MinOpts", "FullOpts"] for (_, base, diff, _) in tp_diffs): def write_pivot_section(row): - if not any(is_significant(row, base, diff) for (_, base, diff) in tp_diffs): + if not any(is_significant(row, base, diff) for (_, base, diff, _) in tp_diffs): return - pcts = [compute_pct(base_metrics[row]["Diff executed instructions"], diff_metrics[row]["Diff executed instructions"]) for (_, base_metrics, diff_metrics) in tp_diffs] + pcts = [compute_pct(base_metrics[row]["Diff executed instructions"], diff_metrics[row]["Diff executed instructions"]) for (_, base_metrics, diff_metrics, _) in tp_diffs] min_pct_str = format_pct(min(pcts)) max_pct_str = format_pct(max(pcts)) if min_pct_str == max_pct_str: @@ -3307,7 +3319,7 @@ def write_pivot_section(row): with DetailsSection(write_fh, tp_summary): write_fh.write("|Collection|PDIFF|\n") write_fh.write("|---|--:|\n") - for mch_file, base, diff in tp_diffs: + for mch_file, base, diff, _ in tp_diffs: base_instructions = base[row]["Diff executed instructions"] diff_instructions = diff[row]["Diff executed instructions"] @@ -3320,6 +3332,8 @@ def write_pivot_section(row): write_pivot_section("Overall") write_pivot_section("MinOpts") write_pivot_section("FullOpts") + if include_details: + write_tpdiff_context_examples(write_fh, tp_diffs) elif include_details: write_top_context_section() write_fh.write("No significant throughput differences found\n") @@ -3330,7 +3344,7 @@ def write_pivot_section(row): write_fh.write("{} contexts:\n\n".format(disp)) write_fh.write("|Collection|Base # instructions|Diff # instructions|PDIFF|\n") write_fh.write("|---|--:|--:|--:|\n") - for mch_file, base, diff in tp_diffs: + for mch_file, base, diff, _ in tp_diffs: base_instructions = base[row]["Diff executed instructions"] diff_instructions = diff[row]["Diff executed instructions"] write_fh.write("|{}|{:,d}|{:,d}|{}|\n".format( @@ -3338,6 +3352,85 @@ def write_pivot_section(row): compute_and_format_pct(base_instructions, diff_instructions))) write_fh.write("\n") + +def write_tpdiff_context_examples(write_fh, tp_diffs): + """ Write top per-context throughput regression/improvement examples. + + Args: + write_fh : file handle for file to output to + tp_diffs : list of (mch_file, base_metrics, diff_metrics, tp_per_context) + where tp_per_context is a list of dicts with keys + "Context", "Method full name", "MinOpts", + "Base instructions", "Diff instructions". + """ + + # Flatten per-context rows; tag each with its originating collection. + flat = [] + for (mch_file, _, _, tp_per_context) in tp_diffs: + for row in tp_per_context: + try: + base_insts = int(row["Base instructions"]) + diff_insts = int(row["Diff instructions"]) + except (KeyError, ValueError): + continue + if base_insts <= 0 or diff_insts == base_insts: + continue + pct = (diff_insts - base_insts) / base_insts * 100 + flat.append({ + "Collection": mch_file, + "Method full name": row.get("Method full name", ""), + "Context": row.get("Context", ""), + "MinOpts": row.get("MinOpts", "False") == "True", + "Base instructions": base_insts, + "Diff instructions": diff_insts, + "PDIFF pct": pct, + }) + + if not flat: + return + + # Suppress tiny absolute deltas that show up as big percentages but are noise. + MIN_ABS_DELTA = 50 + significant = [r for r in flat if abs(r["Diff instructions"] - r["Base instructions"]) >= MIN_ABS_DELTA] + + if not significant: + return + + def write_examples(title, rows): + if not rows: + return + with DetailsSection(write_fh, title): + write_fh.write("|Collection|Context|Method|Base|Diff|PDIFF|\n") + write_fh.write("|---|--:|---|--:|--:|--:|\n") + for r in rows: + # Release JITs don't report MethodFullName; fall back to context number. + method = r["Method full name"] or "" + write_fh.write("|{}|{}|{}|{:,d}|{:,d}|{}|\n".format( + r["Collection"], + r["Context"], + method, + r["Base instructions"], + r["Diff instructions"], + compute_and_format_pct(r["Base instructions"], r["Diff instructions"]))) + + TOP_N = 20 + + def split_and_emit(label, rows): + regressions = sorted([r for r in rows if r["PDIFF pct"] > 0], + key=lambda r: r["PDIFF pct"], reverse=True)[:TOP_N] + improvements = sorted([r for r in rows if r["PDIFF pct"] < 0], + key=lambda r: r["PDIFF pct"])[:TOP_N] + write_examples("Top method regressions ({}, by PDIFF %)".format(label), regressions) + write_examples("Top method improvements ({}, by PDIFF %)".format(label), improvements) + + fullopts = [r for r in significant if not r["MinOpts"]] + minopts = [r for r in significant if r["MinOpts"]] + + if fullopts: + split_and_emit("FullOpts", fullopts) + if minopts: + split_and_emit("MinOpts", minopts) + ################################################################################ # SuperPMI Metric Diff ################################################################################ From 62cd05e28c4378d306e3d04f7908f564708f0a6f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 29 Jun 2026 07:46:41 -0700 Subject: [PATCH 2/4] PR review feedback - Drop legacy 3-tuple tolerance and the try/except / .get defensive paths; callers always produce the current shape. - Bound per-context tp_diff retention via four top-K heaps (200 each) so very large MCH files can't blow up memory. - Escape '|' / HTML / newlines in markdown cells; drop the HTML-looking "" placeholder (use an empty cell instead). --- src/coreclr/scripts/superpmi.py | 60 +++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 190940aff45dd8..22677f49824d9d 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -20,6 +20,7 @@ import asyncio import csv import datetime +import heapq import html import json import locale @@ -2006,10 +2007,18 @@ def aggregate_diff_metrics(details_file): diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"] diffs = [] - # Per-context throughput diffs (rows where PIN measured base != diff - # instruction count). Used by tpdiff to surface specific method examples. + # Per-context throughput diffs. Use bounded per-bucket heaps so very large + # MCH files don't blow up memory: four buckets (FullOpts|MinOpts) x + # (regression|improvement), each capped at TP_TOP_K. Each heap is a min-heap + # of (key, idx, row) where `key` is `pct` for regressions (keep largest) + # and `-pct` for improvements (keep most negative). tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"] - tp_diffs = [] + TP_TOP_K = 200 + tp_idx = 0 + tp_heaps = { + ("FullOpts", "reg"): [], ("FullOpts", "imp"): [], + ("MinOpts", "reg"): [], ("MinOpts", "imp"): [], + } for row in read_csv(details_file): base_result = row["Base result"] @@ -2049,8 +2058,18 @@ def aggregate_diff_metrics(details_file): base_dict["Diff executed instructions"] += base_insts diff_dict["Diff executed instructions"] += diff_insts - if base_insts != diff_insts: - tp_diffs.append({key: row[key] for key in tp_diffs_fields}) + if base_insts > 0 and base_insts != diff_insts: + pct = (diff_insts - base_insts) / base_insts * 100 + bucket = "MinOpts" if row["MinOpts"] == "True" else "FullOpts" + direction = "reg" if pct > 0 else "imp" + key = pct if direction == "reg" else -pct + tp_idx += 1 + h = tp_heaps[(bucket, direction)] + entry = (key, tp_idx, {f: row[f] for f in tp_diffs_fields}) + if len(h) < TP_TOP_K: + heapq.heappush(h, entry) + elif key > h[0][0]: + heapq.heapreplace(h, entry) base_perfscore = float(row["Base PerfScore"]) diff_perfscore = float(row["Diff PerfScore"]) @@ -2089,6 +2108,8 @@ def aggregate_diff_metrics(details_file): else: d["Relative PerfScore Geomean (Diffs)"] = 1 + tp_diffs = [record for h in tp_heaps.values() for (_, _, record) in h] + return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts}, {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts}, diffs, @@ -3267,9 +3288,6 @@ def replay_with_throughput_diff(self): def write_tpdiff_markdown_summary(write_fh, base_jit_build_string_decoded, diff_jit_build_string_decoded, base_jit_options, diff_jit_options, tp_diffs, include_details): - # Tolerate the legacy 3-tuple shape from older saved JSON summaries. - tp_diffs = [t if len(t) >= 4 else (t[0], t[1], t[2], []) for t in tp_diffs] - def write_top_context_section(): if not base_jit_build_string_decoded: write_fh.write("{} Could not decode base JIT build string".format(html_color("red", "Warning:"))) @@ -3364,23 +3382,23 @@ def write_tpdiff_context_examples(write_fh, tp_diffs): "Base instructions", "Diff instructions". """ + # Escape values destined for a markdown table cell: '|' splits cells, '<>&' + # render as HTML on GitHub, and newlines break the row. + def md_cell(s): + return html.escape(s).replace("|", "|").replace("\n", " ").replace("\r", "") + # Flatten per-context rows; tag each with its originating collection. flat = [] for (mch_file, _, _, tp_per_context) in tp_diffs: for row in tp_per_context: - try: - base_insts = int(row["Base instructions"]) - diff_insts = int(row["Diff instructions"]) - except (KeyError, ValueError): - continue - if base_insts <= 0 or diff_insts == base_insts: - continue + base_insts = int(row["Base instructions"]) + diff_insts = int(row["Diff instructions"]) pct = (diff_insts - base_insts) / base_insts * 100 flat.append({ "Collection": mch_file, - "Method full name": row.get("Method full name", ""), - "Context": row.get("Context", ""), - "MinOpts": row.get("MinOpts", "False") == "True", + "Method full name": row["Method full name"], + "Context": row["Context"], + "MinOpts": row["MinOpts"] == "True", "Base instructions": base_insts, "Diff instructions": diff_insts, "PDIFF pct": pct, @@ -3403,12 +3421,10 @@ def write_examples(title, rows): write_fh.write("|Collection|Context|Method|Base|Diff|PDIFF|\n") write_fh.write("|---|--:|---|--:|--:|--:|\n") for r in rows: - # Release JITs don't report MethodFullName; fall back to context number. - method = r["Method full name"] or "" write_fh.write("|{}|{}|{}|{:,d}|{:,d}|{}|\n".format( - r["Collection"], + md_cell(r["Collection"]), r["Context"], - method, + md_cell(r["Method full name"]), r["Base instructions"], r["Diff instructions"], compute_and_format_pct(r["Base instructions"], r["Diff instructions"]))) From 7e96da13006c156884f16fd576a66af99e27ebe8 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 29 Jun 2026 07:51:16 -0700 Subject: [PATCH 3/4] Drop the tp_diffs heap cap --- src/coreclr/scripts/superpmi.py | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 22677f49824d9d..8dfc4a3858f421 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -20,7 +20,6 @@ import asyncio import csv import datetime -import heapq import html import json import locale @@ -2007,18 +2006,10 @@ def aggregate_diff_metrics(details_file): diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"] diffs = [] - # Per-context throughput diffs. Use bounded per-bucket heaps so very large - # MCH files don't blow up memory: four buckets (FullOpts|MinOpts) x - # (regression|improvement), each capped at TP_TOP_K. Each heap is a min-heap - # of (key, idx, row) where `key` is `pct` for regressions (keep largest) - # and `-pct` for improvements (keep most negative). + # Per-context throughput diffs (rows where PIN measured base != diff + # instruction count). Used by tpdiff to surface specific method examples. tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"] - TP_TOP_K = 200 - tp_idx = 0 - tp_heaps = { - ("FullOpts", "reg"): [], ("FullOpts", "imp"): [], - ("MinOpts", "reg"): [], ("MinOpts", "imp"): [], - } + tp_diffs = [] for row in read_csv(details_file): base_result = row["Base result"] @@ -2059,17 +2050,7 @@ def aggregate_diff_metrics(details_file): diff_dict["Diff executed instructions"] += diff_insts if base_insts > 0 and base_insts != diff_insts: - pct = (diff_insts - base_insts) / base_insts * 100 - bucket = "MinOpts" if row["MinOpts"] == "True" else "FullOpts" - direction = "reg" if pct > 0 else "imp" - key = pct if direction == "reg" else -pct - tp_idx += 1 - h = tp_heaps[(bucket, direction)] - entry = (key, tp_idx, {f: row[f] for f in tp_diffs_fields}) - if len(h) < TP_TOP_K: - heapq.heappush(h, entry) - elif key > h[0][0]: - heapq.heapreplace(h, entry) + tp_diffs.append({f: row[f] for f in tp_diffs_fields}) base_perfscore = float(row["Base PerfScore"]) diff_perfscore = float(row["Diff PerfScore"]) @@ -2108,8 +2089,6 @@ def aggregate_diff_metrics(details_file): else: d["Relative PerfScore Geomean (Diffs)"] = 1 - tp_diffs = [record for h in tp_heaps.values() for (_, _, record) in h] - return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts}, {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts}, diffs, From be9196e1e7c2991a0e86ef66f3761549472ea0fe Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 29 Jun 2026 14:18:38 -0700 Subject: [PATCH 4/4] Drop the Method column from per-context examples --- src/coreclr/scripts/superpmi.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 8dfc4a3858f421..4e1c1d75014f2e 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2008,7 +2008,7 @@ def aggregate_diff_metrics(details_file): # Per-context throughput diffs (rows where PIN measured base != diff # instruction count). Used by tpdiff to surface specific method examples. - tp_diffs_fields = ["Context", "Method full name", "MinOpts", "Base instructions", "Diff instructions"] + tp_diffs_fields = ["Context", "MinOpts", "Base instructions", "Diff instructions"] tp_diffs = [] for row in read_csv(details_file): @@ -3357,15 +3357,9 @@ def write_tpdiff_context_examples(write_fh, tp_diffs): write_fh : file handle for file to output to tp_diffs : list of (mch_file, base_metrics, diff_metrics, tp_per_context) where tp_per_context is a list of dicts with keys - "Context", "Method full name", "MinOpts", - "Base instructions", "Diff instructions". + "Context", "MinOpts", "Base instructions", "Diff instructions". """ - # Escape values destined for a markdown table cell: '|' splits cells, '<>&' - # render as HTML on GitHub, and newlines break the row. - def md_cell(s): - return html.escape(s).replace("|", "|").replace("\n", " ").replace("\r", "") - # Flatten per-context rows; tag each with its originating collection. flat = [] for (mch_file, _, _, tp_per_context) in tp_diffs: @@ -3375,7 +3369,6 @@ def md_cell(s): pct = (diff_insts - base_insts) / base_insts * 100 flat.append({ "Collection": mch_file, - "Method full name": row["Method full name"], "Context": row["Context"], "MinOpts": row["MinOpts"] == "True", "Base instructions": base_insts, @@ -3397,13 +3390,12 @@ def write_examples(title, rows): if not rows: return with DetailsSection(write_fh, title): - write_fh.write("|Collection|Context|Method|Base|Diff|PDIFF|\n") - write_fh.write("|---|--:|---|--:|--:|--:|\n") + write_fh.write("|Collection|Context|Base|Diff|PDIFF|\n") + write_fh.write("|---|--:|--:|--:|--:|\n") for r in rows: - write_fh.write("|{}|{}|{}|{:,d}|{:,d}|{}|\n".format( - md_cell(r["Collection"]), + write_fh.write("|{}|{}|{:,d}|{:,d}|{}|\n".format( + r["Collection"], r["Context"], - md_cell(r["Method full name"]), r["Base instructions"], r["Diff instructions"], compute_and_format_pct(r["Base instructions"], r["Diff instructions"])))