From 3fe72582b1794339bd80f69fbbfb372237f09c67 Mon Sep 17 00:00:00 2001
From: Matt Frank <mfrank@nvidia.com>
Date: Wed, 27 May 2026 11:38:31 -0500
Subject: [PATCH 1/4] Add --jackknife flag to rcp_viewer

--jackknife GBS restricts output to the single real (non-interpolated)
RCP at the given global batch size, validating it against the full
measured set (so pruned-out batch sizes are still accepted), and also
prints the benchmark's submission_runs count.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../visualization_scripts/rcp_viewer.py            | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
index 28e999d..4447295 100755
--- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
+++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
@@ -68,7 +68,10 @@ def main():
                     help='specify an RCP json file to use')
     parser.add_argument('--interpolate', action='store_true',
                         help='generate interpolated rcp min/mean for all batch sizes')
-    
+    parser.add_argument('--jackknife', type=int, metavar='GBS',
+                        help='restrict output to the single real (non-interpolated) RCP at this '
+                             'global batch size, and also print the benchmark submission_runs')
+
 
     args = parser.parse_args()
     rcp_pass_arg='pruned_rcps'
@@ -80,7 +83,14 @@ def main():
     if not args.no_header:
         print("BS,Mean,Min")
 
-    if not args.interpolate:
+    if args.jackknife is not None:
+        record = checker._find_rcp(args.jackknife, 'full_rcps')
+        if record is None:
+            sys.exit(f"Error: GBS {args.jackknife} is not a measured "
+                     f"(non-interpolated) RCP batch size for {args.benchmark}")
+        print_rcp_record(record)
+        print(f"submission_runs: {checker.submission_runs}")
+    elif not args.interpolate:
         data=checker._get_rcp_data(rcp_pass_arg)
         for key, record in data.items():
             print_rcp_record(record)

From 2060614c53a29471491c127efc5c7b715195722c Mon Sep 17 00:00:00 2001
From: Matt Frank <mfrank@nvidia.com>
Date: Wed, 27 May 2026 11:56:10 -0500
Subject: [PATCH 2/4] Add jackknife bootstrap histogram to rcp_viewer

When --jackknife is given, resample the reference convergence runs 1000
times (drawing submission_runs values with replacement), take a trimmed
mean (trim ceil(10%) from each end), and print an ASCII histogram of the
resulting score distribution. Add --seed for reproducible output.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../visualization_scripts/rcp_viewer.py       | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
index 4447295..8ecc0f5 100755
--- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
+++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
@@ -7,6 +7,8 @@
 import sys
 import os
 import argparse
+import math
+import numpy as np
 
 #Add the project root directory (assumed to be 3 levels up) to sys.path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
@@ -16,6 +18,31 @@
 def print_rcp_record(record):
     print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}")
 
+def jackknife_scores(samples, num_runs, iterations=1000, rng=None):
+    '''Bootstrap submission-sized trimmed-mean scores from the reference runs.
+
+    Draw num_runs values with replacement from samples, trim k=ceil(10%) from
+    each end, and take the mean. Repeat iterations times, returning the scores.
+    '''
+    rng = rng if rng is not None else np.random.default_rng()
+    arr = np.asarray(samples, dtype=float)
+    k = math.ceil(0.10 * num_runs)
+    if num_runs - 2 * k <= 0:
+        sys.exit(f"Error: trimming {k} from each end of {num_runs} runs leaves no samples")
+    scores = np.empty(iterations)
+    for i in range(iterations):
+        draw = np.sort(rng.choice(arr, size=num_runs, replace=True))
+        scores[i] = draw[k:num_runs - k].mean()
+    return scores
+
+def print_histogram(scores, bar_width=50):
+    '''Print an ASCII text-bar histogram of scores using numpy auto-binning.'''
+    counts, edges = np.histogram(scores, bins='auto')
+    max_count = counts.max() if len(counts) else 0
+    for i, c in enumerate(counts):
+        bar = '#' * (round(bar_width * c / max_count) if max_count else 0)
+        print(f"{edges[i]:.1f}-{edges[i+1]:.1f} | {bar} ({c})")
+
 # this should be a method of rcp_checker.RCP_Checker, but it's missing.
 # Instead we derived it from _find_min_rcp()
 def find_max_rcp(checker, rcp_pass_arg='pruned_rcps'):
@@ -71,6 +98,8 @@ def main():
     parser.add_argument('--jackknife', type=int, metavar='GBS',
                         help='restrict output to the single real (non-interpolated) RCP at this '
                              'global batch size, and also print the benchmark submission_runs')
+    parser.add_argument('--seed', type=int, default=None,
+                        help='seed the RNG for reproducible --jackknife output')
 
 
     args = parser.parse_args()
@@ -90,6 +119,10 @@ def main():
                      f"(non-interpolated) RCP batch size for {args.benchmark}")
         print_rcp_record(record)
         print(f"submission_runs: {checker.submission_runs}")
+        scores = jackknife_scores(record['Epochs to converge'],
+                                  checker.submission_runs,
+                                  rng=np.random.default_rng(args.seed))
+        print_histogram(scores)
     elif not args.interpolate:
         data=checker._get_rcp_data(rcp_pass_arg)
         for key, record in data.items():

From 30b76de421bd3371441d614a9aabc089ad67889e Mon Sep 17 00:00:00 2001
From: Matt Frank <mfrank@nvidia.com>
Date: Wed, 27 May 2026 12:00:12 -0500
Subject: [PATCH 3/4] Rename --jackknife to --bootstrap in rcp_viewer

The resampling draws with replacement, which is a bootstrap, not a
jackknife, so name it accurately. Rewrite the flag help to lead with its
real purpose (producing the score histogram) rather than the output
restriction, and increase the resample count from 1000 to 10000 for a
smoother distribution.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../visualization_scripts/rcp_viewer.py        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
index 8ecc0f5..af17983 100755
--- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
+++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
@@ -18,7 +18,7 @@
 def print_rcp_record(record):
     print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}")
 
-def jackknife_scores(samples, num_runs, iterations=1000, rng=None):
+def bootstrap_scores(samples, num_runs, iterations=10000, rng=None):
     '''Bootstrap submission-sized trimmed-mean scores from the reference runs.
 
     Draw num_runs values with replacement from samples, trim k=ceil(10%) from
@@ -95,11 +95,11 @@ def main():
                     help='specify an RCP json file to use')
     parser.add_argument('--interpolate', action='store_true',
                         help='generate interpolated rcp min/mean for all batch sizes')
-    parser.add_argument('--jackknife', type=int, metavar='GBS',
-                        help='restrict output to the single real (non-interpolated) RCP at this '
-                             'global batch size, and also print the benchmark submission_runs')
+    parser.add_argument('--bootstrap', type=int, metavar='GBS',
+                        help='print a histogram of bootstrapped, submission-sized trimmed-mean '
+                             'scores for the real (non-interpolated) RCP at the given global batch size (GBS)')
     parser.add_argument('--seed', type=int, default=None,
-                        help='seed the RNG for reproducible --jackknife output')
+                        help='seed the RNG for reproducible --bootstrap output')
 
 
     args = parser.parse_args()
@@ -112,14 +112,14 @@ def main():
     if not args.no_header:
         print("BS,Mean,Min")
 
-    if args.jackknife is not None:
-        record = checker._find_rcp(args.jackknife, 'full_rcps')
+    if args.bootstrap is not None:
+        record = checker._find_rcp(args.bootstrap, 'full_rcps')
         if record is None:
-            sys.exit(f"Error: GBS {args.jackknife} is not a measured "
+            sys.exit(f"Error: GBS {args.bootstrap} is not a measured "
                      f"(non-interpolated) RCP batch size for {args.benchmark}")
         print_rcp_record(record)
         print(f"submission_runs: {checker.submission_runs}")
-        scores = jackknife_scores(record['Epochs to converge'],
+        scores = bootstrap_scores(record['Epochs to converge'],
                                   checker.submission_runs,
                                   rng=np.random.default_rng(args.seed))
         print_histogram(scores)

From 4bd6c8c33172c0173376a328476efc8e542fb0d2 Mon Sep 17 00:00:00 2001
From: Matt Frank <mfrank@nvidia.com>
Date: Wed, 27 May 2026 12:14:55 -0500
Subject: [PATCH 4/4] Print max speedup and tail probability in --bootstrap

Add two summary lines to --bootstrap output: max_speedup (RCP mean / RCP
min, the largest score ratio achievable from lucky-fast convergence) and
P(score < min), the measured fraction of bootstrap scores falling below
the RCP min.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../rcp_checker/visualization_scripts/rcp_viewer.py           | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
index af17983..223bbb2 100755
--- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
+++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
@@ -119,9 +119,13 @@ def main():
                      f"(non-interpolated) RCP batch size for {args.benchmark}")
         print_rcp_record(record)
         print(f"submission_runs: {checker.submission_runs}")
+        max_speedup = record['RCP Mean'] / record['Min Epochs']
+        print(f"max_speedup (mean/min): {max_speedup}")
         scores = bootstrap_scores(record['Epochs to converge'],
                                   checker.submission_runs,
                                   rng=np.random.default_rng(args.seed))
+        prob_below_min = np.mean(scores < record['Min Epochs'])
+        print(f"P(score < min): {prob_below_min}")
         print_histogram(scores)
     elif not args.interpolate:
         data=checker._get_rcp_data(rcp_pass_arg)