From 3fe72582b1794339bd80f69fbbfb372237f09c67 Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 11:38:31 -0500 Subject: [PATCH 1/4] Add --jackknife flag to rcp_viewer --jackknife GBS restricts output to the single real (non-interpolated) RCP at the given global batch size, validating it against the full measured set (so pruned-out batch sizes are still accepted), and also prints the benchmark's submission_runs count. Co-Authored-By: Claude Opus 4.7 --- .../visualization_scripts/rcp_viewer.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index 28e999d..4447295 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -68,7 +68,10 @@ def main(): help='specify an RCP json file to use') parser.add_argument('--interpolate', action='store_true', help='generate interpolated rcp min/mean for all batch sizes') - + parser.add_argument('--jackknife', type=int, metavar='GBS', + help='restrict output to the single real (non-interpolated) RCP at this ' + 'global batch size, and also print the benchmark submission_runs') + args = parser.parse_args() rcp_pass_arg='pruned_rcps' @@ -80,7 +83,14 @@ def main(): if not args.no_header: print("BS,Mean,Min") - if not args.interpolate: + if args.jackknife is not None: + record = checker._find_rcp(args.jackknife, 'full_rcps') + if record is None: + sys.exit(f"Error: GBS {args.jackknife} is not a measured " + f"(non-interpolated) RCP batch size for {args.benchmark}") + print_rcp_record(record) + print(f"submission_runs: {checker.submission_runs}") + elif not args.interpolate: data=checker._get_rcp_data(rcp_pass_arg) for key, record in data.items(): print_rcp_record(record) From 2060614c53a29471491c127efc5c7b715195722c Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 11:56:10 -0500 Subject: [PATCH 2/4] Add jackknife bootstrap histogram to rcp_viewer When --jackknife is given, resample the reference convergence runs 1000 times (drawing submission_runs values with replacement), take a trimmed mean (trim ceil(10%) from each end), and print an ASCII histogram of the resulting score distribution. Add --seed for reproducible output. Co-Authored-By: Claude Opus 4.7 --- .../visualization_scripts/rcp_viewer.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index 4447295..8ecc0f5 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -7,6 +7,8 @@ import sys import os import argparse +import math +import numpy as np #Add the project root directory (assumed to be 3 levels up) to sys.path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))) @@ -16,6 +18,31 @@ def print_rcp_record(record): print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}") +def jackknife_scores(samples, num_runs, iterations=1000, rng=None): + '''Bootstrap submission-sized trimmed-mean scores from the reference runs. + + Draw num_runs values with replacement from samples, trim k=ceil(10%) from + each end, and take the mean. Repeat iterations times, returning the scores. + ''' + rng = rng if rng is not None else np.random.default_rng() + arr = np.asarray(samples, dtype=float) + k = math.ceil(0.10 * num_runs) + if num_runs - 2 * k <= 0: + sys.exit(f"Error: trimming {k} from each end of {num_runs} runs leaves no samples") + scores = np.empty(iterations) + for i in range(iterations): + draw = np.sort(rng.choice(arr, size=num_runs, replace=True)) + scores[i] = draw[k:num_runs - k].mean() + return scores + +def print_histogram(scores, bar_width=50): + '''Print an ASCII text-bar histogram of scores using numpy auto-binning.''' + counts, edges = np.histogram(scores, bins='auto') + max_count = counts.max() if len(counts) else 0 + for i, c in enumerate(counts): + bar = '#' * (round(bar_width * c / max_count) if max_count else 0) + print(f"{edges[i]:.1f}-{edges[i+1]:.1f} | {bar} ({c})") + # this should be a method of rcp_checker.RCP_Checker, but it's missing. # Instead we derived it from _find_min_rcp() def find_max_rcp(checker, rcp_pass_arg='pruned_rcps'): @@ -71,6 +98,8 @@ def main(): parser.add_argument('--jackknife', type=int, metavar='GBS', help='restrict output to the single real (non-interpolated) RCP at this ' 'global batch size, and also print the benchmark submission_runs') + parser.add_argument('--seed', type=int, default=None, + help='seed the RNG for reproducible --jackknife output') args = parser.parse_args() @@ -90,6 +119,10 @@ def main(): f"(non-interpolated) RCP batch size for {args.benchmark}") print_rcp_record(record) print(f"submission_runs: {checker.submission_runs}") + scores = jackknife_scores(record['Epochs to converge'], + checker.submission_runs, + rng=np.random.default_rng(args.seed)) + print_histogram(scores) elif not args.interpolate: data=checker._get_rcp_data(rcp_pass_arg) for key, record in data.items(): From 30b76de421bd3371441d614a9aabc089ad67889e Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 12:00:12 -0500 Subject: [PATCH 3/4] Rename --jackknife to --bootstrap in rcp_viewer The resampling draws with replacement, which is a bootstrap, not a jackknife, so name it accurately. Rewrite the flag help to lead with its real purpose (producing the score histogram) rather than the output restriction, and increase the resample count from 1000 to 10000 for a smoother distribution. Co-Authored-By: Claude Opus 4.7 --- .../visualization_scripts/rcp_viewer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index 8ecc0f5..af17983 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -18,7 +18,7 @@ def print_rcp_record(record): print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}") -def jackknife_scores(samples, num_runs, iterations=1000, rng=None): +def bootstrap_scores(samples, num_runs, iterations=10000, rng=None): '''Bootstrap submission-sized trimmed-mean scores from the reference runs. Draw num_runs values with replacement from samples, trim k=ceil(10%) from @@ -95,11 +95,11 @@ def main(): help='specify an RCP json file to use') parser.add_argument('--interpolate', action='store_true', help='generate interpolated rcp min/mean for all batch sizes') - parser.add_argument('--jackknife', type=int, metavar='GBS', - help='restrict output to the single real (non-interpolated) RCP at this ' - 'global batch size, and also print the benchmark submission_runs') + parser.add_argument('--bootstrap', type=int, metavar='GBS', + help='print a histogram of bootstrapped, submission-sized trimmed-mean ' + 'scores for the real (non-interpolated) RCP at the given global batch size (GBS)') parser.add_argument('--seed', type=int, default=None, - help='seed the RNG for reproducible --jackknife output') + help='seed the RNG for reproducible --bootstrap output') args = parser.parse_args() @@ -112,14 +112,14 @@ def main(): if not args.no_header: print("BS,Mean,Min") - if args.jackknife is not None: - record = checker._find_rcp(args.jackknife, 'full_rcps') + if args.bootstrap is not None: + record = checker._find_rcp(args.bootstrap, 'full_rcps') if record is None: - sys.exit(f"Error: GBS {args.jackknife} is not a measured " + sys.exit(f"Error: GBS {args.bootstrap} is not a measured " f"(non-interpolated) RCP batch size for {args.benchmark}") print_rcp_record(record) print(f"submission_runs: {checker.submission_runs}") - scores = jackknife_scores(record['Epochs to converge'], + scores = bootstrap_scores(record['Epochs to converge'], checker.submission_runs, rng=np.random.default_rng(args.seed)) print_histogram(scores) From 4bd6c8c33172c0173376a328476efc8e542fb0d2 Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 12:14:55 -0500 Subject: [PATCH 4/4] Print max speedup and tail probability in --bootstrap Add two summary lines to --bootstrap output: max_speedup (RCP mean / RCP min, the largest score ratio achievable from lucky-fast convergence) and P(score < min), the measured fraction of bootstrap scores falling below the RCP min. Co-Authored-By: Claude Opus 4.7 --- .../rcp_checker/visualization_scripts/rcp_viewer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index af17983..223bbb2 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -119,9 +119,13 @@ def main(): f"(non-interpolated) RCP batch size for {args.benchmark}") print_rcp_record(record) print(f"submission_runs: {checker.submission_runs}") + max_speedup = record['RCP Mean'] / record['Min Epochs'] + print(f"max_speedup (mean/min): {max_speedup}") scores = bootstrap_scores(record['Epochs to converge'], checker.submission_runs, rng=np.random.default_rng(args.seed)) + prob_below_min = np.mean(scores < record['Min Epochs']) + print(f"P(score < min): {prob_below_min}") print_histogram(scores) elif not args.interpolate: data=checker._get_rcp_data(rcp_pass_arg)