From 850f11bd0126f45aa32c5a9a8f3bc7b1ee4670e4 Mon Sep 17 00:00:00 2001
From: Wenyuan Chi <wychi@meta.com>
Date: Thu, 4 Jun 2026 13:59:30 -0700
Subject: [PATCH] Route benchmark subprocess through the PAR bootstrap (#143)

Summary:

The kernel benchmark subprocess in `benchmark.py` launched the child as bare `sys.executable`. Inside a Buck PAR `sys.executable` is the static-linked `#native-main#` interpreter; re-exec'ing it directly leaves it un-bootstrapped (no `LD_LIBRARY_PATH`/`PYTHONPATH`/`LD_PRELOAD` from the PAR `_bootstrap.sh`), so the benchmark child fails to load bundled shared libraries (e.g. `libevict-thrift-py3-extensions.so`). Every candidate then benchmarks as `inf` and `opt_manager` reports "did not produce an improved kernel".

KA already ships the bootstrap hook: `setup_internal_environment()` sets `KERNEL_PROFILER_PYTHON` to the PAR `_bootstrap.sh` (which rebuilds the env from `$0` before exec-ing the native main), but only `ncu_profiler.py` honored it. Route the benchmark child through the same hook, falling back to `sys.executable` when unset (OSS / non-PAR).

Reviewed By: stashuk-olek

Differential Revision: D107483498
---
 .../opt_worker_component/benchmarking/benchmark.py       | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/triton_kernel_agent/opt_worker_component/benchmarking/benchmark.py b/triton_kernel_agent/opt_worker_component/benchmarking/benchmark.py
index 9f8314ac..46de3b34 100644
--- a/triton_kernel_agent/opt_worker_component/benchmarking/benchmark.py
+++ b/triton_kernel_agent/opt_worker_component/benchmarking/benchmark.py
@@ -20,6 +20,7 @@
 
 import json
 import logging
+import os
 import subprocess
 import sys
 import traceback
@@ -129,8 +130,14 @@ def benchmark_kernel(
                 results_json = self.artifacts_dir / "benchmark_results.json"
                 benchmark_script = Path(__file__).parent / "kernel_subprocess.py"
 
+                # Use KERNEL_PROFILER_PYTHON (the PAR bootstrap) when set, like
+                # ncu_profiler.py; bare sys.executable is un-bootstrapped in a PAR.
+                bench_python = (
+                    os.environ.get("KERNEL_PROFILER_PYTHON") or sys.executable
+                )
+
                 cmd = [
-                    sys.executable,
+                    bench_python,
                     str(benchmark_script),
                     "--problem",
                     str(problem_file),