Add an option to clean the directory after each benchmark run.

lyglst · Orbax Authors · commit 6bed1eedba27 · 2026-04-09T14:17:12.000-07:00
PiperOrigin-RevId: 897288357
diff --git a/checkpoint/orbax/checkpoint/_src/testing/benchmarks/core/config_parsing.py b/checkpoint/orbax/checkpoint/_src/testing/benchmarks/core/config_parsing.py
@@ -107,6 +107,7 @@ def create_test_suite_from_config(
     config_path: str,
     output_dir: str | None = None,
     local_directory: str | None = None,
+    remove_repeated_dir: bool = False,
 ) -> core.TestSuite:
   """Creates a single TestSuite object from the benchmark configuration.
 
@@ -116,6 +117,8 @@ def create_test_suite_from_config(
       results will be stored in a temporary directory.
     local_directory: Optional local directory for benchmark results. This is
       used for ECM benchmarks.
+    remove_repeated_dir: Whether to remove the generated repeat_* directories
+      after execution.
 
   Returns:
     A TestSuite object containing all benchmarks generated from the config.
@@ -200,4 +203,5 @@ def create_test_suite_from_config(
       num_repeats=num_repeats,
       output_dir=output_dir,
       local_directory=local_directory,
+      remove_repeated_dir=remove_repeated_dir,
   )
diff --git a/checkpoint/orbax/checkpoint/_src/testing/benchmarks/core/core.py b/checkpoint/orbax/checkpoint/_src/testing/benchmarks/core/core.py
@@ -98,6 +98,8 @@ class TestResult:
   error: Exception | None = (
       None  # The error raised during the test run, if any.
   )
+  path: epath.Path | None = None
+  local_path: epath.Path | None = None
 
   def is_successful(self) -> bool:
     """Returns whether the test run was successful."""
@@ -203,6 +205,8 @@ def run(self, repeat_index: int | None = None) -> TestResult:
     )
     try:
       result = self.test_fn(context)
+      result.path = path
+      result.local_path = local_path
     except Exception as e:  # pylint: disable=broad-exception-caught
       # We catch all exceptions to ensure that any error during the test
       # execution is recorded in the TestResult.
@@ -220,7 +224,12 @@ def run(self, repeat_index: int | None = None) -> TestResult:
           e,
           exc_info=True,
       )
-      result = TestResult(metrics=metric_lib.Metrics(), error=e)
+      result = TestResult(
+          metrics=metric_lib.Metrics(),
+          error=e,
+          path=path,
+          local_path=local_path,
+      )
     result.metrics.name = name
 
     result.metrics.report()
@@ -401,13 +410,15 @@ def __init__(
       skip_incompatible_mesh_configs: bool = True,
       num_repeats: int = 1,
       local_directory: str | None = None,
+      remove_repeated_dir: bool = False,
   ):
     self._name = name
     self._benchmarks_generators = benchmarks_generators
     self._skip_incompatible_mesh_configs = skip_incompatible_mesh_configs
     self._num_repeats = num_repeats
     self._output_dir = output_dir
     self._local_directory = local_directory
+    self._remove_repeated_dir = remove_repeated_dir
     tensorboard_dir = None
     if output_dir:
       tensorboard_dir = epath.Path(output_dir) / "tensorboard"
@@ -459,6 +470,10 @@ def run(self) -> Sequence[TestResult]:
               checkpoint_config=benchmark.checkpoint_config,
               error=result.error,
           )
+          if self._remove_repeated_dir:
+            multihost.sync_global_processes("test_suite:repeat_cleanup")
+            self._remove_repeat_directory(result.path)
+            self._remove_repeat_directory(result.local_path)
 
     if not all_results:
       logging.warning("No benchmarks were run for this suite.")
diff --git a/checkpoint/orbax/checkpoint/_src/testing/benchmarks/run_benchmarks.py b/checkpoint/orbax/checkpoint/_src/testing/benchmarks/run_benchmarks.py
@@ -55,6 +55,11 @@
     False,
     'Enables HLO dumping to a subdirectory within --output_directory.',
 )
+_REMOVE_REPEATED_DIR = flags.DEFINE_bool(
+    'remove_repeated_dir',
+    False,
+    'Remove the generated repeat_* directories after execution.',
+)
 
 
 
@@ -126,7 +131,10 @@ def _configure_hlo_dump(output_directory: str):
 
 
 def _run_benchmarks(
-    config_file: str, output_directory: str, local_directory: str | None = None
+    config_file: str,
+    output_directory: str,
+    local_directory: str | None = None,
+    remove_repeated_dir: bool = False,
 ) -> None:
   """Runs Orbax checkpoint benchmarks based on a generator class and a config file.
 
@@ -135,6 +143,8 @@ def _run_benchmarks(
     output_directory: Directory to store benchmark results in.
     local_directory: Local directory for benchmark results. This is used for ECM
       benchmarks.
+    remove_repeated_dir: Whether to remove the generated repeat_* directories
+      after execution.
 
   Raises:
     RuntimeError: If any benchmark test fails.
@@ -155,6 +165,7 @@ def _run_benchmarks(
         config_file,
         output_dir=output_directory,
         local_directory=local_directory,
+        remove_repeated_dir=remove_repeated_dir,
     )
   except Exception as e:
     logging.error('Failed to create test suite from config: %s', e)
@@ -201,7 +212,10 @@ def main(argv: List[str]) -> None:
   logging.info('Set jax_enable_x64=True')
 
   _run_benchmarks(
-      _CONFIG_FILE.value, _OUTPUT_DIRECTORY.value, _LOCAL_DIRECTORY.value
+      _CONFIG_FILE.value,
+      _OUTPUT_DIRECTORY.value,
+      _LOCAL_DIRECTORY.value,
+      remove_repeated_dir=_REMOVE_REPEATED_DIR.value,
   )
 
   logging.info('run_benchmarks.py finished.')
diff --git a/checkpoint/orbax/checkpoint/_src/testing/benchmarks/run_benchmarks_pytorch.py b/checkpoint/orbax/checkpoint/_src/testing/benchmarks/run_benchmarks_pytorch.py
@@ -39,6 +39,11 @@
     'Output directory for benchmark results.',
     required=True,
 )
+_REMOVE_REPEATED_DIR = flags.DEFINE_bool(
+    'remove_repeated_dir',
+    False,
+    'Remove the generated repeat_* directories after execution.',
+)
 
 
 
@@ -73,7 +78,9 @@ def _init_torch_distributed() -> None:
         raise
 
 
-def _run_benchmarks(config_file: str, output_directory: str) -> None:
+def _run_benchmarks(
+    config_file: str, output_directory: str, remove_repeated_dir: bool = False
+) -> None:
   """Runs the benchmarks."""
   logging.info('Running benchmarks from config: %s', config_file)
   logging.info('Output directory: %s', output_directory)
@@ -89,7 +96,9 @@ def _run_benchmarks(config_file: str, output_directory: str) -> None:
 
   try:
     test_suite = config_parsing.create_test_suite_from_config(
-        config_file, output_dir=output_directory
+        config_file,
+        output_dir=output_directory,
+        remove_repeated_dir=remove_repeated_dir,
     )
   except Exception as e:
     logging.error('Failed to create test suite from config: %s', e)
@@ -119,7 +128,11 @@ def main(argv: Sequence[str]) -> None:
 
   logging.info('run_benchmarks_pytorch.py started.')
   _init_torch_distributed()
-  _run_benchmarks(_CONFIG_FILE.value, _OUTPUT_DIRECTORY.value)
+  _run_benchmarks(
+      _CONFIG_FILE.value,
+      _OUTPUT_DIRECTORY.value,
+      remove_repeated_dir=_REMOVE_REPEATED_DIR.value,
+  )
   logging.info('run_benchmarks_pytorch.py finished.')