Add commit=False to BestOfN for RL training rollouts

congwang-mk · congwang-mk · commit 5913eae2288b · 2026-03-04T15:13:16.000-08:00
Signed-off-by: Cong Wang &lt;cwang@multikernel.io&gt;
diff --git a/README.md b/README.md
@@ -153,6 +153,27 @@ candidates = [make_test(c.message.content) for c in resp.choices]
 outcome = BestOfN(candidates, scores=logprob_scores)(ws)
 ```
 
+#### RL training rollouts
+
+Pass ``commit=False`` to collect scores from all candidates without
+modifying the workspace. Every branch runs to completion and aborts --
+the base stays pristine for the next batch. This gives you cheap,
+isolated rollout environments for policy gradient methods like GRPO.
+
+```python
+from branching import Workspace, BestOfN
+
+ws = Workspace("/mnt/workspace")
+
+for prompt in training_batch:
+    candidates = [make_candidate(prompt) for _ in range(N)]
+    outcome = BestOfN(candidates, commit=False)(ws)
+
+    # All N results available -- extract (success, score) for training
+    rewards = [(r.success, r.score) for r in outcome.all_results]
+    trainer.step(prompt, rewards)
+```
+
 ### Reflexion (retry with feedback)
 
 Run a task, and if it fails, generate a critique and feed it back into the
diff --git a/src/branching/agent/patterns.py b/src/branching/agent/patterns.py
@@ -25,6 +25,11 @@ class BestOfN:
     finishing. The main thread picks the winner based on score, then
     signals each thread to commit (winner) or abort (losers).
 
+    Pass ``commit=False`` to abort all branches and return results
+    without modifying the workspace. Useful for RL training rollouts
+    where you need scores from every candidate but don't want to
+    commit any of them.
+
     Each candidate callable receives (path,) and returns ``bool`` or
     ``(success: bool, score: float)``.  A bare ``bool`` defaults to
     score 1.0/0.0 unless overridden by *scores* or *evaluate*.
@@ -50,13 +55,15 @@ def __init__(
         timeout: float | None = None,
         resource_limits: ResourceLimits | None = None,
         group_limits: ResourceLimits | None = None,
+        commit: bool = True,
     ):
         self._candidates = list(candidates)
         self._scores = list(scores) if scores is not None else None
         self._evaluate = evaluate
         self._timeout = timeout
         self._resource_limits = resource_limits
         self._group_limits = group_limits
+        self._commit = commit
 
     def _score(self, ret, path, index):
         """Parse candidate return and apply optional evaluator."""
@@ -173,7 +180,7 @@ def _on_scope(sp: Path, _i: int = index) -> None:
                     best_score = r.score
                     best_idx = i
 
-            if best_idx is not None:
+            if best_idx is not None and self._commit:
                 decisions[best_idx] = "commit"
 
             # Kill still-running tasks (only useful when timeout left
@@ -189,7 +196,7 @@ def _on_scope(sp: Path, _i: int = index) -> None:
             for f in futures:
                 f.result()
 
-        committed = best_idx is not None
+        committed = best_idx is not None and self._commit
         winner = results[best_idx] if best_idx is not None else None
         all_results = [
             r if r is not None else SpeculationResult(branch_index=i, success=False)
diff --git a/tests/test_speculate.py b/tests/test_speculate.py
@@ -185,6 +185,23 @@ def test_commits_exactly_one(self):
         assert len(MockFSBackend._commits) == 1
         assert len(MockFSBackend._aborts) == 2
 
+    def test_commit_false_aborts_all(self):
+        """commit=False aborts all branches and returns results."""
+        ws = _make_workspace()
+
+        candidates = [
+            lambda p, s=s: (True, float(s)) for s in range(3)
+        ]
+
+        outcome = BestOfN(candidates, commit=False)(ws)
+        assert not outcome.committed
+        assert outcome.winner.branch_index == 2  # best still identified
+        assert outcome.winner.score == 2.0
+        assert len(outcome.all_results) == 3
+        # All aborted, none committed
+        assert len(MockFSBackend._commits) == 0
+        assert len(MockFSBackend._aborts) == 3
+
     def test_runs_in_parallel(self):
         """Verify candidates actually run concurrently."""
         import time