Add Tournament pattern: pairwise elimination bracket

congwang-mk · congwang-mk · commit 820baece15dc · 2026-02-13T17:40:25.000-08:00
Signed-off-by: Cong Wang &lt;cwang@multikernel.io&gt;
diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ raises, everything is rolled back - the workspace is untouched.
 
 ## Agent patterns
 
-BranchContext ships with four high-level patterns that cover the most common
+BranchContext ships with five high-level patterns that cover the most common
 agent workflows. Each is a callable class: instantiate with config, call with
 a workspace.
 
@@ -142,6 +142,30 @@ outcome = TreeOfThoughts(
 )(ws)
 ```
 
+### Tournament (pairwise elimination)
+
+Generate N candidates in parallel, then narrow to one through pairwise
+elimination via a judge function. The convergent dual of Tree of Thoughts:
+starts wide, narrows to one.
+
+Use when you have a reliable pairwise comparator but no absolute scoring
+function: patch selection where an LLM judge picks the better diff,
+A/B-style evaluation where candidates are compared head-to-head, or
+any setting where relative ranking is easier than absolute scoring.
+
+```python
+from branching import Tournament
+
+def generate_patch(path: Path, index: int) -> bool:
+    return run_agent(workdir=path, seed=index)
+
+def judge(path_a: Path, path_b: Path) -> int:
+    # 0 = a wins, 1 = b wins
+    return llm_compare(path_a / "diff.patch", path_b / "diff.patch")
+
+outcome = Tournament(generate_patch, n=8, judge=judge)(ws)
+```
+
 ## Lower-level usage
 
 The patterns above are built on two lower-level primitives you can use
@@ -289,6 +313,7 @@ All patterns: instantiate with config, call with a `Workspace`, get a
 | **`BestOfN`** | `(task, n=3, *, timeout=None)` | Run N copies; commit highest-scoring success |
 | **`Reflexion`** | `(task, max_retries=3, *, critique=None)` | Retry with critique feedback loop |
 | **`TreeOfThoughts`** | `(strategies, *, evaluate=None, expand=None, max_depth=1, timeout=None)` | Parallel strategy tree with optional depth expansion |
+| **`Tournament`** | `(task, n=4, *, judge, timeout=None)` | Generate N candidates; pairwise elimination picks winner |
 
 ### Result types
 
diff --git a/src/branching/__init__.py b/src/branching/__init__.py
@@ -50,6 +50,7 @@
     "BestOfN",
     "Reflexion",
     "TreeOfThoughts",
+    "Tournament",
     # Results
     "SpeculationResult",
     "SpeculationOutcome",
@@ -82,6 +83,7 @@
     "BestOfN": ".agent.patterns",
     "Reflexion": ".agent.patterns",
     "TreeOfThoughts": ".agent.patterns",
+    "Tournament": ".agent.patterns",
     # Results
     "SpeculationResult": ".agent.result",
     "SpeculationOutcome": ".agent.result",
diff --git a/src/branching/agent/patterns.py b/src/branching/agent/patterns.py
@@ -418,3 +418,150 @@ def _multi_level(self, workspace: Workspace) -> SpeculationOutcome:
             all_results=all_results,
             committed=True,
         )
+
+
+class Tournament:
+    """Pairwise elimination bracket: generate N candidates, compare
+    pairwise via a judge function, commit the final winner.
+
+    The convergent dual of TreeOfThoughts: starts wide, narrows to one.
+
+    Example:
+        outcome = Tournament(task, n=4, judge=judge)(ws)
+        # Commits the bracket winner
+    """
+
+    def __init__(
+        self,
+        task: Callable[[Path, int], bool],
+        n: int = 4,
+        *,
+        judge: Callable[[Path, Path], int],
+        timeout: float | None = None,
+    ):
+        """
+        Args:
+            task: Callable(branch_path, candidate_index) → success.
+                  Produces output in the branch directory.
+            n: Number of candidates to generate.
+            judge: Callable(path_a, path_b) → 0 (a wins) or 1 (b wins).
+                   Compares two candidates' branches during elimination.
+            timeout: Overall timeout in seconds.
+        """
+        self._task = task
+        self._n = n
+        self._judge = judge
+        self._timeout = timeout
+
+    @staticmethod
+    def _run_bracket(
+        survivors: list[int],
+        branch_paths: list[Path],
+        judge: Callable[[Path, Path], int],
+    ) -> int:
+        """Single-elimination bracket. Returns the winning candidate index."""
+        while len(survivors) > 1:
+            next_round: list[int] = []
+            i = 0
+            while i < len(survivors) - 1:
+                a, b = survivors[i], survivors[i + 1]
+                pick = judge(branch_paths[a], branch_paths[b])
+                next_round.append(b if pick else a)
+                i += 2
+            # Odd candidate gets a bye
+            if len(survivors) % 2 == 1:
+                next_round.append(survivors[-1])
+            survivors = next_round
+        return survivors[0]
+
+    def __call__(self, workspace: Workspace) -> SpeculationOutcome:
+        n = self._n
+        results: list[Optional[SpeculationResult]] = [None] * n
+        branch_paths: list[Optional[Path]] = [None] * n
+        task_done = [threading.Event() for _ in range(n)]
+        decision_ready = [threading.Event() for _ in range(n)]
+        decisions = ["abort"] * n
+
+        def _run_candidate(index: int) -> None:
+            result = SpeculationResult(branch_index=index, success=False)
+            try:
+                with workspace.branch(
+                    f"tournament_{index}", on_success=None, on_error=None
+                ) as b:
+                    result.branch_path = b.path
+                    branch_paths[index] = b.path
+                    try:
+                        success = self._task(b.path, index)
+                        result.success = bool(success)
+                        result.return_value = success
+                    except Exception as e:
+                        result.exception = e
+
+                    results[index] = result
+                    task_done[index].set()
+
+                    decision_ready[index].wait()
+
+                    if decisions[index] == "commit":
+                        b.commit()
+                    else:
+                        b.abort()
+
+            except Exception as e:
+                result.exception = e
+                results[index] = result
+                task_done[index].set()
+
+        with ThreadPoolExecutor(max_workers=n) as pool:
+            futures = [pool.submit(_run_candidate, i) for i in range(n)]
+
+            # Wait for all tasks to finish
+            deadline = (
+                time.monotonic() + self._timeout
+                if self._timeout is not None
+                else None
+            )
+            for ev in task_done:
+                remaining = (
+                    max(0, deadline - time.monotonic())
+                    if deadline is not None
+                    else None
+                )
+                ev.wait(timeout=remaining)
+
+            # Filter to successful survivors
+            survivors = [
+                i for i, r in enumerate(results)
+                if r is not None and r.success
+            ]
+
+            winner_idx: Optional[int] = None
+            if len(survivors) == 1:
+                winner_idx = survivors[0]
+            elif len(survivors) > 1:
+                winner_idx = self._run_bracket(
+                    survivors, branch_paths, self._judge
+                )
+
+            if winner_idx is not None:
+                decisions[winner_idx] = "commit"
+
+            # Release all threads
+            for ev in decision_ready:
+                ev.set()
+
+            for f in futures:
+                f.result()
+
+        committed = winner_idx is not None
+        winner = results[winner_idx] if winner_idx is not None else None
+        all_results = [
+            r if r is not None else SpeculationResult(branch_index=i, success=False)
+            for i, r in enumerate(results)
+        ]
+
+        return SpeculationOutcome(
+            winner=winner,
+            all_results=all_results,
+            committed=committed,
+        )
diff --git a/tests/test_speculate.py b/tests/test_speculate.py
@@ -8,7 +8,7 @@
 from branching.core.base import FSBackend
 from branching.core.workspace import Workspace
 from branching.agent.speculate import Speculate
-from branching.agent.patterns import BestOfN, Reflexion, TreeOfThoughts
+from branching.agent.patterns import BestOfN, Reflexion, TreeOfThoughts, Tournament
 from branching.agent.result import SpeculationResult, SpeculationOutcome
 
 
@@ -381,6 +381,123 @@ def good(path):
         assert outcome.all_results[0].exception is not None
 
 
+class TestTournament:
+    def test_basic_bracket(self):
+        """4 candidates, judge always picks second → candidate 3 wins."""
+        ws = _make_workspace()
+
+        def task(path: Path, index: int) -> bool:
+            return True
+
+        def judge(path_a: Path, path_b: Path) -> int:
+            return 1  # always pick b
+
+        outcome = Tournament(task, n=4, judge=judge)(ws)
+        assert outcome.committed
+        assert outcome.winner is not None
+        # Bracket: (0v1→1), (2v3→3), (1v3→3)
+        assert outcome.winner.branch_index == 3
+        assert len(outcome.all_results) == 4
+
+    def test_all_fail(self):
+        """No survivors means nothing committed."""
+        ws = _make_workspace()
+
+        def task(path: Path, index: int) -> bool:
+            return False
+
+        def judge(path_a, path_b):
+            raise AssertionError("judge should not be called")
+
+        outcome = Tournament(task, n=3, judge=judge)(ws)
+        assert not outcome.committed
+        assert outcome.winner is None
+        assert len(outcome.all_results) == 3
+
+    def test_single_survivor(self):
+        """Only 1 succeeds → auto-wins without judge call."""
+        ws = _make_workspace()
+        judge_calls = []
+
+        def task(path: Path, index: int) -> bool:
+            return index == 2
+
+        def judge(path_a, path_b):
+            judge_calls.append(1)
+            return 0
+
+        outcome = Tournament(task, n=4, judge=judge)(ws)
+        assert outcome.committed
+        assert outcome.winner.branch_index == 2
+        assert len(judge_calls) == 0
+
+    def test_odd_candidates(self):
+        """3 candidates: one gets a bye in round 1."""
+        ws = _make_workspace()
+
+        def task(path: Path, index: int) -> bool:
+            return True
+
+        def judge(path_a: Path, path_b: Path) -> int:
+            return 0  # always pick a
+
+        outcome = Tournament(task, n=3, judge=judge)(ws)
+        assert outcome.committed
+        # Bracket: (0v1→0), bye 2, then (0v2→0)
+        assert outcome.winner.branch_index == 0
+
+    def test_commits_exactly_one(self):
+        """Only the winner should be committed; all others aborted."""
+        ws = _make_workspace()
+
+        def task(path: Path, index: int) -> bool:
+            return True
+
+        def judge(path_a, path_b):
+            return 0
+
+        outcome = Tournament(task, n=4, judge=judge)(ws)
+        assert outcome.committed
+        assert len(MockFSBackend._commits) == 1
+        assert len(MockFSBackend._aborts) == 3
+
+    def test_runs_in_parallel(self):
+        """Verify candidates actually run concurrently."""
+        import time
+        ws = _make_workspace()
+        start = time.monotonic()
+
+        def task(path: Path, index: int) -> bool:
+            time.sleep(0.2)
+            return True
+
+        def judge(path_a, path_b):
+            return 0
+
+        outcome = Tournament(task, n=3, judge=judge)(ws)
+        elapsed = time.monotonic() - start
+        assert outcome.committed
+        # 3 tasks @ 0.2s each; parallel ~0.2s, sequential ~0.6s
+        assert elapsed < 0.5
+
+    def test_exception_in_candidate(self):
+        """Exception in one candidate → eliminated, others proceed."""
+        ws = _make_workspace()
+
+        def task(path: Path, index: int) -> bool:
+            if index == 0:
+                raise RuntimeError("boom")
+            return True
+
+        def judge(path_a, path_b):
+            return 0
+
+        outcome = Tournament(task, n=3, judge=judge)(ws)
+        assert outcome.committed
+        assert outcome.all_results[0].exception is not None
+        assert outcome.winner.branch_index != 0
+
+
 class TestSpeculationResult:
     def test_dataclass(self):
         r = SpeculationResult(branch_index=0, success=True, score=0.95)