@@ -121,6 +121,20 @@ def test_picks_highest_score(self):
121121 assert outcome .winner .score == 0.9
122122 assert len (outcome .all_results ) == 3
123123
124+ def test_bool_candidates (self ):
125+ """Candidates returning bare bool get score 1.0/0.0."""
126+ ws = _make_workspace ()
127+
128+ candidates = [
129+ lambda p : False ,
130+ lambda p : True ,
131+ ]
132+
133+ outcome = BestOfN (candidates )(ws )
134+ assert outcome .committed
135+ assert outcome .winner .branch_index == 1
136+ assert outcome .winner .score == 1.0
137+
124138 def test_skips_failures (self ):
125139 ws = _make_workspace ()
126140
@@ -136,7 +150,7 @@ def test_skips_failures(self):
136150 def test_all_fail (self ):
137151 ws = _make_workspace ()
138152
139- candidates = [lambda p : ( False , 0.0 ) for _ in range (3 )]
153+ candidates = [lambda p : False for _ in range (3 )]
140154
141155 outcome = BestOfN (candidates )(ws )
142156 assert not outcome .committed
@@ -177,16 +191,99 @@ def test_runs_in_parallel(self):
177191 ws = _make_workspace ()
178192 start = time .monotonic ()
179193
180- def slow (path : Path ) -> tuple [ bool , float ] :
194+ def slow (path : Path ) -> bool :
181195 time .sleep (0.2 )
182- return True , 1.0
196+ return True
183197
184198 outcome = BestOfN ([slow , slow , slow ])(ws )
185199 elapsed = time .monotonic () - start
186200 assert outcome .committed
187201 # 3 tasks @ 0.2s each; parallel should be ~0.2s, sequential ~0.6s
188202 assert elapsed < 0.5
189203
204+ def test_scores_param (self ):
205+ """Pre-computed scores override default 1.0/0.0 for bool candidates."""
206+ ws = _make_workspace ()
207+
208+ candidates = [lambda p : True , lambda p : True , lambda p : True ]
209+ logprob_scores = [- 2.5 , - 0.1 , - 1.3 ]
210+
211+ outcome = BestOfN (candidates , scores = logprob_scores )(ws )
212+ assert outcome .committed
213+ assert outcome .winner .branch_index == 1 # highest logprob
214+ assert outcome .winner .score == - 0.1
215+
216+ def test_scores_ignored_for_tuple_return (self ):
217+ """When candidate returns (bool, float), scores param is ignored."""
218+ ws = _make_workspace ()
219+
220+ candidates = [
221+ lambda p : (True , 5.0 ), # candidate provides own score
222+ lambda p : (True , 10.0 ), # candidate provides own score
223+ ]
224+
225+ outcome = BestOfN (candidates , scores = [99.0 , 1.0 ])(ws )
226+ assert outcome .committed
227+ assert outcome .winner .branch_index == 1 # tuple score 10.0 wins
228+ assert outcome .winner .score == 10.0
229+
230+ def test_scores_skipped_for_failures (self ):
231+ """Failed bool candidates don't use pre-computed scores."""
232+ ws = _make_workspace ()
233+
234+ candidates = [lambda p : False , lambda p : True ]
235+ outcome = BestOfN (candidates , scores = [99.0 , 0.5 ])(ws )
236+ assert outcome .committed
237+ assert outcome .winner .branch_index == 1
238+ assert outcome .winner .score == 0.5
239+
240+ def test_evaluate_callback (self ):
241+ """External evaluate callback overrides all other scores."""
242+ ws = _make_workspace ()
243+
244+ candidates = [
245+ lambda p : (True , 10.0 ), # candidate says 10
246+ lambda p : (True , 1.0 ), # candidate says 1
247+ ]
248+
249+ calls = []
250+ def evaluate (path ):
251+ calls .append (path )
252+ return float (len (calls )) # 1.0 for first, 2.0 for second
253+
254+ outcome = BestOfN (candidates , evaluate = evaluate )(ws )
255+ assert outcome .committed
256+ assert len (calls ) == 2
257+
258+ def test_evaluate_not_called_on_failure (self ):
259+ """evaluate is only called for successful candidates."""
260+ ws = _make_workspace ()
261+
262+ eval_calls = []
263+ def evaluate (path ):
264+ eval_calls .append (path )
265+ return 1.0
266+
267+ candidates = [lambda p : False , lambda p : True ]
268+ outcome = BestOfN (candidates , evaluate = evaluate )(ws )
269+ assert outcome .committed
270+ assert len (eval_calls ) == 1
271+
272+ def test_evaluate_overrides_scores_param (self ):
273+ """evaluate takes priority over both tuple scores and scores param."""
274+ ws = _make_workspace ()
275+
276+ candidates = [lambda p : True , lambda p : True ]
277+
278+ outcome = BestOfN (
279+ candidates ,
280+ scores = [100.0 , 1.0 ],
281+ evaluate = lambda p : 42.0 ,
282+ )(ws )
283+ assert outcome .committed
284+ # Both get evaluate score 42.0; either could win (both equal)
285+ assert outcome .winner .score == 42.0
286+
190287
191288class TestReflexion :
192289 def test_succeeds_first_try (self ):
0 commit comments