Skip to content

Commit 44bc690

Browse files
committed
Fix timeout evaluations getting positive fitness
1 parent 65cbbe8 commit 44bc690

5 files changed

Lines changed: 63 additions & 10 deletions

File tree

openevolve/evaluator.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,11 @@ async def evaluate_program(
262262
"error_type": "timeout",
263263
}
264264

265-
return {"error": 0.0, "timeout": True}
265+
return {
266+
"combined_score": 0.0,
267+
"error": "Evaluation timed out",
268+
"timeout": True,
269+
}
266270

267271
except Exception as e:
268272
last_exception = e
@@ -400,7 +404,12 @@ async def run_stage1():
400404
except asyncio.TimeoutError:
401405
logger.warning(f"Stage 1 evaluation timed out after {self.config.timeout}s")
402406
return EvaluationResult(
403-
metrics={"stage1_passed": 0.0, "error": 0.0, "timeout": True},
407+
metrics={
408+
"combined_score": 0.0,
409+
"stage1_passed": 0.0,
410+
"error": "Stage 1 evaluation timed out",
411+
"timeout": True,
412+
},
404413
artifacts={
405414
"failure_stage": "stage1",
406415
"timeout": True,
@@ -447,7 +456,9 @@ async def run_stage2():
447456
"failure_stage": "stage2",
448457
}
449458
)
459+
stage1_eval_result.metrics["combined_score"] = 0.0
450460
stage1_eval_result.metrics["stage2_passed"] = 0.0
461+
stage1_eval_result.metrics["error"] = "Stage 2 evaluation timed out"
451462
stage1_eval_result.metrics["timeout"] = True
452463
return stage1_eval_result
453464
except Exception as e:
@@ -509,7 +520,9 @@ async def run_stage3():
509520
"failure_stage": "stage3",
510521
}
511522
)
523+
merged_result.metrics["combined_score"] = 0.0
512524
merged_result.metrics["stage3_passed"] = 0.0
525+
merged_result.metrics["error"] = "Stage 3 evaluation timed out"
513526
merged_result.metrics["timeout"] = True
514527
return merged_result
515528
except Exception as e:

openevolve/utils/async_utils.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,19 @@ async def run_with_timeout(
4242
coro: Coroutine function to run
4343
timeout: Timeout in seconds
4444
*args: Arguments to pass to the coroutine
45-
timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
45+
timeout_error_value: Value to return on timeout
46+
(default: {"combined_score": 0.0, "error": "Evaluation timed out", "timeout": True})
4647
**kwargs: Keyword arguments to pass to the coroutine
4748
4849
Returns:
4950
Result of the coroutine or timeout_error_value on timeout
5051
"""
5152
if timeout_error_value is None:
52-
timeout_error_value = {"error": 0.0, "timeout": True}
53+
timeout_error_value = {
54+
"combined_score": 0.0,
55+
"error": "Evaluation timed out",
56+
"timeout": True,
57+
}
5358

5459
try:
5560
return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout)
@@ -68,14 +73,19 @@ async def run_sync_with_timeout(
6873
func: Synchronous function to run
6974
timeout: Timeout in seconds
7075
*args: Arguments to pass to the function
71-
timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
76+
timeout_error_value: Value to return on timeout
77+
(default: {"combined_score": 0.0, "error": "Evaluation timed out", "timeout": True})
7278
**kwargs: Keyword arguments to pass to the function
7379
7480
Returns:
7581
Result of the function or timeout_error_value on timeout
7682
"""
7783
if timeout_error_value is None:
78-
timeout_error_value = {"error": 0.0, "timeout": True}
84+
timeout_error_value = {
85+
"combined_score": 0.0,
86+
"error": "Evaluation timed out",
87+
"timeout": True,
88+
}
7989

8090
try:
8191
loop = asyncio.get_event_loop()

openevolve/utils/metrics_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def safe_numeric_average(metrics: Dict[str, Any]) -> float:
2121

2222
numeric_values = []
2323
for value in metrics.values():
24-
if isinstance(value, (int, float)):
24+
if isinstance(value, (int, float)) and not isinstance(value, bool):
2525
try:
2626
# Convert to float and check if it's a valid number
2727
float_val = float(value)
@@ -53,7 +53,7 @@ def safe_numeric_sum(metrics: Dict[str, Any]) -> float:
5353

5454
numeric_sum = 0.0
5555
for value in metrics.values():
56-
if isinstance(value, (int, float)):
56+
if isinstance(value, (int, float)) and not isinstance(value, bool):
5757
try:
5858
# Convert to float and check if it's a valid number
5959
float_val = float(value)
@@ -99,7 +99,7 @@ def get_fitness_score(
9999
for key, value in metrics.items():
100100
# Exclude MAP feature dimensions from fitness calculation
101101
if key not in feature_dimensions:
102-
if isinstance(value, (int, float)):
102+
if isinstance(value, (int, float)) and not isinstance(value, bool):
103103
try:
104104
float_val = float(value)
105105
if not (float_val != float_val): # Check for NaN

tests/test_evaluator_timeout.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,10 @@ async def run_test():
162162
self.assertLess(elapsed_time, 5)
163163

164164
# Should return timeout result
165+
self.assertIn("combined_score", result)
166+
self.assertEqual(result["combined_score"], 0.0)
165167
self.assertIn("error", result)
166-
self.assertEqual(result["error"], 0.0)
168+
self.assertEqual(result["error"], "Evaluation timed out")
167169
self.assertIn("timeout", result)
168170
self.assertTrue(result["timeout"])
169171

tests/test_metrics_utils.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import unittest
2+
3+
from openevolve.utils.metrics_utils import get_fitness_score, safe_numeric_average
4+
5+
6+
class TestMetricsUtils(unittest.TestCase):
7+
def test_safe_numeric_average_excludes_boolean_values(self):
8+
metrics = {
9+
"combined_score": 0.0,
10+
"timeout": True,
11+
"stage1_passed": False,
12+
"latency_ms": 2.0,
13+
}
14+
15+
self.assertEqual(safe_numeric_average(metrics), 1.0)
16+
17+
def test_get_fitness_score_excludes_boolean_values_without_combined_score(self):
18+
metrics = {
19+
"error": 0.0,
20+
"timeout": True,
21+
"ranking_passed": False,
22+
}
23+
24+
self.assertEqual(get_fitness_score(metrics), 0.0)
25+
26+
27+
if __name__ == "__main__":
28+
unittest.main()

0 commit comments

Comments
 (0)