updated tests

kiranandcode · kiranandcode · commit bd650b7be36a · 2025-11-19T17:14:34.000-05:00
diff --git a/tests/test_handlers_llm_futures.py b/tests/test_handlers_llm_futures.py
@@ -6,11 +6,13 @@
 """
 
 import time
+from collections.abc import Callable
 from concurrent.futures import Future
 from inspect import BoundArguments
 from typing import Any, override
 
-from effectful.handlers.futures import ThreadPoolFuturesInterpretation
+import effectful.handlers.futures as futures
+from effectful.handlers.futures import Executor, ThreadPoolFuturesInterpretation
 from effectful.handlers.llm import Template
 from effectful.handlers.llm.providers import OpenAIAPIProvider
 from effectful.ops.semantics import handler
@@ -28,34 +30,73 @@ def __init__(self, response, delay: float = 0.05, mapping={}):
 
     @override
     def _openai_api_call[T](
-        self, template: Any, args: BoundArguments, retty: type[T]
+        self, template: Template, args: BoundArguments, retty: type[T]
     ) -> T:
         self.calls.append((template, args.args, retty))
         time.sleep(self.delay)
-
-        return self.mapping.get((template, tuple(args.args)), self.response)
+        return self.mapping.get(template, {}).get(tuple(args.args), self.response)
 
 
 @Template.define
-def hiaku(topic: str) -> Future[str]:
+def hiaku(topic: str) -> str:
     """Return a hiaku about {topic}."""
     raise NotHandled
 
 
-# synchronous template for comparison
-@Template.define
-def hiaku_s(topic: str) -> str:
-    """Return a hiaku about {topic}."""
-    raise NotImplementedError
-
-
 def test_future_return_type_decodes_inner_type():
-    """Test that Future[int] templates correctly decode to int."""
+    """Test that llm templates correctly decode to int, even wrapped in a future."""
     ref_hiaku = "apples to oranges, oranges to pears, I don't know what a hiaku is"
     mock_provider = SlowMockLLMProvider(ref_hiaku, delay=0.001)
 
     with handler(ThreadPoolFuturesInterpretation()), handler(mock_provider):
-        future = hiaku("apples")
+        future = Executor.submit(hiaku, "apples")
         assert isinstance(future, Future)
         result = future.result()
         assert result == ref_hiaku
+
+
+@Template.define
+def generate_program(task: str) -> Callable[[int], int]:
+    """Generate a Python program that {task}."""
+    raise NotHandled
+
+
+def test_concurrent_program_generation():
+    """Simulate concurrent LLM calls to generate Python programs and pick the best one."""
+    # Mock responses for different approaches to the same task
+    responses = {
+        generate_program: {
+            ("implement fibonacci algorithm 0",): "def fib(n: int) -> int: return n",
+            (
+                "implement fibonacci algorithm 1",
+            ): "def fib(n: int) -> int: return n * fib(n - 1)",
+            (
+                "implement fibonacci algorithm 2",
+            ): "def fib(n: int) -> int: return fib(n - 2) + fib(n - 1) if n > 1 else 0",
+        }
+    }
+
+    mock_provider = SlowMockLLMProvider(
+        response="print('Default')", delay=0.01, mapping=responses
+    )
+
+    user_request: str = "implement fibonacci algorithm"
+
+    with handler(ThreadPoolFuturesInterpretation()), handler(mock_provider):
+        # Launch multiple LLM calls concurrently
+        tasks = [
+            Executor.submit(generate_program, (user_request + f" {i}"))
+            for i in range(3)
+        ]
+
+        # Collect all results as they finish
+        results_as_completed = (f.result() for f in futures.as_completed(tasks))
+
+        valid_results = [(result, len(result)) for result in results_as_completed]
+
+        # Pick the "best" result (here: the shortest program, as a naive heuristic)
+        best_program = max(valid_results, key=lambda pair: pair[1])[0]
+
+    # Assertions
+    assert len(valid_results) == 3
+    assert best_program in set(responses[generate_program].values())