Replace magic numbers with named constants and configurable LLM timeout

loglux · loglux · commit 52074a157468 · 2026-03-21T23:08:59.000Z
- Extract SESSION_TTL, SESSION_GC_INTERVAL, SSE_KEEPALIVE_INTERVAL in main.py
- Add LLM_TIMEOUT_MS env var (default 60000) to Config with settings_db override
- Pass configurable timeout to all LLM providers instead of hardcoded 60s
diff --git a/app/assistant/service.py b/app/assistant/service.py
@@ -140,23 +140,27 @@ def _detect_db_type(db_url: str) -> str:
 def _build_provider(config: Config) -> LLMProvider:
     provider = config.llm_provider.lower()
     base_url = config.llm_base_url or _PROVIDER_BASE_URLS.get(provider)
+    timeout = config.llm_timeout_ms / 1000
 
     if provider == "anthropic":
         return AnthropicProvider(
             api_key=config.llm_api_key,
             model=config.llm_model,
             base_url=base_url,
+            timeout=timeout,
         )
 
     if provider == "ollama":
         return OllamaProvider(
             model=config.llm_model,
             base_url=base_url,
+            timeout=timeout,
         )
 
     # All others: OpenAI-compatible /v1/chat/completions
     return ChatCompletionsProvider(
         api_key=config.llm_api_key,
         model=config.llm_model,
         base_url=base_url,
+        timeout=timeout,
     )
diff --git a/app/config.py b/app/config.py
@@ -18,6 +18,7 @@ class Config:
     llm_model: str
     llm_base_url: str | None
     openai_api_mode: str
+    llm_timeout_ms: int
     chat_history_enabled: bool
     chat_history_limit: int
 
@@ -44,6 +45,7 @@ def load(cls) -> "Config":
         llm_model = os.getenv("LLM_MODEL", "gpt-5.4-mini")
         llm_base_url: str | None = os.getenv("LLM_BASE_URL") or None
         openai_api_mode = os.getenv("OPENAI_API_MODE", "chat").lower()
+        llm_timeout_ms = int(os.getenv("LLM_TIMEOUT_MS", "60000"))
         chat_history_enabled = True
         chat_history_limit = 10
 
@@ -74,6 +76,9 @@ def load(cls) -> "Config":
             v = settings_db.get_app_setting("enable_explanations")
             if v:
                 enable_explanations = v.lower() == "true"
+            v = settings_db.get_app_setting("llm_timeout_ms")
+            if v:
+                llm_timeout_ms = int(v)
             v = settings_db.get_app_setting("chat_history_enabled")
             if v:
                 chat_history_enabled = v.lower() == "true"
@@ -108,6 +113,7 @@ def load(cls) -> "Config":
             llm_model=llm_model,
             llm_base_url=llm_base_url,
             openai_api_mode=openai_api_mode,
+            llm_timeout_ms=llm_timeout_ms,
             chat_history_enabled=chat_history_enabled,
             chat_history_limit=chat_history_limit,
         )
diff --git a/app/llm/providers/anthropic.py b/app/llm/providers/anthropic.py
@@ -14,10 +14,11 @@
 
 
 class AnthropicProvider(LLMProvider):
-    def __init__(self, api_key: str, model: str, base_url: str | None) -> None:
+    def __init__(self, api_key: str, model: str, base_url: str | None, timeout: float = 60) -> None:
         self.api_key = api_key
         self.model = model
         self.base_url = (base_url or "https://api.anthropic.com").rstrip("/")
+        self.timeout = timeout
 
     async def generate(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
         if not self.api_key:
@@ -47,7 +48,7 @@ async def generate(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
         }
 
         url = f"{self.base_url}/v1/messages"
-        async with httpx.AsyncClient(timeout=60) as client:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
             response = await client.post(url, headers=headers, json=payload)
             response.raise_for_status()
             data = response.json()
diff --git a/app/llm/providers/chat_completions.py b/app/llm/providers/chat_completions.py
@@ -12,10 +12,11 @@
 
 
 class ChatCompletionsProvider(LLMProvider):
-    def __init__(self, api_key: str, model: str, base_url: str | None) -> None:
+    def __init__(self, api_key: str, model: str, base_url: str | None, timeout: float = 60) -> None:
         self.api_key = api_key
         self.model = model
         self.base_url = (base_url or "https://api.openai.com").rstrip("/")
+        self.timeout = timeout
 
     async def generate(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
         url = f"{self.base_url}/v1/chat/completions"
@@ -28,7 +29,7 @@ async def generate(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
         if self.api_key:
             headers["Authorization"] = f"Bearer {self.api_key}"
 
-        async with httpx.AsyncClient(timeout=60) as client:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
             response = await client.post(url, headers=headers, json=payload)
             response.raise_for_status()
             data = response.json()
diff --git a/app/llm/providers/ollama.py b/app/llm/providers/ollama.py
@@ -6,9 +6,10 @@
 
 
 class OllamaProvider(LLMProvider):
-    def __init__(self, model: str, base_url: str | None) -> None:
+    def __init__(self, model: str, base_url: str | None, timeout: float = 60) -> None:
         self.model = model
         self.base_url = (base_url or "http://localhost:11434").rstrip("/")
+        self.timeout = timeout
 
     async def generate(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
         url = f"{self.base_url}/api/chat"
@@ -18,7 +19,7 @@ async def generate(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
             "stream": False,
             "options": {"temperature": 0.1},
         }
-        async with httpx.AsyncClient(timeout=60) as client:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
             response = await client.post(url, json=payload)
             response.raise_for_status()
             data = response.json()
diff --git a/app/main.py b/app/main.py
@@ -35,9 +35,12 @@ def _build_app_state() -> tuple[Config, ToolRegistry]:
 
 SUPPORTED_MCP_VERSIONS = {"2025-11-25", "2025-06-18", "2025-03-26"}
 
+SESSION_TTL = 600  # seconds before idle session expires
+SESSION_GC_INTERVAL = 60  # seconds between garbage collection sweeps
+SSE_KEEPALIVE_INTERVAL = 15  # seconds between SSE keepalive pings
+
 _sessions: Dict[str, Tuple[asyncio.Queue[str], float]] = {}
 _sessions_lock = asyncio.Lock()
-_session_ttl_seconds = 600
 
 
 def reload_config() -> None:
@@ -84,13 +87,13 @@ async def _enqueue(session_id: str, payload: Dict[str, Any]) -> bool:
 
 async def _gc_sessions() -> None:
     while True:
-        await asyncio.sleep(60)
+        await asyncio.sleep(SESSION_GC_INTERVAL)
         now = time.time()
         async with _sessions_lock:
             expired = [
                 session_id
                 for session_id, (_, last_seen) in _sessions.items()
-                if now - last_seen > _session_ttl_seconds
+                if now - last_seen > SESSION_TTL
             ]
             for session_id in expired:
                 _sessions.pop(session_id, None)
@@ -296,7 +299,7 @@ async def event_stream():
         try:
             while True:
                 try:
-                    message = await asyncio.wait_for(queue.get(), timeout=15)
+                    message = await asyncio.wait_for(queue.get(), timeout=SSE_KEEPALIVE_INTERVAL)
                     yield f"event: message\ndata: {message}\n\n"
                 except asyncio.TimeoutError:
                     yield ": keepalive\n\n"
diff --git a/tests/test_mcp_api.py b/tests/test_mcp_api.py
@@ -244,6 +244,7 @@ async def test_ui_routes_use_latest_runtime_config(
         llm_model="gpt-5.4-mini",
         llm_base_url=None,
         openai_api_mode="chat",
+        llm_timeout_ms=60000,
         chat_history_enabled=True,
         chat_history_limit=10,
     )

Original file line number	Diff line number	Diff line change
`@@ -244,6 +244,7 @@ async def test_ui_routes_use_latest_runtime_config(`
`244`	`244`	`llm_model="gpt-5.4-mini",`
`245`	`245`	`llm_base_url=None,`
`246`	`246`	`openai_api_mode="chat",`
	`247`	`+ llm_timeout_ms=60000,`
`247`	`248`	`chat_history_enabled=True,`
`248`	`249`	`chat_history_limit=10,`
`249`	`250`	`)`