matdev83
diff --git a/‎src/connectors/anthropic.py‎
Lines changed: 1 addition & 13 deletions b/‎src/connectors/anthropic.py‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎src/connectors/openai.py‎
Lines changed: 167 additions & 123 deletions b/‎src/connectors/openai.py‎
Lines changed: 167 additions & 123 deletions
@@ -26,7 +26,6 @@
 from src.core.common.exceptions import (
     AuthenticationError,
     ConfigurationError,
-    InvalidRequestError,
     ServiceUnavailableError,
 )
 from src.core.config.app_config import AppConfig
@@ -544,18 +543,7 @@ async def chat_completions(  # type: ignore[override]
         through :class:`ConnectorChatCompletionsRequest`; legacy positional call shapes
         are not supported at this boundary.
         """
-        if not isinstance(request, ConnectorChatCompletionsRequest):
-            raise InvalidRequestError(
-                message=(
-                    "AnthropicBackend.chat_completions requires ConnectorChatCompletionsRequest. "
-                    "Legacy request_data/processed_messages/effective_model invocation is not supported."
-                ),
-                details={
-                    "received_type": type(request).__name__,
-                    "connector": "anthropic",
-                },
-            )
-        return await self._chat_completions_canonical(request)
+        return await self._chat_completions_canonical(request)
 
     # -----------------------------------------------------------
     # Payload helpers
 
@@ -192,18 +192,21 @@ def _build_quota_exhaustion_stream_chunk(
     message = _extract_insufficient_quota_message(body) or (
         "Upstream quota was exhausted."
     )
-    error_payload = {
+    error_payload: dict[str, Any] = {
         "id": f"chatcmpl-error-{int(time.time())}",
         "object": "chat.completion.chunk",
         "created": int(time.time()),
         "model": model,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "error"}],
-        "error": {
-            "message": message,
-            "type": "quota_exceeded",
-            "code": 503,
-            "status_code": 503,
-        },
+        "error": cast(
+            dict[str, Any],
+            {
+                "message": message,
+                "type": "quota_exceeded",
+                "code": 503,
+                "status_code": 503,
+            },
+        ),
     }
     error_body = error_payload["error"]
     if error_details and isinstance(error_body, dict):
@@ -237,11 +240,11 @@ def _attach_http_error_details(
     }
 
 
-def _raise_for_httpx_request_error(
-    exc: httpx.RequestError,
-    *,
-    url: str,
-    log_extra: dict[str, str] | None,
+def _raise_for_httpx_request_error(
+    exc: httpx.RequestError,
+    *,
+    url: str,
+    log_extra: dict[str, str] | None,
 ) -> NoReturn:
     """Map httpx transport errors to domain errors (read timeout vs connect vs other)."""
 
@@ -288,27 +291,41 @@ def _raise_for_httpx_request_error(
             status_code=504,
         ) from exc
 
+    if isinstance(exc, httpx.ReadError):
+        if logger.isEnabledFor(logging.WARNING):
+            logger.warning(
+                "Upstream read error (connection lost mid-stream): %s: %s",
+                url,
+                exc,
+                extra=log_extra,
+            )
+        raise BackendError(
+            message=f"Upstream read error: connection lost during read ({exc!s})",
+            details={"url": url, "reason": "read_error"},
+            status_code=502,
+        ) from exc
+
     logger.error(
         "Request failed to %s: %s",
         url,
         exc,
         exc_info=True,
         extra=log_extra if log_extra else None,
     )
-    raise ServiceUnavailableError(
-        message=f"Could not connect to backend ({exc!s})",
-        details={"url": url},
-    ) from exc
-
-
-def _is_retryable_http2_stream_termination(exc: httpx.RequestError) -> bool:
-    if not isinstance(exc, httpx.RemoteProtocolError):
-        return False
-    message = str(exc)
-    return "ConnectionTerminated" in message and "ErrorCodes.NO_ERROR" in message
-
-
-class OpenAIConnector(LLMBackend):
+    raise ServiceUnavailableError(
+        message=f"Could not connect to backend ({exc!s})",
+        details={"url": url},
+    ) from exc
+
+
+def _is_retryable_http2_stream_termination(exc: httpx.RequestError) -> bool:
+    if not isinstance(exc, httpx.RemoteProtocolError):
+        return False
+    message = str(exc)
+    return "ConnectionTerminated" in message and "ErrorCodes.NO_ERROR" in message
+
+
+class OpenAIConnector(LLMBackend):
     """Minimal OpenAI-compatible connector used by OpenRouterBackend in tests.
 
     It supports an optional `headers_override` kwarg and treats streaming
@@ -363,51 +380,51 @@ def __init__(
             getattr(self.config, "disable_health_checks", False)
         )
 
-        # Enable health checks only when neither config nor env disable them
-        self._health_check_enabled = not (
-            disable_health_checks_env or disable_health_checks_config
-        )
-
-    async def _send_request_with_retry(
-        self,
-        *,
-        build_request: Callable[[], httpx.Request],
-        stream: bool,
-        capture: HttpxBoundaryCaptureContext,
-        url: str,
-        log_extra: dict[str, str] | None,
-    ) -> httpx.Response:
-        request = build_request()
-        try:
-            return await self._capture_http_client.send(
-                request,
-                stream=stream,
-                capture=capture,
-            )
-        except httpx.RequestError as exc:
-            if _is_retryable_http2_stream_termination(exc):
-                logger.warning(
-                    "Transient upstream HTTP/2 termination for %s; retrying once",
-                    url,
-                    extra=log_extra if log_extra else None,
-                )
-                retry_request = build_request()
-                try:
-                    return await self._capture_http_client.send(
-                        retry_request,
-                        stream=stream,
-                        capture=capture,
-                    )
-                except httpx.RequestError as retry_exc:
-                    _raise_for_httpx_request_error(
-                        retry_exc,
-                        url=url,
-                        log_extra=log_extra,
-                    )
-            _raise_for_httpx_request_error(exc, url=url, log_extra=log_extra)
-
-    @property
-    def api_base_url(self) -> str:
+        # Enable health checks only when neither config nor env disable them
+        self._health_check_enabled = not (
+            disable_health_checks_env or disable_health_checks_config
+        )
+
+    async def _send_request_with_retry(
+        self,
+        *,
+        build_request: Callable[[], httpx.Request],
+        stream: bool,
+        capture: HttpxBoundaryCaptureContext,
+        url: str,
+        log_extra: dict[str, str] | None,
+    ) -> httpx.Response:
+        request = build_request()
+        try:
+            return await self._capture_http_client.send(
+                request,
+                stream=stream,
+                capture=capture,
+            )
+        except httpx.RequestError as exc:
+            if _is_retryable_http2_stream_termination(exc):
+                logger.warning(
+                    "Transient upstream HTTP/2 termination for %s; retrying once",
+                    url,
+                    extra=log_extra if log_extra else None,
+                )
+                retry_request = build_request()
+                try:
+                    return await self._capture_http_client.send(
+                        retry_request,
+                        stream=stream,
+                        capture=capture,
+                    )
+                except httpx.RequestError as retry_exc:
+                    _raise_for_httpx_request_error(
+                        retry_exc,
+                        url=url,
+                        log_extra=log_extra,
+                    )
+            _raise_for_httpx_request_error(exc, url=url, log_extra=log_extra)
+
+    @property
+    def api_base_url(self) -> str:
         """Return the API base URL."""
         return self._api_base_url
 
@@ -1181,19 +1198,19 @@ async def _handle_non_streaming_response(
 
         guarded_headers = self._apply_loop_guard_to_outbound_headers(headers)
         log_extra = self._get_log_extra(context)
-        response = await self._send_request_with_retry(
-            build_request=lambda: self.client.build_request(
-                "POST", url, json=payload, headers=guarded_headers
-            ),
-            stream=False,
-            capture=self._http_boundary_capture(
-                model=str(payload.get("model") or "unknown"),
-                context=context,
-            ),
-            url=url,
-            log_extra=log_extra if log_extra else None,
-        )
-        self.update_quota_headers(response.headers)
+        response = await self._send_request_with_retry(
+            build_request=lambda: self.client.build_request(
+                "POST", url, json=payload, headers=guarded_headers
+            ),
+            stream=False,
+            capture=self._http_boundary_capture(
+                model=str(payload.get("model") or "unknown"),
+                context=context,
+            ),
+            url=url,
+            log_extra=log_extra if log_extra else None,
+        )
+        self.update_quota_headers(response.headers)
 
         if int(response.status_code) >= 400:
             # For backwards compatibility with existing error handlers, still use HTTPException here.
@@ -1305,19 +1322,19 @@ async def _handle_streaming_response(
 
         guarded_headers = self._apply_loop_guard_to_outbound_headers(headers)
 
-        response = await self._send_request_with_retry(
-            build_request=lambda: self.client.build_request(
-                "POST", url, json=payload, headers=guarded_headers
-            ),
-            stream=True,
-            capture=self._http_boundary_capture(
-                model=str(payload.get("model") or "unknown"),
-                context=context,
-            ),
-            url=url,
-            log_extra=log_extra if log_extra else None,
-        )
-        self.update_quota_headers(response.headers)
+        response = await self._send_request_with_retry(
+            build_request=lambda: self.client.build_request(
+                "POST", url, json=payload, headers=guarded_headers
+            ),
+            stream=True,
+            capture=self._http_boundary_capture(
+                model=str(payload.get("model") or "unknown"),
+                context=context,
+            ),
+            url=url,
+            log_extra=log_extra if log_extra else None,
+        )
+        self.update_quota_headers(response.headers)
 
         status_code = (
             int(response.status_code) if hasattr(response, "status_code") else 200
@@ -1607,6 +1624,23 @@ async def iter_sse_messages() -> AsyncGenerator[str, None]:
                             details={"url": url, "reason": "read_timeout"},
                             status_code=504,
                         ) from exc
+                    except httpx.ReadError as exc:
+                        if buffer:
+                            yield buffer
+                            buffer = ""
+                        if logger.isEnabledFor(logging.WARNING):
+                            logger.warning(
+                                "Streaming read error during SSE for %s",
+                                url,
+                                extra=log_extra if log_extra else None,
+                            )
+                        raise BackendError(
+                            message=(
+                                f"Upstream read error: connection lost during streaming ({exc!s})"
+                            ),
+                            details={"url": url, "reason": "read_error"},
+                            status_code=502,
+                        ) from exc
                     except httpx.RequestError as exc:
                         if buffer:
                             yield buffer
@@ -2042,19 +2076,19 @@ async def _handle_responses_non_streaming_response(
 
         guarded_headers = self._apply_loop_guard_to_outbound_headers(headers)
 
-        response = await self._send_request_with_retry(
-            build_request=lambda: self.client.build_request(
-                "POST", url, json=payload, headers=guarded_headers
-            ),
-            stream=False,
-            capture=self._http_boundary_capture(
-                model=str(payload.get("model") or "unknown"),
-                context=context,
-            ),
-            url=url,
-            log_extra=None,
-        )
-        self.update_quota_headers(response.headers)
+        response = await self._send_request_with_retry(
+            build_request=lambda: self.client.build_request(
+                "POST", url, json=payload, headers=guarded_headers
+            ),
+            stream=False,
+            capture=self._http_boundary_capture(
+                model=str(payload.get("model") or "unknown"),
+                context=context,
+            ),
+            url=url,
+            log_extra=None,
+        )
+        self.update_quota_headers(response.headers)
 
         if int(response.status_code) >= 400:
             try:
@@ -2251,19 +2285,19 @@ async def stream_completion(
         payload["stream"] = True
 
         # Build and send request
-        response = await self._send_request_with_retry(
-            build_request=lambda: self.client.build_request(
-                "POST", url, json=payload, headers=guarded_headers
-            ),
-            stream=True,
-            capture=self._http_boundary_capture(
-                model=str(effective_model),
-                context=connector_context,
-            ),
-            url=url,
-            log_extra=None,
-        )
-        self.update_quota_headers(response.headers)
+        response = await self._send_request_with_retry(
+            build_request=lambda: self.client.build_request(
+                "POST", url, json=payload, headers=guarded_headers
+            ),
+            stream=True,
+            capture=self._http_boundary_capture(
+                model=str(effective_model),
+                context=connector_context,
+            ),
+            url=url,
+            log_extra=None,
+        )
+        self.update_quota_headers(response.headers)
 
         status_code = (
             int(response.status_code) if hasattr(response, "status_code") else 200
@@ -2402,6 +2436,16 @@ async def stream_completion(
                 details={"url": url, "reason": "read_timeout"},
                 status_code=504,
             ) from exc
+        except httpx.ReadError as exc:
+            if logger.isEnabledFor(logging.WARNING):
+                logger.warning("Streaming read error for %s", url)
+            raise BackendError(
+                message=(
+                    f"Upstream read error: connection lost during streaming ({exc!s})"
+                ),
+                details={"url": url, "reason": "read_error"},
+                status_code=502,
+            ) from exc
         except httpx.RequestError as exc:
             raise ServiceUnavailableError(
                 message=f"Streaming connection interrupted ({exc!s})"