Skip to content

Commit 93b8105

Browse files
author
Mateusz
committed
fix: retry and classify remote protocol disconnects
Retry one-shot OpenAI-compatible requests when httpx reports transient "Server disconnected" remote protocol errors so ZAI coding-plan traffic can recover without unnecessary failover. Map unrecovered RemoteProtocolError cases to BackendError(502) and lock behavior with targeted retry/error-mapping tests. Made-with: Cursor
1 parent 550de82 commit 93b8105

3 files changed

Lines changed: 94 additions & 13 deletions

File tree

src/connectors/openai.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,20 @@ def _raise_for_httpx_request_error(
305305
status_code=502,
306306
) from exc
307307

308+
if isinstance(exc, httpx.RemoteProtocolError):
309+
if logger.isEnabledFor(logging.WARNING):
310+
logger.warning(
311+
"Upstream protocol error (remote disconnect): %s: %s",
312+
url,
313+
exc,
314+
extra=log_extra,
315+
)
316+
raise BackendError(
317+
message=f"Upstream protocol error: remote server disconnected ({exc!s})",
318+
details={"url": url, "reason": "remote_protocol_error"},
319+
status_code=502,
320+
) from exc
321+
308322
logger.error(
309323
"Request failed to %s: %s",
310324
url,
@@ -322,7 +336,10 @@ def _is_retryable_http2_stream_termination(exc: httpx.RequestError) -> bool:
322336
if not isinstance(exc, httpx.RemoteProtocolError):
323337
return False
324338
message = str(exc)
325-
return "ConnectionTerminated" in message and "ErrorCodes.NO_ERROR" in message
339+
lowered = message.lower()
340+
if "ConnectionTerminated" in message and "ErrorCodes.NO_ERROR" in message:
341+
return True
342+
return "server disconnected" in lowered
326343

327344

328345
class OpenAIConnector(LLMBackend):

tests/unit/connectors/test_openai_canonical.py

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -600,11 +600,59 @@ async def test_stream_completion_retries_once_on_http2_no_error_termination(
600600
chunk async for chunk in openai_connector.stream_completion(request)
601601
]
602602

603-
assert chunks == [
604-
'data: {"choices":[{"delta":{"content":"ok"}}]}\n\n',
605-
"data: [DONE]\n\n",
606-
]
607-
assert openai_connector._capture_http_client.send.await_count == 2
603+
assert chunks == [
604+
'data: {"choices":[{"delta":{"content":"ok"}}]}\n\n',
605+
"data: [DONE]\n\n",
606+
]
607+
assert openai_connector._capture_http_client.send.await_count == 2
608+
609+
@pytest.mark.asyncio
610+
async def test_stream_completion_retries_once_on_server_disconnected(
611+
self, openai_connector
612+
):
613+
request = CanonicalChatRequest(
614+
model="gpt-4",
615+
messages=[ChatMessage(role="user", content="Hello")],
616+
max_tokens=100,
617+
stream=True,
618+
)
619+
http_request = httpx.Request(
620+
"POST",
621+
"https://api.openai.com/v1/chat/completions",
622+
json={"model": "gpt-4", "stream": True},
623+
)
624+
streamed_response = MagicMock()
625+
streamed_response.status_code = 200
626+
streamed_response.headers = {"content-type": "text/event-stream"}
627+
streamed_response.aiter_bytes = lambda: _aiter_bytes(
628+
b'data: {"choices":[{"delta":{"content":"ok"}}]}\n\n',
629+
b"data: [DONE]\n\n",
630+
)
631+
streamed_response.aclose = AsyncMock()
632+
633+
openai_connector.client.build_request.return_value = http_request
634+
with patch.object(
635+
openai_connector,
636+
"_prepare_payload",
637+
new_callable=AsyncMock,
638+
return_value={"model": "gpt-4", "messages": []},
639+
):
640+
openai_connector._capture_http_client.send = AsyncMock(
641+
side_effect=[
642+
httpx.RemoteProtocolError("Server disconnected"),
643+
streamed_response,
644+
]
645+
)
646+
647+
chunks = [
648+
chunk async for chunk in openai_connector.stream_completion(request)
649+
]
650+
651+
assert chunks == [
652+
'data: {"choices":[{"delta":{"content":"ok"}}]}\n\n',
653+
"data: [DONE]\n\n",
654+
]
655+
assert openai_connector._capture_http_client.send.await_count == 2
608656

609657

610658
class TestOpenAIPayloadCleaning:

tests/unit/openai_connector_tests/test_read_error_handling.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,24 @@ def test_unknown_error_maps_to_service_unavailable_503(self) -> None:
113113
assert ctx.value.status_code == 503
114114
assert "Could not connect to backend" in ctx.value.message
115115

116+
def test_remote_protocol_error_maps_to_backend_error_502(self) -> None:
117+
"""httpx.RemoteProtocolError should map to BackendError(502)."""
118+
exc = httpx.RemoteProtocolError(
119+
"Server disconnected",
120+
request=httpx.Request("POST", "https://example.com/v1/chat/completions"),
121+
)
122+
123+
with pytest.raises(BackendError) as ctx:
124+
_raise_for_httpx_request_error(
125+
exc,
126+
url="https://example.com/v1/chat/completions",
127+
log_extra=None,
128+
)
129+
130+
assert ctx.value.status_code == 502
131+
assert ctx.value.details.get("reason") == "remote_protocol_error"
132+
assert "remote server disconnected" in ctx.value.message.lower()
133+
116134

117135
class TestRetryableHttp2StreamTermination:
118136
"""Test HTTP/2 stream termination retry detection."""
@@ -130,15 +148,14 @@ def test_read_error_is_not_retryable(self) -> None:
130148

131149
assert not _is_retryable_http2_stream_termination(exc)
132150

133-
def test_remote_protocol_error_with_no_error_is_retryable(self) -> None:
134-
"""httpx.RemoteProtocolError with NO_ERROR termination is retryable."""
151+
def test_remote_protocol_error_with_server_disconnected_is_retryable(self) -> None:
152+
"""httpx.RemoteProtocolError with server disconnected message is retryable."""
135153
exc = httpx.RemoteProtocolError(
136-
"Server disconnected without sending a response.",
154+
"Server disconnected",
137155
request=httpx.Request("POST", "https://example.com/v1/chat/completions"),
138156
)
139157

140-
# This is NOT retryable because it doesn't have ConnectionTerminated/NO_ERROR
141-
assert not _is_retryable_http2_stream_termination(exc)
158+
assert _is_retryable_http2_stream_termination(exc)
142159

143160
def test_remote_protocol_error_with_graceful_termination_is_retryable(self) -> None:
144161
"""httpx.RemoteProtocolError with graceful HTTP/2 termination is retryable."""
@@ -147,5 +164,4 @@ def test_remote_protocol_error_with_graceful_termination_is_retryable(self) -> N
147164
"Server disconnected without sending a response. ConnectionTerminated",
148165
request=httpx.Request("POST", "https://example.com/v1/chat/completions"),
149166
)
150-
# Note: This doesn't match the full pattern because NO_ERROR is missing
151-
assert not _is_retryable_http2_stream_termination(exc)
167+
assert _is_retryable_http2_stream_termination(exc)

0 commit comments

Comments
 (0)