22
33from __future__ import annotations
44
5+ import random
56import time
67from abc import ABC , abstractmethod
78from typing import TYPE_CHECKING , Any
1213from ._exceptions import ConnectionError , TimeoutError
1314from ._version import get_version
1415
16+ # Retry constants (OpenAI-style exponential backoff with jitter)
17+ INITIAL_RETRY_DELAY = 0.5 # seconds
18+ MAX_RETRY_DELAY = 8.0 # seconds
19+ MAX_RETRY_AFTER = 60 # max seconds to respect from Retry-After header
20+ RETRYABLE_STATUS_CODES = (429 , 502 , 503 , 504 )
21+
1522if TYPE_CHECKING :
1623 from collections .abc import Mapping
1724
2431 HTTP2_AVAILABLE = False
2532
2633
34+ def _calculate_retry_delay (
35+ attempt : int ,
36+ max_retries : int ,
37+ response_headers : Mapping [str , str ] | None = None ,
38+ ) -> float :
39+ """Calculate retry delay with Retry-After support and exponential backoff + jitter.
40+
41+ Follows the OpenAI pattern:
42+ 1. If Retry-After header present and <= 60s, use it
43+ 2. Otherwise, exponential backoff (0.5s * 2^attempt) with 25% jitter, capped at 8s
44+ """
45+ # Check Retry-After header first
46+ if response_headers :
47+ retry_after = response_headers .get ("retry-after" ) or response_headers .get (
48+ "Retry-After"
49+ )
50+ if retry_after :
51+ try :
52+ retry_after_seconds = float (retry_after )
53+ if 0 < retry_after_seconds <= MAX_RETRY_AFTER :
54+ return retry_after_seconds
55+ except ValueError :
56+ pass
57+
58+ # Exponential backoff with jitter
59+ retries_done = min (max_retries - (max_retries - attempt ), 1000 )
60+ sleep_seconds = min (INITIAL_RETRY_DELAY * (2.0 ** retries_done ), MAX_RETRY_DELAY )
61+ jitter = 1 - 0.25 * random .random ()
62+ return sleep_seconds * jitter
63+
64+
2765class HTTPClient (ABC ):
2866 """Abstract base class for HTTP clients."""
2967
@@ -137,12 +175,15 @@ def request(
137175 params = filtered_params if filtered_params else None ,
138176 json = json ,
139177 )
140- # Retry on server errors (502, 503, 504)
178+ # Retry on rate limits (429) and server errors (502, 503, 504)
141179 if (
142- response .status_code in ( 502 , 503 , 504 )
180+ response .status_code in RETRYABLE_STATUS_CODES
143181 and attempt < self ._max_retries
144182 ):
145- time .sleep (2 ** attempt * 0.5 )
183+ delay = _calculate_retry_delay (
184+ attempt , self ._max_retries , response .headers
185+ )
186+ time .sleep (delay )
146187 continue
147188 return response .content , response .status_code , response .headers
148189
@@ -155,7 +196,8 @@ def request(
155196
156197 # Exponential backoff before retry
157198 if attempt < self ._max_retries :
158- time .sleep (2 ** attempt * 0.1 )
199+ delay = _calculate_retry_delay (attempt , self ._max_retries )
200+ time .sleep (delay )
159201
160202 raise last_exception or ConnectionError ("Request failed after retries" )
161203
@@ -229,12 +271,15 @@ async def request(
229271 params = filtered_params if filtered_params else None ,
230272 json = json ,
231273 )
232- # Retry on server errors (502, 503, 504)
274+ # Retry on rate limits (429) and server errors (502, 503, 504)
233275 if (
234- response .status_code in ( 502 , 503 , 504 )
276+ response .status_code in RETRYABLE_STATUS_CODES
235277 and attempt < self ._max_retries
236278 ):
237- await asyncio .sleep (2 ** attempt * 0.5 )
279+ delay = _calculate_retry_delay (
280+ attempt , self ._max_retries , response .headers
281+ )
282+ await asyncio .sleep (delay )
238283 continue
239284 return response .content , response .status_code , response .headers
240285
@@ -247,7 +292,8 @@ async def request(
247292
248293 # Exponential backoff before retry
249294 if attempt < self ._max_retries :
250- await asyncio .sleep (2 ** attempt * 0.1 )
295+ delay = _calculate_retry_delay (attempt , self ._max_retries )
296+ await asyncio .sleep (delay )
251297
252298 raise last_exception or ConnectionError ("Request failed after retries" )
253299
0 commit comments