|
2 | 2 |
|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
| 5 | +import asyncio |
5 | 6 | import logging |
6 | 7 | from enum import StrEnum |
7 | 8 |
|
8 | 9 | import anthropic |
| 10 | +from anthropic import RateLimitError as _RateLimitError |
9 | 11 |
|
10 | 12 | logger = logging.getLogger(__name__) |
11 | 13 |
|
| 14 | +# Retry configuration for rate-limit (429) errors |
| 15 | +MAX_RETRIES = 3 |
| 16 | +RETRY_BASE_DELAY = 2 # seconds |
| 17 | + |
12 | 18 | # Maximum characters to send to Claude (leave room for prompt within 200K context) |
13 | 19 | MAX_CONTENT_CHARS = 1_500_000 |
14 | 20 |
|
15 | | -# Maximum characters for chat context (smaller to leave room for conversation history) |
16 | | -MAX_CHAT_CONTEXT_CHARS = 500_000 |
| 21 | +# Maximum characters for chat context (smaller to leave room for conversation history |
| 22 | +# and to reduce token usage for rate-limit-constrained plans) |
| 23 | +MAX_CHAT_CONTEXT_CHARS = 200_000 |
17 | 24 |
|
18 | 25 | # Default model for AI generation |
19 | 26 | DEFAULT_MODEL = "claude-sonnet-4-20250514" |
@@ -133,22 +140,37 @@ async def generate_summary( |
133 | 140 | f"## File Contents\n{content}" |
134 | 141 | ) |
135 | 142 |
|
136 | | - try: |
137 | | - client = anthropic.AsyncAnthropic(api_key=api_key) |
138 | | - response = await client.messages.create( |
139 | | - model=DEFAULT_MODEL, |
140 | | - max_tokens=8192, |
141 | | - system=system_prompt, |
142 | | - messages=[{"role": "user", "content": user_content}], |
143 | | - ) |
144 | | - result = response.content[0].text |
145 | | - except Exception as exc: |
146 | | - logger.exception("Claude API call failed for summary_type=%s", summary_type.value) |
147 | | - msg = f"AI summary generation failed: {exc}" |
148 | | - raise RuntimeError(msg) from exc |
149 | | - |
150 | | - logger.info("Generated %s summary (%d chars)", summary_type.value, len(result)) |
151 | | - return result |
| 143 | + client = anthropic.AsyncAnthropic(api_key=api_key) |
| 144 | + last_exc: Exception | None = None |
| 145 | + |
| 146 | + for attempt in range(MAX_RETRIES): |
| 147 | + try: |
| 148 | + response = await client.messages.create( |
| 149 | + model=DEFAULT_MODEL, |
| 150 | + max_tokens=8192, |
| 151 | + system=system_prompt, |
| 152 | + messages=[{"role": "user", "content": user_content}], |
| 153 | + ) |
| 154 | + result = response.content[0].text |
| 155 | + logger.info("Generated %s summary (%d chars)", summary_type.value, len(result)) |
| 156 | + return result |
| 157 | + except _RateLimitError as exc: |
| 158 | + last_exc = exc |
| 159 | + delay = RETRY_BASE_DELAY * (2 ** attempt) |
| 160 | + logger.warning( |
| 161 | + "Rate limited (attempt %d/%d) for summary_type=%s, retrying in %ds", |
| 162 | + attempt + 1, MAX_RETRIES, summary_type.value, delay, |
| 163 | + ) |
| 164 | + if attempt < MAX_RETRIES - 1: |
| 165 | + await asyncio.sleep(delay) |
| 166 | + except Exception as exc: |
| 167 | + logger.exception("Claude API call failed for summary_type=%s", summary_type.value) |
| 168 | + msg = f"AI summary generation failed: {exc}" |
| 169 | + raise RuntimeError(msg) from exc |
| 170 | + |
| 171 | + # All retries exhausted for rate limiting |
| 172 | + msg = "Rate limit exceeded. Please wait a minute before trying again." |
| 173 | + raise RuntimeError(msg) from last_exc |
152 | 174 |
|
153 | 175 |
|
154 | 176 | async def generate_chat_response( |
@@ -216,19 +238,34 @@ async def generate_chat_response( |
216 | 238 | messages.append({"role": msg_item["role"], "content": msg_item["content"]}) |
217 | 239 | messages.append({"role": "user", "content": message}) |
218 | 240 |
|
219 | | - try: |
220 | | - client = anthropic.AsyncAnthropic(api_key=api_key) |
221 | | - response = await client.messages.create( |
222 | | - model=DEFAULT_MODEL, |
223 | | - max_tokens=4096, |
224 | | - system=system_prompt, |
225 | | - messages=messages, |
226 | | - ) |
227 | | - result = response.content[0].text |
228 | | - except Exception as exc: |
229 | | - logger.exception("Claude chat API call failed") |
230 | | - msg = f"AI chat failed: {exc}" |
231 | | - raise RuntimeError(msg) from exc |
232 | | - |
233 | | - logger.info("Generated chat response (%d chars)", len(result)) |
234 | | - return result |
| 241 | + client = anthropic.AsyncAnthropic(api_key=api_key) |
| 242 | + last_exc: Exception | None = None |
| 243 | + |
| 244 | + for attempt in range(MAX_RETRIES): |
| 245 | + try: |
| 246 | + response = await client.messages.create( |
| 247 | + model=DEFAULT_MODEL, |
| 248 | + max_tokens=4096, |
| 249 | + system=system_prompt, |
| 250 | + messages=messages, |
| 251 | + ) |
| 252 | + result = response.content[0].text |
| 253 | + logger.info("Generated chat response (%d chars)", len(result)) |
| 254 | + return result |
| 255 | + except _RateLimitError as exc: |
| 256 | + last_exc = exc |
| 257 | + delay = RETRY_BASE_DELAY * (2 ** attempt) |
| 258 | + logger.warning( |
| 259 | + "Rate limited (attempt %d/%d) for chat, retrying in %ds", |
| 260 | + attempt + 1, MAX_RETRIES, delay, |
| 261 | + ) |
| 262 | + if attempt < MAX_RETRIES - 1: |
| 263 | + await asyncio.sleep(delay) |
| 264 | + except Exception as exc: |
| 265 | + logger.exception("Claude chat API call failed") |
| 266 | + msg = f"AI chat failed: {exc}" |
| 267 | + raise RuntimeError(msg) from exc |
| 268 | + |
| 269 | + # All retries exhausted for rate limiting |
| 270 | + msg = "Rate limit exceeded. Please wait a minute before trying again." |
| 271 | + raise RuntimeError(msg) from last_exc |
0 commit comments