Skip to content

Commit 7312bdd

Browse files
shantanu patilclaude
authored andcommitted
Add rate limit retry logic and friendly error messages
- Backend: Retry up to 3 times with exponential backoff on 429 errors - Backend: Reduce chat context to 200K chars to lower token usage - Frontend: Show friendly amber-styled rate limit message instead of raw error - Frontend: Detect 429 in SSE stream handler for clean error display Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c094024 commit 7312bdd

2 files changed

Lines changed: 102 additions & 38 deletions

File tree

src/core/ai_summary.py

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,25 @@
22

33
from __future__ import annotations
44

5+
import asyncio
56
import logging
67
from enum import StrEnum
78

89
import anthropic
10+
from anthropic import RateLimitError as _RateLimitError
911

1012
logger = logging.getLogger(__name__)
1113

14+
# Retry configuration for rate-limit (429) errors
15+
MAX_RETRIES = 3
16+
RETRY_BASE_DELAY = 2 # seconds
17+
1218
# Maximum characters to send to Claude (leave room for prompt within 200K context)
1319
MAX_CONTENT_CHARS = 1_500_000
1420

15-
# Maximum characters for chat context (smaller to leave room for conversation history)
16-
MAX_CHAT_CONTEXT_CHARS = 500_000
21+
# Maximum characters for chat context (smaller to leave room for conversation history
22+
# and to reduce token usage for rate-limit-constrained plans)
23+
MAX_CHAT_CONTEXT_CHARS = 200_000
1724

1825
# Default model for AI generation
1926
DEFAULT_MODEL = "claude-sonnet-4-20250514"
@@ -133,22 +140,37 @@ async def generate_summary(
133140
f"## File Contents\n{content}"
134141
)
135142

136-
try:
137-
client = anthropic.AsyncAnthropic(api_key=api_key)
138-
response = await client.messages.create(
139-
model=DEFAULT_MODEL,
140-
max_tokens=8192,
141-
system=system_prompt,
142-
messages=[{"role": "user", "content": user_content}],
143-
)
144-
result = response.content[0].text
145-
except Exception as exc:
146-
logger.exception("Claude API call failed for summary_type=%s", summary_type.value)
147-
msg = f"AI summary generation failed: {exc}"
148-
raise RuntimeError(msg) from exc
149-
150-
logger.info("Generated %s summary (%d chars)", summary_type.value, len(result))
151-
return result
143+
client = anthropic.AsyncAnthropic(api_key=api_key)
144+
last_exc: Exception | None = None
145+
146+
for attempt in range(MAX_RETRIES):
147+
try:
148+
response = await client.messages.create(
149+
model=DEFAULT_MODEL,
150+
max_tokens=8192,
151+
system=system_prompt,
152+
messages=[{"role": "user", "content": user_content}],
153+
)
154+
result = response.content[0].text
155+
logger.info("Generated %s summary (%d chars)", summary_type.value, len(result))
156+
return result
157+
except _RateLimitError as exc:
158+
last_exc = exc
159+
delay = RETRY_BASE_DELAY * (2 ** attempt)
160+
logger.warning(
161+
"Rate limited (attempt %d/%d) for summary_type=%s, retrying in %ds",
162+
attempt + 1, MAX_RETRIES, summary_type.value, delay,
163+
)
164+
if attempt < MAX_RETRIES - 1:
165+
await asyncio.sleep(delay)
166+
except Exception as exc:
167+
logger.exception("Claude API call failed for summary_type=%s", summary_type.value)
168+
msg = f"AI summary generation failed: {exc}"
169+
raise RuntimeError(msg) from exc
170+
171+
# All retries exhausted for rate limiting
172+
msg = "Rate limit exceeded. Please wait a minute before trying again."
173+
raise RuntimeError(msg) from last_exc
152174

153175

154176
async def generate_chat_response(
@@ -216,19 +238,34 @@ async def generate_chat_response(
216238
messages.append({"role": msg_item["role"], "content": msg_item["content"]})
217239
messages.append({"role": "user", "content": message})
218240

219-
try:
220-
client = anthropic.AsyncAnthropic(api_key=api_key)
221-
response = await client.messages.create(
222-
model=DEFAULT_MODEL,
223-
max_tokens=4096,
224-
system=system_prompt,
225-
messages=messages,
226-
)
227-
result = response.content[0].text
228-
except Exception as exc:
229-
logger.exception("Claude chat API call failed")
230-
msg = f"AI chat failed: {exc}"
231-
raise RuntimeError(msg) from exc
232-
233-
logger.info("Generated chat response (%d chars)", len(result))
234-
return result
241+
client = anthropic.AsyncAnthropic(api_key=api_key)
242+
last_exc: Exception | None = None
243+
244+
for attempt in range(MAX_RETRIES):
245+
try:
246+
response = await client.messages.create(
247+
model=DEFAULT_MODEL,
248+
max_tokens=4096,
249+
system=system_prompt,
250+
messages=messages,
251+
)
252+
result = response.content[0].text
253+
logger.info("Generated chat response (%d chars)", len(result))
254+
return result
255+
except _RateLimitError as exc:
256+
last_exc = exc
257+
delay = RETRY_BASE_DELAY * (2 ** attempt)
258+
logger.warning(
259+
"Rate limited (attempt %d/%d) for chat, retrying in %ds",
260+
attempt + 1, MAX_RETRIES, delay,
261+
)
262+
if attempt < MAX_RETRIES - 1:
263+
await asyncio.sleep(delay)
264+
except Exception as exc:
265+
logger.exception("Claude chat API call failed")
266+
msg = f"AI chat failed: {exc}"
267+
raise RuntimeError(msg) from exc
268+
269+
# All retries exhausted for rate limiting
270+
msg = "Rate limit exceeded. Please wait a minute before trying again."
271+
raise RuntimeError(msg) from last_exc

static/js/main.js

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,11 +1285,34 @@ function _appendChatError(message) {
12851285
const container = document.getElementById('chat-messages');
12861286
if (!container) { return; }
12871287

1288+
// Detect rate limit errors and show a friendly message
1289+
const isRateLimit = /rate.limit|429|too many requests|wait.*minute/i.test(message);
1290+
const friendlyMsg = isRateLimit
1291+
? 'Rate limit reached — the AI is temporarily busy. Please wait a moment and try again.'
1292+
: message;
1293+
12881294
const wrapper = document.createElement('div');
12891295
wrapper.className = 'chat-msg-error flex gap-3 animate-fade-in';
1290-
wrapper.innerHTML = '<div class="w-8 h-8 rounded-lg bg-red-100 border-[2px] border-red-400 flex items-center justify-center flex-shrink-0 text-xs font-bold text-red-600">!</div>'
1291-
+ '<div class="bg-red-50 border-[2px] border-red-300 rounded-lg p-3 max-w-[80%]">'
1292-
+ '<p class="text-sm text-red-700">' + _escapeHtml(message) + '</p></div>';
1296+
1297+
const retryBtn = isRateLimit
1298+
? '<button onclick="this.closest(\'.chat-msg-error\').remove()" '
1299+
+ 'class="mt-2 text-xs font-semibold text-amber-700 bg-amber-100 border border-amber-300 '
1300+
+ 'rounded-md px-3 py-1 hover:bg-amber-200 transition-colors cursor-pointer">'
1301+
+ 'Dismiss</button>'
1302+
: '';
1303+
1304+
const bgColor = isRateLimit ? 'bg-amber-50' : 'bg-red-50';
1305+
const borderColor = isRateLimit ? 'border-amber-300' : 'border-red-300';
1306+
const textColor = isRateLimit ? 'text-amber-800' : 'text-red-700';
1307+
const iconBg = isRateLimit ? 'bg-amber-100' : 'bg-red-100';
1308+
const iconBorder = isRateLimit ? 'border-amber-400' : 'border-red-400';
1309+
const iconColor = isRateLimit ? 'text-amber-600' : 'text-red-600';
1310+
const icon = isRateLimit ? '⏳' : '!';
1311+
1312+
wrapper.innerHTML = '<div class="w-8 h-8 rounded-lg ' + iconBg + ' border-[2px] ' + iconBorder + ' flex items-center justify-center flex-shrink-0 text-xs font-bold ' + iconColor + '">' + icon + '</div>'
1313+
+ '<div class="' + bgColor + ' border-[2px] ' + borderColor + ' rounded-lg p-3 max-w-[80%]">'
1314+
+ '<p class="text-sm ' + textColor + '">' + _escapeHtml(friendlyMsg) + '</p>'
1315+
+ retryBtn + '</div>';
12931316

12941317
container.appendChild(wrapper);
12951318
container.scrollTop = container.scrollHeight;
@@ -1332,7 +1355,11 @@ function _readSSEStream(url, body, onEvent, onError) {
13321355
if (!response.ok) {
13331356
let data;
13341357
try { data = await response.json(); } catch { data = {}; }
1335-
onEvent({ type: 'error', payload: { message: data.error || data.detail || 'Request failed' } });
1358+
let errMsg = data.error || data.detail || 'Request failed';
1359+
if (response.status === 429) {
1360+
errMsg = 'Rate limit exceeded. Please wait a moment before trying again.';
1361+
}
1362+
onEvent({ type: 'error', payload: { message: errMsg } });
13361363
return;
13371364
}
13381365

0 commit comments

Comments
 (0)