Skip to content

Commit 6ddba16

Browse files
Robbie1977claude
andcommitted
fix: increase API timeout to 120s and add request throttling
The VFBquery API can be slow to respond for some terms. Changes: - Increase HTTP timeout from 30s to 120s - Reduce max retries from 3 to 2 (with longer backoff) - Add 0.5s delay between requests to avoid overwhelming the API - Add progress counter and batch summary (created/skipped/failed) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5bee6c2 commit 6ddba16

1 file changed

Lines changed: 34 additions & 17 deletions

File tree

vfbterms.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import json
1010
import datetime
1111
import traceback
12+
import time
1213

1314
# Suppress the urllib3 warning about OpenSSL
1415
warnings.filterwarnings('ignore', category=Warning)
@@ -40,8 +41,8 @@ def create_session():
4041
"""Create a requests session with retry logic and connection pooling."""
4142
session = requests.Session()
4243
retry = Retry(
43-
total=3,
44-
backoff_factor=1,
44+
total=2,
45+
backoff_factor=2,
4546
status_forcelist=[500, 502, 503, 504],
4647
)
4748
adapter = HTTPAdapter(max_retries=retry, pool_connections=10, pool_maxsize=10)
@@ -56,7 +57,7 @@ def create_session():
5657
def fetch_term_info(term_id):
5758
"""Fetch term info from VFBquery API. Returns dict or None on error."""
5859
try:
59-
resp = session.get(API_BASE, params={"id": term_id}, timeout=30)
60+
resp = session.get(API_BASE, params={"id": term_id}, timeout=120)
6061
resp.raise_for_status()
6162
data = resp.json()
6263
if not data or not data.get("Id"):
@@ -495,30 +496,46 @@ def get_vfb_connect():
495496

496497
def save_terms(ids):
497498
"""Fetch and save term pages for a list of IDs."""
498-
for term_id in ids:
499+
total = len(ids)
500+
success_count = 0
501+
skip_count = 0
502+
fail_count = 0
503+
for i, term_id in enumerate(ids):
499504
try:
500505
filename = term_id + "_v" + str(version) + ".md"
501-
if not os.path.isfile(filename):
502-
print(f"Processing {term_id}...")
503-
term_data = fetch_term_info(term_id)
504-
if term_data is None:
505-
continue
506+
if os.path.isfile(filename):
507+
skip_count += 1
508+
continue
509+
510+
print(f"Processing {term_id} ({i+1}/{total})...")
511+
term_data = fetch_term_info(term_id)
512+
if term_data is None:
513+
fail_count += 1
514+
continue
506515

507-
page_content = generate_page(term_data)
516+
page_content = generate_page(term_data)
508517

509-
with open(filename, "w", encoding="utf-8") as f:
510-
f.write(page_content)
518+
with open(filename, "w", encoding="utf-8") as f:
519+
f.write(page_content)
511520

512-
# Clean up previous version
513-
old_filename = term_id + "_v" + str(version - 1) + ".md"
514-
if os.path.isfile(old_filename):
515-
os.remove(old_filename)
516-
print(f'Removed: {old_filename}')
521+
success_count += 1
522+
523+
# Clean up previous version
524+
old_filename = term_id + "_v" + str(version - 1) + ".md"
525+
if os.path.isfile(old_filename):
526+
os.remove(old_filename)
527+
print(f'Removed: {old_filename}')
528+
529+
# Brief pause to avoid overwhelming the API
530+
time.sleep(0.5)
517531

518532
except Exception as e:
533+
fail_count += 1
519534
print(f"ERROR processing {term_id}: {str(e)}")
520535
print(traceback.format_exc())
521536

537+
print(f"\nBatch complete: {success_count} created, {skip_count} skipped (existing), {fail_count} failed out of {total} total")
538+
522539
# ─── Testing ─────────────────────────────────────────────────────────────────
523540

524541
def test_term_page(term_id, term_type="class"):

0 commit comments

Comments
 (0)