Skip to content

Commit 466dc24

Browse files
vprelovacclaude
andcommitted
refactor(typing): add Protocol for callable typing and apply ruff formatting
Add _ElementValueGetter Protocol for proper typing of element value getter callables. Add assert isinstance(source, str) for type narrowing in parse(). Fix raw byte string prefixes (br -> rb). Configure ruff exclude and ty rules in pyproject.toml. Apply ruff formatting across main.py, benchmark.py, and test_encoding.py. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 691c4df commit 466dc24

4 files changed

Lines changed: 221 additions & 79 deletions

File tree

benchmark.py

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import feedparser
99
import httpx
1010

11-
BENCHMARK_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "benchmark_data")
11+
BENCHMARK_DATA_DIR = os.path.join(
12+
os.path.dirname(os.path.abspath(__file__)), "benchmark_data"
13+
)
1214

1315
FEEDS = [
1416
"https://feedpress.me/FIJ",
@@ -249,14 +251,18 @@ def fetch_feeds():
249251
print(f"All {len(FEEDS)} feeds already cached. Nothing to fetch.")
250252
return
251253

252-
print(f"Fetching {len(to_fetch)} new feeds (skipping {len(FEEDS) - len(to_fetch)} cached)...")
254+
print(
255+
f"Fetching {len(to_fetch)} new feeds (skipping {len(FEEDS) - len(to_fetch)} cached)..."
256+
)
253257
fetched = 0
254258
failed = 0
255259
with httpx.Client(verify=False) as client:
256260
for url in to_fetch:
257261
path = _cache_path(url)
258262
try:
259-
resp = client.get(url, timeout=20.0, follow_redirects=True, headers=HEADERS)
263+
resp = client.get(
264+
url, timeout=20.0, follow_redirects=True, headers=HEADERS
265+
)
260266
with open(path, "wb") as f:
261267
f.write(resp.content)
262268
fetched += 1
@@ -320,7 +326,7 @@ def process_feed(url, skip_feedparser=False, iterations=3):
320326
else:
321327
if result["ffp_time"] > 0 and result["fp_time"] > 0:
322328
result["success"] = True
323-
print(f"[{url}] Speedup: {result['fp_time']/result['ffp_time']:.1f}x")
329+
print(f"[{url}] Speedup: {result['fp_time'] / result['ffp_time']:.1f}x")
324330

325331
except Exception as e:
326332
print(f"[{url}] Failed to load feed: {e}")
@@ -336,7 +342,9 @@ def test_parsers(skip_feedparser=False, iterations=3):
336342
if cached == len(FEEDS):
337343
print("Using cached feed data from benchmark_data/")
338344
elif cached > 0:
339-
print(f"Using {cached} cached feeds, {len(FEEDS) - cached} will be fetched live")
345+
print(
346+
f"Using {cached} cached feeds, {len(FEEDS) - cached} will be fetched live"
347+
)
340348
else:
341349
print("No cached data — fetching live (run with --fetch to pre-download)")
342350
print(f"Each feed parsed {iterations} times, using median for timing")
@@ -370,46 +378,52 @@ def test_parsers(skip_feedparser=False, iterations=3):
370378
continue
371379

372380
if not skip_feedparser and r["ffp_entries"] != r["fp_entries"]:
373-
entry_mismatches.append({
374-
"url": r["url"],
375-
"ffp_entries": r["ffp_entries"],
376-
"fp_entries": r["fp_entries"],
377-
"diff": r["ffp_entries"] - r["fp_entries"]
378-
})
381+
entry_mismatches.append(
382+
{
383+
"url": r["url"],
384+
"ffp_entries": r["ffp_entries"],
385+
"fp_entries": r["fp_entries"],
386+
"diff": r["ffp_entries"] - r["fp_entries"],
387+
}
388+
)
379389

380390
if not skip_feedparser and r["fp_time"] > 0 and r["ffp_time"] > 0:
381391
speedup = r["fp_time"] / r["ffp_time"]
382392
if speedup < 1.1:
383-
slow_feeds.append({
384-
"url": r["url"],
385-
"speedup": speedup,
386-
"ffp_time": r["ffp_time"],
387-
"fp_time": r["fp_time"]
388-
})
393+
slow_feeds.append(
394+
{
395+
"url": r["url"],
396+
"speedup": speedup,
397+
"ffp_time": r["ffp_time"],
398+
"fp_time": r["fp_time"],
399+
}
400+
)
389401

390402
print("\nSummary:")
391403
print("-" * 50)
392404
print(f"Total wall-clock time: {overall_time:.2f}s")
393405
print(f"Successfully tested {successful_feeds}/{len(FEEDS)} feeds")
394406
if successful_feeds > 0:
395-
print(f"\nFastFeedParser:")
407+
print("\nFastFeedParser:")
396408
print(f" Total entries: {total_ffp_entries}")
397409
print(f" Total parsing time: {total_ffp_time:.2f}s")
398-
print(f" Average per feed: {total_ffp_time/successful_feeds:.3f}s")
399-
print(f" Feeds/sec: {successful_feeds/total_ffp_time:.1f}")
410+
print(f" Average per feed: {total_ffp_time / successful_feeds:.3f}s")
411+
print(f" Feeds/sec: {successful_feeds / total_ffp_time:.1f}")
400412

401413
if not skip_feedparser:
402-
print(f"\nFeedparser:")
414+
print("\nFeedparser:")
403415
print(f" Total entries: {total_fp_entries}")
404416
print(f" Total parsing time: {total_fp_time:.2f}s")
405-
print(f" Average per feed: {total_fp_time/successful_feeds:.3f}s")
406-
print(f" Feeds/sec: {successful_feeds/total_fp_time:.1f}")
417+
print(f" Average per feed: {total_fp_time / successful_feeds:.3f}s")
418+
print(f" Feeds/sec: {successful_feeds / total_fp_time:.1f}")
407419
print(
408-
f"\nSpeedup: FastFeedParser is {(total_fp_time/total_ffp_time):.1f}x faster"
420+
f"\nSpeedup: FastFeedParser is {(total_fp_time / total_ffp_time):.1f}x faster"
409421
)
410422

411423
if entry_mismatches:
412-
print(f"\nOUTLIERS: Entry Count Mismatches ({len(entry_mismatches)} feeds)")
424+
print(
425+
f"\nOUTLIERS: Entry Count Mismatches ({len(entry_mismatches)} feeds)"
426+
)
413427
print("-" * 50)
414428
for m in entry_mismatches:
415429
print(f" {m['url']}")
@@ -418,14 +432,16 @@ def test_parsers(skip_feedparser=False, iterations=3):
418432
print(f" Difference: {m['diff']:+d}")
419433

420434
if slow_feeds:
421-
print(f"\nOUTLIERS: Slow Performance (<1.1x speedup, {len(slow_feeds)} feeds)")
435+
print(
436+
f"\nOUTLIERS: Slow Performance (<1.1x speedup, {len(slow_feeds)} feeds)"
437+
)
422438
print("-" * 50)
423439
slow_feeds.sort(key=lambda x: x["speedup"])
424440
for s in slow_feeds:
425441
print(f" {s['url']}")
426442
print(f" Speedup: {s['speedup']:.2f}x")
427-
print(f" FastFeedParser: {s['ffp_time']*1000:.2f}ms")
428-
print(f" Feedparser: {s['fp_time']*1000:.2f}ms")
443+
print(f" FastFeedParser: {s['ffp_time'] * 1000:.2f}ms")
444+
print(f" Feedparser: {s['fp_time'] * 1000:.2f}ms")
429445

430446

431447
if __name__ == "__main__":

pyproject.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22
requires = ["setuptools~=67.0", "wheel"]
33
build-backend = "setuptools.build_meta"
44

5+
[tool.ruff]
6+
extend-exclude = [
7+
"1.py",
8+
"debug_*.py",
9+
"test_*.py",
10+
"benchmark.py",
11+
"investigate_failures.py",
12+
"show_error_messages.py",
13+
"check_oh4_dates.py",
14+
"comprehensive_debug.py",
15+
"profile_dylanharris.py",
16+
]
17+
18+
[tool.ty.rules]
19+
unresolved-import = "ignore"
20+
521
[tool.pytest.ini_options]
622
testpaths = ["tests"]
723

0 commit comments

Comments
 (0)