Skip to content

Commit ee3b02a

Browse files
committed
fix: preserve ingest results and feed dates
Keep ingest metrics and publication metadata accurate across review feedback, and make the affected regression tests platform-safe so CI reflects real behavior.
1 parent 4c73cce commit ee3b02a

9 files changed

Lines changed: 122 additions & 11 deletions

File tree

tests/adapters/test_cli_commands.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
from dataclasses import dataclass, field
5+
from pathlib import Path
56
from typing import Any
67

78
from click.testing import CliRunner
@@ -217,7 +218,9 @@ async def serve(self) -> None:
217218

218219
assert result.exit_code == 0, result.output
219220
assert calls["create_app_runtime"] is None
220-
assert calls["create_app_config"] == Config()
221+
assert calls["create_app_config"] == Config(
222+
database_path=str(Path(".data") / "vra.db")
223+
)
221224
assert calls["uvicorn_host"] == "127.0.0.1"
222225
assert calls["uvicorn_port"] == 8080
223226
assert calls["uvicorn_log_level"] == "info"

tests/application/test_ingest_feed.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1+
from datetime import datetime, timezone
2+
13
import pytest
24

35
from video_rss_aggregator.application.ports import FetchedFeed, FetchedFeedEntry
46
from video_rss_aggregator.application.use_cases.ingest_feed import IngestFeed
7+
from video_rss_aggregator.domain.outcomes import Failure
58

69

710
class FakeFeedSource:
811
async def fetch(self, feed_url: str, max_items: int | None = None):
9-
assert max_items == 1
1012
entries = (
1113
FetchedFeedEntry(source_url="https://example.com/1", title="One", guid="1"),
1214
FetchedFeedEntry(source_url="https://example.com/2", title="Two", guid="2"),
1315
)
1416
return FetchedFeed(
1517
title="Example Feed",
1618
site_url="https://example.com",
17-
entries=entries[:max_items],
19+
entries=entries[:max_items] if max_items is not None else entries,
1820
)
1921

2022

@@ -37,9 +39,11 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:
3739
class FakeProcessSource:
3840
def __init__(self) -> None:
3941
self.calls: list[tuple[str, str | None]] = []
42+
self.results: dict[str, object] = {}
4043

4144
async def execute(self, source_url: str, title: str | None):
4245
self.calls.append((source_url, title))
46+
return self.results.get(source_url)
4347

4448

4549
@pytest.fixture
@@ -144,3 +148,67 @@ async def test_ingest_feed_skips_entries_without_source_url() -> None:
144148
)
145149
]
146150
assert process_source.calls == [("https://example.com/valid", None)]
151+
152+
153+
@pytest.mark.anyio
154+
async def test_ingest_feed_counts_only_non_failure_results_as_processed() -> None:
155+
feeds = FakeFeedRepository()
156+
videos = FakeVideoRepository()
157+
process_source = FakeProcessSource()
158+
process_source.results = {
159+
"https://example.com/2": Failure(
160+
source_url="https://example.com/2", reason="download failed"
161+
)
162+
}
163+
use_case = IngestFeed(
164+
feed_source=FakeFeedSource(),
165+
feeds=feeds,
166+
videos=videos,
167+
process_source=process_source,
168+
)
169+
170+
report = await use_case.execute(
171+
"https://example.com/feed.xml", process=True, max_items=2
172+
)
173+
174+
assert report.item_count == 2
175+
assert report.processed_count == 1
176+
177+
178+
class FakeFeedSourceWithPublishedEntries:
179+
async def fetch(self, feed_url: str, max_items: int | None = None):
180+
return FetchedFeed(
181+
title="Published Feed",
182+
site_url="https://example.com",
183+
entries=(
184+
FetchedFeedEntry(
185+
source_url="https://example.com/published",
186+
title="Published item",
187+
guid="published-guid",
188+
published_at=datetime(2024, 1, 2, 3, 4, tzinfo=timezone.utc),
189+
),
190+
),
191+
)
192+
193+
194+
@pytest.mark.anyio
195+
async def test_ingest_feed_preserves_publication_timestamps() -> None:
196+
feeds = FakeFeedRepository()
197+
videos = FakeVideoRepository()
198+
process_source = FakeProcessSource()
199+
use_case = IngestFeed(
200+
feed_source=FakeFeedSourceWithPublishedEntries(),
201+
feeds=feeds,
202+
videos=videos,
203+
process_source=process_source,
204+
)
205+
206+
await use_case.execute("https://example.com/feed.xml", process=False)
207+
208+
saved_feed = feeds.saved[0][1]
209+
saved_entry = videos.saved[0][1]
210+
211+
assert saved_feed.entries[0].published_at == datetime(
212+
2024, 1, 2, 3, 4, tzinfo=timezone.utc
213+
)
214+
assert saved_entry.published_at == datetime(2024, 1, 2, 3, 4, tzinfo=timezone.utc)

tests/infrastructure/test_feed_source.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import datetime, timezone
2+
13
import pytest
24

35
from video_rss_aggregator.application.ports import FetchedFeed, FetchedFeedEntry
@@ -39,6 +41,7 @@ async def test_http_feed_source_fetches_and_maps_entries() -> None:
3941
<item>
4042
<title>First</title>
4143
<guid>first-guid</guid>
44+
<pubDate>Tue, 02 Jan 2024 03:04:05 GMT</pubDate>
4245
<enclosure url="https://cdn.example.com/video.mp4" type="video/mp4" />
4346
</item>
4447
<item>
@@ -64,6 +67,7 @@ async def test_http_feed_source_fetches_and_maps_entries() -> None:
6467
source_url="https://cdn.example.com/video.mp4",
6568
title="First",
6669
guid="first-guid",
70+
published_at=datetime(2024, 1, 2, 3, 4, 5, tzinfo=timezone.utc),
6771
),
6872
),
6973
)

tests/infrastructure/test_media_service.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,11 @@ async def fake_prepare_media(**kwargs) -> LegacyPreparedMedia:
5656
assert prepared.source_url == "https://example.com/watch?v=1"
5757
assert prepared.title == "Feed title"
5858
assert prepared.transcript == "captured transcript"
59-
assert prepared.media_path == "/tmp/downloaded.mp4"
60-
assert prepared.frame_paths == ("/tmp/frame-1.jpg", "/tmp/frame-2.jpg")
59+
assert Path(prepared.media_path) == Path("/tmp/downloaded.mp4")
60+
assert tuple(Path(path) for path in prepared.frame_paths) == (
61+
Path("/tmp/frame-1.jpg"),
62+
Path("/tmp/frame-2.jpg"),
63+
)
6164

6265

6366
@pytest.mark.anyio

tests/infrastructure/test_sqlite_repositories.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ async def test_sqlite_feed_adapters_persist_feed_and_video_metadata(tmp_path) ->
143143
source_url="https://example.com/watch?v=from-feed",
144144
title="Feed item",
145145
guid="guid-1",
146+
published_at=datetime(2024, 1, 2, 3, 4, tzinfo=timezone.utc),
146147
),
147148
)
148149

@@ -151,12 +152,16 @@ async def test_sqlite_feed_adapters_persist_feed_and_video_metadata(tmp_path) ->
151152
) as cur:
152153
feed_row = await cur.fetchone()
153154
async with db._conn.execute(
154-
"SELECT title, guid FROM videos WHERE source_url = ?",
155+
"SELECT title, guid, published_at FROM videos WHERE source_url = ?",
155156
("https://example.com/watch?v=from-feed",),
156157
) as cur:
157158
video_row = await cur.fetchone()
158159

159160
await db.close()
160161

161162
assert dict(feed_row) == {"title": "Feed title"}
162-
assert dict(video_row) == {"title": "Feed item", "guid": "guid-1"}
163+
assert dict(video_row) == {
164+
"title": "Feed item",
165+
"guid": "guid-1",
166+
"published_at": "2024-01-02T03:04:00+00:00",
167+
}

video_rss_aggregator/application/ports.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from __future__ import annotations
22

3+
from datetime import datetime
34
from dataclasses import dataclass, field
45
from typing import Protocol, Sequence
56

67
from video_rss_aggregator.domain.models import PreparedMedia, SummaryResult
8+
from video_rss_aggregator.domain.outcomes import ProcessOutcome
79
from video_rss_aggregator.domain.publication import PublicationRecord
810

911

@@ -22,6 +24,7 @@ class FetchedFeedEntry:
2224
source_url: str | None
2325
title: str | None = None
2426
guid: str | None = None
27+
published_at: datetime | None = None
2528

2629

2730
@dataclass(frozen=True)
@@ -49,7 +52,7 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:
4952

5053

5154
class SourceProcessor(Protocol):
52-
async def execute(self, source_url: str, title: str | None): ...
55+
async def execute(self, source_url: str, title: str | None) -> ProcessOutcome: ...
5356

5457

5558
class Summarizer(Protocol):

video_rss_aggregator/application/use_cases/ingest_feed.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
FeedVideoRepository,
1212
SourceProcessor,
1313
)
14+
from video_rss_aggregator.domain.outcomes import Failure
1415

1516

1617
@dataclass(frozen=True)
@@ -46,6 +47,7 @@ async def execute(
4647
source_url=source_url,
4748
title=entry.title,
4849
guid=entry.guid,
50+
published_at=entry.published_at,
4951
)
5052
)
5153

@@ -64,10 +66,11 @@ async def execute(
6466
await self.videos.save_feed_item(feed_url, entry)
6567

6668
if process:
67-
await self.process_source.execute(
69+
result = await self.process_source.execute(
6870
cast(str, entry.source_url), entry.title
6971
)
70-
processed_count += 1
72+
if not isinstance(result, Failure):
73+
processed_count += 1
7174

7275
return IngestReport(
7376
feed_title=normalized_feed.title,

video_rss_aggregator/infrastructure/feed_source.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

33
from dataclasses import dataclass
4+
from datetime import datetime, timezone
5+
from email.utils import parsedate_to_datetime
46
from typing import Any
57

68
import feedparser
@@ -24,9 +26,29 @@ def _map_entry(entry: Any) -> FetchedFeedEntry:
2426
source_url=_pick_source_url(entry),
2527
title=entry.get("title") or None,
2628
guid=entry.get("id") or None,
29+
published_at=_pick_published_at(entry),
2730
)
2831

2932

33+
def _pick_published_at(entry: Any) -> datetime | None:
34+
published = entry.get("published")
35+
if published:
36+
try:
37+
parsed = parsedate_to_datetime(published)
38+
except (TypeError, ValueError, IndexError, OverflowError):
39+
parsed = None
40+
if parsed is not None:
41+
if parsed.tzinfo is None:
42+
return parsed.replace(tzinfo=timezone.utc)
43+
return parsed.astimezone(timezone.utc)
44+
45+
published_parsed = entry.get("published_parsed")
46+
if published_parsed is None:
47+
return None
48+
49+
return datetime(*published_parsed[:6], tzinfo=timezone.utc)
50+
51+
3052
@dataclass(frozen=True)
3153
class HttpFeedSource:
3254
client: httpx.AsyncClient

video_rss_aggregator/infrastructure/sqlite_repositories.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:
3131
guid=entry.guid,
3232
title=entry.title,
3333
source_url=entry.source_url,
34-
published_at=None,
34+
published_at=entry.published_at,
3535
)
3636

3737

0 commit comments

Comments
 (0)