fix: preserve ingest results and feed dates

ArietidsZ · ArietidsZ · commit ee3b02aa0089 · 2026-03-10T11:39:09.000+08:00
Keep ingest metrics and publication metadata accurate across review feedback, and make the affected regression tests platform-safe so CI reflects real behavior.
diff --git a/tests/adapters/test_cli_commands.py b/tests/adapters/test_cli_commands.py
@@ -2,6 +2,7 @@
 
 import json
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any
 
 from click.testing import CliRunner
@@ -217,7 +218,9 @@ async def serve(self) -> None:
 
     assert result.exit_code == 0, result.output
     assert calls["create_app_runtime"] is None
-    assert calls["create_app_config"] == Config()
+    assert calls["create_app_config"] == Config(
+        database_path=str(Path(".data") / "vra.db")
+    )
     assert calls["uvicorn_host"] == "127.0.0.1"
     assert calls["uvicorn_port"] == 8080
     assert calls["uvicorn_log_level"] == "info"
diff --git a/tests/application/test_ingest_feed.py b/tests/application/test_ingest_feed.py
@@ -1,20 +1,22 @@
+from datetime import datetime, timezone
+
 import pytest
 
 from video_rss_aggregator.application.ports import FetchedFeed, FetchedFeedEntry
 from video_rss_aggregator.application.use_cases.ingest_feed import IngestFeed
+from video_rss_aggregator.domain.outcomes import Failure
 
 
 class FakeFeedSource:
     async def fetch(self, feed_url: str, max_items: int | None = None):
-        assert max_items == 1
         entries = (
             FetchedFeedEntry(source_url="https://example.com/1", title="One", guid="1"),
             FetchedFeedEntry(source_url="https://example.com/2", title="Two", guid="2"),
         )
         return FetchedFeed(
             title="Example Feed",
             site_url="https://example.com",
-            entries=entries[:max_items],
+            entries=entries[:max_items] if max_items is not None else entries,
         )
 
 
@@ -37,9 +39,11 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:
 class FakeProcessSource:
     def __init__(self) -> None:
         self.calls: list[tuple[str, str | None]] = []
+        self.results: dict[str, object] = {}
 
     async def execute(self, source_url: str, title: str | None):
         self.calls.append((source_url, title))
+        return self.results.get(source_url)
 
 
 @pytest.fixture
@@ -144,3 +148,67 @@ async def test_ingest_feed_skips_entries_without_source_url() -> None:
         )
     ]
     assert process_source.calls == [("https://example.com/valid", None)]
+
+
+@pytest.mark.anyio
+async def test_ingest_feed_counts_only_non_failure_results_as_processed() -> None:
+    feeds = FakeFeedRepository()
+    videos = FakeVideoRepository()
+    process_source = FakeProcessSource()
+    process_source.results = {
+        "https://example.com/2": Failure(
+            source_url="https://example.com/2", reason="download failed"
+        )
+    }
+    use_case = IngestFeed(
+        feed_source=FakeFeedSource(),
+        feeds=feeds,
+        videos=videos,
+        process_source=process_source,
+    )
+
+    report = await use_case.execute(
+        "https://example.com/feed.xml", process=True, max_items=2
+    )
+
+    assert report.item_count == 2
+    assert report.processed_count == 1
+
+
+class FakeFeedSourceWithPublishedEntries:
+    async def fetch(self, feed_url: str, max_items: int | None = None):
+        return FetchedFeed(
+            title="Published Feed",
+            site_url="https://example.com",
+            entries=(
+                FetchedFeedEntry(
+                    source_url="https://example.com/published",
+                    title="Published item",
+                    guid="published-guid",
+                    published_at=datetime(2024, 1, 2, 3, 4, tzinfo=timezone.utc),
+                ),
+            ),
+        )
+
+
+@pytest.mark.anyio
+async def test_ingest_feed_preserves_publication_timestamps() -> None:
+    feeds = FakeFeedRepository()
+    videos = FakeVideoRepository()
+    process_source = FakeProcessSource()
+    use_case = IngestFeed(
+        feed_source=FakeFeedSourceWithPublishedEntries(),
+        feeds=feeds,
+        videos=videos,
+        process_source=process_source,
+    )
+
+    await use_case.execute("https://example.com/feed.xml", process=False)
+
+    saved_feed = feeds.saved[0][1]
+    saved_entry = videos.saved[0][1]
+
+    assert saved_feed.entries[0].published_at == datetime(
+        2024, 1, 2, 3, 4, tzinfo=timezone.utc
+    )
+    assert saved_entry.published_at == datetime(2024, 1, 2, 3, 4, tzinfo=timezone.utc)
diff --git a/tests/infrastructure/test_feed_source.py b/tests/infrastructure/test_feed_source.py
@@ -1,3 +1,5 @@
+from datetime import datetime, timezone
+
 import pytest
 
 from video_rss_aggregator.application.ports import FetchedFeed, FetchedFeedEntry
@@ -39,6 +41,7 @@ async def test_http_feed_source_fetches_and_maps_entries() -> None:
                 <item>
                   <title>First</title>
                   <guid>first-guid</guid>
+                  <pubDate>Tue, 02 Jan 2024 03:04:05 GMT</pubDate>
                   <enclosure url="https://cdn.example.com/video.mp4" type="video/mp4" />
                 </item>
                 <item>
@@ -64,6 +67,7 @@ async def test_http_feed_source_fetches_and_maps_entries() -> None:
                 source_url="https://cdn.example.com/video.mp4",
                 title="First",
                 guid="first-guid",
+                published_at=datetime(2024, 1, 2, 3, 4, 5, tzinfo=timezone.utc),
             ),
         ),
     )
diff --git a/tests/infrastructure/test_media_service.py b/tests/infrastructure/test_media_service.py
@@ -56,8 +56,11 @@ async def fake_prepare_media(**kwargs) -> LegacyPreparedMedia:
     assert prepared.source_url == "https://example.com/watch?v=1"
     assert prepared.title == "Feed title"
     assert prepared.transcript == "captured transcript"
-    assert prepared.media_path == "/tmp/downloaded.mp4"
-    assert prepared.frame_paths == ("/tmp/frame-1.jpg", "/tmp/frame-2.jpg")
+    assert Path(prepared.media_path) == Path("/tmp/downloaded.mp4")
+    assert tuple(Path(path) for path in prepared.frame_paths) == (
+        Path("/tmp/frame-1.jpg"),
+        Path("/tmp/frame-2.jpg"),
+    )
 
 
 @pytest.mark.anyio
diff --git a/tests/infrastructure/test_sqlite_repositories.py b/tests/infrastructure/test_sqlite_repositories.py
@@ -143,6 +143,7 @@ async def test_sqlite_feed_adapters_persist_feed_and_video_metadata(tmp_path) ->
             source_url="https://example.com/watch?v=from-feed",
             title="Feed item",
             guid="guid-1",
+            published_at=datetime(2024, 1, 2, 3, 4, tzinfo=timezone.utc),
         ),
     )
 
@@ -151,12 +152,16 @@ async def test_sqlite_feed_adapters_persist_feed_and_video_metadata(tmp_path) ->
     ) as cur:
         feed_row = await cur.fetchone()
     async with db._conn.execute(
-        "SELECT title, guid FROM videos WHERE source_url = ?",
+        "SELECT title, guid, published_at FROM videos WHERE source_url = ?",
         ("https://example.com/watch?v=from-feed",),
     ) as cur:
         video_row = await cur.fetchone()
 
     await db.close()
 
     assert dict(feed_row) == {"title": "Feed title"}
-    assert dict(video_row) == {"title": "Feed item", "guid": "guid-1"}
+    assert dict(video_row) == {
+        "title": "Feed item",
+        "guid": "guid-1",
+        "published_at": "2024-01-02T03:04:00+00:00",
+    }
diff --git a/video_rss_aggregator/application/ports.py b/video_rss_aggregator/application/ports.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
+from datetime import datetime
 from dataclasses import dataclass, field
 from typing import Protocol, Sequence
 
 from video_rss_aggregator.domain.models import PreparedMedia, SummaryResult
+from video_rss_aggregator.domain.outcomes import ProcessOutcome
 from video_rss_aggregator.domain.publication import PublicationRecord
 
 
@@ -22,6 +24,7 @@ class FetchedFeedEntry:
     source_url: str | None
     title: str | None = None
     guid: str | None = None
+    published_at: datetime | None = None
 
 
 @dataclass(frozen=True)
@@ -49,7 +52,7 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:
 
 
 class SourceProcessor(Protocol):
-    async def execute(self, source_url: str, title: str | None): ...
+    async def execute(self, source_url: str, title: str | None) -> ProcessOutcome: ...
 
 
 class Summarizer(Protocol):
diff --git a/video_rss_aggregator/application/use_cases/ingest_feed.py b/video_rss_aggregator/application/use_cases/ingest_feed.py
@@ -11,6 +11,7 @@
     FeedVideoRepository,
     SourceProcessor,
 )
+from video_rss_aggregator.domain.outcomes import Failure
 
 
 @dataclass(frozen=True)
@@ -46,6 +47,7 @@ async def execute(
                     source_url=source_url,
                     title=entry.title,
                     guid=entry.guid,
+                    published_at=entry.published_at,
                 )
             )
 
@@ -64,10 +66,11 @@ async def execute(
             await self.videos.save_feed_item(feed_url, entry)
 
             if process:
-                await self.process_source.execute(
+                result = await self.process_source.execute(
                     cast(str, entry.source_url), entry.title
                 )
-                processed_count += 1
+                if not isinstance(result, Failure):
+                    processed_count += 1
 
         return IngestReport(
             feed_title=normalized_feed.title,
diff --git a/video_rss_aggregator/infrastructure/feed_source.py b/video_rss_aggregator/infrastructure/feed_source.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from datetime import datetime, timezone
+from email.utils import parsedate_to_datetime
 from typing import Any
 
 import feedparser
@@ -24,9 +26,29 @@ def _map_entry(entry: Any) -> FetchedFeedEntry:
         source_url=_pick_source_url(entry),
         title=entry.get("title") or None,
         guid=entry.get("id") or None,
+        published_at=_pick_published_at(entry),
     )
 
 
+def _pick_published_at(entry: Any) -> datetime | None:
+    published = entry.get("published")
+    if published:
+        try:
+            parsed = parsedate_to_datetime(published)
+        except (TypeError, ValueError, IndexError, OverflowError):
+            parsed = None
+        if parsed is not None:
+            if parsed.tzinfo is None:
+                return parsed.replace(tzinfo=timezone.utc)
+            return parsed.astimezone(timezone.utc)
+
+    published_parsed = entry.get("published_parsed")
+    if published_parsed is None:
+        return None
+
+    return datetime(*published_parsed[:6], tzinfo=timezone.utc)
+
+
 @dataclass(frozen=True)
 class HttpFeedSource:
     client: httpx.AsyncClient
diff --git a/video_rss_aggregator/infrastructure/sqlite_repositories.py b/video_rss_aggregator/infrastructure/sqlite_repositories.py
@@ -31,7 +31,7 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:
             guid=entry.guid,
             title=entry.title,
             source_url=entry.source_url,
-            published_at=None,
+            published_at=entry.published_at,
         )
 
 

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`	`FeedVideoRepository,`
`12`	`12`	`SourceProcessor,`
`13`	`13`	`)`
	`14`	`+from video_rss_aggregator.domain.outcomes import Failure`
`14`	`15`
`15`	`16`
`16`	`17`	`@dataclass(frozen=True)`
`@@ -46,6 +47,7 @@ async def execute(`
`46`	`47`	`source_url=source_url,`
`47`	`48`	`title=entry.title,`
`48`	`49`	`guid=entry.guid,`
	`50`	`+ published_at=entry.published_at,`
`49`	`51`	`)`
`50`	`52`	`)`
`51`	`53`
`@@ -64,10 +66,11 @@ async def execute(`
`64`	`66`	`await self.videos.save_feed_item(feed_url, entry)`
`65`	`67`
`66`	`68`	`if process:`
`67`		`- await self.process_source.execute(`
	`69`	`+ result = await self.process_source.execute(`
`68`	`70`	`cast(str, entry.source_url), entry.title`
`69`	`71`	`)`
`70`		`- processed_count += 1`
	`72`	`+ if not isinstance(result, Failure):`
	`73`	`+ processed_count += 1`
`71`	`74`
`72`	`75`	`return IngestReport(`
`73`	`76`	`feed_title=normalized_feed.title,`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ async def save_feed_item(self, feed_url: str, entry: FetchedFeedEntry) -> None:`
`31`	`31`	`guid=entry.guid,`
`32`	`32`	`title=entry.title,`
`33`	`33`	`source_url=entry.source_url,`
`34`		`- published_at=None,`
	`34`	`+ published_at=entry.published_at,`
`35`	`35`	`)`
`36`	`36`
`37`	`37`