Skip to content

Commit 0766bb1

Browse files
author
Vladimir Prelovac (aider)
committed
fix: ensure feed entries have stable IDs from id, guid, rdf:about, or fallback link
1 parent c0ec066 commit 0766bb1

1 file changed

Lines changed: 19 additions & 0 deletions

File tree

src/fastfeedparser/main.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,18 @@ def _parse_feed_entry(item: _Element, feed_type: _FeedType) -> FastFeedParserDic
353353
)
354354

355355
entry = FastFeedParserDict()
356+
# ------------------------------------------------------------------
357+
# 1) Collect a stable identifier for this entry.
358+
# Atom → <id>
359+
# RSS → <guid>
360+
# RDF → rdf:about attribute on the <item>
361+
# ------------------------------------------------------------------
362+
atom_id = _get_element_value(item, "{http://www.w3.org/2005/Atom}id")
363+
rss_guid = _get_element_value(item, "guid")
364+
rdf_about = item.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about")
365+
entry_id: Optional[str] = (atom_id or rss_guid or rdf_about)
366+
if entry_id:
367+
entry["id"] = entry_id.strip()
356368
get_field_value = _field_value_getter(item, feed_type)
357369
for field in fields:
358370
value = get_field_value(*field[1:])
@@ -414,6 +426,13 @@ def _parse_feed_entry(item: _Element, feed_type: _FeedType) -> FastFeedParserDic
414426
):
415427
entry["link"] = guid_text
416428

429+
# ------------------------------------------------------------------
430+
# 2) Guarantee that every entry has an id. If none of the dedicated
431+
# id sources were present, fall back to the chosen link.
432+
# ------------------------------------------------------------------
433+
if "id" not in entry and "link" in entry:
434+
entry["id"] = entry["link"]
435+
417436
content = None
418437
if feed_type == "rss":
419438
content = item.find("{http://purl.org/rss/1.0/modules/content/}encoded")

0 commit comments

Comments
 (0)