fix(configs): tighten 4 configs & add release & security advisory configs (#306)

gildesmarais · web-flow · commit 24d38d64e578 · 2026-03-28T12:03:40.000+01:00
* fix(configs): tighten 4 feed configs with stable selectors

* feat: add release and security advisory configs

* chore: drop unreliable avherald config

* chore: agents + style

* fix: move unstable changed configs to browserless strategy
diff --git a/AGENTS.md b/AGENTS.md
@@ -68,6 +68,13 @@ Recommended sequence:
 4. Confirm the title and URL live inside that boundary.
 5. Record the final URL if the page redirects by locale or renders a different surface than expected.
 
+If Chrome MCP is unavailable (`Transport closed` or page-lock errors), do this recovery sequence:
+
+1. Kill stale Chrome MCP processes (`pkill -9 -f 'chrome-devtools-mcp|Chrome for Testing'`).
+2. Retry Chrome MCP once before continuing.
+3. If still unavailable, continue with `curl -I -L`, runtime `feed`, and HTML inspection in a temporary file.
+4. Explicitly report Chrome MCP outage in the final handoff.
+
 ## Browserless
 
 Use Browserless when:
@@ -158,6 +165,20 @@ bundle exec rspec --tag fetch --example 'example.com/feed.yml' spec/html2rss/con
 - the chosen surface is too noisy or too dynamic
 - the candidate should be downgraded or dropped
 
+7. Cross-runtime mismatch check (required when core feed works but fetch specs fail):
+
+- confirm canonical URL with redirect tracing:
+
+```bash
+curl -I -L -s https://example.com | sed -n '1,20p'
+```
+
+- compare behavior in both runtimes:
+  - core repo (`../html2rss`) via `html2rss feed`
+  - configs repo fetch lane (`bundle exec rspec --tag fetch --example ...`)
+- if selectors are valid in core but fetch lane still returns zero items, treat this as request-strategy/runtime mismatch, not selector success.
+- in that case: prefer Browserless-backed verification if available; otherwise mark as downgraded/deferred with evidence.
+
 ## Runtime Debugging
 
 Use the core CLI as the authority for single-config debugging. The quickest loop is:
@@ -170,6 +191,13 @@ Use the core CLI as the authority for single-config debugging. The quickest loop
 
 If Browserless works but Faraday does not, keep the config narrow and classify it as Browserless-backed instead of trying to rescue it with brittle tweaks.
 
+Additional high-value checks:
+
+- Always normalize `channel.url` to the final canonical host/path (`www` vs non-`www`, retired legacy paths).
+- Prefer selectors anchored to content links (`h3 a`, `a[href*='/article/']`) over container-only selectors.
+- Remove optional fields first when quality drops (`categories`, synthetic IDs, weak descriptions) before adding selector complexity.
+- Set `enhance: false` early if enhancement starts pulling nav/hero/market widgets.
+
 ## Auto-Source
 
 Use `auto` for reconnaissance, not as proof that a config is ready.
@@ -211,3 +239,5 @@ When finishing config work, report:
 - dropped or deferred candidates and why
 - commands actually run
 - residual risks, especially selector drift, localization dependence, or Browserless dependence
+- whether Chrome MCP was available during validation
+- whether focused fetch specs matched core runtime behavior
diff --git a/lib/html2rss/configs/deraktionaer.de/meistgelesen.yml b/lib/html2rss/configs/deraktionaer.de/meistgelesen.yml
@@ -1,21 +1,16 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
 channel:
   title: "deraktionaer.de: meistgelesen"
-  url: https://deraktionaer.de/
+  url: https://www.deraktionaer.de/
   time_zone: Europe/Berlin
   ttl: 360
   language: de
+enhance: false
+strategy: browserless
 selectors:
   items:
-    selector: "#most-viewed ol > li"
+    selector: "section#top-articles article.top-article a.top-article-content[href^='/artikel/']"
   title:
-    selector: "> a"
+    extractor: "text"
   url:
-    selector: "> a"
     extractor: "href"
-  isin:
-    selector: ".stock-info"
-    extractor: attribute
-    attribute: "data-quote"
-  categories:
-    - isin
diff --git a/lib/html2rss/configs/elastic.co/elasticsearch-release-notes.yml b/lib/html2rss/configs/elastic.co/elasticsearch-release-notes.yml
@@ -0,0 +1,15 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
+channel:
+  url: https://www.elastic.co/docs/release-notes/elasticsearch
+  language: en
+  time_zone: UTC
+  ttl: 360
+strategy: browserless
+selectors:
+  items:
+    selector: 'a[href^="#elasticsearch-"][href$="-release-notes"]'
+    enhance: false
+  title:
+    extractor: text
+  url:
+    extractor: href
diff --git a/lib/html2rss/configs/go.dev/release-history.yml b/lib/html2rss/configs/go.dev/release-history.yml
@@ -1,14 +1,15 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
----
 channel:
-  url: https://avherald.com/
+  url: https://go.dev/doc/devel/release
   language: en
-  ttl: 120
   time_zone: UTC
+  ttl: 360
+strategy: browserless
 selectors:
   items:
-    selector: "table table a"
+    selector: 'a[href^="/doc/go1."]'
+    enhance: false
   title:
-    selector: span
+    extractor: text
   url:
     extractor: href
diff --git a/lib/html2rss/configs/grafana.com/whatsnew.yml b/lib/html2rss/configs/grafana.com/whatsnew.yml
@@ -0,0 +1,15 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
+channel:
+  url: https://grafana.com/docs/grafana/latest/whatsnew/
+  language: en
+  time_zone: UTC
+  ttl: 360
+strategy: browserless
+selectors:
+  items:
+    selector: 'a.docs__menu-a[href^="/docs/grafana/latest/whatsnew/whats-new-in-v"]'
+    enhance: false
+  title:
+    extractor: text
+  url:
+    extractor: href
diff --git a/lib/html2rss/configs/iaapa.org/news.yml b/lib/html2rss/configs/iaapa.org/news.yml
@@ -1,15 +1,15 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
 channel:
-  url: https://www.iaapa.org/news
+  url: https://iaapa.org/news-funworld
   time_zone: UTC
   ttl: 720
+enhance: false
+strategy: browserless
 selectors:
   items:
     selector: ".views-row > article"
   title:
     selector: h3
-  description:
-    selector: ".event-card__summary"
   url:
     selector: "a"
     extractor: "href"
diff --git a/lib/html2rss/configs/mozilla.org/security-advisories.yml b/lib/html2rss/configs/mozilla.org/security-advisories.yml
@@ -0,0 +1,16 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
+channel:
+  url: https://www.mozilla.org/en-US/security/advisories/
+  language: en
+  time_zone: UTC
+  ttl: 360
+strategy: browserless
+selectors:
+  items:
+    selector: "main li"
+    enhance: false
+  title:
+    selector: 'a[href*="/security/advisories/mfsa"]'
+  url:
+    selector: 'a[href*="/security/advisories/mfsa"]'
+    extractor: href
diff --git a/lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml b/lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml
@@ -1,20 +1,16 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/html2rss/html2rss/refs/heads/master/schema/html2rss-config.schema.json
 channel:
-  url: https://www.tourismusnetzwerk-brandenburg.de/nc/aktuelle-nachrichten/
+  url: https://tourismusnetzwerk-brandenburg.de/
   time_zone: Europe/Berlin
   ttl: 720
   language: de
+enhance: false
+strategy: browserless
 selectors:
   items:
-    selector: "article.article"
+    selector: "article.node.article.wall-floating"
   title:
-    selector: "h3"
+    selector: "h3.title a[rel='bookmark']"
   url:
-    selector: "a"
+    selector: "h3.title a[rel='bookmark']"
     extractor: "href"
-  topic:
-    selector: ".field--item"
-  categories:
-    - topic
-  description:
-    selector: "p"