From 9c50ae6f792fd86b2fe234028094e1d9e7c4ab73 Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Mon, 15 Jun 2026 19:24:14 -0400 Subject: [PATCH] Fix lychee false positives on standard.site at:// link tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every post emits the spec-required tag (plus the homepage publication tag). lychee 0.24.2 can't parse the at:// scheme — the colons in did:plc:… read as an invalid port — so it throws a hard parse error that no --exclude, --scheme, --remap, or .lycheeignore rule can suppress (they all run after URL parsing). This produced 125+ false-positive errors per scheduled run, drowning real link rot. Strip these verification-infrastructure link tags from the throwaway link-check build (after lint-html, before lychee). Production output is unaffected: links.yml builds its own dist-astro and never deploys it. Remove the now-confirmed-dead ^at:// .lycheeignore rule. Refs #1884 Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/links.yml | 15 +++++++++++++++ .lycheeignore | 9 +++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 1455f4188..6439901fa 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -46,6 +46,21 @@ jobs: - name: Validate HTML (html-validate) run: npm run lint-html + - name: Strip standard.site link tags before link check + # The spec-required tags (one + # per post, plus the homepage publication tag) carry AT Protocol URIs. + # lychee can't parse the at:// scheme — the colons in did:plc:… read as an + # invalid port — so it emits a hard parse error that no --exclude, + # --scheme, --remap, or .lycheeignore rule can suppress (they all run + # after URL parsing). These tags are verification infrastructure, not + # content links, so we drop them from this throwaway build before + # checking. Production output is unaffected: links.yml builds its own + # dist-astro and never deploys it. Runs after lint-html so the real + # markup is validated first. + run: | + find dist-astro -name '*.html' -print0 \ + | xargs -0 sed -i -E 's#]*rel="site\.standard\.(document|publication)"[^>]*>##g' + - name: Restore lychee cache uses: actions/cache@v5 with: diff --git a/.lycheeignore b/.lycheeignore index 7dd6fcb2e..fc65f9215 100644 --- a/.lycheeignore +++ b/.lycheeignore @@ -19,10 +19,11 @@ ^https?://127\.0\.0\.1 ^https?://example\.(com|org|net) -# AT Protocol URIs (at://did:plc:.../...) — emitted as standard.site -# rel="site.standard.document"/"site.standard.publication" link tags. The -# at:// scheme isn't resolvable over HTTP, so lychee can't (and shouldn't) check it. -^at:// +# Note: standard.site rel="site.standard.document"/"site.standard.publication" +# link tags carry at:// AT Protocol URIs that lychee can't parse (the colons in +# did:plc:… read as an invalid port), so it errors before any ignore rule applies. +# A regex here can't suppress them; the links.yml "Strip standard.site link tags" +# step removes those tags from the throwaway build before lychee runs instead. # Intentionally-broken demo links in posts that document 404 behavior. # Pattern matches both raw root-relative form and the file:// form lychee