diff --git a/.github/workflows/merge-data.yaml b/.github/workflows/merge-data.yaml index 1395e5850..63625d915 100644 --- a/.github/workflows/merge-data.yaml +++ b/.github/workflows/merge-data.yaml @@ -36,15 +36,20 @@ jobs: ref: data path: data-branch-check - # Use main's metadata/repos.yaml as the authoritative private-repo list so - # stale orphan entries on the data branch cannot block the privacy gate. + # Privacy gate: blocks promotion if a private repo's wiki page would leak onto main. + # Design: delta-based public-allowlist inversion β€” no GraphQL needed. + # - data's own repos.yaml is authoritative (data branch is the writer) + # - publicSlugs built from entries with explicit private===false via computeRepoSlug + # - pages already on main (default checkout = main) are grandfathered to avoid + # re-flagging verifiably-public repos that lack an explicit private===false field + # (e.g. marcusrbrown/copiloting: onboarding_status=lost-access, absent private) + # - dead/node-null orphans are tolerated: if already on main β†’ grandfathered; + # if genuinely new β†’ correctly flagged without any GraphQL call - name: πŸ”’ Block private wiki pages env: - GITHUB_TOKEN: ${{ steps.get-workflow-app-token.outputs.token }} + GRANDFATHER_WIKI_REPOS_DIR: ../knowledge/wiki/repos working-directory: data-branch-check - run: | - cp ../metadata/repos.yaml metadata/repos.yaml - node ../scripts/check-wiki-private-presence.ts + run: node ../scripts/check-wiki-private-presence.ts - name: πŸ”€ Open weekly data merge PR env: diff --git a/docs/brainstorms/2026-05-23-discord-presence-via-control-plane-events-requirements.md b/docs/brainstorms/2026-05-23-discord-presence-via-control-plane-events-requirements.md new file mode 100644 index 000000000..de421d961 --- /dev/null +++ b/docs/brainstorms/2026-05-23-discord-presence-via-control-plane-events-requirements.md @@ -0,0 +1,231 @@ +# Fro Bot Discord Presence via Control-Plane Events + +**Date:** 2026-05-23 +**Status:** Requirements (revised after document-review) +**Scope:** Standard +**This document:** control-plane side only. The gateway-side feature is tracked in [fro-bot/agent#671](https://github.com/fro-bot/agent/issues/671). + +## Feature + +The Fro Bot control plane (`fro-bot/.github`) POSTs structured event payloads to a webhook exposed by the Fro Bot gateway daemon (`fro-bot/agent`, deployed via `marcusrbrown/infra` to `gateway.fro.bot`). The gateway, which is already logged in to Discord as the **Fro Bot user identity**, posts a message in Fronomenal **as Fro Bot** for each event. This is the load-bearing constraint: posts must originate from the Fro Bot user account via discord.js, NOT from a Discord webhook URL (which would post as a webhook bot, not as Fro Bot). The character has presence in Fronomenal; the control plane is what gives that character things to say. + +## Scope Split + +| This repo (`fro-bot/.github`) β€” what this doc covers | The gateway (`fro-bot/agent`) β€” tracked in a separate issue | +|---|---| +| Detect events in workflows | HTTP server with `POST /v1/announce` endpoint | +| Build structured payload (event type, context, timestamp) | HMAC-SHA256 signature verification | +| Sign payload (HMAC-SHA256) | Replay protection (timestamp window) | +| POST to gateway webhook with retry-once | Templated rendering per event type (v1) | +| Resolve secrets, kill-switch, error handling | Post via existing discord.js client as Fro Bot identity | +| File the gateway issue with the contract spec | Channel routing config | + +The gateway issue (titled "Fro Bot presence webhook") is the artifact that captures the gateway-side build. This document does NOT design the gateway side β€” it specifies the contract the gateway must implement. + +## Problem + +The Fro Bot character has presence in Fronomenal Discord via `gateway.fro.bot` but no way to talk about what it's actually doing. The control plane fires events every day (surveys complete, invitations accepted, daily reconcile runs, wiki-lint findings) but they stay invisible. The sunset of the GitHub-issue-based operational log (PR #3368) removed the wrong channel β€” issues nobody read β€” without replacing it with the right one. Fro Bot should drop in-character messages in Fronomenal when notable things happen, posted from its own Discord identity, not from an opaque webhook bot. + +## Goals + +1. **Control plane posts AS Fro Bot** in Fronomenal Discord for notable control-plane events β€” via the gateway's discord.js client, not via a Discord webhook URL +2. **Build the announce contract** (payload shape, auth, retry policy) so the gateway-side issue has everything it needs to implement against +3. **Reuse the existing gateway deployment** β€” don't rearchitect or relocate the gateway daemon +4. **Stay forward-compatible** with v2 LLM composition: the v1 payload can carry pre-rendered text in a `rendered_text` field; gateway templates when that's absent + +## Non-Goals (v1) + +- LLM composition of message text β€” deferred to v2; v1 uses gateway-side templates +- High-risk privacy events (visibility transition, integrity alerts) β€” those stay on GitHub-issue surfaces +- Daily reconcile summary event and wiki-lint findings event β€” added as fast-followers once the architecture proves out with the two narrowed event types +- Two-way conversation in Discord (gateway already handles `@fro-bot` mentions; not extending here) +- `self_improvement_observed` event type β€” no current trigger; add when the trigger exists, not as reserved contract surface +- Multi-channel routing β€” single Fronomenal channel for all v1 drops +- Durable queue / outage buffering β€” best-effort delivery with single-retry only +- Replacing Bluesky (already gated off in PR #3368; revival is a separate decision) +- Gateway-side implementation details (channel ID config, embed templates, discord.js wiring, HMAC verification, replay cache) β€” covered by the gateway-side issue + +## Users + +- **Marcus** β€” primary audience; sees Fro Bot's activity reflected in Fronomenal in Fro Bot's voice +- **Fronomenal members** β€” secondary audience; experience Fro Bot as a character with rhythm and personality + +## Architecture (v1) + +### Component overview + +``` +fro-bot/.github (control plane) fro-bot/agent (gateway) +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Event-firing workflow β”‚ β”‚ POST /v1/announce β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Event happens β”‚ β”‚ β”‚ β”‚ Verify HMAC β”‚ β”‚ +β”‚ β”‚ (survey-repo, β”‚ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ poll-invitations) β”‚ β”‚ β”‚ ↓ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ ↓ β”‚ POST β”‚ β”‚ Replay window β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ ───→ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ Build payload + sign β”‚ β”‚ HTTPS β”‚ ↓ β”‚ +β”‚ β”‚ (HMAC-SHA256) β”‚ β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β”‚ Template render β”‚ β”‚ +β”‚ ↓ β”‚ β”‚ β”‚ (v1) OR use β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”‚ rendered_text β”‚ β”‚ +β”‚ β”‚ scripts/gateway- β”‚ β”‚ β”‚ β”‚ (v2 forward) β”‚ β”‚ +β”‚ β”‚ announce.ts β”‚ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ POST + retry-once β”‚ β”‚ β”‚ ↓ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ β”‚ β”‚ Post AS Fro Bot β”‚ β”‚ +β”‚ Kill switch (workflow var) β”‚ β”‚ β”‚ via discord.js β”‚ β”‚ +β”‚ bypasses POST when set β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + ↓ + Fronomenal Discord server + (channel ID from gateway config) +``` + +### What this repo builds + +1. **`scripts/gateway-announce.ts`** β€” new TypeScript module, runs under Node 24 native TS. Responsibilities: + - Read event type, structured context, and timestamp from env vars passed by the calling workflow step + - Read `GATEWAY_WEBHOOK_SECRET` and `GATEWAY_PRESENCE_URL` from env + - Build canonical JSON payload (field ordering deterministic; see Payload Contract below) + - Compute HMAC-SHA256 over the canonicalized payload bytes + - Honor kill-switch env var (when set, log and exit 0 without POSTing) + - POST with single retry on transient failure (network error or HTTP 5xx); on final failure, log structured stderr warning and exit 0 (do not fail the workflow) +2. **Composer step in event-firing workflows** β€” added to: + - `survey-repo.yaml` after a successful survey + wiki-commit, before the broadcast job + - `poll-invitations.yaml` when β‰₯1 public invitation accepted +3. **Kill-switch infrastructure** β€” repo variable `GATEWAY_ANNOUNCE_DISABLED` (boolean). When set, `gateway-announce.ts` logs `gateway-announce: kill switch active; skipping POST` to stderr and exits 0 +4. **Secrets contract documented in `metadata/README.md`** β€” `GATEWAY_WEBHOOK_SECRET`, `GATEWAY_PRESENCE_URL` listed alongside existing repo secrets + +### Payload Contract (v1) + +The control plane sends this exact shape to `POST /v1/announce`: + +```jsonc +{ + "v": 1, // schema version; bumped on breaking changes + "event_type": "survey_completed", // one of: survey_completed | invitation_accepted + "fired_at": "2026-05-23T19:30:00Z", // ISO 8601, control-plane wall clock; signed in the body + "context": { // event-specific structured data; see Event Types below + // event-specific keys + }, + "rendered_text": null // v1: always null. v2 forward: pre-composed in-character message text. Gateway falls back to its templates when null. +} +``` + +**Canonicalization for signing**: +- Encode as JSON with **lexicographically sorted keys at every level**, no whitespace +- UTF-8 byte encoding +- HMAC-SHA256 over the canonical bytes, hex-encoded (lowercase) +- Signature transmitted in `X-Gateway-Signature` header (HMAC hex) +- Timestamp transmitted in `X-Gateway-Timestamp` header AND in the body's `fired_at` field; gateway MUST verify they match + +**Replay window**: the gateway should reject requests where `|now - fired_at| > 5 minutes`. Strictly informational here; implementation is gateway-side. + +**Constant-time comparison**: HMAC verification on the gateway side MUST use constant-time comparison. Stated as a requirement on the gateway issue; not enforceable from the POST side. + +### v1 Event Types + +| Event | When it fires | Context fields | +|---|---|---| +| `invitation_accepted` | After `poll-invitations.yaml` accepts β‰₯1 public invitation. Single POST aggregating all invitations in the cycle | `{count: number, repos: [{owner, name}, ...]}` | +| `survey_completed` | After a successful `survey-repo.yaml` run that landed wiki content on `data`. One POST per survey | `{owner: string, repo: string, slug: string, wiki_pages_changed: number}` | + +Both event types include enough context for the gateway to render a template that names the specific repo or count, without needing to look anything up. + +### Fast-Follower Events (not in v1) + +These ship after v1 proves out the contract. Listed here so the gateway issue can stub embed colors / template slots: + +- `reconcile_notable` β€” daily reconcile cron, gated on notability (dispatched > 0, floored > 0, newRepos > 0, byChannel changed, or lostAccess > 0) +- `wiki_lint_findings` β€” weekly wiki-lint run with findings β‰₯ 1 (depends on the open wiki-lint follow-up plan landing first) + +### Outage Policy + +- Single retry on transient failure (network error, HTTP 5xx) with 5-second backoff +- On final failure: structured stderr log line carrying event type and HTTP status (no canonical identifiers in the failure message β€” repos in the context field stay in the body, which isn't logged on failure), then exit 0 +- Lost events are acceptable in v1; the control plane has its own audit trail (data-branch commits, metadata files, GitHub Actions run logs) +- No buffer, no queue, no replay + +### Kill Switch + +- Repo variable `GATEWAY_ANNOUNCE_DISABLED` β€” when set to any truthy value, `gateway-announce.ts` short-circuits before computing the HMAC or making the network call +- Used when the gateway is down for maintenance, or if v1 content turns out to be a problem and we need to mute the pipeline without editing workflows +- No fallback to webhook-based posting; the whole point is gateway-as-Fro-Bot, so silence is the correct behavior when the gateway is unavailable + +### Privacy Boundary + +Only `invitation_accepted` and `survey_completed` events ship in v1. Both carry public-repo identifiers exclusively: + +- `survey_completed` only fires after the workflow's privacy-recheck step confirms `isPrivate === false` (existing guardrail; see `docs/solutions/security-issues/survey-workflow-side-privacy-gate-2026-05-16.md`) +- `invitation_accepted` only includes public invitations (already gated in `scripts/handle-invitation.ts`) +- The fast-follower `reconcile_notable` will need an additional privacy review before shipping β€” the `byChannel` rollup could carry channel-classification telemetry that's meaningful only for public-discovery surfaces; that boundary is the fast-follower's problem to resolve + +## Functional Requirements + +- **R1**: `scripts/gateway-announce.ts` accepts event type, structured context, and timestamp from env vars and produces a canonicalized JSON payload matching the contract above +- **R2**: HMAC-SHA256 signature uses the `GATEWAY_WEBHOOK_SECRET` env var; signature is hex-encoded and transmitted in the `X-Gateway-Signature` header +- **R3**: The script POSTs to the URL in `GATEWAY_PRESENCE_URL` env var with `Content-Type: application/json`, `X-Gateway-Signature`, and `X-Gateway-Timestamp` headers +- **R4**: On HTTP 5xx or network error, the script retries exactly once after a 5-second backoff; on second failure, logs a structured stderr line and exits 0 +- **R5**: When the repo variable `GATEWAY_ANNOUNCE_DISABLED` is truthy, the script logs `gateway-announce: kill switch active; skipping POST` to stderr and exits 0 before any network call or HMAC computation +- **R6**: `survey-repo.yaml` invokes `gateway-announce.ts` in a new step after the `Record survey result` step, only when the recheck succeeded (same gate as the existing `Record survey result` step). Event type: `survey_completed`. Context: `{owner, repo, slug, wiki_pages_changed}` +- **R7**: `poll-invitations.yaml` invokes `gateway-announce.ts` when `steps.poll.outputs.public_invitations_accepted > 0`. Event type: `invitation_accepted`. Context: `{count, repos: [...]}` β€” repos populated from invitation handler output (will require a small change to `scripts/handle-invitation.ts` to emit the list) +- **R8**: `metadata/README.md` documents `GATEWAY_WEBHOOK_SECRET`, `GATEWAY_PRESENCE_URL`, and `GATEWAY_ANNOUNCE_DISABLED` in the existing secrets/vars table +- **R9**: The payload contract is forward-compatible with a `rendered_text` field that the v2 composer (deferred) will populate; v1 callers always send `null` +- **R10**: The gateway-side feature is filed as a GitHub issue on `fro-bot/agent` capturing the contract above (endpoint shape, auth, replay window, constant-time comparison requirement, channel routing, templated rendering scope, posting AS Fro Bot via discord.js) + +## Success Criteria + +- **SC1**: A real `survey-repo.yaml` run lands a templated message in the Fronomenal target channel, posted by the Fro Bot Discord user account (visible message author: Fro Bot), within 30 seconds of the survey completing +- **SC2**: A real `poll-invitations.yaml` cycle accepting β‰₯1 public invitation lands a templated message in the same channel, same authorship +- **SC3**: A `curl` against the gateway endpoint with a deliberately wrong HMAC signature returns 4xx and produces NO Discord post (gateway-side test; verified after gateway feature ships) +- **SC4**: Setting `GATEWAY_ANNOUNCE_DISABLED=true` causes the next `survey-repo.yaml` or `poll-invitations.yaml` run to log the kill-switch line and produce no gateway request +- **SC5**: Simulating gateway unavailability (e.g., pointing `GATEWAY_PRESENCE_URL` at a black hole) causes the workflow to log the structured warning and continue without failing the run + +## What This Replaces + +The legacy `discord-notify.ts` webhook posting (still present, no callers as of PR #3368). Once v1 ships and proves stable, `discord-notify.ts` can be retired β€” the gateway-via-Fro-Bot path supersedes it. That retirement is a separate PR, not part of v1. + +## What This Doesn't Replace + +- The gateway's `@fro-bot` mention handler (still serves reactive interactions) +- The `/fro-bot ping` slash command (still serves as heartbeat) +- The existing GitHub PR review activity (different surface, different audience) +- Bluesky posting (still gated off in PR #3368; revival is a separate decision) + +## Open Questions for Planning + +- **Q1**: Output of `scripts/handle-invitation.ts` currently exposes only the count of accepted public invitations via step output. To populate `context.repos`, the script needs to also expose the list of repo identifiers (owner/name pairs) β€” either as a stringified JSON step output or via a small artifact file. Planning resolves the exact mechanism. +- **Q2**: The `Record survey result` step computes `SURVEY_STATUS` from a multi-step truth table. The gateway-announce step needs the same success gate; planning decides whether to reuse the same env-var pattern or extract a shared step output. +- **Q3**: `GATEWAY_PRESENCE_URL` β€” repo variable or repo secret? URLs aren't secret in the security sense, but the gateway endpoint URL is rarely-rotated and not useful to attackers without the HMAC secret. Probably variable; planning confirms. +- **Q4**: Test strategy for `scripts/gateway-announce.ts` β€” unit-test the canonicalization, HMAC, retry, and kill-switch logic with mocked fetch. No live integration test in CI (gateway is production). +- **Q5**: Should `gateway-announce.ts` use Node's built-in `crypto` for HMAC and built-in `fetch` for the POST, or pull in a dep? Lean toward built-in (matches repo's zero-dep ethos for control-plane scripts). +- **Q6**: Versioning policy on the contract β€” `v: 1` in the payload is the simple knob. When v2 ships the composer, it bumps to `v: 2` and gateway supports both during transition? Or does v2 stay `v: 1` since `rendered_text` was always in the contract? Planning decides. + +## Out-of-Scope but Worth Flagging + +- The gateway issue should require the gateway-side implementation to log every accepted announce request with a redacted summary (event type, timestamp, response status) β€” gives operator-visible audit trail without echoing payload content. Captured in the gateway issue, not enforced here. +- Audience demand validation (product-lens review concern): not gating v1 because the feature is the point regardless. Worth a post-v1 check: are the drops landing well? If quality from templates is poor, the composer (v2) becomes higher-priority. + +## Related Work + +- **Sunset PR #3368** β€” removed the journal-entry pipeline. Did not replace the channel; v1 here is the replacement +- **Explorer research (2026-05-23)** β€” capability matrix on `fro-bot/agent` gateway + `marcusrbrown/infra` deployment (informs the gateway issue contents) +- **`fro-bot/agent#671`** β€” gateway-side build (filed 2026-05-23) +- **`marcusrbrown/infra`** β€” will need a deploy secret rollout (`GATEWAY_WEBHOOK_SECRET`) and possibly env var (`GATEWAY_PRESENCE_CHANNEL_ID`) once the gateway side lands +- **PR #3293 / `docs/solutions/security-issues/survey-workflow-side-privacy-gate-2026-05-16.md`** β€” the privacy gate on `survey-repo.yaml` is what makes `survey_completed` safe to broadcast +- **`scripts/handle-invitation.ts`** β€” needs a small change to expose the list of accepted repos (Q1 above) + +## Review Disposition + +This document was rewritten after a document-review pass surfaced 31 raw findings across 6 reviewers. Key disposition: + +- **"Composer is over-scoped for v1"** (4-reviewer convergence) β€” accepted; composer deferred to v2, v1 uses gateway-side templates +- **"Audience demand assumed"** (product-lens) β€” acknowledged but not gating: the feature is the point regardless of measurable demand +- **"HMAC underspec"** (3 reviewers) β€” resolved: canonicalization, header layout, replay window, constant-time comparison all specified above +- **"`self_improvement_observed` shouldn't be in v1"** β€” accepted; removed from contract +- **"Wiki-lint coupling"** β€” accepted; wiki_lint_findings demoted to fast-follower +- **"No DoS posture / 200-ack overstates / kill switch missing"** β€” kill switch added; DoS posture is gateway-side (covered in gateway issue); 200-ack semantics also gateway-side +- **"Discord-notify role contradiction"** β€” resolved: discord-notify is retired after v1 ships, the gateway-via-Fro-Bot path supersedes it +- **"Architecture overcommits to one chat bridge"** β€” accepted as a tradeoff; Fronomenal is the only target and a different bridge would be a different brainstorm diff --git a/docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md b/docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md new file mode 100644 index 000000000..d7152a5cf --- /dev/null +++ b/docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md @@ -0,0 +1,117 @@ +# Make `data` the Sole Writer of `metadata/repos.yaml` + +**Date:** 2026-06-01 +**Status:** Requirements +**Scope:** Standard (architectural β€” state ownership + privacy-gate robustness) + +## Problem + +The weekly `data β†’ main` promotion (PR #3396) was born with a merge conflict in `metadata/repos.yaml`. Diagnosis confirmed the conflict is not a wiki problem β€” all 38 `knowledge/**` files auto-merge cleanly. The single conflicting file, `metadata/repos.yaml`, was mutated on **both** branches since the merge-base: + +- On `data`: 43 autonomous survey write-backs advancing `last_survey_at`/`last_survey_status` + new repo entries (all public). +- On `main`: PR #3394 *deleted* two redacted private-repo orphan entries. + +That both-sides mutation is the proximate cause. But it is itself a **workaround for a deeper problem**, revealed by the comment at `.github/workflows/merge-data.yaml:39-40`: + +> "Use main's metadata/repos.yaml as the authoritative private-repo list so stale orphan entries on the data branch cannot block the privacy gate." + +The real root cause: **`scripts/check-wiki-private-presence.ts` fails closed on *any* unresolvable private entry**, including `node-null` (the repo was deleted or the App lost access). A single dead private repo on `data` makes `resolveCanonicalSlugs` throw, which blocks every promotion. Two PRs were written to work around that brittleness: + +- **PR #3395** pointed the privacy gate at `main`'s `repos.yaml` (via `cp ../metadata/repos.yaml`) so curated-clean main data wouldn't carry orphans. +- **PR #3394** deleted the orphan entries from `main` to keep that copy clean. + +Together those two workarounds turned `repos.yaml` into a both-sides-mutated file and produced the #3396 conflict. There is one root problem (a brittle fail-closed gate), not two. + +## The Decision + +**Remove the second writer instead of splitting state or building merge-resolution machinery.** + +The privacy posture already redacts private repos (`name: `, never canonical identifiers), so a redacted private entry on `main` is β€” by design β€” not a leak. There is therefore no reason to curate private entries off `main`, and no reason for `main` to ever edit `repos.yaml` independently. Once `data` is the sole writer: + +- `main`'s `repos.yaml` is updated **only through the promotion PR**. The one exception is `bootstrapDataBranch` re-seeding `data` from `main` after a squash-merge deletes the `data` ref β€” but that copies `main`'s exact content onto a fresh `data`, so it introduces no divergence (data starts equal to main, then only data advances). The no-both-sides-mutation property holds as long as nothing edits `repos.yaml` on `main` outside the promotion PR. +- No independent `main` edits β‡’ no both-sides mutation β‡’ **the conflict class is eliminated**, with no file split and no "data wins" resolution logic. This property is a *policy* invariant (nothing should edit `repos.yaml` on `main` except the promotion PR), not a structural guarantee β€” see change #4 for the safety net that makes a violation loud, and Q5 for whether to enforce it in code. + +This is preferred over the two alternatives surfaced during diagnosis: + +- **Splitting `repos.yaml`** into data-state + main-policy files (Oracle fix #1) β€” unnecessary once `main` stops editing the file; adds a schema migration and a second file for no benefit. +- **Auto-resolving the conflict** in `merge-data-pr.ts` (Oracle fix #3) β€” unnecessary once there is no conflict to resolve. + +## Scope (the change set) + +### 1. Make the privacy gate tolerate `node-null` (the root fix) + +`scripts/check-wiki-private-presence.ts` β€” `resolveCanonicalSlugs`: + +- A `node-null` result means the repo is deleted or the App lost access. The current gate fails closed on it, which is the brittleness that triggered the workarounds. The fix must stop node-null from blocking promotion **without losing leak coverage**. +- **Critical coverage gap to close (reviewer-flagged P1):** the node-id-match sweep only catches files literally named `.md`. Wiki pages are named by canonical slug `--.md`. So naively "skip slug resolution, keep node-id sweep" would LOSE detection of a stale `--.md` page for a repo that was surveyed while public, then went private, then was deleted (now `node-null`, slug unresolvable via GraphQL). The node-id sweep does not cover that file. This is a privacy regression, not benign orphan cleanup. +- Therefore node-null handling MUST retain canonical-slug leak coverage. Candidate mechanisms (planning decides): (a) cache the last-known canonical slug on the entry when first resolved, so node-null entries still carry a slug to sweep; (b) treat node-null as warn-and-continue only when NO wiki page could plausibly match, else fail closed; (c) sweep every `knowledge/wiki/repos/*.md` filename against the node-null private set using any retained identifier. The point: a deleted private repo must not be able to leave a slug-named page behind undetected. +- Keep fail-closed behavior **only** for `subprocess-threw` (network / rate-limit / auth) β€” those are genuinely unverifiable and must block, with a retry on the next run. + +Net effect: stale/dead private orphans on `data` no longer *block* promotion, but their slug-named leak surface stays covered. Genuine transport uncertainty still fails closed. + +### 2. Revert the `cp` workaround in `merge-data.yaml` + +`.github/workflows/merge-data.yaml` β€” `πŸ”’ Block private wiki pages` step: + +- Remove `cp ../metadata/repos.yaml metadata/repos.yaml`. The gate reads `data`'s own `repos.yaml` again (the branch being promoted is the correct source for "what private entries does this promotion carry?"). +- Update the step comment to reflect that `data` is authoritative and the gate tolerates dead orphans. +- **Sequencing:** this revert is only safe *after* change #1 lands, because the `cp` exists precisely to dodge the fail-closed-on-orphan behavior. + +### 3. Stop curating private entries off `main` (policy + docs) + +- Document that redacted private entries (`name: `, `private: true`) are allowed on `main` and must not be deleted as "hygiene." PR #3394's deletion was an unnecessary workaround. +- No need to re-add the two already-deleted entries β€” `data` will re-introduce them through normal reconcile probing if those repos are still accessible; if they are dead, the now-tolerant gate handles them. +- Update `metadata/README.md` to state plainly: `metadata/repos.yaml` is written **only** on `data`; `main` never edits it outside the promotion PR. + +### 4. Safety net β€” fail loud on a born-conflicted promotion PR (Oracle fix #2) + +`scripts/merge-data-pr.ts`: + +- After find-or-create, check the resulting PR's mergeability. If it is `DIRTY`/`CONFLICTING`, do not let the workflow report a clean `success` β€” surface it (non-zero exit or a high-signal alert issue) so a conflicted promotion can never again accumulate silently behind a green check. +- This is defense-in-depth and is **not** scope creep: the sole-writer property (change #1-3) is a policy invariant, not a structural guarantee. A future emergency hand-edit on `main`, a bootstrap-recovery edge case, or any non-promotion writer can violate it. When that happens the conflict must be loud immediately rather than discovered weeks later (the exact failure mode that hid #3396 behind a 27/76 divergence). The reviewer tension here β€” "conflict should be impossible, so #4 is creep" vs "the premise isn't airtight, so #4 is required" β€” resolves in favor of keeping #4 precisely because the invariant is enforced by policy, not by structure. + +## Non-Goals + +- **No file split.** `metadata/repos.yaml` stays one file. (Revisit only if `main` ever genuinely needs to edit repo identity/policy independently of `data` β€” not foreseen.) +- **No bespoke YAML merge engine** in `merge-data-pr.ts`. Removing the second writer removes the need. +- **No change to the survey-side privacy gate** (`survey-repo.yaml` visibility recheck). That remains the primary gate; `check-wiki-private-presence.ts` is defense-in-depth. +- **No change to the redaction posture.** Private repos stay redacted as `name: `; this work does not relax that. +- **No `removeRepoEntry` helper.** It was only needed for the manual orphan-deletion path we are eliminating. + +## Success Criteria + +- **SC1**: A private repo entry on `data` whose `node_id` resolves to `node-null` (deleted/lost-access) does **not** block the weekly promotion; the gate skips its slug resolution but still runs the node-id-match sweep. +- **SC2**: A private entry whose resolution fails with a transport/auth error (`subprocess-threw`) **still** blocks the gate (fail-closed preserved). +- **SC3**: `merge-data.yaml` no longer copies `main`'s `repos.yaml`; the gate reads the `data` checkout and passes for a clean promotion. +- **SC4**: A simulated born-conflicted promotion PR causes `merge-data-pr.ts` to fail/alert rather than report success. +- **SC5**: Two consecutive weekly promotions complete with `repos.yaml` changing only on `data` (no `main`-side edits), producing no conflict. +- **SC6**: A redacted private entry present on `data` promotes to `main` without being stripped, and `check-wiki-private-presence.ts` reports no leak for it (redaction β‰  leak). + +## Privacy Boundary (residual-risk statement) + +The sole-writer model leaves redacted private entries (`name: `, `private: true`) on the public `main` branch. This is an explicit, auditable trust decision, not an oversight: + +- **What a bare `node_id` exposes:** that *a* private repo exists in fro-bot's access graph, plus a stable opaque identifier. GitHub node_ids are opaque, non-enumerable, and resolving one to `owner/repo` requires API access that itself gates on the repo's privacy. A reader without that access learns only "some private repo, identified by an opaque token, exists." +- **Residual correlation risk (accepted):** the node_id is stable across branches, commits, and workflow artifacts, so an observer could correlate the same private repo across those public surfaces over time, and infer lifecycle events (added/surveyed/removed dates). This is judged acceptable: it reveals existence and timing, never canonical identity or content. +- **What stays protected:** canonical `owner/repo`, repo name, and all wiki content. The redaction posture (change does not relax it) keeps those off `main`. +- **Defense-in-depth note:** the promotion privacy gate now reads `data`'s own `repos.yaml` (change #2). Since `data` is the sole writer and is only writable by the fro-bot identity, the gate trusts the same identity that produced the content. This is an accepted trust boundary for v1; an independently-anchored private-id source is deferred (see Q6) unless the threat model tightens. + +## Open Questions for Planning + +- **Q1**: `detectPrivateWikiLeaks` already runs both canonical-slug and node-id matches. Confirm the cleanest way to feed `node-null` entries into the node-id sweep while excluding them from slug resolution β€” likely `resolveCanonicalSlugs` returns resolved + a separate `nodeNullEntries` list, and `main()` passes both into the detector (resolved β†’ slug+node-id checks; node-null β†’ node-id check only). Planning settles the exact shape and the return type change. +- **Q2**: Test strategy for the gate change β€” unit-test `resolveCanonicalSlugs` classification (node-null tolerated, subprocess-threw throws) with mocked `execFileSync`, and `detectPrivateWikiLeaks` with node-null entries in the node-id sweep. No live GraphQL in CI. +- **Q3**: For the safety net (#4), does `merge-data-pr.ts` poll for mergeability (it can be `unknown` immediately after create β€” there is already a `waitForKnownMergeableState` helper) before deciding DIRTY? Reuse that helper. Decide fail-the-run vs. open-an-alert-issue (or both). +- **Q4**: Sequencing within a single PR vs. split PRs β€” changes #1 and #2 are coupled (revert is unsafe before the gate fix). Likely one PR for #1+#2+#3, a separate PR for #4 (independent). Planning confirms. +- **Q5**: `check-wiki-authority.ts` currently guards `metadata/*.yaml` by author identity. Should it additionally assert that a `repos.yaml` change on a PR to `main` only ever originates from the `data` promotion (intent-based guard, Oracle fix #5)? This is the code enforcement of the policy invariant that change #3 only documents β€” without it, change #3 is documentation-only and a future `main`-side edit can silently reintroduce the conflict class. Decide: bundle the guard into this work, or ship it as an explicit follow-up hardening item (not leave it as an open question). +- **Q6**: Independent privacy source of truth. The gate trusting `data`'s own `repos.yaml` (change #2) is an accepted v1 trust boundary. If the threat model ever requires the gate to validate against a source the promoted branch cannot itself edit, that is a separate design. Confirm v1 accepts the current boundary. +- **Q7**: Sole-writer commitment vs. reversibility. The model forecloses `main` editing `repos.yaml` independently. If `main`-side repo *policy* (distinct from survey *state*) is ever needed, the split becomes necessary after all. Planning should note the migration path back to split state ownership stays open (it does β€” the split is strictly additive later), so this is a low-cost, reversible commitment rather than a one-way door. + +## Related Work + +- **PR #3396** β€” the conflicted promotion that triggered this; resolved manually via two Fro Bot-identity agent dispatches (orphan removal + data-wins merge), merged at `37618e5`. +- **PR #3394** β€” deleted the two private orphans from `main`; this work reverts that *policy* (the entries themselves stay gone). +- **PR #3395** β€” pointed the gate at `main`'s `repos.yaml`; this work reverts that `cp`. +- **`scripts/check-wiki-private-presence.ts`** β€” the fail-closed gate at the center of the root cause. +- **`scripts/commit-metadata.ts:345`** β€” already refuses writes to `main`; reinforces that `data` is the only programmatic writer. +- **`docs/solutions/security-issues/survey-workflow-side-privacy-gate-2026-05-16.md`** β€” the primary (survey-side) privacy gate that makes `check-wiki-private-presence.ts` defense-in-depth. +- **Oracle diagnosis (2026-06-01)** β€” proposed file-split (fix #1) + fail-on-dirty (fix #2); this brainstorm adopts a simpler root fix that subsumes #1 and keeps #2 as the safety net. diff --git a/docs/plans/2026-06-01-001-fix-data-branch-sole-writer-plan.md b/docs/plans/2026-06-01-001-fix-data-branch-sole-writer-plan.md new file mode 100644 index 000000000..160506655 --- /dev/null +++ b/docs/plans/2026-06-01-001-fix-data-branch-sole-writer-plan.md @@ -0,0 +1,292 @@ +--- +title: "fix: Make data the sole writer of metadata/repos.yaml" +type: fix +status: active +date: 2026-06-01 +origin: docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md +--- + +# fix: Make `data` the Sole Writer of `metadata/repos.yaml` + +## Overview + +The weekly `data β†’ main` promotion (PR #3396) was born conflicted in `metadata/repos.yaml`. Root cause: that file is mutated on both branches β€” autonomous survey write-backs on `data`, and privacy-hygiene deletions on `main` (PR #3394) β€” and the main-side edits are themselves workarounds for a brittle privacy gate that fails closed on `node-null` private entries. This plan eliminates the conflict class by removing the second writer: fix the gate so it no longer needs main-side curation, stop editing `repos.yaml` on `main`, and add a safety net that makes any future born-conflicted promotion loud instead of silent. + +## Problem Frame + +`scripts/check-wiki-private-presence.ts` (`resolveCanonicalSlugs`) throws if *any* private entry is unresolvable, including `node-null` (repo deleted / App lost access). A single dead private orphan on `data` blocked every promotion. Two PRs worked around that: PR #3395 pointed the gate at `main`'s `repos.yaml` (`cp ../metadata/repos.yaml`), and PR #3394 deleted orphans from `main` to keep that copy clean. Together they turned `repos.yaml` into a both-sides-mutated file and produced the #3396 conflict. One root problem (brittle fail-closed gate), not two. See origin: docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md. + +## Requirements Trace + +- R1. A `node-null` private entry on `data` must not block the weekly promotion (origin SC1). +- R2. A `subprocess-threw` (transport/auth) resolution failure must still fail the gate closed (origin SC2). +- R3. Slug-named wiki-page leak coverage for `node-null` private repos must be preserved β€” no privacy regression (origin SC1, review P1). +- R4. `merge-data.yaml` must stop copying `main`'s `repos.yaml`; the gate reads the `data` checkout (origin SC3). +- R5. A born-conflicted promotion PR must fail/alert rather than report success (origin SC4). +- R6. Two consecutive promotions complete with `repos.yaml` changing only on `data`, producing no conflict (origin SC5). +- R7. A redacted private entry on `data` promotes to `main` without being stripped, and the gate reports no leak for it (origin SC6). + +## Scope Boundaries + +- No file split of `metadata/repos.yaml` (origin non-goal). Single-writer makes it unnecessary. +- No bespoke YAML merge engine in `merge-data-pr.ts`. +- No change to the survey-side privacy gate (`survey-repo.yaml` visibility recheck) β€” that remains the primary gate. +- No relaxation of the redaction posture: private repos stay `name: `, canonical identifiers never reach `main`. +- No `removeRepoEntry` helper β€” only needed for the manual orphan-deletion path being eliminated. + +### Deferred to Separate Tasks + +- Intent-based authority guard (a `repos.yaml` change on a `main` PR may only originate from the `data` promotion): decision in this plan (Unit 5) is to ship it as the code enforcement of the sole-writer invariant, OR defer as an explicit follow-up issue β€” resolved in Open Questions below. +- Auto-deletion of wiki pages for repos that go private: already owned by private-repo handling plan Unit 8 (ships as a check, operator deletes). Not duplicated here. +- Independent privacy source of truth (gate validating against a source the promoted branch can't edit): deferred unless the threat model tightens (origin Q6). + +## Context & Research + +### Relevant Code and Patterns + +- `scripts/check-wiki-private-presence.ts` β€” the gate. `resolveCanonicalSlugs` (lines 142-191) throws on any failure; `detectPrivateWikiLeaks` (lines 24-49) does canonical-slug + node-id matching; `FailureMode = 'subprocess-threw' | 'node-null'` (line 123) already classifies the two failure modes β€” the classification exists, only the throw-on-both behavior needs changing. +- `.github/workflows/merge-data.yaml` lines 39-47 β€” the `cp ../metadata/repos.yaml` workaround + gate invocation. +- `scripts/merge-data-pr.ts` β€” `waitForKnownMergeableState` (lines 276-298) already polls `mergeable_state` past `unknown`; reuse it for DIRTY detection. `mergeDataPr` returns a result object; `main()` (lines 702-705) prints it. +- `scripts/reconcile-repos.ts` β€” sole programmatic writer of survey-state fields on `data`; `bootstrapDataBranch` re-seeds `data` from `main` after squash-delete (copies main's exact content β†’ no divergence). +- `scripts/commit-metadata.ts` line 345 β€” already refuses writes to `main`, reinforcing data-as-only-programmatic-writer. +- `scripts/schemas.ts` line 67 β€” documents that canonical identifiers never reach `main` (the constraint that rules out caching slugs). + +### Institutional Learnings + +- `docs/solutions/security-issues/survey-workflow-side-privacy-gate-2026-05-16.md` β€” the survey-side gate that makes `check-wiki-private-presence.ts` defense-in-depth. +- `docs/solutions/security-issues/private-repo-dispatch-visibility-gate-2026-05-08.md` β€” node_id-only public text; redaction posture rationale. +- `docs/solutions/integration-issues/merge-data-pr-github-422-race-recovery-2026-05-02.md` β€” prior merge-data race handling; informs the safety-net error classification. + +### External References + +- None needed β€” fully local patterns. + +## Key Technical Decisions + +- **Node-null leak coverage via DELTA-based public-allowlist inversion (the core design move).** The current gate iterates *private* entries and tries to match their slugs against wiki filenames β€” which requires resolving the private repo's canonical slug (impossible for `node-null`, and storing it would itself leak per schema line 67). Invert it: flag any wiki page that is **new in this promotion** and does not map to a known-public entry. Specifically: + - Build `publicSlugs` from entries with explicit **`private === false`**, using `computeRepoSlug(owner, name)` from `scripts/wiki-slug.ts` (NOT raw `owner--name` β€” see the empirical finding below). + - Build `grandfatheredSlugs` = the stems of repo pages **already present on `main`** (the promotion base). A page already on `main` already passed the gate and is already public knowledge; the gate's job is to stop NEW private pages, not re-litigate existing ones. + - Flag a `knowledge/wiki/repos/*.md` page only when its stem is in NEITHER `publicSlugs` NOR `grandfatheredSlugs`. This catches a newly-promoted private/node-null/node_id page while leaving existing public pages alone. + - *Fail-safe predicate (review P1, both reviewers):* the public set is built from `private === false` ONLY. The schema makes `private` optional and treats absent as private-until-confirmed; `private !== true` would wrongly admit absent/legacy entries and invert the fail-safe. + - *EMPIRICAL FINDINGS that forced the delta design (verified against live `main`, 2026-06-01):* + 1. **Slug mismatch (P0 if ignored):** wiki filenames are produced by `computeRepoSlug` (dotsβ†’dashes, leading-dot stripped, underscoreβ†’dash), so entry `marcusrbrown/.dotfiles` β†’ page `marcusrbrown--dotfiles.md`. A raw `owner--name` allowlist flags 9 of 26 legit public pages. The allowlist MUST use `computeRepoSlug`. + 2. **Lost-access over-block (P0, the reason for the delta design):** `marcusrbrown/copiloting` is `onboarding_status: lost-access` with ABSENT `private` (legacy entry, can't be re-probed) but is verifiably PUBLIC. Under a strict `private === false` predicate its existing public page is flagged β€” blocking every promotion. Grandfathering pages already on `main` resolves this without weakening the fail-safe for new pages. + - *Rationale:* removes the dependency on resolving private identities entirely; the public entries are the source of truth for "what's allowed to have a page." + - *Boundary to resolve (see Open Questions):* a repos page that maps to NO tracked entry at all (neither public nor private) β€” e.g., a stale page from before tracking. Recommended: fail closed (flag it) β€” an unattributable page is exactly the leak signal. This must not over-flag legitimately public pages, so the public-allowlist must be built from the same `data` `repos.yaml` the promotion carries. +- **Tamper trust boundary (review P0 β€” reframed).** Adversarial review flagged that building the allowlist from `data`'s own `repos.yaml` lets a fake `private: false` entry whitelist a private page. This is real but **trust-equivalent to the existing gate**, not a regression: the current gate reads the *private* list from the same file, and a tamper that *deletes* a private entry already causes the same under-block. Both designs anchor on fro-bot being the sole writer of `data` (enforced by Unit 5's authority guard + the integrity check in `reconcile-repos.ts`). An independently-anchored privacy source is out of scope for v1 (origin Q6). Document this as an accepted, unchanged trust boundary β€” do not claim the inversion introduces it. +- **Keep `subprocess-threw` fail-closed; stop failing on `node-null`.** Transport/auth uncertainty is genuinely unverifiable β†’ must block + retry next run. `node-null` is now a non-event for coverage (the inversion doesn't need resolution), so it never blocks. +- **Gate reads `data`'s own `repos.yaml`.** Revert the `cp`. The branch being promoted is the correct source for "what entries does this promotion carry." Accepted trust boundary: `data` is written only by the fro-bot identity (origin Q6). +- **Safety net is policy-enforcement, not creep.** Sole-writer is a policy invariant, not structural. `merge-data-pr.ts` must detect a DIRTY/CONFLICTING result and fail/alert so a violated invariant is loud immediately. + +## Open Questions + +### Resolved During Planning + +- **Node-null slug coverage mechanism** β†’ public-allowlist inversion (above). Resolves origin Q1 without storing canonical slugs. +- **Can we cache the canonical slug on the entry?** β†’ No. Schema line 67: canonical identifiers never reach `main`. Inversion avoids needing it. +- **Does anything remove a wiki page when a repo goes private?** β†’ No removal path exists (wiki is additive-only; `wiki-ingest.ts` lines 791-814). Private-repo plan Unit 8 handles this as a check. This plan's gate is the promotion-time backstop. +- **Is `waitForKnownMergeableState` reusable for DIRTY detection?** β†’ Yes (lines 276-298). The safety net reuses it. + +### Deferred to Implementation + +- Exact return-shape change to `resolveCanonicalSlugs` / `detectPrivateWikiLeaks` to express the inversion (likely: `detectPrivateWikiLeaks` takes `publicSlugs: Set` + `wikiRepoFilenames` and flags non-members; `resolveCanonicalSlugs` may become unnecessary for the leak check, retained only if still used elsewhere). Settle when touching the code. +- Whether the safety net hard-fails the workflow, opens an alert issue, or both (origin Q3). Lean: both β€” non-zero exit AND a high-signal issue, mirroring the stale-divergence alert pattern already in `merge-data-pr.ts`. +- Whether Unit 5 (authority guard) bundles into this work or ships as a follow-up issue (origin Q5). + +## High-Level Technical Design + +> *This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.* + +Current gate (private-driven, needs resolution): + + for each PRIVATE entry: + resolve canonical slug via GraphQL ← throws on node-null (BRITTLE) + match slug / node_id against wiki filenames β†’ leak + +Delta-based inverted gate (no private resolution, grandfathers existing pages): + + publicSlugs = { computeRepoSlug(e.owner, e.name) for e in repos if e.private === false } + grandfatheredSlugs = { stem(f) for f in main:knowledge/wiki/repos/*.md } # already-public, already-passed + + for each file in data:knowledge/wiki/repos/*.md: + stem = file without .md + if stem NOT in publicSlugs AND stem NOT in grandfatheredSlugs: + flag as leak ← a NEW page that maps to no public entry + + # fail closed if repos.yaml or main page-list is unreadable + # computeRepoSlug (not raw owner--name) and private===false (not !==true) are both load-bearing + +## Implementation Units + +- [x] **Unit 1: Invert the privacy gate to a public-allowlist model** + +**Goal:** Make `check-wiki-private-presence.ts` detect leaks by flagging wiki pages that don't map to a known-public entry, eliminating the need to resolve private slugs and making `node-null` a non-blocking non-event. + +**Requirements:** R1, R2, R3, R7 + +**Dependencies:** None + +**Files:** +- Modify: `scripts/check-wiki-private-presence.ts` +- Modify: `.github/workflows/merge-data.yaml` (surface `main`'s repo-page list to the gate β€” the non-`data` checkout already has `knowledge/wiki/repos/` on `main`; pass its path/list) +- Reference: `scripts/wiki-slug.ts` (`computeRepoSlug`) +- Test: `scripts/check-wiki-private-presence.test.ts` + +**Approach:** +- Build `publicSlugs: Set` from `repos.yaml` entries where **`private === false`** via `computeRepoSlug(owner, name)`. **Critical 1:** use `computeRepoSlug`, NOT raw `${owner}--${name}` β€” wiki filenames are sanitized (dots/underscoresβ†’dashes, leading-dot stripped); a raw allowlist over-flags ~9 legit pages (empirically verified). **Critical 2:** use `private === false`, NOT `private !== true` β€” the schema makes `private` optional and treats absent as private-until-confirmed (fail-safe); `private !== true` inverts that. +- Build `grandfatheredSlugs: Set` from the stems of `.md` files already present in `knowledge/wiki/repos/` on the promotion BASE (`main`). A page already on `main` is grandfathered (already public, already passed the gate). This resolves the `copiloting` lost-access over-block. +- Reframe `detectPrivateWikiLeaks` to flag any `data` `knowledge/wiki/repos/*.md` stem present in NEITHER `publicSlugs` NOR `grandfatheredSlugs`. Add an `unattributable-page` reason; retain existing taxonomy where applicable. +- Keep `subprocess-threw` fail-closed: if reading/parsing the `data` entries OR the `main` page list fails, block. `node-null` no longer participates in the throw path. +- Retain `resolveCanonicalSlugs` only if still referenced elsewhere; otherwise mark for removal (decide in-code). + +**Execution note:** Test-first β€” add the failing grandfather + slug-sanitization scenarios before changing the detector. + +**Patterns to follow:** existing `detectPrivateWikiLeaks` structure (lines 24-49); `computeRepoSlug` from `scripts/wiki-slug.ts`; existing fail-closed throw with per-entry mode hints (lines 179-188). + +**Test scenarios:** +- Happy path: all `data` repo pages map to `private === false` entries (via `computeRepoSlug`) β†’ no leak. +- **Slug-sanitization (empirical P0): entry `owner/.dotfiles` + page `owner--dotfiles.md` β†’ NO leak** (allowlist uses `computeRepoSlug`). Assert a raw-join allowlist WOULD have flagged it. +- **Grandfather (empirical P0): a page already on `main` for a `lost-access`/absent-`private` entry (`marcusrbrown--copiloting.md`) β†’ NO leak** (grandfathered). Assert that without grandfathering it WOULD flag. +- Edge case: a NEW page (on `data`, not `main`) for a private entry (node_id known) β†’ flagged. +- Edge case (original P1 gap): a NEW page for a `node-null` private repo β†’ flagged without GraphQL resolution. +- Edge case (fail-safe predicate): a NEW page for an entry with ABSENT/undefined `private` β†’ flagged. Assert `private !== true` is NOT the predicate. +- Edge case: a NEW page named `.md` β†’ flagged (not a public slug, not grandfathered). +- Error path: `repos.yaml` parse failure OR main-page-list read failure β†’ throws (fail closed); MUST NOT yield an empty set that silently over- or under-blocks. +- Happy path: redacted private entry present, no corresponding page β†’ no leak (R7). + +**Verification:** node-null private orphan no longer blocks; a NEW slug-named page for a private repo is still caught; the existing `copiloting` page does not over-block; transport failure still blocks. + +- [x] **Unit 2: Revert the `cp` workaround in `merge-data.yaml`** + +**Goal:** Gate reads `data`'s own `repos.yaml` instead of copying `main`'s. + +**Requirements:** R4 + +**Dependencies:** Unit 1 (revert is only safe once the gate tolerates node-null orphans) + +**Files:** +- Modify: `.github/workflows/merge-data.yaml` + +**Approach:** +- Remove `cp ../metadata/repos.yaml metadata/repos.yaml` from the `πŸ”’ Block private wiki pages` step. +- Update the step comment to state `data` is authoritative and the gate tolerates dead orphans. + +**Execution note:** none (workflow YAML). + +**Patterns to follow:** existing two-checkout structure in `merge-data.yaml` (lines 31-47). + +**Test scenarios:** Test expectation: none β€” workflow config change. Validate via `actionlint` and a manual/dispatch promotion run (covered by R6 verification). + +**Verification:** `merge-data.yaml` no longer references `cp`; the gate step runs against the `data` checkout. + +- [x] **Unit 3: Documentation β€” sole-writer model + privacy boundary** + +**Goal:** Document that `repos.yaml` is written only on `data`, that redacted private entries are allowed on `main`, and the residual-risk statement for a bare node_id. + +**Requirements:** R7 (policy), supports R6 + +**Dependencies:** None (can land with Unit 1-2) + +**Files:** +- Modify: `metadata/README.md` +- Modify: `knowledge/schema.md` (reinforce public-only / data-authoritative invariant if not already stated) + +**Approach:** +- State plainly: `metadata/repos.yaml` is written only on `data`; `main` never edits it outside the promotion PR. +- Document that redacted private entries (`name: `, `private: true`) are allowed on `main` and must not be deleted as hygiene. +- Include the residual-risk statement: a bare node_id exposes existence + timing, never canonical identity or content. + +**Patterns to follow:** existing `metadata/README.md` credential/authority tables; origin doc "Privacy Boundary" section. + +**Test scenarios:** Test expectation: none β€” documentation. Markdown lint must pass. + +**Verification:** README and schema reflect the sole-writer model; no claim that private entries must be curated off `main`. + +- [x] **Unit 4: Safety net β€” fail/alert on a born-conflicted promotion PR** + +**Goal:** `merge-data-pr.ts` surfaces a DIRTY/CONFLICTING promotion PR loudly instead of reporting clean success. + +**Requirements:** R5 + +**Dependencies:** None (independent of Unit 1-3) + +**Files:** +- Modify: `scripts/merge-data-pr.ts` +- Test: `scripts/merge-data-pr.test.ts` + +**Approach:** +- After find-or-create, reuse `waitForKnownMergeableState` to resolve `mergeable_state` past `unknown`. +- If the resolved state is `dirty`/`conflicting`, set a non-clean outcome: open (or reuse) a high-signal alert issue AND signal failure (non-zero exit from `main()` or an explicit `conflicted: true` flag the workflow checks). Decide both-vs-one per origin Q3 (lean both). +- Mirror the existing `maybeCreateStaleDivergenceAlert` dedup pattern so repeated runs don't spam issues. + +**Execution note:** Test-first β€” add the DIRTY-result scenario with a mocked Octokit before changing `mergeDataPr`. + +**Patterns to follow:** `maybeCreateStaleDivergenceAlert` (lines 335-384) for dedup + issue creation; `waitForKnownMergeableState` (lines 276-298); `MergeDataPrResult` shape (lines 28-35). + +**Test scenarios:** +- Happy path: clean mergeable PR β†’ result reports success, no alert. +- Error path: `mergeable_state: 'dirty'` β†’ alert issue created + non-clean signal. +- Edge case: `mergeable_state` stays `unknown` after retries β†’ existing behavior preserved (warn, no false alarm). +- Edge case: alert issue already open β†’ no duplicate (dedup). +- Integration: a DIRTY result sets the field the workflow step inspects to fail the run. + +**Verification:** a simulated born-conflicted PR causes a non-clean outcome + alert; a clean PR does not. + +- [x] **Unit 5: (Conditional) Intent-based authority guard for `repos.yaml` on `main`** + +**Goal:** Code-enforce that a `repos.yaml` change on a PR to `main` only originates from the `data` promotion β€” the enforcement of Unit 3's documented invariant. + +**Requirements:** Supports R6 (prevents future both-sides mutation) + +**Dependencies:** Unit 3 (documents the policy this enforces) + +**Files:** +- Modify: `scripts/check-wiki-authority.ts` +- Test: `scripts/check-wiki-authority.test.ts` + +**Approach:** +- Extend the existing author-identity guard so a `metadata/repos.yaml` modification on a non-promotion PR (head β‰  `data`) is rejected even when authored by a fro-bot identity. +- Allow the change when the PR head is `data` (the promotion path). + +**Execution note:** Test-first. + +**Patterns to follow:** existing `check-wiki-authority.ts` path-guard + author checks. + +**Test scenarios:** +- Happy path: `data β†’ main` promotion PR touching `repos.yaml` β†’ allowed. +- Error path: a non-`data` PR (even fro-bot-authored) touching `repos.yaml` β†’ blocked. +- Edge case: a non-`data` PR touching other `metadata/*.yaml` β†’ existing behavior unchanged. + +**Verification:** the guard blocks the exact both-sides-mutation path (#3394-style edit on `main`) that started this. + +> **Unit 5 gating:** This unit is the code enforcement of Unit 3's policy. Per origin Q5, the decision is to **include it** unless the user prefers a separate follow-up β€” without it, change #3 is documentation-only and a future `main`-side edit silently reintroduces the conflict class. Confirmed in the handoff question. + +## System-Wide Impact + +- **Interaction graph:** `merge-data.yaml` β†’ `check-wiki-private-presence.ts` (gate) + `merge-data-pr.ts` (PR open). `check-wiki-authority.ts` runs as a PR-time required check on `main`. +- **Error propagation:** gate `subprocess-threw` β†’ blocks promotion (fail closed). Safety-net DIRTY β†’ alert + fail. Both surface to the operator, not silent. +- **State lifecycle risks:** `bootstrapDataBranch` re-seeding `data` from `main` copies exact content (no divergence) β€” the sole-writer invariant holds across data-branch recreation. +- **API surface parity:** none β€” internal control-plane only. +- **Integration coverage:** a real promotion run (R6) is the cross-layer proof unit tests can't give; validate via manual `merge-data.yaml` dispatch after Units 1-2 land. +- **Unchanged invariants:** redaction posture (private = `name: `), `commitMetadata` main-write refusal, survey-side visibility gate, additive-only wiki contract β€” none change. + +## Risks & Dependencies + +| Risk | Mitigation | +|------|------------| +| Inversion over-flags a legitimately public page (public entry missing from `data`'s `repos.yaml`) | Build the allowlist from the same `data` `repos.yaml` the promotion carries; a missing public entry is itself a data-integrity signal worth surfacing. Test the unattributable-page boundary explicitly. | +| Unit 2 reverted before Unit 1 lands β†’ node-null orphan re-blocks promotion | Enforce dependency: Unit 2 sequenced after Unit 1 in the same PR. | +| Safety net (Unit 4) spams alert issues | Reuse `maybeCreateStaleDivergenceAlert` dedup pattern. | +| Unit 5 blocks a legitimate emergency `main` hotfix to `repos.yaml` | Document an explicit override path (PR title marker or temporary guard bypass) in Unit 3 docs. | + +## Documentation / Operational Notes + +- After Units 1-2 merge, validate with a manual `merge-data.yaml` dispatch (the data branch will have been recreated by then via the next autonomous run or bootstrap). +- The 2 private orphans deleted from `main` (PR #3394) are not re-added; reconcile re-probes them naturally, and the tolerant gate handles them if dead. + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md](docs/brainstorms/2026-06-01-data-branch-sole-writer-requirements.md) +- Related code: `scripts/check-wiki-private-presence.ts`, `scripts/merge-data-pr.ts`, `.github/workflows/merge-data.yaml`, `scripts/check-wiki-authority.ts` +- Related PRs: #3394 (orphan deletion, policy reverted here), #3395 (cp workaround, reverted here), #3396 (the conflicted promotion, merged manually) +- Related plan: `docs/plans/2026-05-05-002-feat-private-repo-handling-plan.md` (Unit 8 owns private-page-removal-as-check) diff --git a/knowledge/schema.md b/knowledge/schema.md index 5b7623439..c3163c63c 100644 --- a/knowledge/schema.md +++ b/knowledge/schema.md @@ -87,7 +87,7 @@ Operations: `ingest`, `query`, `lint`, `manual-edit`. ## Editing the wiki -The `knowledge/wiki//*.md` pages, `knowledge/index.md`, and `knowledge/log.md` are enforced as Fro-Bot-writable-only on `main`. A CI job (`Check Wiki Authority`, backed by `scripts/check-wiki-authority.ts`) fails any PR that modifies them unless authored by `fro-bot` or `fro-bot[bot]`. This keeps `main` aligned with `data`, which is the authoritative wiki source. +The `knowledge/wiki//*.md` pages, `knowledge/index.md`, and `knowledge/log.md` are enforced as Fro-Bot-writable-only on `main`. A CI job (`Check Wiki Authority`, backed by `scripts/check-wiki-authority.ts`) fails any PR that modifies them unless authored by `fro-bot` or `fro-bot[bot]`. This keeps `main` aligned with `data`, which is the authoritative wiki source. The same data-authoritative invariant applies to `metadata/repos.yaml`: it is written only on `data` and promoted to `main` β€” `main` is never the origin of edits to either the wiki content tree or `repos.yaml` outside of a promotion PR. For intentional manual edits (correcting a factual error the agent hasn't caught, for example), land the change on `data` and let the existing promotion flow land it on `main`: diff --git a/metadata/README.md b/metadata/README.md index c5e017e73..54e92de44 100644 --- a/metadata/README.md +++ b/metadata/README.md @@ -54,9 +54,9 @@ repos: Field notes: - `node_id` β€” GitHub GraphQL node ID for the repository. Used as the manual dispatch identifier: `gh workflow run survey-repo.yaml -f node_id=`. Also used as the privacy-safe identifier in `pending-review` issue bodies for private repos. -- `private` β€” whether the repository is private. Entries with `private: true` have `owner` and `name` redacted to `[REDACTED]` in public-facing contexts; the `node_id` is used as the subject identifier instead. +- `private` β€” whether the repository is private. Entries with `private: true` are stored redacted: `owner` and `name` are replaced with `` and canonical identifiers never reach `main`. The `node_id` is used as the subject identifier in issue bodies and workflow dispatch. Redacted entries appear on `main` as part of normal promotion β€” this is by design, not a privacy leak. They must not be deleted from `main` as hygiene (see [Sole-writer rule and privacy boundary](#sole-writer-rule-and-privacy-boundary) below). -Update convention: invitation handler, metadata workflow, and daily reconcile update this file programmatically on the `data` branch. +Sole-writer rule: `repos.yaml` is written exclusively on the `data` branch β€” by the invitation handler, daily reconcile, and survey workflows running under the fro-bot identity. `main` never edits this file directly. The sole path from `data` to `main` is the weekly `data β†’ main` promotion PR; manual hygiene edits to `repos.yaml` on a `main`-targeting feature branch are prohibited because they create a both-sides mutation that conflicts the promotion. If a private repo entry is deleted or access is lost, leave its redacted entry in `repos.yaml` as-is β€” the promotion privacy gate tolerates dead orphans (it grandfathers pages already present on `main` and blocks only newly-promoted unattributable pages). Onboarding status values: @@ -78,6 +78,10 @@ The channel is sticky after first write β€” neither reconcile nor any other writ `next_survey_eligible_at` is the ISO date at which an entry becomes eligible for re-survey, computed at survey-completion time as `last_survey_at + base_interval[channel] + jitter(owner, name, last_survey_at)`. `null` means never-surveyed (treat as immediately eligible). +### Sole-writer rule and privacy boundary + +Redacted private entries (`name: `, `private: true`) on the public `main` branch are an explicit, auditable trust decision, not an oversight. A bare `node_id` exposes only that _a_ private repo exists in fro-bot's access graph, plus a stable opaque identifier and lifecycle timing (added/surveyed dates). Resolving a `node_id` to `owner/repo` requires API access that itself gates on the repo's privacy; a reader without that access learns only that some private repo exists. Canonical `owner/repo`, repo name, and all wiki content stay off `main`. This is an accepted residual risk β€” existence and timing may be inferred, never identity or content. + ### `renovate.yaml` Auto-discovered list of fro-bot org repositories with Renovate workflows. Used by `dispatch-renovate.yaml` to determine which repos to dispatch `workflow_dispatch` events to. @@ -136,6 +140,8 @@ PAT split summary: All `metadata/*.yaml` files are enforced as Fro-Bot-writable-only on `main`. A CI job (`Check Wiki Authority`, backed by `scripts/check-wiki-authority.ts`) fails any PR that modifies them unless authored by `fro-bot` or `fro-bot[bot]`. This prevents `main` from drifting relative to `data`, which is the single authoritative source for metadata state. +`repos.yaml` carries an additional sole-writer invariant: changes to it on `main` must originate only from the `data` promotion branch. A direct edit to `repos.yaml` on a non-promotion branch is prohibited even if fro-bot-authored. Any exception requires an explicit override and is treated as an emergency measure, not routine workflow β€” the invariant exists precisely to prevent the both-sides mutation that causes promotion conflicts. + For intentional manual edits to any metadata file (including `allowlist.yaml`), land the change on `data` directly and let the existing promotion flow land it on `main`: ```bash diff --git a/scripts/check-wiki-authority.test.ts b/scripts/check-wiki-authority.test.ts index 160d4cf21..5691d45fd 100644 --- a/scripts/check-wiki-authority.test.ts +++ b/scripts/check-wiki-authority.test.ts @@ -1,6 +1,15 @@ -import {describe, expect, it} from 'vitest' +import {describe, expect, it, vi} from 'vitest' -import {checkWikiAuthority, formatBlockMessage} from './check-wiki-authority.ts' +import {checkWikiAuthority, fetchChangedFiles, formatBlockMessage} from './check-wiki-authority.ts' + +// Hoisted mock for execFileSync β€” must precede any import that might trigger the module. +const {mockExecFileSync} = vi.hoisted(() => ({ + mockExecFileSync: vi.fn(), +})) + +vi.mock('node:child_process', () => ({ + execFileSync: mockExecFileSync, +})) describe('checkWikiAuthority', () => { describe('author is an allowed Fro Bot identity', () => { @@ -8,7 +17,7 @@ describe('checkWikiAuthority', () => { // #given the App installation author touching metadata/repos.yaml // #when the guard evaluates the PR // #then the edit is allowed (fro-bot[bot] is the promotion-PR identity) - const result = checkWikiAuthority({author: 'fro-bot[bot]', files: ['metadata/repos.yaml']}) + const result = checkWikiAuthority({author: 'fro-bot[bot]', headRef: 'data', files: ['metadata/repos.yaml']}) expect(result).toEqual({ok: true}) }) @@ -18,6 +27,7 @@ describe('checkWikiAuthority', () => { // #then the edit is allowed (fro-bot and fro-bot[bot] are one operator) const result = checkWikiAuthority({ author: 'fro-bot', + headRef: 'data', files: ['knowledge/wiki/topics/home-assistant.md'], }) expect(result).toEqual({ok: true}) @@ -29,6 +39,7 @@ describe('checkWikiAuthority', () => { // #then every guarded path is allowed under the Fro Bot identity const result = checkWikiAuthority({ author: 'fro-bot[bot]', + headRef: 'data', files: [ 'knowledge/wiki/repos/marcusrbrown--x.md', 'metadata/allowlist.yaml', @@ -38,6 +49,62 @@ describe('checkWikiAuthority', () => { }) expect(result).toEqual({ok: true}) }) + + it('allows fro-bot from data branch editing metadata/repos.yaml (promotion path)', () => { + // #given fro-bot on the `data` branch touching metadata/repos.yaml + // #when the guard evaluates the PR + // #then the edit is allowed β€” this is the legitimate promotion path + const result = checkWikiAuthority({author: 'fro-bot', headRef: 'data', files: ['metadata/repos.yaml']}) + expect(result).toEqual({ok: true}) + }) + + it('blocks fro-bot editing metadata/repos.yaml from a non-data branch (both-sides mutation)', () => { + // #given fro-bot on a feature branch (not `data`) touching metadata/repos.yaml + // #when the guard evaluates the PR + // #then the edit is blocked β€” this is the prohibited both-sides mutation pattern (#3394) + const result = checkWikiAuthority({ + author: 'fro-bot', + headRef: 'fix/something', + files: ['metadata/repos.yaml'], + }) + expect(result).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) + }) + + it('blocks fro-bot[bot] editing metadata/repos.yaml from a non-data branch', () => { + // #given fro-bot[bot] on a non-data branch touching metadata/repos.yaml + // #when the guard evaluates the PR + // #then the edit is blocked β€” repos.yaml can only come from `data` + const result = checkWikiAuthority({ + author: 'fro-bot[bot]', + headRef: 'feature/something', + files: ['metadata/repos.yaml'], + }) + expect(result).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) + }) + + it('allows fro-bot editing metadata/allowlist.yaml from a non-data branch (only repos.yaml has head rule)', () => { + // #given fro-bot on a feature branch touching a metadata yaml that is NOT repos.yaml + // #when the guard evaluates the PR + // #then the edit is allowed β€” only repos.yaml carries the head-ref restriction + const result = checkWikiAuthority({ + author: 'fro-bot', + headRef: 'fix/something', + files: ['metadata/allowlist.yaml'], + }) + expect(result).toEqual({ok: true}) + }) + + it('allows fro-bot on data branch editing other guarded files (wiki, index, log)', () => { + // #given fro-bot on `data` touching wiki and knowledge files (not repos.yaml) + // #when the guard evaluates the PR + // #then the edit is allowed β€” unchanged behavior for other guarded files + const result = checkWikiAuthority({ + author: 'fro-bot', + headRef: 'data', + files: ['knowledge/wiki/topics/x.md', 'knowledge/index.md', 'knowledge/log.md'], + }) + expect(result).toEqual({ok: true}) + }) }) describe('author is not Fro Bot; only unguarded files touched', () => { @@ -45,7 +112,7 @@ describe('checkWikiAuthority', () => { // #given a human PR touching only application code and top-level docs // #when the guard evaluates the PR // #then the edit is allowed (no guarded paths present) - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['README.md', 'src/foo.ts']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['README.md', 'src/foo.ts']}) expect(result).toEqual({ok: true}) }) @@ -53,7 +120,7 @@ describe('checkWikiAuthority', () => { // #given a PR whose file list is empty (edge case) // #when the guard evaluates the PR // #then the guard does not fire (nothing to check) - const result = checkWikiAuthority({author: 'marcusrbrown', files: []}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: []}) expect(result).toEqual({ok: true}) }) @@ -61,7 +128,7 @@ describe('checkWikiAuthority', () => { // #given the Karpathy-style conventions doc edited by a human // #when the guard evaluates the PR // #then the edit is allowed (schema is intentionally outside the guard) - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['knowledge/schema.md']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['knowledge/schema.md']}) expect(result).toEqual({ok: true}) }) @@ -69,7 +136,7 @@ describe('checkWikiAuthority', () => { // #given a human edit to the knowledge directory's README // #when the guard evaluates the PR // #then the edit is allowed (READMEs are human-editable) - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['knowledge/README.md']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['knowledge/README.md']}) expect(result).toEqual({ok: true}) }) @@ -77,7 +144,7 @@ describe('checkWikiAuthority', () => { // #given a human edit to the wiki directory's README // #when the guard evaluates the PR // #then the edit is allowed (README is outside the auto-managed wiki content) - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['knowledge/wiki/README.md']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['knowledge/wiki/README.md']}) expect(result).toEqual({ok: true}) }) @@ -85,7 +152,7 @@ describe('checkWikiAuthority', () => { // #given a human edit to the metadata directory's README // #when the guard evaluates the PR // #then the edit is allowed (only *.yaml files in metadata/ are guarded) - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/README.md']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/README.md']}) expect(result).toEqual({ok: true}) }) }) @@ -95,7 +162,7 @@ describe('checkWikiAuthority', () => { // #given a human author touching an auto-managed metadata yaml // #when the guard evaluates the PR // #then the edit is blocked and the file is listed - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/repos.yaml']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/repos.yaml']}) expect(result).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) }) @@ -105,6 +172,7 @@ describe('checkWikiAuthority', () => { // #then the edit is blocked const result = checkWikiAuthority({ author: 'marcusrbrown', + headRef: 'main', files: ['knowledge/wiki/topics/home-assistant.md'], }) expect(result).toEqual({ok: false, blockedFiles: ['knowledge/wiki/topics/home-assistant.md']}) @@ -114,7 +182,7 @@ describe('checkWikiAuthority', () => { // #given a human author touching the wiki catalog // #when the guard evaluates the PR // #then the edit is blocked - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['knowledge/index.md']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['knowledge/index.md']}) expect(result).toEqual({ok: false, blockedFiles: ['knowledge/index.md']}) }) @@ -122,7 +190,7 @@ describe('checkWikiAuthority', () => { // #given a human author touching the append-only wiki log // #when the guard evaluates the PR // #then the edit is blocked - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['knowledge/log.md']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['knowledge/log.md']}) expect(result).toEqual({ok: false, blockedFiles: ['knowledge/log.md']}) }) @@ -132,6 +200,7 @@ describe('checkWikiAuthority', () => { // #then blockedFiles contains only the guarded file, not the code file const result = checkWikiAuthority({ author: 'marcusrbrown', + headRef: 'main', files: ['src/foo.ts', 'metadata/repos.yaml'], }) expect(result).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) @@ -142,7 +211,7 @@ describe('checkWikiAuthority', () => { // #when the guard evaluates the PR // #then blockedFiles lists every guarded path in the original order const files = ['metadata/repos.yaml', 'knowledge/wiki/repos/x.md', 'knowledge/index.md', 'knowledge/log.md'] - const result = checkWikiAuthority({author: 'marcusrbrown', files}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files}) expect(result).toEqual({ok: false, blockedFiles: files}) }) @@ -152,6 +221,7 @@ describe('checkWikiAuthority', () => { // #then the edit is blocked β€” only fro-bot and fro-bot[bot] are authorized const result = checkWikiAuthority({ author: 'github-actions[bot]', + headRef: 'main', files: ['metadata/repos.yaml'], }) expect(result).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) @@ -161,10 +231,7 @@ describe('checkWikiAuthority', () => { // #given a random bot identity touching a guarded file // #when the guard evaluates the PR // #then the edit is blocked β€” the guard fails closed on unknown identities - const result = checkWikiAuthority({ - author: 'dependabot[bot]', - files: ['metadata/repos.yaml'], - }) + const result = checkWikiAuthority({author: 'dependabot[bot]', headRef: 'main', files: ['metadata/repos.yaml']}) expect(result).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) }) }) @@ -176,6 +243,7 @@ describe('checkWikiAuthority', () => { // #then the knowledge/wiki/** glob matches at any depth const result = checkWikiAuthority({ author: 'marcusrbrown', + headRef: 'main', files: ['knowledge/wiki/comparisons/x-vs-y.md'], }) expect(result).toEqual({ok: false, blockedFiles: ['knowledge/wiki/comparisons/x-vs-y.md']}) @@ -185,7 +253,7 @@ describe('checkWikiAuthority', () => { // #given a human author touching the allowlist // #when the guard evaluates the PR // #then the edit is blocked β€” even human-curated allowlist edits land via data branch - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/allowlist.yaml']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/allowlist.yaml']}) expect(result).toEqual({ok: false, blockedFiles: ['metadata/allowlist.yaml']}) }) @@ -193,7 +261,7 @@ describe('checkWikiAuthority', () => { // #given a human author touching the renovate dispatch list // #when the guard evaluates the PR // #then the edit is blocked β€” renovate.yaml is rebuilt by the metadata workflow - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/renovate.yaml']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/renovate.yaml']}) expect(result).toEqual({ok: false, blockedFiles: ['metadata/renovate.yaml']}) }) @@ -201,7 +269,7 @@ describe('checkWikiAuthority', () => { // #given a new yaml file added to metadata/ in the future // #when the guard evaluates the PR // #then the edit is blocked by default β€” new metadata files inherit the data-branch contract - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/new-thing.yaml']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/new-thing.yaml']}) expect(result).toEqual({ok: false, blockedFiles: ['metadata/new-thing.yaml']}) }) @@ -209,7 +277,11 @@ describe('checkWikiAuthority', () => { // #given a human author touching auto-managed social cooldown state // #when the guard evaluates the PR // #then the edit is blocked β€” social-cooldowns.yaml is auto-managed - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/social-cooldowns.yaml']}) + const result = checkWikiAuthority({ + author: 'marcusrbrown', + headRef: 'main', + files: ['metadata/social-cooldowns.yaml'], + }) expect(result).toEqual({ok: false, blockedFiles: ['metadata/social-cooldowns.yaml']}) }) @@ -217,7 +289,7 @@ describe('checkWikiAuthority', () => { // #given a hypothetical nested metadata file // #when the guard evaluates the PR // #then the edit is NOT blocked β€” if nested metadata is added later, the glob is revisited - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/subdir/x.yaml']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/subdir/x.yaml']}) expect(result).toEqual({ok: true}) }) @@ -225,7 +297,7 @@ describe('checkWikiAuthority', () => { // #given a yaml file with the non-canonical .yml extension // #when the guard evaluates the PR // #then the edit is NOT blocked β€” the repo convention is *.yaml, and guard matches that literally - const result = checkWikiAuthority({author: 'marcusrbrown', files: ['metadata/repos.yml']}) + const result = checkWikiAuthority({author: 'marcusrbrown', headRef: 'main', files: ['metadata/repos.yml']}) expect(result).toEqual({ok: true}) }) @@ -235,6 +307,7 @@ describe('checkWikiAuthority', () => { // #then the edit is NOT blocked β€” anchored regexes only match the canonical prefixes const result = checkWikiAuthority({ author: 'marcusrbrown', + headRef: 'main', files: ['docs/knowledge/index.md', 'backup/metadata/repos.yaml', 'src/knowledge/wiki/x.md'], }) expect(result).toEqual({ok: true}) @@ -280,3 +353,51 @@ describe('formatBlockMessage', () => { expect(msg.length).toBeGreaterThan(50) }) }) + +describe('fetchChangedFiles (Fix #5 β€” paginated API)', () => { + it('calls gh api --paginate with the correct endpoint and returns filenames', () => { + // #given execFileSync returns a newline-separated list of filenames (as gh --paginate would) + mockExecFileSync.mockReturnValue('scripts/foo.ts\nmetadata/repos.yaml\nknowledge/index.md\n') + + // #when fetchChangedFiles is called with prNumber and fullName + const files = fetchChangedFiles(42, 'fro-bot/.github') + + // #then it uses gh api --paginate (not gh pr view) and returns all filenames + expect(mockExecFileSync).toHaveBeenCalledWith( + 'gh', + ['api', '--paginate', '/repos/fro-bot/.github/pulls/42/files', '--jq', '.[].filename'], + {encoding: 'utf8'}, + ) + expect(files).toEqual(['scripts/foo.ts', 'metadata/repos.yaml', 'knowledge/index.md']) + }) + + it('includes metadata/repos.yaml from a paginated response not on the first page (guard bypass prevented)', () => { + // #given execFileSync returns filenames where metadata/repos.yaml appears later + // (simulating it being beyond the first page of a large PR β€” --paginate merges it all) + const paginatedOutput = [ + 'scripts/a.ts', + 'scripts/b.ts', + // ... imagine 300 files above, then: + 'metadata/repos.yaml', + ].join('\n') + mockExecFileSync.mockReturnValue(`${paginatedOutput}\n`) + + const files = fetchChangedFiles(99, 'fro-bot/.github') + + // #then metadata/repos.yaml is present in the result (not dropped by single-page limit) + expect(files).toContain('metadata/repos.yaml') + + // #and a non-data-branch human author is correctly blocked + const guardResult = checkWikiAuthority({author: 'marcusrbrown', headRef: 'feature/x', files}) + expect(guardResult).toEqual({ok: false, blockedFiles: ['metadata/repos.yaml']}) + }) + + it('filters empty lines from the output', () => { + // #given output with trailing newline (common from shell tools) + mockExecFileSync.mockReturnValue('foo.ts\n\nbar.ts\n') + + const files = fetchChangedFiles(1, 'owner/repo') + + expect(files).toEqual(['foo.ts', 'bar.ts']) + }) +}) diff --git a/scripts/check-wiki-authority.ts b/scripts/check-wiki-authority.ts index 07e9e28e7..d07366ba7 100644 --- a/scripts/check-wiki-authority.ts +++ b/scripts/check-wiki-authority.ts @@ -34,6 +34,7 @@ const GUARDED_PATTERNS: readonly RegExp[] = [ export interface GuardInput { readonly author: string + readonly headRef: string readonly files: readonly string[] } @@ -53,6 +54,15 @@ export type GuardResult = {readonly ok: true} | {readonly ok: false; readonly bl */ export function checkWikiAuthority(input: GuardInput): GuardResult { if (FROBOT_AUTHORS.has(input.author)) { + // metadata/repos.yaml may only arrive via the `data` promotion branch. + // Any other head branch from a fro-bot identity is the prohibited both-sides mutation. + // The `headRef !== 'data'` bypass is safe to gate on a branch name only because a + // fro-bot identity never originates from a fork β€” fork PRs carry an external author and + // fall through to the GUARDED_PATTERNS check below, so a fork naming its branch `data` + // cannot reach this allow path. + if (input.files.includes('metadata/repos.yaml') && input.headRef !== 'data') { + return {ok: false, blockedFiles: ['metadata/repos.yaml']} + } return {ok: true} } const blockedFiles = input.files.filter(f => GUARDED_PATTERNS.some(p => p.test(f))) @@ -94,29 +104,48 @@ interface PullRequestEventPayload { readonly pull_request?: { readonly number?: number readonly user?: {readonly login?: string} | null + readonly head?: {readonly ref?: string} | null + readonly base?: {readonly repo?: {readonly full_name?: string} | null} | null } } -async function readPullRequestContext(eventPath: string): Promise<{prNumber: number; author: string}> { +async function readPullRequestContext( + eventPath: string, +): Promise<{prNumber: number; author: string; headRef: string; fullName: string | null}> { const raw = await readFile(eventPath, 'utf8') const parsed = JSON.parse(raw) as PullRequestEventPayload const prNumber = parsed.pull_request?.number const author = parsed.pull_request?.user?.login + const headRef = parsed.pull_request?.head?.ref if (typeof prNumber !== 'number' || typeof author !== 'string' || author === '') { throw new Error( `check-wiki-authority: event payload missing pull_request.number or pull_request.user.login (path=${eventPath})`, ) } - return {prNumber, author} + if (typeof headRef !== 'string' || headRef === '') { + throw new Error(`check-wiki-authority: event payload missing pull_request.head.ref (path=${eventPath})`) + } + const rawFullName = parsed.pull_request?.base?.repo?.full_name + const fullName = typeof rawFullName === 'string' && rawFullName.length > 0 ? rawFullName : null + return {prNumber, author, headRef, fullName} } -function fetchChangedFiles(prNumber: number): string[] { - // `gh pr view` with --json files returns paginated results. For PRs under GitHub's - // default per-file soft limit this single-page view is sufficient; if it ever proves - // insufficient, swap to `gh api --paginate /repos/{owner}/{repo}/pulls/{n}/files`. - const stdout = execFileSync('gh', ['pr', 'view', String(prNumber), '--json', 'files', '--jq', '.files[].path'], { - encoding: 'utf8', - }) +/** + * Fetch the complete list of changed files for a pull request using the paginated GitHub API. + * + * Uses `gh api --paginate` so files beyond GitHub's first-page soft limit are always included. + * `{fullName}` is the `owner/repo` string sourced from the event payload's + * `pull_request.base.repo.full_name` field; falls back to `gh repo view` when the payload + * does not carry it (e.g. re-triggered workflows). + * + * Exported for unit testing. + */ +export function fetchChangedFiles(prNumber: number, fullName: string): string[] { + const stdout = execFileSync( + 'gh', + ['api', '--paginate', `/repos/${fullName}/pulls/${prNumber}/files`, '--jq', '.[].filename'], + {encoding: 'utf8'}, + ) return stdout.split('\n').filter(line => line.length > 0) } @@ -129,9 +158,12 @@ async function main(): Promise { process.exit(1) } - const {prNumber, author} = await readPullRequestContext(eventPath) - const files = fetchChangedFiles(prNumber) - const result = checkWikiAuthority({author, files}) + const {prNumber, author, headRef, fullName: eventFullName} = await readPullRequestContext(eventPath) + const fullName = + eventFullName ?? + execFileSync('gh', ['repo', 'view', '--json', 'nameWithOwner', '--jq', '.nameWithOwner'], {encoding: 'utf8'}).trim() + const files = fetchChangedFiles(prNumber, fullName) + const result = checkWikiAuthority({author, headRef, files}) if (result.ok) { process.stdout.write(`check-wiki-authority: ok (author=${author}, files_checked=${files.length})\n`) diff --git a/scripts/check-wiki-private-presence.test.ts b/scripts/check-wiki-private-presence.test.ts index eb952a84f..0d6693869 100644 --- a/scripts/check-wiki-private-presence.test.ts +++ b/scripts/check-wiki-private-presence.test.ts @@ -1,368 +1,502 @@ -import {describe, expect, it, vi} from 'vitest' +import type {RepoEntry} from './schemas.ts' +import {describe, expect, it, vi} from 'vitest' import { + buildPublicSlugMap, + buildPublicSlugs, detectPrivateWikiLeaks, - isNotFoundSignal, loadWikiFilenames, - resolveCanonicalSlugs, + loadWikiPages, + requireGrandfatherDir, + type WikiPageSnapshot, } from './check-wiki-private-presence.ts' +import {computeRepoSlug} from './wiki-slug.ts' // Hoisted mocks β€” vitest transforms these to the top of the module at compile time, // so they take effect before any imports regardless of source order. -const {mockExecFileSync, mockReaddir} = vi.hoisted(() => { +const {mockReaddir, mockReadFile} = vi.hoisted(() => { return { - mockExecFileSync: vi.fn(), mockReaddir: vi.fn(), + mockReadFile: vi.fn(), } }) -vi.mock('node:child_process', () => ({execFileSync: mockExecFileSync})) vi.mock('node:fs/promises', () => ({ - readFile: vi.fn(), + readFile: mockReadFile, readdir: mockReaddir, })) +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Minimal WikiPageSnapshot for tests β€” content defaults to empty string. */ +function page(filename: string, hash: string, content = ''): WikiPageSnapshot { + const stem = filename.replace(/\.md$/i, '').toLowerCase() + return {filename, stem, hash, content} +} + +// --------------------------------------------------------------------------- +// detectPrivateWikiLeaks +// --------------------------------------------------------------------------- + describe('detectPrivateWikiLeaks', () => { describe('happy path β€” no leaks', () => { - it('returns empty array when privateEntries is empty', () => { - // #given no private entries at all + it('returns empty array when all data pages are in publicSlugMap (with attribution)', () => { + // #given data pages whose stems are all in publicSlugMap with correct attribution URLs // #when detection runs // #then no leaks are reported const result = detectPrivateWikiLeaks({ - privateEntries: [], - wikiRepoFilenames: ['marcusrbrown--poly.md', 'some-org--some-repo.md'], + dataWikiPages: [ + page('marcusrbrown--poly.md', 'hash1', 'url: https://github.com/marcusrbrown/poly'), + page('some-org--some-repo.md', 'hash2', 'url: https://github.com/some-org/some-repo'), + ], + publicSlugMap: new Map([ + ['marcusrbrown--poly', [{owner: 'marcusrbrown', name: 'poly', private: false} as unknown as RepoEntry]], + ['some-org--some-repo', [{owner: 'some-org', name: 'some-repo', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], }) expect(result).toEqual([]) }) - it('returns empty array when no wiki filenames match any private entry', () => { - // #given private entries whose slugs and node_ids do not appear in wiki filenames + it('returns empty array when data pages are empty', () => { + // #given no data wiki pages at all // #when detection runs // #then no leaks are reported const result = detectPrivateWikiLeaks({ - privateEntries: [ - {node_id: 'R_kgDOABCDEF', canonicalSlug: 'acme--secret-repo'}, - {node_id: 'R_kgDOXYZ123', canonicalSlug: 'acme--another-private'}, - ], - wikiRepoFilenames: ['marcusrbrown--poly.md', 'some-org--public-repo.md'], + dataWikiPages: [], + publicSlugMap: new Map([ + ['some-org--public', [{owner: 'some-org', name: 'public', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], }) expect(result).toEqual([]) }) - }) - describe('canonical slug matching', () => { - it('returns a leak when a wiki filename stem matches the canonicalSlug exactly', () => { - // #given a private entry whose canonicalSlug matches a wiki filename stem + it('returns empty array when a redacted private entry exists but no corresponding data page', () => { + // #given a private entry's slug is not in publicSlugMap and no corresponding wiki file // #when detection runs - // #then one leak is returned with reason canonical-slug-match + // #then no leak is reported (no page = no exposure) const result = detectPrivateWikiLeaks({ - privateEntries: [{node_id: 'R_kgDOABCDEF', canonicalSlug: 'marcusrbrown--poly'}], - wikiRepoFilenames: ['marcusrbrown--poly.md', 'other-repo.md'], + dataWikiPages: [page('some-org--public.md', 'h1', 'url: https://github.com/some-org/public')], + publicSlugMap: new Map([ + ['some-org--public', [{owner: 'some-org', name: 'public', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], }) - expect(result).toEqual([ - { - filename: 'marcusrbrown--poly.md', - reason: 'canonical-slug-match', - node_id: 'R_kgDOABCDEF', - }, - ]) + expect(result).toEqual([]) }) + }) - it('matches case-insensitively (filename MarcusRBrown--Poly.md matches slug marcusrbrown--poly)', () => { - // #given a wiki filename with mixed case that matches the lowercase slug + describe('Fix #2 β€” content-hash grandfathering (the core fix)', () => { + it('passes: unchanged grandfathered absent-private page (the copiloting case)', () => { + // #given a page with no publicSlugMap entry (absent private, e.g. lost-access) + // whose content-hash equals the corresponding page on main // #when detection runs - // #then the leak is detected despite case difference + // #then NO leak β€” content is identical to what is already on main const result = detectPrivateWikiLeaks({ - privateEntries: [{node_id: 'R_kgDOABCDEF', canonicalSlug: 'marcusrbrown--poly'}], - wikiRepoFilenames: ['MarcusRBrown--Poly.md'], + dataWikiPages: [page('marcusrbrown--copiloting.md', 'same-hash', '# copiloting content')], + publicSlugMap: new Map(), // not in public set + grandfatherPages: [page('marcusrbrown--copiloting.md', 'same-hash', '# copiloting content')], }) - expect(result).toEqual([ - { - filename: 'MarcusRBrown--Poly.md', - reason: 'canonical-slug-match', - node_id: 'R_kgDOABCDEF', - }, - ]) + expect(result).toEqual([]) }) - }) - describe('node_id matching (defensive)', () => { - it('returns a leak when a wiki filename stem matches the node_id', () => { - // #given a wiki filename whose stem is the node_id (defensive future-proofing) + it('blocks: modified grandfathered page whose entry is NOT private===false', () => { + // #given a page that exists on main (grandfathered) but its data-branch version has changed + // AND the entry is NOT private===false (absent/private) // #when detection runs - // #then one leak is returned with reason node-id-match + // #then BLOCKED β€” modified content without public attribution is a leak const result = detectPrivateWikiLeaks({ - privateEntries: [{node_id: 'R_kgDOABCDEF'}], - wikiRepoFilenames: ['R_kgDOABCDEF.md', 'other-repo.md'], + dataWikiPages: [page('marcusrbrown--copiloting.md', 'new-hash', '# updated content')], + publicSlugMap: new Map(), // not public + grandfatherPages: [page('marcusrbrown--copiloting.md', 'old-hash', '# original content')], }) - expect(result).toEqual([ - { - filename: 'R_kgDOABCDEF.md', - reason: 'node-id-match', - node_id: 'R_kgDOABCDEF', - }, - ]) + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'marcusrbrown--copiloting.md', reason: 'unattributable-page'}) }) - it('applies only node_id matching when canonicalSlug is absent (resolution failed)', () => { - // #given a private entry with no canonicalSlug (GraphQL resolution failed) - // #when detection runs against wiki files that do not match the node_id - // #then no leaks are reported (slug matching is skipped) + it('passes: modified grandfathered page whose entry IS private===false (public repo)', () => { + // #given a page that exists on main AND in publicSlugMap (private===false) + // even though its hash has changed (content was updated) + // #when detection runs + // #then passes β€” it is a public repo, attribution URL present const result = detectPrivateWikiLeaks({ - privateEntries: [{node_id: 'R_kgDOABCDEF'}], - wikiRepoFilenames: ['marcusrbrown--poly.md'], + dataWikiPages: [page('acme--widget.md', 'new-hash', 'url: https://github.com/acme/widget updated content')], + publicSlugMap: new Map([ + ['acme--widget', [{owner: 'acme', name: 'widget', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [page('acme--widget.md', 'old-hash', 'old content')], }) expect(result).toEqual([]) }) - it('detects node_id leak when canonicalSlug is absent and node_id matches', () => { - // #given a private entry with no canonicalSlug but node_id matches a wiki file + it('blocks: new page not in publicSlugMap and not grandfathered at all', () => { + // #given a brand-new data page with no matching public entry and no grandfather // #when detection runs - // #then the node_id-based leak is returned + // #then BLOCKED β€” unattributable const result = detectPrivateWikiLeaks({ - privateEntries: [{node_id: 'R_kgDOABCDEF'}], - wikiRepoFilenames: ['R_kgDOABCDEF.md'], + dataWikiPages: [page('acme--secret.md', 'h1', 'private content')], + publicSlugMap: new Map(), + grandfatherPages: [], }) expect(result).toHaveLength(1) - expect(result[0]).toMatchObject({reason: 'node-id-match', node_id: 'R_kgDOABCDEF'}) + expect(result[0]).toMatchObject({filename: 'acme--secret.md', reason: 'unattributable-page'}) }) }) - describe('both slug and node_id match different files for the same entry', () => { - it('returns two leaks when both canonicalSlug and node_id match different wiki files', () => { - // #given a private entry whose slug matches one file and node_id matches another + describe('Fix #1 β€” collision detection (ambiguous-public-slug)', () => { + it('blocks: stem maps to 2+ public entries (ambiguous slug)', () => { + // #given two public repos that sanitize to the same slug + // e.g. "acme/foo-bar" and "acme/foo_bar" both β†’ "acme--foo-bar" // #when detection runs - // #then two leaks are returned (one per match, no short-circuit) + // #then BLOCKED with ambiguous-public-slug (fail closed) const result = detectPrivateWikiLeaks({ - privateEntries: [{node_id: 'R_kgDOABCDEF', canonicalSlug: 'marcusrbrown--poly'}], - wikiRepoFilenames: ['marcusrbrown--poly.md', 'R_kgDOABCDEF.md'], + dataWikiPages: [page('acme--foo-bar.md', 'h1', 'url: https://github.com/acme/foo-bar')], + publicSlugMap: new Map([ + [ + 'acme--foo-bar', + [ + {owner: 'acme', name: 'foo-bar', private: false} as unknown as RepoEntry, + {owner: 'acme', name: 'foo_bar', private: false} as unknown as RepoEntry, + ], + ], + ]), + grandfatherPages: [], }) - expect(result).toHaveLength(2) - expect(result).toContainEqual({ - filename: 'marcusrbrown--poly.md', - reason: 'canonical-slug-match', - node_id: 'R_kgDOABCDEF', - }) - expect(result).toContainEqual({ - filename: 'R_kgDOABCDEF.md', - reason: 'node-id-match', - node_id: 'R_kgDOABCDEF', + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'acme--foo-bar.md', reason: 'ambiguous-public-slug'}) + }) + + it('passes: stem maps to exactly 1 public entry (unambiguous)', () => { + // #given a page whose stem resolves to a single public entry + // #when detection runs + // #then passes (unambiguous, attribution satisfied) + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('acme--widget.md', 'h1', 'url: https://github.com/acme/widget')], + publicSlugMap: new Map([ + ['acme--widget', [{owner: 'acme', name: 'widget', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], }) + expect(result).toEqual([]) }) }) - describe('multiple private entries β€” partial leaks', () => { - it('returns leaks only for matching entries when some match and some do not', () => { - // #given two private entries, only one of which has a matching wiki file + describe('Fix #1 β€” attribution check (content must reference the public repo)', () => { + it('passes: page content contains expected GitHub URL for the matched public entry', () => { + // #given a page in publicSlugMap whose content references the expected URL // #when detection runs - // #then only the matching entry produces a leak + // #then passes const result = detectPrivateWikiLeaks({ - privateEntries: [ - {node_id: 'R_kgDOABCDEF', canonicalSlug: 'acme--secret'}, - {node_id: 'R_kgDOXYZ123', canonicalSlug: 'acme--also-secret'}, + dataWikiPages: [ + page( + 'marcusrbrown--dotfiles.md', + 'h1', + '---\ntitle: "marcusrbrown/.dotfiles"\nsources:\n - url: https://github.com/marcusrbrown/.dotfiles\n---', + ), ], - wikiRepoFilenames: ['acme--secret.md', 'public-repo.md'], - }) - expect(result).toHaveLength(1) - expect(result[0]).toMatchObject({ - filename: 'acme--secret.md', - reason: 'canonical-slug-match', - node_id: 'R_kgDOABCDEF', + publicSlugMap: new Map([ + [ + 'marcusrbrown--dotfiles', + [{owner: 'marcusrbrown', name: '.dotfiles', private: false} as unknown as RepoEntry], + ], + ]), + grandfatherPages: [], }) + expect(result).toEqual([]) }) - it('returns all leaks when multiple entries each match a wiki file β€” asserts per-leak shape', () => { - // #given two private entries both matching wiki files + it('blocks: page content does NOT contain expected GitHub URL (wrong repo frontmatter)', () => { + // #given a page whose stem matches a public slug but content references a DIFFERENT repo + // (slug collision via content spoofing) // #when detection runs - // #then both leaks are returned with correct filename, reason, and node_id (P3 #11 strengthened) + // #then BLOCKED β€” content attribution fails const result = detectPrivateWikiLeaks({ - privateEntries: [ - {node_id: 'R_kgDOABCDEF', canonicalSlug: 'acme--secret'}, - {node_id: 'R_kgDOXYZ123', canonicalSlug: 'acme--also-secret'}, + dataWikiPages: [ + page( + 'acme--widget.md', + 'h1', + '---\ntitle: "acme/other-thing"\nsources:\n - url: https://github.com/acme/other-thing\n---', + ), ], - wikiRepoFilenames: ['acme--secret.md', 'acme--also-secret.md'], + publicSlugMap: new Map([ + ['acme--widget', [{owner: 'acme', name: 'widget', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], + }) + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'acme--widget.md', reason: 'unattributable-page'}) + }) + }) + + describe('slug sanitization (P0) β€” computeRepoSlug required', () => { + it('does NOT leak when publicSlugMap is built from computeRepoSlug for dotfiles entry', () => { + // #given entry {owner:'marcusrbrown', name:'.dotfiles', private:false} + // computeRepoSlug sanitizes the leading dot: .dotfiles β†’ dotfiles + // so the slug is 'marcusrbrown--dotfiles', matching the page stem + const slug = computeRepoSlug('marcusrbrown', '.dotfiles') + expect(slug).toBe('marcusrbrown--dotfiles') + + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('marcusrbrown--dotfiles.md', 'h1', 'url: https://github.com/marcusrbrown/.dotfiles')], + publicSlugMap: new Map([ + [slug, [{owner: 'marcusrbrown', name: '.dotfiles', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], }) - expect(result).toHaveLength(2) - expect(result).toContainEqual({ - filename: 'acme--secret.md', - reason: 'canonical-slug-match', - node_id: 'R_kgDOABCDEF', + expect(result).toEqual([]) + }) + }) + + describe('grandfather (P0) β€” pages already on main are safe when unchanged', () => { + it('does NOT flag marcusrbrown--copiloting.md when hash matches grandfather', () => { + // #given a page that is on main (grandfathered) with ABSENT private field + // #when detection runs with same hash + // #then NO leak β€” content-identical to what is already public on main + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('marcusrbrown--copiloting.md', 'copiloting-hash', '# content')], + publicSlugMap: new Map(), + grandfatherPages: [page('marcusrbrown--copiloting.md', 'copiloting-hash', '# content')], + }) + expect(result).toEqual([]) + }) + + it('DOES flag marcusrbrown--copiloting.md when grandfatherPages is EMPTY', () => { + // #given the same page but no grandfathering at all + // #when detection runs without grandfather pages + // #then flagged β€” grandfathering is what saves it + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('marcusrbrown--copiloting.md', 'h1', 'content')], + publicSlugMap: new Map(), + grandfatherPages: [], }) - expect(result).toContainEqual({ - filename: 'acme--also-secret.md', - reason: 'canonical-slug-match', - node_id: 'R_kgDOXYZ123', + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'marcusrbrown--copiloting.md', reason: 'unattributable-page'}) + }) + + it('DOES flag marcusrbrown--copiloting.md when hash differs from grandfather', () => { + // #given a grandfathered page whose content was modified on the data branch + // #when detection runs + // #then flagged β€” content-identity grandfathering blocks modified private pages + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('marcusrbrown--copiloting.md', 'new-hash', 'modified content')], + publicSlugMap: new Map(), + grandfatherPages: [page('marcusrbrown--copiloting.md', 'old-hash', 'original content')], }) + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'marcusrbrown--copiloting.md', reason: 'unattributable-page'}) }) }) -}) -describe('resolveCanonicalSlugs', () => { - it('returns resolved slugs for all entries when GraphQL succeeds', () => { - // #given GraphQL returns a valid nameWithOwner for each node_id - // #when resolveCanonicalSlugs is called - // #then each entry gets its canonicalSlug set - mockExecFileSync.mockReturnValue(JSON.stringify({data: {node: {nameWithOwner: 'acme/secret'}}})) - const result = resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}]) - expect(result.resolved).toEqual([{node_id: 'R_kgDOABCDEF', canonicalSlug: 'acme--secret'}]) - expect(Object.keys(result)).not.toContain('failures') - }) - - it('throws (fails closed) when GraphQL resolution fails for any entry', () => { - // #given GraphQL throws for a private entry - // #when resolveCanonicalSlugs is called - // #then it throws with the failing node_id listed (fail-closed: P1 #2) - mockExecFileSync.mockImplementation(() => { - throw new Error('gh: HTTP 401') + describe('new private page β€” flagged as leak', () => { + it('flags a page whose stem is in neither publicSlugMap nor grandfatherPages', () => { + // #given a data page for a private entry, not in either set + // #when detection runs + // #then flagged with reason unattributable-page + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('acme--secret-repo.md', 'h1', 'secret content')], + publicSlugMap: new Map([ + ['acme--public-repo', [{owner: 'acme', name: 'public-repo', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [page('other--old-page.md', 'h2', 'other content')], + }) + expect(result).toHaveLength(1) + expect(result).toContainEqual({filename: 'acme--secret-repo.md', reason: 'unattributable-page'}) }) - expect(() => resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}])).toThrow(/R_kgDOABCDEF/) }) - it('throws listing all failing node_ids when multiple entries fail resolution', () => { - // #given GraphQL throws for two private entries - // #when resolveCanonicalSlugs is called - // #then the error message names both node_ids - mockExecFileSync.mockImplementation(() => { - throw new Error('gh: HTTP 401') + describe('node-null tolerance β€” no GraphQL call required', () => { + it('flags a page for a deleted repo (no public entry, not grandfathered) β€” pure function, no gh call', () => { + // #given a page whose repo is deleted (would have been node-null under old GraphQL approach) + // #when detection runs (detectPrivateWikiLeaks is PURE β€” no subprocess, no gh) + // #then flagged as unattributable-page (not in public set, not grandfathered) + // This test exercises the pure function directly β€” no mock needed, proving no GraphQL call is made. + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('deleted-org--deleted-repo.md', 'h1', 'content')], + publicSlugMap: new Map(), + grandfatherPages: [], + }) + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'deleted-org--deleted-repo.md', reason: 'unattributable-page'}) }) - let err: unknown - try { - resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}, {node_id: 'R_kgDOXYZ123'}]) - } catch (error) { - err = error - } - expect(err).toBeInstanceOf(Error) - expect((err as Error).message).toMatch(/R_kgDOABCDEF/) - expect((err as Error).message).toMatch(/R_kgDOXYZ123/) - }) - - it('throws with node-null mode when GraphQL returns data.node: null (Test #1)', () => { - // #given GraphQL returns null for the node (repo deleted or App lost access) - // #when resolveCanonicalSlugs is called - // #then it throws identifying the failure as node-null (not subprocess-threw) - mockExecFileSync.mockReturnValue(JSON.stringify({data: {node: null}})) - expect(() => resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}])).toThrow(/node-null/) - }) - - it('classifies a non-zero gh exit carrying NOT_FOUND as node-null, not subprocess-threw', () => { - // #given gh api graphql exits non-zero but the captured stdout is a valid NOT_FOUND body - // (repo deleted or App lost access β€” data.node: null with a top-level errors array) - // #when resolveCanonicalSlugs is called - // #then the failure is classified node-null so operators investigate repo lifecycle, not transport - mockExecFileSync.mockImplementation(() => { - throw Object.assign(new Error('gh: Could not resolve to a node with the global id of "R_kgDOSVJgdw".'), { - status: 1, - stdout: JSON.stringify({ - data: {node: null}, - errors: [{type: 'NOT_FOUND', message: 'Could not resolve to a node with the global id'}], - }), - stderr: 'gh: Could not resolve to a node with the global id', + }) + + describe('fail-safe predicate β€” private === false, NOT private !== true', () => { + it('flags a page whose entry has ABSENT private field and is not grandfathered', () => { + // #given an entry with absent private (legacy/unprobed) + // Under the correct predicate (=== false), absent private is NOT in publicSlugMap β†’ page gets flagged + // Under wrong predicate (!== true), absent private WOULD be wrongly admitted β†’ leak slips through + + // buildPublicSlugMap with absent-private entry produces empty map + const resultWithAbsentPrivate = detectPrivateWikiLeaks({ + dataWikiPages: [page('acme--mystery.md', 'h1', 'content')], + publicSlugMap: new Map(), // absent private correctly excluded + grandfatherPages: [], }) + expect(resultWithAbsentPrivate).toHaveLength(1) + expect(resultWithAbsentPrivate[0]).toMatchObject({reason: 'unattributable-page'}) }) - expect(() => resolveCanonicalSlugs([{node_id: 'R_kgDOSVJgdw'}])).toThrow(/node-null/) - expect(() => resolveCanonicalSlugs([{node_id: 'R_kgDOSVJgdw'}])).not.toThrow(/subprocess-threw/) }) - it('still classifies a genuine transport error (HTTP 401, no body) as subprocess-threw', () => { - // #given gh throws with no NOT_FOUND signal in any captured stream - // #when resolveCanonicalSlugs is called - // #then the failure remains subprocess-threw (network/rate-limit/auth) - mockExecFileSync.mockImplementation(() => { - throw Object.assign(new Error('gh: HTTP 401'), {status: 1, stdout: '', stderr: 'gh: HTTP 401'}) + describe('node_id-named page β€” flagged', () => { + it('flags a page whose stem is a node_id (R_kgDO…) not in either set', () => { + // #given a data page named after a node_id (legacy naming or defensive case) + // #when detection runs + // #then flagged β€” node_id-named pages are not in publicSlugMap or grandfatherPages + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('R_kgDOABCDEF.md', 'h1', 'content')], + publicSlugMap: new Map(), + grandfatherPages: [], + }) + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'R_kgDOABCDEF.md', reason: 'unattributable-page'}) }) - expect(() => resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}])).toThrow(/subprocess-threw/) }) - it('distinguishes subprocess-threw from node-null in the error message (NBC #2)', () => { - // #given one entry throws subprocess and another returns node: null - // #when resolveCanonicalSlugs is called - // #then the error message labels each failure with its mode - mockExecFileSync - .mockImplementationOnce(() => { - throw new Error('gh: HTTP 401') + describe('case-insensitive stem matching', () => { + it('matches case-insensitively (MarcusRBrown--Poly.md stem lowercased to match publicSlugMap)', () => { + // #given a wiki filename with mixed case + // #when detection runs + // #then matched case-insensitively against publicSlugMap + const result = detectPrivateWikiLeaks({ + dataWikiPages: [page('MarcusRBrown--Poly.md', 'h1', 'url: https://github.com/marcusrbrown/poly')], + publicSlugMap: new Map([ + ['marcusrbrown--poly', [{owner: 'marcusrbrown', name: 'poly', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [], + }) + expect(result).toEqual([]) + }) + }) + + describe('multiple pages β€” partial leaks', () => { + it('flags only pages not covered by publicSlugMap or unchanged grandfatherPages', () => { + // #given a mix of public (with attribution), grandfathered-unchanged, and new-private pages + // #when detection runs + // #then only uncovered pages are flagged + const result = detectPrivateWikiLeaks({ + dataWikiPages: [ + page('acme--public.md', 'h1', 'url: https://github.com/acme/public'), // in publicSlugMap, attribution ok + page('marcusrbrown--copiloting.md', 'same-hash', '# copiloting'), // unchanged grandfather + page('acme--secret.md', 'h3', 'private content'), // neither β†’ leak + ], + publicSlugMap: new Map([ + ['acme--public', [{owner: 'acme', name: 'public', private: false} as unknown as RepoEntry]], + ]), + grandfatherPages: [page('marcusrbrown--copiloting.md', 'same-hash', '# copiloting')], }) - .mockReturnValueOnce(JSON.stringify({data: {node: null}})) - let err: unknown - try { - resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}, {node_id: 'R_kgDOXYZ123'}]) - } catch (error) { - err = error - } - expect(err).toBeInstanceOf(Error) - expect((err as Error).message).toMatch(/subprocess-threw/) - expect((err as Error).message).toMatch(/R_kgDOABCDEF/) - expect((err as Error).message).toMatch(/node-null/) - expect((err as Error).message).toMatch(/R_kgDOXYZ123/) - }) - - it('normalizes canonicalSlug to lowercase at storage (NBC #3)', () => { - // #given GitHub returns a mixed-case nameWithOwner - // #when resolveCanonicalSlugs is called - // #then the stored canonicalSlug is lowercased - mockExecFileSync.mockReturnValue(JSON.stringify({data: {node: {nameWithOwner: 'MarcusRBrown/Poly'}}})) - const result = resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}]) - expect(result.resolved[0]?.canonicalSlug).toBe('marcusrbrown--poly') - }) - - it.each([ - ['bfra-me/works', 'bfra-me--works'], - ['marcusrbrown/ha-config', 'marcusrbrown--ha-config'], - ['bfra-me/.github', 'bfra-me--.github'], - ['MarcusRBrown/Poly', 'marcusrbrown--poly'], - ])('converts nameWithOwner %s β†’ slug %s (Test #2)', (nameWithOwner, expectedSlug) => { - // #given GraphQL returns a nameWithOwner with various owner/repo shapes - // #when resolveCanonicalSlugs is called - // #then the canonicalSlug is correctly lowercased and slash-replaced - mockExecFileSync.mockReturnValue(JSON.stringify({data: {node: {nameWithOwner}}})) - const result = resolveCanonicalSlugs([{node_id: 'R_kgDOABCDEF'}]) - expect(result.resolved[0]?.canonicalSlug).toBe(expectedSlug) + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject({filename: 'acme--secret.md', reason: 'unattributable-page'}) + }) }) }) -describe('isNotFoundSignal', () => { - it('returns false for empty input', () => { - expect(isNotFoundSignal('')).toBe(false) +// --------------------------------------------------------------------------- +// loadWikiPages +// --------------------------------------------------------------------------- + +describe('loadWikiPages', () => { + it('returns empty array when directory does not exist (ENOENT)', async () => { + // #given the wiki repos directory does not exist + // #when loadWikiPages is called + // #then it returns [] gracefully (fresh checkout, no wiki yet) + const enoent = Object.assign(new Error('ENOENT'), {code: 'ENOENT'}) + mockReaddir.mockRejectedValue(enoent) + const result = await loadWikiPages('knowledge/wiki/repos') + expect(result).toEqual([]) + }) + + it('propagates non-ENOENT errors from readdir (fail-closed)', async () => { + // #given readdir throws a permission error (not ENOENT) + // #when loadWikiPages is called + // #then the error propagates β€” do not silently swallow FS errors + const eperm = Object.assign(new Error('EPERM: operation not permitted'), {code: 'EPERM'}) + mockReaddir.mockRejectedValue(eperm) + await expect(loadWikiPages('knowledge/wiki/repos')).rejects.toThrow(/EPERM/) }) - it('detects a structured top-level NOT_FOUND error', () => { - expect(isNotFoundSignal(JSON.stringify({data: {node: null}, errors: [{type: 'NOT_FOUND'}]}))).toBe(true) + it('propagates non-ENOENT errors from readFile (fail-closed)', async () => { + // #given readdir succeeds but readFile throws a permission error + // #when loadWikiPages is called + // #then the error propagates + mockReaddir.mockResolvedValue(['foo.md']) + const eperm = Object.assign(new Error('EPERM: cannot read file'), {code: 'EPERM'}) + mockReadFile.mockRejectedValue(eperm) + await expect(loadWikiPages('knowledge/wiki/repos')).rejects.toThrow(/EPERM/) }) - it('detects data.node: null even without an errors array', () => { - expect(isNotFoundSignal(JSON.stringify({data: {node: null}}))).toBe(true) + it('returns WikiPageSnapshot records with correct fields', async () => { + // #given two .md files in the directory + // #when loadWikiPages is called + // #then returns snapshots with filename, stem, hash, content + mockReaddir.mockResolvedValue(['acme--widget.md', 'other.txt', '.gitkeep']) + mockReadFile.mockResolvedValue('url: https://github.com/acme/widget\n') + const result = await loadWikiPages('knowledge/wiki/repos') + expect(result).toHaveLength(1) + expect(result[0]?.filename).toBe('acme--widget.md') + expect(result[0]?.stem).toBe('acme--widget') + expect(result[0]?.content).toBe('url: https://github.com/acme/widget\n') + // hash must be a 64-char hex string (SHA-256) + expect(result[0]?.hash).toMatch(/^[0-9a-f]{64}$/) }) - it('detects the plain-text gh NOT_FOUND line (non-JSON stderr)', () => { - expect(isNotFoundSignal('gh: Could not resolve to a node with the global id of "R_kgDOSVJgdw".')).toBe(true) + it('produces the same hash for identical content (deterministic)', async () => { + // #given two pages with identical content + // #when loadWikiPages is called + // #then both snapshots have the same hash + mockReaddir.mockResolvedValue(['page-a.md', 'page-b.md']) + mockReadFile.mockResolvedValue('same content') + const result = await loadWikiPages('knowledge/wiki/repos') + expect(result).toHaveLength(2) + expect(result[0]?.hash).toBe(result[1]?.hash) }) - it('returns false for an unrelated transport error', () => { - expect(isNotFoundSignal('gh: HTTP 401 Unauthorized')).toBe(false) + it('produces different hashes for different content', async () => { + // #given two pages with different content + // #when loadWikiPages is called + // #then hashes differ + mockReaddir.mockResolvedValue(['page-a.md', 'page-b.md']) + mockReadFile.mockResolvedValueOnce('content A').mockResolvedValueOnce('content B') + const result = await loadWikiPages('knowledge/wiki/repos') + expect(result).toHaveLength(2) + expect(result[0]?.hash).not.toBe(result[1]?.hash) }) - it('returns false for a successful repository response', () => { - expect(isNotFoundSignal(JSON.stringify({data: {node: {nameWithOwner: 'acme/secret'}}}))).toBe(false) + it('passes the joined dir+filename path to readFile', async () => { + // #given a custom directory path + // #when loadWikiPages is called with that path + // #then readFile is called with the joined path + mockReaddir.mockResolvedValue(['foo--bar.md']) + mockReadFile.mockResolvedValue('content') + await loadWikiPages('/some/custom/dir') + expect(mockReadFile).toHaveBeenCalledWith('/some/custom/dir/foo--bar.md', 'utf8') }) }) +// --------------------------------------------------------------------------- +// loadWikiFilenames (kept for backward compatibility) +// --------------------------------------------------------------------------- + describe('loadWikiFilenames', () => { - it('returns empty array when knowledge/wiki/repos/ does not exist (ENOENT)', async () => { + it('returns empty array when directory does not exist (ENOENT)', async () => { // #given the wiki repos directory does not exist // #when loadWikiFilenames is called // #then it returns [] gracefully (fresh checkout, no wiki yet) const enoent = Object.assign(new Error('ENOENT'), {code: 'ENOENT'}) mockReaddir.mockRejectedValue(enoent) - const result = await loadWikiFilenames() + const result = await loadWikiFilenames('knowledge/wiki/repos') expect(result).toEqual([]) }) - it('propagates non-ENOENT errors (fail-closed: P1 #3)', async () => { + it('propagates non-ENOENT errors (fail-closed)', async () => { // #given readdir throws a permission error (not ENOENT) // #when loadWikiFilenames is called // #then the error propagates β€” do not silently swallow FS errors const eperm = Object.assign(new Error('EPERM: operation not permitted'), {code: 'EPERM'}) mockReaddir.mockRejectedValue(eperm) - await expect(loadWikiFilenames()).rejects.toThrow(/EPERM/) + await expect(loadWikiFilenames('knowledge/wiki/repos')).rejects.toThrow(/EPERM/) }) it('returns only .md files from the directory listing', async () => { @@ -370,7 +504,167 @@ describe('loadWikiFilenames', () => { // #when loadWikiFilenames is called // #then only .md filenames are returned mockReaddir.mockResolvedValue(['acme--secret.md', 'README.md', '.gitkeep', 'other.txt']) - const result = await loadWikiFilenames() + const result = await loadWikiFilenames('knowledge/wiki/repos') expect(result).toEqual(['acme--secret.md', 'README.md']) }) + + it('accepts an explicit directory argument (used for grandfather dir)', async () => { + // #given a custom directory path + // #when loadWikiFilenames is called with that path + // #then readdir is called with the given path + mockReaddir.mockResolvedValue(['foo--bar.md']) + const result = await loadWikiFilenames('/some/custom/dir') + expect(mockReaddir).toHaveBeenCalledWith('/some/custom/dir') + expect(result).toEqual(['foo--bar.md']) + }) +}) + +// --------------------------------------------------------------------------- +// buildPublicSlugMap +// --------------------------------------------------------------------------- + +describe('buildPublicSlugMap', () => { + it('returns empty map when no entries have private: false', () => { + const result = buildPublicSlugMap([ + {owner: 'acme', name: 'secret', private: true} as unknown as RepoEntry, + {owner: 'acme', name: 'mystery'} as unknown as RepoEntry, // absent private + ]) + expect(result.size).toBe(0) + }) + + it('includes entries with private: false using computeRepoSlug', () => { + const result = buildPublicSlugMap([ + {owner: 'marcusrbrown', name: '.dotfiles', private: false} as unknown as RepoEntry, + ]) + expect(result.has('marcusrbrown--dotfiles')).toBe(true) + expect(result.get('marcusrbrown--dotfiles')).toHaveLength(1) + }) + + it('groups multiple public entries under the same slug (collision detection)', () => { + // Two repos that sanitize to the same slug + const result = buildPublicSlugMap([ + {owner: 'acme', name: 'foo-bar', private: false} as unknown as RepoEntry, + {owner: 'acme', name: 'foo_bar', private: false} as unknown as RepoEntry, + ]) + expect(result.get('acme--foo-bar')).toHaveLength(2) + }) + + it('keeps distinct slugs separate', () => { + const result = buildPublicSlugMap([ + {owner: 'acme', name: 'alpha', private: false} as unknown as RepoEntry, + {owner: 'acme', name: 'beta', private: false} as unknown as RepoEntry, + ]) + expect(result.size).toBe(2) + expect(result.get('acme--alpha')).toHaveLength(1) + expect(result.get('acme--beta')).toHaveLength(1) + }) +}) + +// --------------------------------------------------------------------------- +// buildPublicSlugs (Fix #4 β€” load-bearing predicate end-to-end) +// --------------------------------------------------------------------------- + +describe('buildPublicSlugs (Fix #4 β€” load-bearing predicate end-to-end)', () => { + it('excludes entry with ABSENT private field (private !== true would wrongly include it)', () => { + // #given entry {owner:'acme', name:'mystery'} with NO private field + // Under private === false: absent is NOT admitted (fail-safe) + // Under private !== true: absent WOULD be admitted (bug) + const result = buildPublicSlugs([ + { + owner: 'acme', + name: 'mystery', + // private is absent (undefined) + added: '2025-01-01', + onboarding_status: 'pending', + last_survey_at: null, + last_survey_status: null, + has_fro_bot_workflow: false, + has_renovate: false, + }, + ]) + + const slug = computeRepoSlug('acme', 'mystery') + // Fail-safe: absent private must NOT be admitted + expect(result.has(slug)).toBe(false) + }) + + it('includes entry with private: false and uses computeRepoSlug (not raw join)', () => { + // #given entry {owner:'marcusrbrown', name:'.dotfiles', private:false} + // computeRepoSlug sanitizes '.dotfiles' β†’ 'dotfiles' + // so the slug is 'marcusrbrown--dotfiles', NOT 'marcusrbrown--.dotfiles' + const result = buildPublicSlugs([ + { + owner: 'marcusrbrown', + name: '.dotfiles', + private: false, + added: '2025-01-01', + onboarding_status: 'pending', + last_survey_at: null, + last_survey_status: null, + has_fro_bot_workflow: false, + has_renovate: false, + }, + ]) + + // computeRepoSlug-based slug IS in the set + expect(result.has('marcusrbrown--dotfiles')).toBe(true) + + // Raw owner--name join is NOT in the set (proving computeRepoSlug is used) + expect(result.has('marcusrbrown--.dotfiles')).toBe(false) + }) + + it('excludes entry with private: true', () => { + // #given a private repo entry + // #when buildPublicSlugs runs + // #then the slug is NOT in the result + const result = buildPublicSlugs([ + { + owner: 'acme', + name: 'secret', + private: true, + added: '2025-01-01', + onboarding_status: 'pending', + last_survey_at: null, + last_survey_status: null, + has_fro_bot_workflow: false, + has_renovate: false, + }, + ]) + + expect(result.has(computeRepoSlug('acme', 'secret'))).toBe(false) + expect(result.size).toBe(0) + }) +}) + +// --------------------------------------------------------------------------- +// requireGrandfatherDir (Fix #6 β€” fail-closed missing-env branch) +// --------------------------------------------------------------------------- + +describe('requireGrandfatherDir (Fix #6 β€” fail-closed missing-env branch)', () => { + it('throws when env is undefined', () => { + // #given GRANDFATHER_WIKI_REPOS_DIR is not set + expect(() => requireGrandfatherDir(undefined)).toThrow(/GRANDFATHER_WIKI_REPOS_DIR/) + }) + + it('throws when env is blank (empty string)', () => { + // #given GRANDFATHER_WIKI_REPOS_DIR is set to an empty string + expect(() => requireGrandfatherDir('')).toThrow(/GRANDFATHER_WIKI_REPOS_DIR/) + }) + + it('throws when env is whitespace-only', () => { + // #given GRANDFATHER_WIKI_REPOS_DIR is set to only spaces + expect(() => requireGrandfatherDir(' ')).toThrow(/GRANDFATHER_WIKI_REPOS_DIR/) + }) + + it('returns the trimmed dir when env is valid', () => { + // #given a valid path (possibly with surrounding whitespace) + const result = requireGrandfatherDir(' /path/to/main-wiki ') + expect(result).toBe('/path/to/main-wiki') + }) + + it('returns the dir as-is when no trimming needed', () => { + // #given a clean path string + const result = requireGrandfatherDir('/workspace/main/knowledge/wiki/repos') + expect(result).toBe('/workspace/main/knowledge/wiki/repos') + }) }) diff --git a/scripts/check-wiki-private-presence.ts b/scripts/check-wiki-private-presence.ts index b4e65bc82..2d96e8ca8 100644 --- a/scripts/check-wiki-private-presence.ts +++ b/scripts/check-wiki-private-presence.ts @@ -1,248 +1,263 @@ -import {execFileSync} from 'node:child_process' +import {createHash} from 'node:crypto' import {readdir, readFile} from 'node:fs/promises' +import {join} from 'node:path' import process from 'node:process' import YAML from 'yaml' -import {assertReposFile} from './schemas.ts' +import {assertReposFile, type RepoEntry} from './schemas.ts' +import {computeRepoSlug} from './wiki-slug.ts' + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface WikiPageSnapshot { + filename: string + stem: string + /** SHA-256 hex digest of the file content β€” used for content-identity grandfathering. */ + hash: string + /** Raw file content β€” used for attribution checks. */ + content: string +} export interface PrivateWikiLeak { filename: string - reason: 'canonical-slug-match' | 'node-id-match' - node_id: string + reason: 'unattributable-page' | 'ambiguous-public-slug' } +// --------------------------------------------------------------------------- +// detectPrivateWikiLeaks β€” pure function (no I/O, no subprocess) +// --------------------------------------------------------------------------- + /** - * Pure function: detect wiki files in knowledge/wiki/repos/ that correspond to private repo entries. + * Pure function: flag data wiki pages that cannot be attributed to a known-public + * or unchanged-grandfathered repo. + * + * For each data page its stem is checked in this order: + * + * 1. If `publicSlugMap` has 2+ entries for the stem β†’ `ambiguous-public-slug` + * (two public repos share the slug after sanitization; fail closed). * - * For each private entry: - * - If canonicalSlug is provided, check for a case-insensitive stem match against wiki filenames. - * - Always check if any wiki filename stem matches the entry's node_id (defensive). + * 2. If `publicSlugMap` has exactly 1 entry for the stem AND the page content + * contains `https://github.com/{owner}/{name}` for that entry β†’ **passes** + * (explicitly public AND attributable). + * If the URL is absent β†’ `unattributable-page` (slug collision via content spoofing). * - * Returns all leaks found without short-circuiting. + * 3. If the stem appears in `grandfatherPages` with the **same hash** as the + * corresponding data page β†’ **passes** (content-identical to what is already on + * main; e.g. `marcusrbrown/copiloting` with `lost-access`). + * A modified page (different hash) falls through to step 4. + * + * 4. Otherwise β†’ `unattributable-page`. + * + * This design requires NO GraphQL. Unchanged grandfathered pages (even absent- + * `private` entries like `marcusrbrown/copiloting`) are handled gracefully. + * Modified or new unattirbutable pages are blocked. */ export function detectPrivateWikiLeaks(params: { - privateEntries: readonly {node_id: string; canonicalSlug?: string}[] - wikiRepoFilenames: readonly string[] + dataWikiPages: readonly WikiPageSnapshot[] + publicSlugMap: ReadonlyMap + grandfatherPages: readonly WikiPageSnapshot[] }): PrivateWikiLeak[] { - const {privateEntries, wikiRepoFilenames} = params + const {dataWikiPages, publicSlugMap, grandfatherPages} = params + + // Build grandfather index: stem β†’ hash (for O(1) lookup) + const grandfatherByStem = new Map() + for (const page of grandfatherPages) { + grandfatherByStem.set(page.stem, page.hash) + } + const leaks: PrivateWikiLeak[] = [] - for (const entry of privateEntries) { - for (const filename of wikiRepoFilenames) { - const stem = filename.replace(/\.md$/i, '') + for (const page of dataWikiPages) { + const entries = publicSlugMap.get(page.stem) - // Canonical slug match (case-insensitive) - if (entry.canonicalSlug !== undefined && stem.toLowerCase() === entry.canonicalSlug.toLowerCase()) { - leaks.push({filename, reason: 'canonical-slug-match', node_id: entry.node_id}) + if (entries !== undefined) { + // Slug matches at least one public entry. + if (entries.length > 1) { + // Ambiguous: 2+ public repos sanitize to the same slug. + leaks.push({filename: page.filename, reason: 'ambiguous-public-slug'}) continue } - // Node ID match (defensive) - if (stem === entry.node_id) { - leaks.push({filename, reason: 'node-id-match', node_id: entry.node_id}) + // Single unique public entry β€” verify content attribution. + // entries.length === 1 is guaranteed by the guard above; the non-null assertion + // is safe but we narrow with a runtime guard to satisfy strict noUncheckedIndexedAccess. + const entry = entries[0] + if (entry === undefined) { + leaks.push({filename: page.filename, reason: 'unattributable-page'}) + continue } + const expectedUrl = `https://github.com/${entry.owner}/${entry.name}` + if (page.content.includes(expectedUrl)) { + continue // passes: explicitly public and attributable + } + + // Content doesn't reference the expected repo β€” possible slug collision. + leaks.push({filename: page.filename, reason: 'unattributable-page'}) + continue } + + // Stem not in publicSlugMap β€” check content-identity grandfathering. + const grandfatherHash = grandfatherByStem.get(page.stem) + if (grandfatherHash !== undefined && grandfatherHash === page.hash) { + continue // unchanged from main β†’ grandfathered (e.g. copiloting/lost-access) + } + + // Not public, not unchanged-grandfathered β†’ block. + leaks.push({filename: page.filename, reason: 'unattributable-page'}) } return leaks } // --------------------------------------------------------------------------- -// Type guards for GraphQL response (no unsafe casts at JSON boundary) +// loadWikiPages β€” exported for testing; ENOENT-only graceful // --------------------------------------------------------------------------- -function isGraphQLRepoNameResponse(value: unknown): value is {data: {node: {nameWithOwner: string}}} { - if (typeof value !== 'object' || value === null) return false - const v = value as Record - if (typeof v.data !== 'object' || v.data === null) return false - const data = v.data as Record - if (typeof data.node !== 'object' || data.node === null) return false - const node = data.node as Record - return typeof node.nameWithOwner === 'string' && node.nameWithOwner.length > 0 -} - -function isGraphQLNodeNullResponse(value: unknown): value is {data: {node: null}} { - if (typeof value !== 'object' || value === null) return false - const v = value as Record - if (typeof v.data !== 'object' || v.data === null) return false - const data = v.data as Record - return data.node === null -} - /** - * Detect a GraphQL NOT_FOUND signal in captured subprocess output. + * Load all .md files in `dir` as WikiPageSnapshot records (filename, stem, hash, content). * - * `gh api graphql` exits non-zero whenever the response carries a top-level - * `errors` array β€” including the benign "Could not resolve to a node with the - * global id" NOT_FOUND that accompanies `data.node: null`. When that happens, - * execFileSync throws before we can parse `data.node`, so the body is only - * reachable via the thrown error's captured stdout/stderr. This classifies - * such a throw as a node-lifecycle failure rather than a transport failure. + * ENOENT is graceful (fresh checkout, no wiki yet) β†’ returns []. + * Any other error propagates β€” do not silently swallow FS errors. + * + * @param dir - Directory path to read (absolute or relative to CWD). */ -export function isNotFoundSignal(text: string): boolean { - if (text.length === 0) return false - // Try structured parse first: top-level errors[].type === 'NOT_FOUND' or data.node: null. +export async function loadWikiPages(dir: string): Promise { + let filenames: string[] try { - const parsed: unknown = JSON.parse(text) - if (typeof parsed === 'object' && parsed !== null) { - const obj = parsed as Record - if (Array.isArray(obj.errors) && obj.errors.some(e => isNotFoundError(e))) return true - if (isGraphQLNodeNullResponse(parsed)) return true + const entries = await readdir(dir) + filenames = entries.filter(f => f.endsWith('.md')) + } catch (error) { + if (typeof error === 'object' && error !== null && (error as NodeJS.ErrnoException).code === 'ENOENT') { + return [] } - } catch { - // Not JSON (e.g. plain `gh:` stderr line) β€” fall through to substring match. + throw error } - return /Could not resolve to a node with the global id/i.test(text) || /"type":\s*"NOT_FOUND"/i.test(text) -} - -function isNotFoundError(value: unknown): boolean { - return typeof value === 'object' && value !== null && (value as Record).type === 'NOT_FOUND' -} -function capturedOutput(error: unknown): string { - if (typeof error !== 'object' || error === null) return '' - const e = error as {stdout?: unknown; stderr?: unknown; message?: unknown} - const parts = [e.stdout, e.stderr, e.message].map(p => (typeof p === 'string' ? p : '')) - return parts.join('\n') + return Promise.all( + filenames.map(async (filename): Promise => { + const content = await readFile(join(dir, filename), 'utf8') + const hash = createHash('sha256').update(content).digest('hex') + const stem = filename.replace(/\.md$/i, '').toLowerCase() + return {filename, stem, hash, content} + }), + ) } // --------------------------------------------------------------------------- -// resolveCanonicalSlugs β€” exported for testing; fail-closed +// loadWikiFilenames β€” kept for backward compatibility; ENOENT-only graceful // --------------------------------------------------------------------------- -export interface ResolvedEntry { - node_id: string - canonicalSlug: string -} - -export interface SlugResolutionResult { - resolved: ResolvedEntry[] +/** + * List .md filenames in the given directory. + * + * ENOENT is graceful (fresh checkout, no wiki yet) β†’ returns []. + * Any other error propagates β€” do not silently swallow FS errors. + * + * @param dir - Directory path to read (absolute or relative to CWD). + */ +export async function loadWikiFilenames(dir: string): Promise { + try { + const entries = await readdir(dir) + return entries.filter(f => f.endsWith('.md')) + } catch (error) { + if (typeof error === 'object' && error !== null && (error as NodeJS.ErrnoException).code === 'ENOENT') { + return [] + } + throw error + } } -type FailureMode = 'subprocess-threw' | 'node-null' - -interface ResolutionFailure { - node_id: string - mode: FailureMode -} +// --------------------------------------------------------------------------- +// buildPublicSlugMap / buildPublicSlugs +// --------------------------------------------------------------------------- /** - * Resolve canonical slugs for all private entries via GraphQL. + * Build a map from slug β†’ RepoEntry[] for all entries with explicit `private === false`. * - * Uses parameterized `--field nodeId=` to avoid injection via crafted node_id values. + * A stem with 2+ entries signals a slug collision (two repos sanitize to the same + * string). Callers must flag ambiguous stems rather than silently admitting them. * - * Fail-closed: if ANY entry's slug cannot be resolved, throws with each failure - * labeled by mode so operators can act without guessing: - * [subprocess-threw] β€” network/rate-limit/auth issue - * [node-null] β€” repo deleted or App lost access + * Uses `computeRepoSlug` (not raw `owner--name` join) so leading-dot sanitization + * matches actual wiki filenames. * - * Captures all subprocess output; never echoes to stdout/stderr. + * Exported for testing. */ -export function resolveCanonicalSlugs(entries: readonly {node_id: string}[]): SlugResolutionResult { - const resolved: ResolvedEntry[] = [] - const failures: ResolutionFailure[] = [] - - const query = 'query($nodeId: ID!) { node(id: $nodeId) { ... on Repository { nameWithOwner } } }' - - for (const entry of entries) { - try { - const stdout = execFileSync( - 'gh', - ['api', 'graphql', '--field', `nodeId=${entry.node_id}`, '-f', `query=${query}`], - { - encoding: 'utf8', - stdio: ['inherit', 'pipe', 'pipe'], - }, - ) - const parsed: unknown = JSON.parse(stdout) - if (isGraphQLRepoNameResponse(parsed)) { - // Normalize to lowercase at storage; convert "owner/repo" β†’ "owner--repo" - // GitHub's nameWithOwner is always `owner/repo` with exactly one slash β€” single-match replace is intentional. - const slug = parsed.data.node.nameWithOwner.replace('/', '--').toLowerCase() - resolved.push({node_id: entry.node_id, canonicalSlug: slug}) - } else if (isGraphQLNodeNullResponse(parsed)) { - failures.push({node_id: entry.node_id, mode: 'node-null'}) +export function buildPublicSlugMap(repos: readonly RepoEntry[]): Map { + const map = new Map() + for (const entry of repos) { + if (entry.private === false) { + const slug = computeRepoSlug(entry.owner, entry.name) + const existing = map.get(slug) + if (existing === undefined) { + map.set(slug, [entry]) } else { - // Unexpected response shape β€” treat as subprocess-level failure - failures.push({node_id: entry.node_id, mode: 'subprocess-threw'}) + existing.push(entry) } - } catch (error) { - // `gh api graphql` exits non-zero on NOT_FOUND even though the body is a - // valid `data.node: null`. Inspect captured output so a deleted repo / lost - // App access is classified as node-null, not a transport failure. - const mode: FailureMode = isNotFoundSignal(capturedOutput(error)) ? 'node-null' : 'subprocess-threw' - failures.push({node_id: entry.node_id, mode}) } } - - if (failures.length > 0) { - const lines = failures.map(f => { - const hint = - f.mode === 'node-null' ? 'investigate repo lifecycle/App access' : 'investigate network/rate-limit/auth' - return ` [${f.mode}] ${f.node_id} (${hint})` - }) - throw new Error( - `check-wiki-private-presence: cannot verify private wiki presence (${failures.length} ${failures.length === 1 ? 'entry' : 'entries'} unresolved):\n${lines.join('\n')}`, - ) - } - - return {resolved} + return map } -// --------------------------------------------------------------------------- -// loadWikiFilenames β€” exported for testing; ENOENT-only graceful (P1 #3) -// --------------------------------------------------------------------------- +/** + * Build the set of public repo slugs from a repos.yaml entry list. + * + * Only entries with explicit `private === false` are admitted β€” absent/undefined + * private is treated as private (fail-safe). Uses `computeRepoSlug` (not a raw + * `owner--name` join) so leading-dot sanitization matches actual wiki filenames. + * + * Exported as a pure helper so tests can exercise the predicate directly without + * standing up the full CLI entry-point. + */ +export function buildPublicSlugs(repos: readonly RepoEntry[]): Set { + return new Set(buildPublicSlugMap(repos).keys()) +} /** - * List .md filenames in knowledge/wiki/repos/. + * Validate and return the `GRANDFATHER_WIKI_REPOS_DIR` environment variable. * - * ENOENT is graceful (fresh checkout, no wiki yet) β†’ returns []. - * Any other error propagates β€” do not silently swallow FS errors. + * Throws on undefined or blank input β€” misconfiguration must never silently + * pass or over-block. Returns the trimmed path string on success. + * + * Exported as a pure helper so tests can exercise the validation branch without + * standing up the full CLI entry-point. */ -export async function loadWikiFilenames(): Promise { - try { - const entries = await readdir('knowledge/wiki/repos') - return entries.filter(f => f.endsWith('.md')) - } catch (error) { - if (typeof error === 'object' && error !== null && (error as NodeJS.ErrnoException).code === 'ENOENT') { - return [] - } - throw error +export function requireGrandfatherDir(env: string | undefined): string { + if (env === undefined || env.trim() === '') { + throw new Error( + 'check-wiki-private-presence: GRANDFATHER_WIKI_REPOS_DIR env var is required but not set. ' + + "The workflow must supply the path to main's knowledge/wiki/repos/ directory " + + 'so that pages already public are not re-flagged.', + ) } + return env.trim() } -// --------------------------------------------------------------------------- -// CLI entry-point -// --------------------------------------------------------------------------- - async function main(): Promise { - // 1. Read and validate metadata/repos.yaml + // 1. Read and validate data branch's own metadata/repos.yaml (CWD = data-branch-check). + // Fail closed: throws on read/parse error. const reposRaw = await readFile('metadata/repos.yaml', 'utf8') const reposParsed: unknown = YAML.parse(reposRaw) assertReposFile(reposParsed) - // 2. Filter to private entries with node_id - const privateEntries = reposParsed.repos.filter( - (r): r is typeof r & {node_id: string} => r.private === true && typeof r.node_id === 'string', - ) - - if (privateEntries.length === 0) { - process.stdout.write('no private wiki leaks detected\n') - return - } + // 2. Build publicSlugMap from entries with explicit private === false. + // IMPORTANT: absent/undefined private is NOT admitted β€” fail-safe default is "private". + // Uses computeRepoSlug (not raw owner--name) so sanitization matches actual wiki filenames. + const publicSlugMap = buildPublicSlugMap(reposParsed.repos) - // 3. List wiki repo filenames from the working directory (data branch checkout) - const wikiRepoFilenames = await loadWikiFilenames() + // 3. Load data wiki pages (CWD = data-branch-check). + const dataWikiPages = await loadWikiPages('knowledge/wiki/repos') - // 4. Resolve canonical slugs via GraphQL (fail-closed β€” throws on any failure) - const {resolved} = resolveCanonicalSlugs(privateEntries) + // 4. Load grandfather pages from main's wiki dir. + // Fail closed: GRANDFATHER_WIKI_REPOS_DIR MUST be supplied by the workflow. + // An unset var β†’ throw loudly; misconfiguration must not silently pass or over-block. + const grandfatherDir = requireGrandfatherDir(process.env.GRANDFATHER_WIKI_REPOS_DIR) + const grandfatherPages = await loadWikiPages(grandfatherDir) - // 5. Detect leaks - const leaks = detectPrivateWikiLeaks({ - privateEntries: resolved, - wikiRepoFilenames, - }) + // 5. Detect leaks β€” pure function, no GraphQL. + const leaks = detectPrivateWikiLeaks({dataWikiPages, publicSlugMap, grandfatherPages}) // 6. Report if (leaks.length > 0) { diff --git a/scripts/merge-data-pr.test.ts b/scripts/merge-data-pr.test.ts index d2ce0cea0..41aa07763 100644 --- a/scripts/merge-data-pr.test.ts +++ b/scripts/merge-data-pr.test.ts @@ -945,6 +945,102 @@ describe('mergeDataPr', () => { ) }) + it('reports conflicted: false for a clean mergeable PR', async () => { + const createIssue = vi.fn(async () => ({ + data: {number: 111, html_url: 'https://github.com/fro-bot/.github/issues/111'}, + })) + const octokit = mockOctokit({ + getPullRequest: async () => ({data: {mergeable_state: 'clean', head: {sha: 'data-commit-sha'}}}), + createIssue, + }) + + // #when the promotion PR is cleanly mergeable + const result = await mergeDataPr({octokit, now: new Date('2026-04-16T00:00:00.000Z')}) + + // #then conflicted is false and no alert issue is created + expect(result.conflicted).toBe(false) + expect(result.conflictAlertIssueNumber).toBeNull() + expect(createIssue).not.toHaveBeenCalled() + }) + + it('reports conflicted: true and creates a conflict alert when mergeable_state is dirty', async () => { + let createIssueParams: CreateIssueParams | undefined + const createIssue = vi.fn(async (params: CreateIssueParams) => { + createIssueParams = params + return {data: {number: 111, html_url: 'https://github.com/fro-bot/.github/issues/111'}} + }) + const octokit = mockOctokit({ + getPullRequest: async () => ({data: {mergeable_state: 'dirty', head: {sha: 'data-commit-sha'}}}), + createIssue, + }) + + // #when the promotion PR is born-conflicted + const result = await mergeDataPr({octokit, now: new Date('2026-04-16T00:00:00.000Z')}) + + // #then it sets conflicted and creates an alert issue + expect(result.conflicted).toBe(true) + expect(result.conflictAlertIssueNumber).toBe(111) + expect(createIssueParams).toBeDefined() + if (createIssueParams == null) throw new Error('expected conflict alert issue parameters') + expect(createIssueParams.owner).toBe('fro-bot') + expect(createIssueParams.repo).toBe('.github') + expect(createIssueParams.title).toContain('Conflicted data promotion PR:') + expect(createIssueParams.body).toContain('42') // PR number in body + expect(createIssueParams.body).toContain('data') + expect(createIssueParams.body).toContain('main') + }) + + it('reports conflicted: false when mergeable_state stays unknown after retries', async () => { + vi.useFakeTimers() + + try { + const createIssue = vi.fn(async () => ({ + data: {number: 111, html_url: 'https://github.com/fro-bot/.github/issues/111'}, + })) + const getPullRequest = vi.fn(async () => ({ + data: {mergeable_state: 'unknown', head: {sha: 'data-commit-sha'}}, + })) + const octokit = mockOctokit({getPullRequest, createIssue}) + + const resultPromise = mergeDataPr({octokit, logger: mockLogger(), now: new Date('2026-04-16T00:00:00.000Z')}) + await vi.runAllTimersAsync() + const result = await resultPromise + + // #then it does not false-alarm on unknown mergeability + expect(result.conflicted).toBe(false) + expect(result.conflictAlertIssueNumber).toBeNull() + expect(createIssue).not.toHaveBeenCalled() + } finally { + vi.useRealTimers() + } + }) + + it('deduplicates conflict alert when one is already open', async () => { + const createIssue = vi.fn(async () => ({ + data: {number: 111, html_url: 'https://github.com/fro-bot/.github/issues/111'}, + })) + const listForRepo = vi.fn(async () => ({ + data: [{number: 100, title: 'Conflicted data promotion PR: data -> main (PR #42)', state: 'open' as const}], + })) + const octokit = mockOctokit({ + getPullRequest: async () => ({data: {mergeable_state: 'dirty', head: {sha: 'data-commit-sha'}}}), + createIssue, + listForRepo, + }) + + // #given an existing conflict alert is already open + // #when the merge PR script runs again + const result = await mergeDataPr({octokit, now: new Date('2026-04-16T00:00:00.000Z')}) + + // #then conflicted is still true but no duplicate issue is created + expect(result.conflicted).toBe(true) + expect(result.conflictAlertIssueNumber).toBeNull() + expect(createIssue).not.toHaveBeenCalled() + expect(listForRepo).toHaveBeenCalledWith( + expect.objectContaining({owner: 'fro-bot', repo: '.github', state: 'open'}), + ) + }) + it('throws a structured error on non-conflict API failures', async () => { const octokit = mockOctokit({ compareCommitsWithBasehead: async () => { @@ -963,4 +1059,97 @@ describe('mergeDataPr', () => { expect((error as MergeDataPrError).code).toBe('API_ERROR') expect((error as MergeDataPrError).message).toContain('Compare failed') }) + + it('reports mergeabilityUnavailable when the mergeable-state fetch fails on a retryable error', async () => { + const createIssue = vi.fn(async () => ({ + data: {number: 111, html_url: 'https://github.com/fro-bot/.github/issues/111'}, + })) + const octokit = mockOctokit({ + // #given the mergeability fetch fails with a retryable 503 + getPullRequest: async () => { + throw Object.assign(new Error('Service Unavailable'), {status: 503}) + }, + createIssue, + }) + + // #when the promotion PR exists but its state could not be determined + const result = await mergeDataPr({octokit, logger: mockLogger(), now: new Date('2026-04-16T00:00:00.000Z')}) + + // #then the run signals unavailable (so main() exits non-zero) without false-alarming as conflicted + expect(result.mergeabilityUnavailable).toBe(true) + expect(result.conflicted).toBe(false) + expect(result.conflictAlertIssueNumber).toBeNull() + expect(createIssue).not.toHaveBeenCalled() + }) + + it('reports mergeabilityUnavailable: false for a clean mergeable PR', async () => { + const octokit = mockOctokit({ + getPullRequest: async () => ({data: {mergeable_state: 'clean', head: {sha: 'data-commit-sha'}}}), + }) + + // #when the promotion PR is cleanly mergeable + const result = await mergeDataPr({octokit, now: new Date('2026-04-16T00:00:00.000Z')}) + + // #then mergeability was determined, so it is not flagged unavailable + expect(result.mergeabilityUnavailable).toBe(false) + expect(result.conflicted).toBe(false) + }) + + it('reports mergeabilityUnavailable: false when mergeable_state stays unknown after retries', async () => { + vi.useFakeTimers() + + try { + const getPullRequest = vi.fn(async () => ({ + data: {mergeable_state: 'unknown', head: {sha: 'data-commit-sha'}}, + })) + const octokit = mockOctokit({getPullRequest}) + + // #when GitHub is still computing mergeability (fetched, but unknown) + const resultPromise = mergeDataPr({octokit, logger: mockLogger(), now: new Date('2026-04-16T00:00:00.000Z')}) + await vi.runAllTimersAsync() + const result = await resultPromise + + // #then fetched-unknown is a no-false-alarm case, distinct from fetch-failure + expect(result.mergeabilityUnavailable).toBe(false) + expect(result.conflicted).toBe(false) + } finally { + vi.useRealTimers() + } + }) + + it('keeps conflicted: true with a null alert when conflict-alert creation fails retryably', async () => { + const octokit = mockOctokit({ + getPullRequest: async () => ({data: {mergeable_state: 'dirty', head: {sha: 'data-commit-sha'}}}), + // #given alert creation fails with a retryable error + createIssue: async () => { + throw Object.assign(new Error('Service Unavailable'), {status: 503}) + }, + }) + + // #when the promotion PR is born-conflicted but the alert cannot be created this run + const result = await mergeDataPr({octokit, logger: mockLogger(), now: new Date('2026-04-16T00:00:00.000Z')}) + + // #then the dirty signal is preserved (exit non-zero) and the next run retries the alert + expect(result.conflicted).toBe(true) + expect(result.conflictAlertIssueNumber).toBeNull() + }) + + it('rethrows when conflict-alert creation fails with a non-retryable error', async () => { + const octokit = mockOctokit({ + getPullRequest: async () => ({data: {mergeable_state: 'dirty', head: {sha: 'data-commit-sha'}}}), + // #given alert creation fails with a non-retryable error + createIssue: async () => { + throw Object.assign(new Error('Validation failed'), {status: 422}) + }, + }) + + // #when the merge PR script runs + const error = await mergeDataPr({octokit, logger: mockLogger(), now: new Date('2026-04-16T00:00:00.000Z')}).catch( + (error: unknown) => error, + ) + + // #then the non-retryable failure surfaces instead of being swallowed + expect(error).toBeInstanceOf(Error) + expect((error as Error).message).toContain('Validation failed') + }) }) diff --git a/scripts/merge-data-pr.ts b/scripts/merge-data-pr.ts index 119f7c4f4..3f068fccb 100644 --- a/scripts/merge-data-pr.ts +++ b/scripts/merge-data-pr.ts @@ -11,6 +11,7 @@ const MERGEABLE_STATE_RETRY_COUNT = 2 const MERGEABLE_STATE_RETRY_DELAY_MS = 1000 const REDISCOVER_PULL_REQUEST_RETRY_COUNT = 1 const REDISCOVER_PULL_REQUEST_RETRY_DELAY_MS = 1000 +const CONFLICT_ALERT_TITLE_PREFIX = 'Conflicted data promotion PR:' type OctokitConstructor = new (params: {auth: string}) => OctokitClient type MergeLabel = 'auto-merge' | 'needs-review' @@ -32,6 +33,15 @@ export interface MergeDataPrResult { label: MergeLabel | null journalIssueNumber: number | null staleAlertIssueNumber: number | null + conflicted: boolean + conflictAlertIssueNumber: number | null + /** + * True when the mergeable-state FETCH itself failed with a retryable error (timeout/5xx/rate-limit) + * and we could not determine whether the PR is born-conflicted. + * Distinct from a successfully-fetched `mergeable_state === 'unknown'` (GitHub still computing), + * which is not an alarm condition. When true, callers should treat the run as non-clean (exit non-zero). + */ + mergeabilityUnavailable: boolean } /** @@ -97,6 +107,9 @@ export async function mergeDataPr(params: MergeDataPrParams = {}): Promise { +}): Promise { let pullRequest try { @@ -234,7 +331,7 @@ async function maybeUpdateBehindPullRequest(params: { `${formatApiWarning(error, `fetching PR #${params.pullRequestNumber}`)}; continuing because the PR already exists and a later run can retry the branch update.`, ) - return + return 'unavailable' } if (pullRequest.data.mergeable_state === 'unknown') { @@ -242,11 +339,11 @@ async function maybeUpdateBehindPullRequest(params: { `PR #${params.pullRequestNumber} mergeability stayed unknown after ${MERGEABLE_STATE_RETRY_COUNT + 1} checks; leaving the branch unchanged for this run.`, ) - return + return 'unknown' } if (pullRequest.data.mergeable_state !== 'behind') { - return + return pullRequest.data.mergeable_state } try { @@ -258,7 +355,7 @@ async function maybeUpdateBehindPullRequest(params: { }) } catch (error: unknown) { if (isExpectedHeadShaRace(error)) { - return + return 'behind' } if (isRetryableGitHubApiError(error)) { @@ -266,11 +363,45 @@ async function maybeUpdateBehindPullRequest(params: { `${formatApiWarning(error, `updating PR #${params.pullRequestNumber} branch from ${params.repo}`)}; continuing because the PR already exists and a later run can retry the branch update.`, ) - return + return null } throw toMergeDataPrError(error, `updating PR #${params.pullRequestNumber} branch from ${params.repo}`) } + + return 'behind' +} + +async function maybeCreateConflictAlert(params: { + octokit: OctokitClient + owner: string + repo: string + pullRequestNumber: number + pullRequestUrl: string + headBranch: string + baseBranch: string +}): Promise { + const existingIssues = await params.octokit.rest.issues.listForRepo({ + owner: params.owner, + repo: params.repo, + state: 'open', + per_page: 30, + }) + + const existingAlert = existingIssues.data.find(issue => issue.title.startsWith(CONFLICT_ALERT_TITLE_PREFIX)) + + if (existingAlert !== undefined) { + return null + } + + const alert = await params.octokit.rest.issues.create({ + owner: params.owner, + repo: params.repo, + title: `${CONFLICT_ALERT_TITLE_PREFIX} ${params.headBranch} -> ${params.baseBranch} (PR #${params.pullRequestNumber})`, + body: createConflictAlertBody(params), + }) + + return alert.data.number } async function waitForKnownMergeableState(params: { @@ -530,6 +661,22 @@ function createStaleAlertBody(params: { ].join('\n') } +function createConflictAlertBody(params: { + pullRequestNumber: number + pullRequestUrl: string + headBranch: string + baseBranch: string +}): string { + return [ + `PR #${params.pullRequestNumber} (${params.pullRequestUrl}) is born conflicted.`, + '', + `The \`${params.headBranch}\` β†’ \`${params.baseBranch}\` data promotion cannot be merged automatically.`, + 'Manual resolution is required: resolve the conflicts, push to the head branch, and re-run the promotion.', + '', + 'This alert will not be duplicated while this issue remains open.', + ].join('\n') +} + async function createOctokitFromEnv(): Promise { const token = process.env.GITHUB_TOKEN @@ -702,6 +849,10 @@ async function delay(milliseconds: number): Promise { async function main(): Promise { const result = await mergeDataPr() process.stdout.write(`${JSON.stringify(result)}\n`) + + if (result.conflicted || result.mergeabilityUnavailable) { + process.exitCode = 1 + } } if (import.meta.url === `file://${process.argv[1]}`) {