From 06030a85cce6740eca2525bd5580a9fdc87fa7ad Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 14:13:35 +0200 Subject: [PATCH 01/13] docs(megarepo): design VRS for cold named-branch worktree GC (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframe #771 from artifact-pruning to safely deleting cold named-branch worktrees from the store (the dominant store accumulation that default GC cannot currently reclaim, since it blanket-protects every named ref). Captures the agreed layered-gate model as decision records 0001-0008, a glossary, and a design subsection in spec.md: - cross-megarepo live-set veto (hard) — verified end-to-end in an isolated store - lossless floor (commit reachable on remote + capture-first) - merged-PR as the primary staleness signal (git-ancestor is unusable under squash-merge) - three reclamation timers (absence / post-merge / archive retention) - capture-then-delete unified with the .archive/ worktree convention Backed by a read-only real-store survey and isolated-store experiments (no real store mutated). Implementation pending. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../0001-gc-reclaims-cold-named-worktrees.md | 33 +++++++++ ...cross-megarepo-liveness-vetoes-deletion.md | 69 +++++++++++++++++++ ...03-trust-liveness-registry-with-margins.md | 40 +++++++++++ ...4-capture-then-delete-uncommitted-state.md | 36 ++++++++++ ...5-merged-pr-is-primary-staleness-signal.md | 37 ++++++++++ .../0006-stale-deletion-in-default-gc.md | 37 ++++++++++ .../decisions/0007-archive-is-the-trash.md | 45 ++++++++++++ .../0008-three-reclamation-timers.md | 42 +++++++++++ packages/@overeng/megarepo/docs/glossary.md | 67 ++++++++++++++++++ packages/@overeng/megarepo/docs/spec.md | 36 ++++++++++ 10 files changed, 442 insertions(+) create mode 100644 packages/@overeng/megarepo/docs/decisions/0001-gc-reclaims-cold-named-worktrees.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0002-cross-megarepo-liveness-vetoes-deletion.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0003-trust-liveness-registry-with-margins.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0005-merged-pr-is-primary-staleness-signal.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0006-stale-deletion-in-default-gc.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0007-archive-is-the-trash.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0008-three-reclamation-timers.md create mode 100644 packages/@overeng/megarepo/docs/glossary.md diff --git a/packages/@overeng/megarepo/docs/decisions/0001-gc-reclaims-cold-named-worktrees.md b/packages/@overeng/megarepo/docs/decisions/0001-gc-reclaims-cold-named-worktrees.md new file mode 100644 index 000000000..b532c94b1 --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0001-gc-reclaims-cold-named-worktrees.md @@ -0,0 +1,33 @@ +# GC reclaims cold named-branch worktrees by deletion, not artifact-pruning + +## Status + +accepted (supersedes the original scope of issue #771) + +## Context + +`mr store gc` default mode protects every `refs/heads/*` and `refs/tags/*` +worktree unconditionally (`classifyStoreWorktreePolicy` → `named_branch_ref`). +Only detached `refs/commits/*` worktrees outside the live set are collectable. +A real-store survey (2026-06-10) found 323 named-branch worktrees across the +store (122 in effect-utils alone), most cold, so default GC structurally cannot +reclaim the dominant accumulation. + +Issue #771 originally proposed the conservative path: keep every worktree, delete +only its regenerable artifacts in place (`--prune-artifacts`). + +## Decision + +Target **full deletion of cold named-branch worktrees** instead. Refine the +staleness classification so default GC can safely delete a cold named-branch +worktree (reclaiming source, `.git`, and artifacts together). The +artifact-prune-in-place mode from #771 is **deferred**, not pursued in this work. + +## Consequences + +- The hard problem moves from "which artifacts are regenerable" to "which + worktrees hold no irreplaceable state" — a safety-classification problem. +- A false-positive deletion can lose un-pushed/uncommitted work, so the safety + gate must be conservative (see later decisions on the deletion invariant). +- Worktrees we want to keep but that carry fat artifacts are NOT addressed here; + artifact-pruning remains available as future work under #771. diff --git a/packages/@overeng/megarepo/docs/decisions/0002-cross-megarepo-liveness-vetoes-deletion.md b/packages/@overeng/megarepo/docs/decisions/0002-cross-megarepo-liveness-vetoes-deletion.md new file mode 100644 index 000000000..bef088ea6 --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0002-cross-megarepo-liveness-vetoes-deletion.md @@ -0,0 +1,69 @@ +# Cross-megarepo membership vetoes stale-worktree deletion + +## Status + +accepted (safety invariant) + +## Context + +The store is shared by independent megarepo workspaces. A worktree that looks +stale in isolation (merged PR, old, clean) may still be an active member of a +_different_ megarepo. Deleting it would break that workspace. + +Protection today rides on the store liveness registry (`.state/workspaces/ +.json`): each workspace records its consumed store paths (`livePaths`, +derived from `repos/` symlinks + lock). `collectStoreLiveSet` unions all +registered records. Verified live: in `default` mode a detached commit worktree +consumed by workspace B is skipped when B is registered. + +Two structural limits (verified / being verified end-to-end): + +1. The registry is a per-workspace **cache**, refreshed only when that workspace + runs an `mr` command. A workspace that exists but has never run `mr` (or whose + record is stale) contributes nothing to the live set — its members are + unprotected. +2. The two existing GC modes can't express the needed gate: `default` blanket- + protects every named branch (so liveness is moot for them); `--all` ignores + the live set entirely (protects nothing). Neither honors "delete a named + branch _only if_ no workspace consumes it." + +## Decision + +Cross-megarepo membership is a **hard veto** on deletion: a worktree referenced +by ANY workspace's live set is never deleted, even if it independently satisfies +the lossless+staleness gate. The new stale-deletion policy is a THIRD mode +(distinct from `default` and `--all`) that consults the live set for named +branches too. + +The registry-completeness gap (limit 1) is itself a safety problem and must be +closed or bounded before stale named-branch deletion is enabled (see the +freshness/heartbeat decision). + +## Verified (end-to-end, isolated store — tmp/gc-exp/xmatrix-findings.md) + +Real `mr` binary, isolated store, gc run from a workspace that does NOT consume +the target detached-commit worktree C: + +- Registered consumer ⇒ C `skipped_in_use` ("referenced by workspace root set"). + Protection unions livePaths of ALL registered workspaces. Works. +- Unregistered / deleted-record consumer ⇒ C `removed` (real gc physically + deleted it). A `repos/` symlink ALONE gives zero protection — gc never + live-scans other workspaces' symlinks. +- Only `mr status` / `mr store status` refresh a record; `mr store gc` (even + dry-run), `ls`, `check`, `root` do NOT. Records go stale easily. +- **Latent pre-existing bug:** after a workspace repins to a new target without + re-registering, gc over-protects the abandoned worktree AND _under-protects + the new in-use target_ (deletes a worktree a live workspace is actually using). + This already exists for commit worktrees today, independent of this feature. + +## Consequences + +- The live-set gate must precede the lossless/staleness checks and use the + store-wide registry (`collectStoreLiveSet`), not just the current workspace. +- Stale deletion cannot reuse `--all` semantics. +- A consumer that never registers is the dominant residual risk; mitigations + (more commands refresh the record; freshness gate; conservative default) + are required, not optional. +- The repin-without-reregister under-protection (verified) must be closed: more + commands must refresh, and/or gc must reconcile registered workspaces before + deleting. diff --git a/packages/@overeng/megarepo/docs/decisions/0003-trust-liveness-registry-with-margins.md b/packages/@overeng/megarepo/docs/decisions/0003-trust-liveness-registry-with-margins.md new file mode 100644 index 000000000..aa91a75cd --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0003-trust-liveness-registry-with-margins.md @@ -0,0 +1,40 @@ +# Trust the liveness registry, bounded by safety margins + +## Status + +accepted + +## Context + +Cross-megarepo protection rides on the per-workspace liveness registry, which is +a cache (only fresh for workspaces that have run `mr`). Building an authoritative +global workspace index was considered and rejected as heavy new infrastructure +with the same chicken-and-egg for never-seen workspaces. + +A key mitigating fact: the lossless floor (fully pushed + no uncommitted source) +already prevents _data loss_ in the cross-megarepo case — a wrongly deleted +member that passed the floor is re-materializable via `mr apply`. The veto is +therefore mostly about _availability_ (don't disrupt an active consumer) plus one +real edge: a squash-merged branch deleted from its remote may have an +unreachable commit, so re-fetch can fail. + +## Decision + +Trust the registry as the cross-megarepo signal, bounded by margins rather than +replaced by new infrastructure: + +- Refresh the current workspace's registry record on more `mr` commands (cheap) + so records stay fresh in normal use. +- Gate stale named-branch deletion on registry freshness (a TTL / heartbeat) and + refuse-when-uncertain (fall back to keeping the worktree). +- Require a worktree be continuously absent from ALL live sets across a grace + window before it is deletable (see staleness/grace-window decision), not just + absent in one snapshot. + +## Consequences + +- The residual risk is a consumer that has literally never run `mr`; this is + accepted, bounded by the grace window and the re-apply recoverability of + lossless worktrees. +- The deleted-remote-branch edge needs explicit handling in the lossless floor + (prefer "commit reachable on remote", not merely "branch was pushed once"). diff --git a/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md b/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md new file mode 100644 index 000000000..243fc1517 --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md @@ -0,0 +1,36 @@ +# Capture-then-delete: safety never depends on classifying dirt + +## Status + +accepted + +## Context + +A real-store survey proved that classifying uncommitted changes as "generated" +vs "source" by path is unreliable in both directions (`src/build/app.ts` matched +a `build/` pattern but is hand-written; `*.d.ts.map` / `*.genie.js` are generated +but matched no pattern). `mr` is generic and cannot reliably know a repo's +generated-file set. Yet nearly every cold worktree carries ~10 dirty files of +regenerated drift, so "any dirt blocks deletion" reclaims almost nothing. + +## Decision + +Deletion safety must NOT depend on the gen/source classifier. Before deleting a +cold worktree that has any uncommitted change, capture the uncommitted state into +a recoverable store-side trash with a retention TTL (e.g. move the worktree under +`$STORE/.state/trash//-/`, or persist a diff patch + untracked +tarball). Only then remove it. Clean worktrees (nothing to lose, and committed +work already durable per the lossless floor) may be hard-deleted directly. + +"Generated vs source" is demoted to a UX-only filter: known-regenerable drift +(lockfiles, declared genie outputs) need not be stashed and need not be reported +as risk — but mis-classifying it never causes data loss. + +## Consequences + +- Provably lossless regardless of classifier accuracy. +- Trash consumes disk until its TTL expires, partially deferring reclaim for + dirty worktrees; the dominant win (clean, merged worktrees → hard delete) is + unaffected. Trash is itself GC'd by age. +- Recovery story: a wrongly-deleted dirty worktree is restorable from trash + within the TTL. diff --git a/packages/@overeng/megarepo/docs/decisions/0005-merged-pr-is-primary-staleness-signal.md b/packages/@overeng/megarepo/docs/decisions/0005-merged-pr-is-primary-staleness-signal.md new file mode 100644 index 000000000..a78fb63bb --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0005-merged-pr-is-primary-staleness-signal.md @@ -0,0 +1,37 @@ +# Merged PR is the primary staleness signal + +## Status + +accepted + +## Context + +A worktree's branch being "merged" is the strongest "this work is done" evidence. +The real-store survey proved the git-only proxy (HEAD is an ancestor of +`origin/main`) is useless here because the repos squash-merge: merged branches +sit hundreds–thousands of commits "ahead" of main (e.g. a MERGED branch 597 and +another 1179 commits ahead). Reliable merged-detection therefore requires the +GitHub PR state, joined by branch name (`gh pr list --state all --json +number,state,headRefName,mergedAt`, one batched call per repo, join locally). + +## Decision + +Use GitHub PR state (PR for the branch is MERGED) as the primary positive +staleness signal, accepting the coupling of stale-deletion to GitHub + `gh`/API + +network. This is acceptable because the store is, in practice, entirely +`github.com/*` and `mr` already models github sources. + +Conservative degradation: when no merged-PR evidence is available — no PR, a +non-GitHub remote, or `gh` unavailable/unauthenticated — the worktree is NOT +eligible for stale deletion and is kept. Absence of evidence never licenses +deletion. + +## Consequences + +- Branches with no PR (incl. never-pushed agent scratch worktrees) and + closed-unmerged PRs are not collected by the merged-primary path (closed-PR + handling may be added later as a separate, lower-confidence tier). +- The deletion path needs a branch→PR-state resolver with batching + caching to + stay within API rate limits; treat resolver failure as "no evidence" (keep). +- Merged-detection cost/latency lives on the GC path; keep it off the hot path of + ordinary `mr` commands. diff --git a/packages/@overeng/megarepo/docs/decisions/0006-stale-deletion-in-default-gc.md b/packages/@overeng/megarepo/docs/decisions/0006-stale-deletion-in-default-gc.md new file mode 100644 index 000000000..d1859c346 --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0006-stale-deletion-in-default-gc.md @@ -0,0 +1,37 @@ +# Stale named-branch deletion is part of default `mr store gc` + +## Status + +accepted + +## Context + +Options considered: a default-off opt-in mode, a separate command, or folding +stale named-branch deletion into the default `mr store gc`. The default-gc option +was chosen for maximal effectiveness and a single "reclaim disk" surface. + +## Decision + +`mr store gc` (no flags) collects stale named-branch worktrees in addition to its +current commit-worktree cleanup. The aggressive, protection-bypassing `--all` +remains a separate explicit mode. + +Because this changes long-standing behavior and permanently deletes worktrees, +the safety gates are NOT optional — they are what makes default-on acceptable: + +- Hard cross-megarepo live-set veto (registry, all workspaces). +- Lossless floor: commit reachable on a remote + capture-then-trash any + uncommitted state before deletion. +- Primary staleness signal = merged PR; absence of merged evidence ⇒ keep. +- Continuous-absence grace window before a worktree is eligible. +- `--dry-run` remains; normal runs must clearly report every stale deletion and + how to recover it from trash. + +## Consequences + +- Any caller of `mr store gc` now also removes merged/cold named branches; output + must make this visible and recoverable, not silent. +- A timer/disk-hygiene consumer can call `mr store gc --dry-run --json` for + pressure-aware planning and the plain command to act. +- The conservative gates mean the effective default behavior on a repo with no + GitHub access or no merged PRs is unchanged (nothing extra deleted). diff --git a/packages/@overeng/megarepo/docs/decisions/0007-archive-is-the-trash.md b/packages/@overeng/megarepo/docs/decisions/0007-archive-is-the-trash.md new file mode 100644 index 000000000..24629117d --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0007-archive-is-the-trash.md @@ -0,0 +1,45 @@ +# The `.archive/` worktree convention IS the recoverable trash, reaped by retention + +## Status + +accepted + +## Context + +`mr store gc` has a second blind spot: archived worktrees. An external worktree +tool's `archive` operation moves a worktree from `refs/heads//` to the +`/.archive//` convention (keeps `.git`, logs metadata to +`.archive/README.md`, optionally deletes the branch ref). gc only walks +`refs/{heads,tags,commits}` and skips dotfile dirs, so `.archive/` is never seen +— archives accumulate indefinitely (observed in real stores). + +The `.archive/` convention already implements exactly the "move aside, keep +recoverable, record metadata" behaviour that decision 0004 (capture-then-delete) +needs. An archived worktree is also the clearest stale signal: the human +explicitly said "done". + +## Decision + +Unify the two: `.archive/` is the single recoverable holding area ("trash"). The +flow becomes: + +1. A cold, stale, lossless worktree is **archived** (moved to `/.archive/`, + metadata recorded) — recoverable, not yet reclaimed. +2. gc grows awareness of `.archive/` and **reaps archives past a retention TTL** + (hard-delete), reclaiming the disk. + +So decision 0004's "capture-then-delete" is implemented AS archiving, and a +single retention policy governs reclamation. + +## Consequences + +- gc must scan `.archive/` (currently skipped as a dotfile dir) for retention + reaping, while still never treating it as a live `refs/*` worktree. +- mr takes a dependency on the `.archive/` store convention owned by the external + worktree tool; the convention should be documented as part of the store layout + so the two tools stay aligned. +- Reaping an archive must still honor the cross-megarepo live-set veto (an + archived path should never be in any live set, but check rather than assume) + and capture nothing further (archiving already captured it). +- Retention TTL is a tuning parameter (open question); archives carry a timestamp + in their name/metadata to drive age. diff --git a/packages/@overeng/megarepo/docs/decisions/0008-three-reclamation-timers.md b/packages/@overeng/megarepo/docs/decisions/0008-three-reclamation-timers.md new file mode 100644 index 000000000..9ef8a1024 --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0008-three-reclamation-timers.md @@ -0,0 +1,42 @@ +# Three reclamation timers: absence grace, post-merge grace, archive retention + +## Status + +accepted (default values proposed, tunable) + +## Context + +Reclamation is time-gated. The live-set veto already protects actively-used +worktrees, so the timers only shape how long after work ends a worktree lingers. +Real-store data: most merged worktrees are 30–120 days old, but a few merges are +2–5 days old — so a generous window spares fresh merges at near-zero reclaim cost. + +A two-timer model (absence + retention) was considered; the three-timer model was +chosen to give explicit, separate control over just-merged branches. + +## Decision + +Three independent timers gate reclamation: + +1. **Absence grace** (default 14d): a worktree must be continuously absent from + ALL live sets for this long before it is eligible to archive. Guards against a + consumer that simply hasn't re-registered recently. +2. **Post-merge grace** (default 7d): even once merged + lossless + absent, do not + archive until at least this long after the PR's `mergedAt`. Protects follow-up + work on a freshly merged branch. +3. **Archive retention TTL** (default 30d): an archived worktree is reaped + (hard-deleted) once it has been archived this long. + +A worktree is archived only when ALL of: cross-megarepo veto passes, lossless, +merged, absence-grace satisfied, AND post-merge-grace satisfied. It is reaped only +after retention TTL. + +## Consequences + +- Three host-overridable config values; defaults are conservative-generous + because the cold population is mostly much older than the windows. +- Post-merge grace requires the PR `mergedAt` timestamp from the staleness + resolver, not just the merged boolean. +- Total worst-case lifetime from "done" to disk reclaimed ≈ max(absence, + post-merge) + retention (~37–44d with defaults); acceptable given the dominant + win is the large, much-older population. diff --git a/packages/@overeng/megarepo/docs/glossary.md b/packages/@overeng/megarepo/docs/glossary.md new file mode 100644 index 000000000..7abf6f52c --- /dev/null +++ b/packages/@overeng/megarepo/docs/glossary.md @@ -0,0 +1,67 @@ +# Megarepo Store GC — Glossary + +Domain language for store garbage collection, specifically the reclamation of +cold named-branch worktrees. Scope: `mr store gc` and the store liveness model. + +## Language + +**Cold worktree**: +A store worktree that no workspace is currently using AND that has been +continuously absent from every workspace live set for the grace window. Cold is +the precondition for reclamation. Opposite: **hot** (recently touched or live). +_Avoid_: stale (reserve that for the merge/age signal), unused. + +**Live set**: +The union of store worktree paths recorded as in-use by all registered +workspaces, read from the **liveness registry**. The hard cross-megarepo veto: +a path in the live set is never deleted. +_Avoid_: in-use set, active set. + +**Liveness registry**: +The store-local cache at `$STORE/.state/workspaces/.json`, one record per +workspace, listing that workspace's `livePaths`. A cache, not an authoritative +index: a workspace contributes only after running an `mr` command that refreshes +its record. + +**Cross-megarepo veto**: +The rule that membership of a worktree in ANY workspace's live set forbids its +deletion, even if it independently looks reclaimable. Protects shared store +worktrees consumed by other megarepos. + +**Lossless floor**: +The non-negotiable precondition that deleting a worktree loses nothing +irreplaceable: every local commit is reachable on a remote, and any uncommitted +state has been captured (archived) first. Distinct from staleness — the floor is +about safety, staleness about timing. + +**Staleness**: +Positive evidence that a worktree's work is done. Primary signal: the branch's +GitHub **PR is merged**. Absence of merged evidence is not staleness — it means +"keep". Not derivable from `git` ancestry here because the repos squash-merge. + +**Grace window**: +The minimum duration a worktree must be continuously absent from all live sets +(and otherwise reclaimable) before it becomes cold. A buffer against deleting a +worktree a consumer simply hasn't re-registered recently. + +**Archive (as trash)**: +The recoverable holding area at `/.archive//`, an existing +worktree-archive convention reused as gc's capture-then-delete mechanism. +Reclamation is two-phase: +a cold+stale+lossless worktree is **archived** (recoverable), then **reaped** +(hard-deleted) once the archive ages past its retention TTL. +_Avoid_: trash, recycle bin (use **archive** for the on-disk concept). + +**Reap**: +Hard-delete of an archived worktree past its retention TTL — the step that +actually reclaims disk. Distinct from **archive** (the recoverable first step). + +## Flagged ambiguities + +- **stale** vs **cold**: in prior informal usage "stale" meant both "old/merged" + and "safe to delete". Resolved: **staleness** = the merged/done signal only; + **cold** = the full deletion-eligibility state (not-live + grace window + + lossless + stale). +- **`--all` mode** is NOT "delete everything stale" — it is the protection- + bypassing nuclear mode that ignores the live set entirely. Cold reclamation is + a separate, live-set-honoring path within default gc. diff --git a/packages/@overeng/megarepo/docs/spec.md b/packages/@overeng/megarepo/docs/spec.md index a8674cf97..1f70e3918 100644 --- a/packages/@overeng/megarepo/docs/spec.md +++ b/packages/@overeng/megarepo/docs/spec.md @@ -626,6 +626,42 @@ mr store gc [--dry-run] [--force] [--all] **Scope:** Uses the store-local workspace registry plus the current workspace. Run `mr status` or another registry-refreshing command from active megarepos so their commit worktrees remain rooted. +##### Cold named-branch reclamation (designed, not yet implemented) + +> Status: design agreed, implementation pending. Rationale and trade-offs in +> `docs/decisions/0001`–`0007`; domain terms in `docs/glossary.md`. + +Today default gc unconditionally protects every `refs/heads/*`/`refs/tags/*` +worktree, so it cannot reclaim cold named-branch worktrees — the dominant +accumulation (survey 2026-06-10: 323 named-branch worktrees, 122 in effect-utils +alone). Default gc will be extended to delete a named-branch worktree only when +it is **cold**, decided by layered gates in this order: + +1. **Cross-megarepo live-set veto (hard).** Not present in any registered + workspace's live set (`collectStoreLiveSet`, store-wide). Verified that a + `repos/` symlink alone gives no protection — only recorded `livePaths` count. +2. **Lossless floor.** Every local commit reachable on a remote; any uncommitted + state captured first (see step 5). No data may be lost by deletion. +3. **Staleness.** The branch's GitHub PR is **merged** (primary signal; the + git-ancestor proxy is unusable because the repos squash-merge). No merged + evidence ⇒ keep. +4. **Grace window.** Continuously absent from all live sets for the window — not + just absent in one snapshot. +5. **Capture = archive.** A qualifying worktree is moved to `/.archive/` + (recoverable; reuses the existing worktree-archive convention), then **reaped** + (hard-deleted) once it ages past the retention TTL. gc also reaps pre-existing + `.archive/` worktrees, which it currently ignores entirely. + +Provably-lossless and conservative: absence of evidence never licenses deletion; +worst case is a re-`mr apply` (re-fetch), except the deleted-remote-branch edge. + +Open tuning questions (not yet decided): grace-window length; archive retention +TTL; whether a small post-merge grace delays deleting a just-merged branch; +closed-PR handling (a lower-confidence tier); and which additional `mr` commands +must refresh the liveness record (today only `mr status`/`mr store status` do — +a verified gap that lets a repinned-but-unre-registered workspace's live worktree +be deleted). + #### `mr store ls` List repos in global store. From 0d2fb3ca0b1dacf4e52cbbe9ee43776e123f9e53 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:48:49 +0200 Subject: [PATCH 02/13] docs(megarepo): resolve GC reclamation tuning params (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 0008 already added three timers (absence/post-merge/retention) - 0009: closed-unmerged PRs count as a staleness signal (the lossless floor self-protects branches whose commits aren't reachable on a remote) - 0010: reconcile ALL registered workspaces before a destructive gc + broaden which mr commands refresh the liveness record — folding in the fix for the verified repin-without-reregister deletion bug - spec/glossary updated to match Co-Authored-By: Claude Opus 4.8 (1M context) --- .../0009-closed-unmerged-counts-as-stale.md | 34 +++++++++++++++ .../0010-reconcile-all-before-delete.md | 42 +++++++++++++++++++ packages/@overeng/megarepo/docs/glossary.md | 6 +-- packages/@overeng/megarepo/docs/spec.md | 35 +++++++++------- 4 files changed, 99 insertions(+), 18 deletions(-) create mode 100644 packages/@overeng/megarepo/docs/decisions/0009-closed-unmerged-counts-as-stale.md create mode 100644 packages/@overeng/megarepo/docs/decisions/0010-reconcile-all-before-delete.md diff --git a/packages/@overeng/megarepo/docs/decisions/0009-closed-unmerged-counts-as-stale.md b/packages/@overeng/megarepo/docs/decisions/0009-closed-unmerged-counts-as-stale.md new file mode 100644 index 000000000..d45cb196f --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0009-closed-unmerged-counts-as-stale.md @@ -0,0 +1,34 @@ +# Closed-unmerged PRs count as a staleness signal (extends 0005) + +## Status + +accepted (extends decision 0005, which had deferred closed-PR handling) + +## Context + +A closed-but-unmerged PR means the work was resolved without landing. Its commits +are not in `main`. The concern is "I closed it but might revisit". The mitigating +insight: the lossless floor self-protects the risky case — recoverability requires +the commit to be reachable on a remote, and a closed PR whose head branch was +deleted on the remote has unreachable commits (not in main either), so the floor +keeps the worktree automatically. Only closed branches still present/reachable on +the remote are reclaim candidates, and those lose nothing on deletion (re-fetchable). + +## Decision + +Treat a CLOSED-unmerged PR as a valid staleness signal under the SAME gates as a +merged PR (cross-megarepo veto, lossless floor, the three timers). No separate +longer grace for closed — the lossless floor already differentiates recoverable +from not. + +The primary staleness predicate is therefore: the branch's PR is **merged OR +closed**. Absence of any PR (open, or no PR at all) still means keep. + +## Consequences + +- Slightly more reclaim (closed-unmerged worktrees whose branches are still on + the remote). +- An OPEN PR is never a staleness signal — open work is kept regardless of age. +- The staleness resolver must return PR state (merged/closed/open/none) + + `mergedAt` (for post-merge grace; closed uses `closedAt` analogously if a + post-close grace is later desired). diff --git a/packages/@overeng/megarepo/docs/decisions/0010-reconcile-all-before-delete.md b/packages/@overeng/megarepo/docs/decisions/0010-reconcile-all-before-delete.md new file mode 100644 index 000000000..14ffeed16 --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0010-reconcile-all-before-delete.md @@ -0,0 +1,42 @@ +# Reconcile all workspaces before a destructive GC; broaden refresh triggers + +## Status + +accepted (also fixes a verified pre-existing bug) + +## Context + +End-to-end experiments proved a pre-existing data-availability bug: only +`mr status` / `mr store status` refresh a workspace's liveness record. A +workspace that repins a member to a new target and runs no refreshing command +before a concurrent gc has a STALE record — gc then over-protects the abandoned +target and _deletes the new in-use target_ (verified: commit worktree D removed +while a live workspace consumed it). With named-branch deletion enabled this risk +extends to branches. + +Each registry record carries its `workspaceRoot`, and a workspace's true live +paths are always derivable from its on-disk `repos/` symlinks + lock — so gc can +re-derive them rather than trust a possibly-stale cached `livePaths`. + +## Decision + +Two changes: + +1. **Reconcile-all before delete.** Before any named-branch deletion, gc + re-derives EVERY registered workspace's live paths fresh from disk (not just + the current workspace's), then computes the live set. Deterministically + catches repins regardless of whether that workspace ran a command. +2. **Broaden refresh triggers.** More `mr` commands refresh the current + workspace's record (e.g. `apply`, `sync`, `pull`, `pin`, and gc for its own + invoking workspace), so workspaces register earlier and records stay fresh. + +## Consequences + +- Reconcile cost scales with the number of registered workspaces (bounded, cheap + file/symlink reads); acceptable on the destructive path, not the hot path. +- Residual risk shrinks to a workspace that has LITERALLY never run any `mr` + command (no record at all) — bounded by the grace window and lossless re-apply. +- This closes the verified bug as part of this work; no separate issue needed. +- A reconcile that finds a workspace dir gone prunes its record (existing + behaviour); a workspace dir present but unreadable should fail safe (treat its + last-known paths as live). diff --git a/packages/@overeng/megarepo/docs/glossary.md b/packages/@overeng/megarepo/docs/glossary.md index 7abf6f52c..0af7b2344 100644 --- a/packages/@overeng/megarepo/docs/glossary.md +++ b/packages/@overeng/megarepo/docs/glossary.md @@ -35,9 +35,9 @@ state has been captured (archived) first. Distinct from staleness — the floor about safety, staleness about timing. **Staleness**: -Positive evidence that a worktree's work is done. Primary signal: the branch's -GitHub **PR is merged**. Absence of merged evidence is not staleness — it means -"keep". Not derivable from `git` ancestry here because the repos squash-merge. +Positive evidence that a worktree's work is done. Signal: the branch's GitHub PR +is **merged or closed** (an OPEN PR or no PR is not staleness — it means "keep"). +Not derivable from `git` ancestry here because the repos squash-merge. **Grace window**: The minimum duration a worktree must be continuously absent from all live sets diff --git a/packages/@overeng/megarepo/docs/spec.md b/packages/@overeng/megarepo/docs/spec.md index 1f70e3918..880da7b60 100644 --- a/packages/@overeng/megarepo/docs/spec.md +++ b/packages/@overeng/megarepo/docs/spec.md @@ -642,25 +642,30 @@ it is **cold**, decided by layered gates in this order: `repos/` symlink alone gives no protection — only recorded `livePaths` count. 2. **Lossless floor.** Every local commit reachable on a remote; any uncommitted state captured first (see step 5). No data may be lost by deletion. -3. **Staleness.** The branch's GitHub PR is **merged** (primary signal; the - git-ancestor proxy is unusable because the repos squash-merge). No merged - evidence ⇒ keep. -4. **Grace window.** Continuously absent from all live sets for the window — not - just absent in one snapshot. -5. **Capture = archive.** A qualifying worktree is moved to `/.archive/` - (recoverable; reuses the existing worktree-archive convention), then **reaped** - (hard-deleted) once it ages past the retention TTL. gc also reaps pre-existing - `.archive/` worktrees, which it currently ignores entirely. +3. **Staleness.** The branch's GitHub PR is **merged or closed** (primary signal; + the git-ancestor proxy is unusable because the repos squash-merge). An open PR + or no PR ⇒ keep. Closed-unmerged is safe under the same gates because the + lossless floor keeps any worktree whose commits aren't reachable on a remote. +4. **Grace windows (three timers).** Continuously absent from all live sets for + the _absence grace_ (default 14d); for merged, also past the _post-merge grace_ + (default 7d after `mergedAt`) — not just absent in one snapshot. +5. **Capture = archive → reap.** A qualifying worktree is moved to + `/.archive/` (recoverable; reuses the existing worktree-archive + convention), then **reaped** (hard-deleted) once it ages past the _archive + retention TTL_ (default 30d). gc also reaps pre-existing `.archive/` worktrees, + which it currently ignores entirely. + +Before any deletion, gc **reconciles all registered workspaces** (re-derives each +one's live paths fresh from disk), not just the current workspace, and more `mr` +commands refresh the liveness record — closing a verified bug where a +repinned-but-unre-registered workspace's _live_ worktree could be deleted. Provably-lossless and conservative: absence of evidence never licenses deletion; worst case is a re-`mr apply` (re-fetch), except the deleted-remote-branch edge. -Open tuning questions (not yet decided): grace-window length; archive retention -TTL; whether a small post-merge grace delays deleting a just-merged branch; -closed-PR handling (a lower-confidence tier); and which additional `mr` commands -must refresh the liveness record (today only `mr status`/`mr store status` do — -a verified gap that lets a repinned-but-unre-registered workspace's live worktree -be deleted). +Remaining open: exact timer defaults are tunable per host; whether a post-close +grace mirrors post-merge grace; metrics/output surface for the disk-hygiene +consumer. #### `mr store ls` From 9604d776fc4c4b51b93607c76703f1329232d40a Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:07:10 +0200 Subject: [PATCH 03/13] docs(megarepo): add GC test contract (0011) after adversarial plan review (#771) - 0011: deterministic test seams (injected `now`, PrStateResolver service layer) + four-layer test strategy (pure / property / integration / gated e2e) - annotate 0004: capture location superseded by 0007's /.archive/ A 5-lens review of the implementation plan surfaced safety holes (wrong reachability predicate, fail-open reconcile, lost stash, archived branch blocks re-apply); these are folded into the refined plan (tmp/gc-impl/plan.md) and 0011. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...4-capture-then-delete-uncommitted-state.md | 4 +- .../docs/decisions/0011-gc-test-contract.md | 48 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 packages/@overeng/megarepo/docs/decisions/0011-gc-test-contract.md diff --git a/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md b/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md index 243fc1517..3d1072245 100644 --- a/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md +++ b/packages/@overeng/megarepo/docs/decisions/0004-capture-then-delete-uncommitted-state.md @@ -2,7 +2,9 @@ ## Status -accepted +accepted — capture-before-delete principle stands; the holding location moved +from `$STORE/.state/trash/` to `/.archive/` (superseded on location by +[0007](0007-archive-is-the-trash.md)). ## Context diff --git a/packages/@overeng/megarepo/docs/decisions/0011-gc-test-contract.md b/packages/@overeng/megarepo/docs/decisions/0011-gc-test-contract.md new file mode 100644 index 000000000..99ebac0ae --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0011-gc-test-contract.md @@ -0,0 +1,48 @@ +# GC test contract: deterministic seams, layered tests + +## Status + +accepted + +## Context + +Cold-worktree reclamation is safety-critical and time-/network-dependent. Tests +must be deterministic and must exercise the safety invariants, not just the happy +path. The codebase has no Effect `Clock` usage and no PR/network seam, so the +testability seams must be designed in, not retrofitted. + +## Decision + +Two injected boundaries, everything else real (no mocking of our own code): + +1. **Time** — an explicit `now: number` (epoch ms) threaded through every decision + and persistence function (`classifyColdWorktree`, `recordObservations`, + `archiveWorktree`, `refreshWorkspaceRegistry.updatedAt`). The CLI edge reads + `Clock.currentTimeMillis`; tests pass fixed values. One uniform seam. +2. **PR state** — a `PrStateResolver` service (`Context.Tag` + `Layer.effect`, the + repo's service pattern) provided into the gc command. Live impl shells `gh`; + tests provide a deterministic stub layer. No process-level `gh` mocking. + +Four test layers, cheapest-first: + +- **Pure unit** — `classifyColdWorktree` as a gate-precedence table (one row per + gate proving short-circuit + the dangerous near-misses), PR-JSON parse/join, + observation-ledger transitions (incl. corrupt-file and no continuity-laundering), + config merge. +- **Property** (`@effect/vitest` `it.prop`, `fc` from `effect/FastCheck`) — the + hard invariants: in-live-set ⇒ never archive; open/none ⇒ keep; unpushed>0 ⇒ + keep; stash present ⇒ keep. +- **Integration** (extended `store-setup.ts` fixture) — the cross-megarepo matrix, + the reconcile-all fail-safe (unreadable workspace ⇒ kept) and repin regression, + archive/reap with retention, and archive → `mr apply` re-materialization. Needs + three new fixture primitives: a bare with real remote-tracking refs (for + reachability), a `repinWorkspace` mutator, and `createArchiveEntry`. +- **Isolated real-binary e2e** (CI-gated/manual) — the real `mr` against a `/tmp` + store; needs `gh`/network, excluded from the default unit run. + +## Consequences + +- The injected-`now` and `PrStateResolver` seams are net-new patterns in this + package; introduced deliberately for determinism. +- The integration fixture must grow before the load-bearing safety tests can be + written; budget that work explicitly. From 72ee69674e0a39e503917884038295b1fd4b8b13 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:19:40 +0200 Subject: [PATCH 04/13] =?UTF-8?q?feat(megarepo):=20cold-GC=20foundations?= =?UTF-8?q?=20=E2=80=94=20config,=20atomic=20writes,=20observation=20ledge?= =?UTF-8?q?r=20(#771)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit U1 of the cold named-branch worktree GC feature: - store-gc-config.ts: three-timer reclamation config (absence grace 14d, post-merge grace 7d, archive retention 30d) as exported defaults, with an optional `$STORE/.state/gc-config.json` merged over them; invalid/absent override falls back to defaults so the GC path never fails on a bad file. - store-fs-atomic.ts: writeFileAtomic (write-temp-then-rename) so state files are never observed half-written by a concurrent reader. - store-gc-observations.ts: cold-observation ledger (`$STORE/.state/gc-observations.json`) recording firstSeenColdAtMs per path. Newly-cold starts the clock; still-cold advances grace; no-longer-cold is dropped (no continuity laundering); unclean-reconcile paths skip grace-advance and re-arm. Atomic write, store-lock-guarded read-modify-write by the caller, corrupt file => empty (conservative re-arm). All decision + persistence paths take an explicit `now` (epoch ms); no ambient clock. Test fixtures (store-setup.ts) extended for the integration layer: - withRemote: store bare cloned/fetched from a separate upstream bare so it has real `refs/remotes/origin/*` (reachability + prune testable). - repinWorkspace: repoint a member symlink/lock WITHOUT re-registering (decision-0010 repin-bug regression seam). - createArchiveEntry: valid `.archive/--/` worktree with a real gitlink for retention/reap tests. Tests: config merge, ledger transitions (incl. corrupt-file and cold->not->cold no-laundering), and fixture-primitive integration checks. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../megarepo/src/lib/store-fs-atomic.ts | 51 ++++++ .../megarepo/src/lib/store-gc-config.ts | 93 ++++++++++ .../src/lib/store-gc-config.unit.test.ts | 44 +++++ .../megarepo/src/lib/store-gc-observations.ts | 165 ++++++++++++++++++ .../lib/store-gc-observations.unit.test.ts | 143 +++++++++++++++ .../store-setup.integration.test.ts | 119 +++++++++++++ .../megarepo/src/test-utils/store-setup.ts | 147 +++++++++++++++- 7 files changed, 759 insertions(+), 3 deletions(-) create mode 100644 packages/@overeng/megarepo/src/lib/store-fs-atomic.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-gc-config.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-gc-observations.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-gc-observations.unit.test.ts create mode 100644 packages/@overeng/megarepo/src/test-utils/store-setup.integration.test.ts diff --git a/packages/@overeng/megarepo/src/lib/store-fs-atomic.ts b/packages/@overeng/megarepo/src/lib/store-fs-atomic.ts new file mode 100644 index 000000000..46f769fa3 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-fs-atomic.ts @@ -0,0 +1,51 @@ +/** + * Atomic file writes for store state. + * + * State files under `$STORE/.state/` (liveness records, gc ledger) must never + * be observed half-written by a concurrent reader. `writeFileAtomic` writes to + * a sibling temp file and `rename`s it into place — on POSIX filesystems + * `rename` over an existing path is atomic, so a reader sees either the old or + * the new content, never a truncated mix. + */ + +import { createHash } from 'node:crypto' + +import { FileSystem, type Error as PlatformError } from '@effect/platform' +import { Effect } from 'effect' + +import { EffectPath, type AbsoluteFilePath } from '@overeng/effect-path' + +/** Derives a per-target temp path so concurrent writers to distinct targets don't collide. */ +const tempPathFor = (path: AbsoluteFilePath): AbsoluteFilePath => { + const digest = createHash('sha256').update(path).digest('hex').slice(0, 16) + return EffectPath.unsafe.absoluteFile(`${path}.tmp-${digest}`) +} + +/** + * Atomically write `content` to `path` via write-temp-then-rename. + * + * The temp file lives in the same directory as the target (required for + * `rename` to stay on one filesystem). On any failure the temp file is removed + * so it never lingers as garbage. + */ +export const writeFileAtomic = ({ + path, + content, +}: { + path: AbsoluteFilePath + content: string +}): Effect.Effect => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const tempPath = tempPathFor(path) + yield* fs + .writeFileString(tempPath, content) + .pipe(Effect.tapError(() => fs.remove(tempPath).pipe(Effect.catchAll(() => Effect.void)))) + yield* fs + .rename(tempPath, path) + .pipe(Effect.tapError(() => fs.remove(tempPath).pipe(Effect.catchAll(() => Effect.void)))) + }).pipe( + Effect.withSpan('megarepo/store/fs/write-atomic', { + attributes: { 'span.label': 'write-atomic' }, + }), + ) diff --git a/packages/@overeng/megarepo/src/lib/store-gc-config.ts b/packages/@overeng/megarepo/src/lib/store-gc-config.ts new file mode 100644 index 000000000..ad5d03685 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-gc-config.ts @@ -0,0 +1,93 @@ +/** + * GC reclamation config (three timers, decision 0008). + * + * Defaults are conservative-generous because the cold population is dominated + * by worktrees much older than the windows. A host may override any subset via + * `$STORE/.state/gc-config.json`; provided keys are merged over the defaults and + * unknown/invalid files fall back to the defaults (never fail the gc path). + */ + +import { FileSystem, type Error as PlatformError } from '@effect/platform' +import { Effect, Schema } from 'effect' + +import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' + +const DAY_MS = 24 * 60 * 60 * 1000 + +/** Default: a worktree must be absent from ALL live sets this long before archive eligibility. */ +export const DEFAULT_ABSENCE_GRACE_MS = 14 * DAY_MS + +/** Default: do not archive until at least this long after the PR's `mergedAt`. */ +export const DEFAULT_POST_MERGE_GRACE_MS = 7 * DAY_MS + +/** Default: an archived worktree is reaped once it has been archived this long. */ +export const DEFAULT_ARCHIVE_RETENTION_MS = 30 * DAY_MS + +/** Fully-resolved reclamation timers in epoch-ms durations. */ +export interface StoreGcConfig { + readonly absenceGraceMs: number + readonly postMergeGraceMs: number + readonly archiveRetentionMs: number +} + +/** Defaults applied when no override file is present (or it is invalid). */ +export const DEFAULT_STORE_GC_CONFIG: StoreGcConfig = { + absenceGraceMs: DEFAULT_ABSENCE_GRACE_MS, + postMergeGraceMs: DEFAULT_POST_MERGE_GRACE_MS, + archiveRetentionMs: DEFAULT_ARCHIVE_RETENTION_MS, +} as const + +/** On-disk override shape: every key optional; only provided keys override defaults. */ +const StoreGcConfigOverride = Schema.Struct({ + absenceGraceMs: Schema.optional(Schema.Number), + postMergeGraceMs: Schema.optional(Schema.Number), + archiveRetentionMs: Schema.optional(Schema.Number), +}) + +/** Parsed `gc-config.json` override: every timer optional. */ +export type StoreGcConfigOverride = Schema.Schema.Type + +/** Relative path of the override file within the store. */ +export const GC_CONFIG_RELATIVE_PATH = '.state/gc-config.json' + +const gcConfigPath = (storeBasePath: AbsoluteDirPath) => + EffectPath.ops.join(storeBasePath, EffectPath.unsafe.relativeFile(GC_CONFIG_RELATIVE_PATH)) + +/** + * Merge a parsed override over the defaults. + * + * Only keys actually present in the override take effect; `undefined` keys keep + * the default. Pure so it is the unit-tested seam for the merge contract. + */ +export const mergeStoreGcConfig = (override: StoreGcConfigOverride): StoreGcConfig => ({ + absenceGraceMs: override.absenceGraceMs ?? DEFAULT_STORE_GC_CONFIG.absenceGraceMs, + postMergeGraceMs: override.postMergeGraceMs ?? DEFAULT_STORE_GC_CONFIG.postMergeGraceMs, + archiveRetentionMs: override.archiveRetentionMs ?? DEFAULT_STORE_GC_CONFIG.archiveRetentionMs, +}) + +/** + * Load the effective gc config from `$STORE/.state/gc-config.json`. + * + * Absent file ⇒ defaults. Unreadable or invalid file ⇒ defaults (the gc path + * must not fail on a malformed override; defaults are the safe fallback). + */ +export const loadStoreGcConfig = ({ + storeBasePath, +}: { + storeBasePath: AbsoluteDirPath +}): Effect.Effect => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const path = gcConfigPath(storeBasePath) + const override = yield* fs.readFileString(path).pipe( + Effect.flatMap((content) => + Schema.decodeUnknown(Schema.parseJson(StoreGcConfigOverride))(content), + ), + Effect.catchAll(() => Effect.succeed({} as StoreGcConfigOverride)), + ) + return mergeStoreGcConfig(override) + }).pipe( + Effect.withSpan('megarepo/store/gc/load-config', { + attributes: { 'span.label': 'gc-config' }, + }), + ) diff --git a/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts new file mode 100644 index 000000000..63e26cbf7 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from 'vitest' + +import { + DEFAULT_ABSENCE_GRACE_MS, + DEFAULT_ARCHIVE_RETENTION_MS, + DEFAULT_POST_MERGE_GRACE_MS, + DEFAULT_STORE_GC_CONFIG, + mergeStoreGcConfig, +} from './store-gc-config.ts' + +describe('store-gc-config', () => { + describe('defaults', () => { + it('matches the three-timer decision (0008): 14d / 7d / 30d', () => { + const day = 24 * 60 * 60 * 1000 + expect(DEFAULT_ABSENCE_GRACE_MS).toBe(14 * day) + expect(DEFAULT_POST_MERGE_GRACE_MS).toBe(7 * day) + expect(DEFAULT_ARCHIVE_RETENTION_MS).toBe(30 * day) + }) + }) + + describe('mergeStoreGcConfig', () => { + it('empty override yields the defaults verbatim', () => { + expect(mergeStoreGcConfig({})).toEqual(DEFAULT_STORE_GC_CONFIG) + }) + + it('overrides only the provided keys, keeping defaults for the rest', () => { + expect(mergeStoreGcConfig({ absenceGraceMs: 1000 })).toEqual({ + absenceGraceMs: 1000, + postMergeGraceMs: DEFAULT_POST_MERGE_GRACE_MS, + archiveRetentionMs: DEFAULT_ARCHIVE_RETENTION_MS, + }) + }) + + it('overrides all three keys', () => { + expect( + mergeStoreGcConfig({ absenceGraceMs: 1, postMergeGraceMs: 2, archiveRetentionMs: 3 }), + ).toEqual({ absenceGraceMs: 1, postMergeGraceMs: 2, archiveRetentionMs: 3 }) + }) + + it('treats an explicit zero as a real override (not falsy fallback)', () => { + expect(mergeStoreGcConfig({ postMergeGraceMs: 0 }).postMergeGraceMs).toBe(0) + }) + }) +}) diff --git a/packages/@overeng/megarepo/src/lib/store-gc-observations.ts b/packages/@overeng/megarepo/src/lib/store-gc-observations.ts new file mode 100644 index 000000000..3efd10816 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-gc-observations.ts @@ -0,0 +1,165 @@ +/** + * Cold-observation ledger (`$STORE/.state/gc-observations.json`). + * + * Absence grace (decision 0008) requires knowing how long a worktree has been + * continuously cold — absent from every workspace's reconciled live set. Git + * itself records no such "first seen cold" timestamp, so gc maintains a small + * ledger mapping `normalizePath(worktreePath) -> firstSeenColdAtMs`. + * + * Contract: + * - A path newly cold this run gets `firstSeenColdAtMs = now`. + * - A path still cold keeps its existing `firstSeenColdAtMs` (grace advances). + * - A path no longer cold is DROPPED — so if it later goes cold again the grace + * clock restarts. This is the "no continuity laundering" rule: a worktree that + * reappears in a live set then disappears again must serve a fresh grace + * window, not inherit credit from an older absence. + * - Paths in `uncleanReconcilePaths` (their workspace failed a clean reconcile + * this run, decision 0010 / B2) are treated as not-cold: their grace does NOT + * advance, and any existing entry is dropped (re-arming, the conservative + * direction that keeps the worktree). + * + * Reads/writes are serialized by the caller under a store lock; the file is + * written atomically. A corrupt/unreadable ledger is treated as empty, which + * conservatively re-arms all grace windows. + */ + +import { FileSystem, type Error as PlatformError } from '@effect/platform' +import { Effect, Schema, type ParseResult } from 'effect' + +import { EffectPath, type AbsoluteDirPath, type AbsoluteFilePath } from '@overeng/effect-path' + +import { writeFileAtomic } from './store-fs-atomic.ts' + +/** Ledger schema: path -> epoch-ms it was first observed continuously cold. */ +const GcObservationLedger = Schema.Record({ key: Schema.String, value: Schema.Number }) + +/** In-memory ledger: `normalizePath(worktreePath) -> firstSeenColdAtMs`. */ +export type GcObservationLedger = Schema.Schema.Type + +/** Relative path of the ledger within the store. */ +export const GC_OBSERVATIONS_RELATIVE_PATH = '.state/gc-observations.json' + +const normalizePath = (path: string): string => path.replace(/\/+$/, '') + +const ledgerPath = (storeBasePath: AbsoluteDirPath): AbsoluteFilePath => + EffectPath.ops.join(storeBasePath, EffectPath.unsafe.relativeFile(GC_OBSERVATIONS_RELATIVE_PATH)) + +/** + * Compute the next ledger state from the current cold set (pure). + * + * This is the unit-tested transition seam. `coldPaths` are the worktree paths + * observed cold this run; `uncleanReconcilePaths` are excluded from cold and + * have their grace re-armed (dropped). + */ +export const nextObservationLedger = ({ + current, + coldPaths, + uncleanReconcilePaths = [], + now, +}: { + current: GcObservationLedger + coldPaths: ReadonlyArray + uncleanReconcilePaths?: ReadonlyArray | undefined + now: number +}): GcObservationLedger => { + const unclean = new Set(uncleanReconcilePaths.map(normalizePath)) + const next: Record = {} + for (const rawPath of coldPaths) { + const path = normalizePath(rawPath) + // Unclean-reconcile paths never advance grace: skip recording them entirely. + if (unclean.has(path) === true) continue + // Preserve an existing firstSeen (grace advances); else start the clock now. + next[path] = current[path] ?? now + } + return next +} + +/** Reads the ledger; corrupt/unreadable file ⇒ empty (conservatively re-arm grace). */ +export const readObservationLedger = ({ + storeBasePath, +}: { + storeBasePath: AbsoluteDirPath +}): Effect.Effect => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const path = ledgerPath(storeBasePath) + return yield* fs.readFileString(path).pipe( + Effect.flatMap((content) => + Schema.decodeUnknown(Schema.parseJson(GcObservationLedger))(content), + ), + Effect.catchAll(() => Effect.succeed({} as GcObservationLedger)), + ) + }).pipe( + Effect.withSpan('megarepo/store/gc/read-observations', { + attributes: { 'span.label': 'gc-observations' }, + }), + ) + +/** Atomically writes the ledger (creating `.state/` if needed). */ +const writeObservationLedger = ({ + storeBasePath, + ledger, +}: { + storeBasePath: AbsoluteDirPath + ledger: GcObservationLedger +}): Effect.Effect< + void, + PlatformError.PlatformError | ParseResult.ParseError, + FileSystem.FileSystem +> => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const path = ledgerPath(storeBasePath) + const stateDir = EffectPath.ops.join(storeBasePath, EffectPath.unsafe.relativeDir('.state/')) + yield* fs.makeDirectory(stateDir, { recursive: true }) + const content = yield* Schema.encode(Schema.parseJson(GcObservationLedger, { space: 2 }))( + ledger, + ) + yield* writeFileAtomic({ path, content: content + '\n' }) + }).pipe( + Effect.withSpan('megarepo/store/gc/write-observations', { + attributes: { 'span.label': 'gc-observations' }, + }), + ) + +/** + * Read-modify-write the ledger for one gc run and return the new state. + * + * MUST be called under a store lock (the caller guards the read-modify-write so + * concurrent gc runs don't clobber each other). Returns the persisted ledger so + * the caller can derive `coldSinceMs` without a second read. + */ +export const recordObservations = ({ + storeBasePath, + coldPaths, + uncleanReconcilePaths, + now, +}: { + storeBasePath: AbsoluteDirPath + coldPaths: ReadonlyArray + uncleanReconcilePaths?: ReadonlyArray | undefined + now: number +}): Effect.Effect< + GcObservationLedger, + PlatformError.PlatformError | ParseResult.ParseError, + FileSystem.FileSystem +> => + Effect.gen(function* () { + const current = yield* readObservationLedger({ storeBasePath }) + const next = nextObservationLedger({ current, coldPaths, uncleanReconcilePaths, now }) + yield* writeObservationLedger({ storeBasePath, ledger: next }) + return next + }).pipe( + Effect.withSpan('megarepo/store/gc/record-observations', { + attributes: { 'span.label': 'gc-observations' }, + }), + ) + +/** Returns the epoch-ms a path was first seen cold, or `undefined` if not tracked. */ +export const coldSinceMs = ({ + ledger, + path, +}: { + ledger: GcObservationLedger + path: string +}): number | undefined => ledger[normalizePath(path)] diff --git a/packages/@overeng/megarepo/src/lib/store-gc-observations.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-gc-observations.unit.test.ts new file mode 100644 index 000000000..88c1db9f2 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-gc-observations.unit.test.ts @@ -0,0 +1,143 @@ +import { FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { Effect } from 'effect' +import { describe, expect, it } from 'vitest' + +import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' + +import { + coldSinceMs, + nextObservationLedger, + readObservationLedger, + recordObservations, + GC_OBSERVATIONS_RELATIVE_PATH, +} from './store-gc-observations.ts' + +const run = (effect: Effect.Effect) => + Effect.runPromise(effect.pipe(Effect.provide(NodeContext.layer))) + +const withTempStore = ( + body: (storeBasePath: AbsoluteDirPath) => Effect.Effect, +) => + run( + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const tmp = EffectPath.unsafe.absoluteDir(`${yield* fs.makeTempDirectoryScoped()}/`) + return yield* body(tmp) + }).pipe(Effect.scoped), + ) + +describe('store-gc-observations', () => { + describe('nextObservationLedger (transitions)', () => { + it('starts the grace clock for a newly-cold path at `now`', () => { + const next = nextObservationLedger({ current: {}, coldPaths: ['/s/a'], now: 100 }) + expect(next).toEqual({ '/s/a': 100 }) + }) + + it('preserves an existing firstSeen for a still-cold path (grace advances)', () => { + const next = nextObservationLedger({ + current: { '/s/a': 100 }, + coldPaths: ['/s/a'], + now: 500, + }) + expect(next['/s/a']).toBe(100) + }) + + it('drops a path that is no longer cold', () => { + const next = nextObservationLedger({ + current: { '/s/a': 100, '/s/b': 200 }, + coldPaths: ['/s/a'], + now: 500, + }) + expect(next).toEqual({ '/s/a': 100 }) + }) + + it('no continuity laundering: cold -> not-cold -> cold restarts the clock', () => { + const armed = nextObservationLedger({ current: {}, coldPaths: ['/s/a'], now: 100 }) + const cleared = nextObservationLedger({ current: armed, coldPaths: [], now: 200 }) + expect(cleared).toEqual({}) + const rearmed = nextObservationLedger({ current: cleared, coldPaths: ['/s/a'], now: 300 }) + expect(rearmed['/s/a']).toBe(300) + }) + + it('normalizes trailing slashes so a dir/file form maps to one entry', () => { + const next = nextObservationLedger({ current: { '/s/a': 50 }, coldPaths: ['/s/a/'], now: 9 }) + expect(next).toEqual({ '/s/a': 50 }) + }) + + it('skips grace-advance for unclean-reconcile paths (not added)', () => { + const next = nextObservationLedger({ + current: {}, + coldPaths: ['/s/a', '/s/b'], + uncleanReconcilePaths: ['/s/b'], + now: 100, + }) + expect(next).toEqual({ '/s/a': 100 }) + }) + + it('re-arms an existing unclean-reconcile path (drops its credit)', () => { + const next = nextObservationLedger({ + current: { '/s/b': 10 }, + coldPaths: ['/s/b'], + uncleanReconcilePaths: ['/s/b'], + now: 100, + }) + expect(next['/s/b']).toBeUndefined() + }) + }) + + describe('coldSinceMs', () => { + it('returns the recorded ms, normalizing the query path', () => { + expect(coldSinceMs({ ledger: { '/s/a': 42 }, path: '/s/a/' })).toBe(42) + }) + + it('returns undefined for an untracked path', () => { + expect(coldSinceMs({ ledger: {}, path: '/s/a' })).toBeUndefined() + }) + }) + + describe('persistence', () => { + it('round-trips through atomic write and read', async () => { + const ledger = await withTempStore((storeBasePath) => + Effect.gen(function* () { + yield* recordObservations({ storeBasePath, coldPaths: ['/s/a'], now: 100 }) + return yield* readObservationLedger({ storeBasePath }) + }), + ) + expect(ledger).toEqual({ '/s/a': 100 }) + }) + + it('advances grace across runs while a path stays cold', async () => { + const ledger = await withTempStore((storeBasePath) => + Effect.gen(function* () { + yield* recordObservations({ storeBasePath, coldPaths: ['/s/a'], now: 100 }) + yield* recordObservations({ storeBasePath, coldPaths: ['/s/a'], now: 900 }) + return yield* readObservationLedger({ storeBasePath }) + }), + ) + expect(ledger['/s/a']).toBe(100) + }) + + it('treats a corrupt ledger as empty (conservatively re-arming)', async () => { + const ledger = await withTempStore((storeBasePath) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const stateDir = EffectPath.ops.join( + storeBasePath, + EffectPath.unsafe.relativeDir('.state/'), + ) + yield* fs.makeDirectory(stateDir, { recursive: true }) + yield* fs.writeFileString( + EffectPath.ops.join( + storeBasePath, + EffectPath.unsafe.relativeFile(GC_OBSERVATIONS_RELATIVE_PATH), + ), + '{ this is not valid json', + ) + return yield* readObservationLedger({ storeBasePath }) + }), + ) + expect(ledger).toEqual({}) + }) + }) +}) diff --git a/packages/@overeng/megarepo/src/test-utils/store-setup.integration.test.ts b/packages/@overeng/megarepo/src/test-utils/store-setup.integration.test.ts new file mode 100644 index 000000000..85f3675d0 --- /dev/null +++ b/packages/@overeng/megarepo/src/test-utils/store-setup.integration.test.ts @@ -0,0 +1,119 @@ +import { Command, FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { describe, it } from '@effect/vitest' +import { Effect } from 'effect' +import { expect } from 'vitest' + +import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' + +import { + createArchiveEntry, + createStoreFixture, + createWorkspaceWithLock, + getWorktreeCommit, + repinWorkspace, +} from './store-setup.ts' + +const git = (cwd: AbsoluteDirPath, ...args: ReadonlyArray) => + Effect.gen(function* () { + const command = Command.make('git', ...args).pipe(Command.workingDirectory(cwd)) + return (yield* Command.string(command)).trim() + }) + +describe('store-setup fixtures', () => { + it.effect( + 'withRemote gives the store bare real refs/remotes/origin/* (reachability testable)', + Effect.fnUntraced( + function* () { + const { bareRepoPaths, worktreePaths, upstreamRepoPaths } = yield* createStoreFixture([ + { + host: 'github.com', + owner: 'o', + repo: 'r', + branches: ['main'], + withRemote: true, + }, + ]) + const bare = bareRepoPaths['github.com/o/r']! + expect(upstreamRepoPaths['github.com/o/r']).toBeDefined() + + // remote-tracking refs exist after the fixture's fetch + const remotes = yield* git(bare, 'for-each-ref', '--format=%(refname)', 'refs/remotes/') + expect(remotes).toContain('refs/remotes/origin/main') + + // a worktree head that is on the remote is reachable: rev-list --not --remotes is empty + const head = yield* getWorktreeCommit(worktreePaths['github.com/o/r#main']!) + const unpushed = yield* git(bare, 'rev-list', head, '--not', '--remotes') + expect(unpushed).toBe('') + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'createArchiveEntry registers a reapable worktree under .archive/', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { host: 'github.com', owner: 'o', repo: 'r', branches: ['main'] }, + ]) + const bare = bareRepoPaths['github.com/o/r']! + const repoRoot = EffectPath.ops.parent(bare)! + const commit = yield* getWorktreeCommit(worktreePaths['github.com/o/r#main']!) + + const archivedAt = new Date('2025-01-02T03:04:05.000Z') + const { archivePath, dirName } = yield* createArchiveEntry({ + bareRepoPath: bare, + repoRoot, + branch: 'feature/x', + commit, + archivedAt, + }) + + expect(dirName).toBe('feature/x--2025-01-02T03:04:05.000Z') + expect(yield* fs.exists(archivePath)).toBe(true) + // git enumerates it as a worktree (the reaper's scan surface) + const list = yield* git(bare, 'worktree', 'list', '--porcelain') + expect(list).toContain(archivePath.replace(/\/+$/, '')) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'repinWorkspace repoints the symlink without touching the registry record', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { worktreePaths } = yield* createStoreFixture([ + { host: 'github.com', owner: 'o', repo: 'r', branches: ['main', 'next'] }, + ]) + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { repo: 'o/r#main' }, + }) + + const newTarget = worktreePaths['github.com/o/r#next']! + yield* repinWorkspace({ workspacePath, memberName: 'repo', newTarget }) + + const symlinkPath = EffectPath.ops.join( + workspacePath, + EffectPath.unsafe.relativeFile('repos/repo'), + ) + const resolved = yield* fs.readLink(symlinkPath) + expect(resolved.replace(/\/+$/, '')).toBe(newTarget.replace(/\/+$/, '')) + + // No registry record was written (no re-registration happened). + const registryDir = EffectPath.ops.join( + workspacePath, + EffectPath.unsafe.relativeDir('.state/workspaces/'), + ) + expect(yield* fs.exists(registryDir)).toBe(false) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) +}) diff --git a/packages/@overeng/megarepo/src/test-utils/store-setup.ts b/packages/@overeng/megarepo/src/test-utils/store-setup.ts index 02ba8294d..585527226 100644 --- a/packages/@overeng/megarepo/src/test-utils/store-setup.ts +++ b/packages/@overeng/megarepo/src/test-utils/store-setup.ts @@ -5,7 +5,7 @@ */ import { Command, FileSystem } from '@effect/platform' -import { Effect, Schema } from 'effect' +import { Effect, Option, Schema } from 'effect' import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' @@ -15,6 +15,7 @@ import { type LockFile, type LockedMember, LOCK_FILE_NAME, + readLockFile, writeLockFile, } from '../lib/lock.ts' import { refTypeToPathSegment, classifyRef } from '../lib/ref.ts' @@ -39,6 +40,13 @@ export interface StoreRepoFixture { readonly commits?: ReadonlyArray /** Whether to make some worktrees dirty */ readonly dirtyWorktrees?: ReadonlyArray + /** + * Wire the store bare repo to a separate upstream bare so it has real + * `refs/remotes/origin/*` (mirrors `Git.cloneBare` + `fetchBare`). Required to + * exercise reachability (`rev-list --not --remotes`) and prune-driven + * remote-branch-deletion scenarios. + */ + readonly withRemote?: boolean } /** Result of creating a store fixture */ @@ -49,6 +57,8 @@ export interface StoreFixtureResult { readonly worktreePaths: Record /** Bare repo paths by "host/owner/repo" */ readonly bareRepoPaths: Record + /** Upstream bare repo paths by "host/owner/repo" (only for `withRemote` repos) */ + readonly upstreamRepoPaths: Record } // ============================================================================= @@ -109,6 +119,7 @@ export const createStoreFixture = (repos: ReadonlyArray) => const worktreePaths: Record = {} const bareRepoPaths: Record = {} + const upstreamRepoPaths: Record = {} for (const repoFixture of repos) { const repoKey = `${repoFixture.host}/${repoFixture.owner}/${repoFixture.repo}` @@ -129,6 +140,22 @@ export const createStoreFixture = (repos: ReadonlyArray) => // Initialize bare repo yield* runGitCommand(bareRepoPath, 'init', '--bare') + // For `withRemote`, the store bare fetches from a separate upstream bare so + // it gains real `refs/remotes/origin/*`. The source repo pushes to that + // upstream (the true remote); otherwise it pushes to the store bare directly. + const withRemote = repoFixture.withRemote === true + let pushTargetPath = bareRepoPath + if (withRemote === true) { + const upstreamPath = EffectPath.ops.join( + tmpDir, + EffectPath.unsafe.relativeDir(`_upstream/${repoKey}.bare/`), + ) + yield* fs.makeDirectory(upstreamPath, { recursive: true }) + yield* runGitCommand(upstreamPath, 'init', '--bare') + upstreamRepoPaths[repoKey] = upstreamPath + pushTargetPath = upstreamPath + } + // Create a source repo to work with (we need commits to reference) const sourceRepoPath = EffectPath.ops.join(tmpDir, EffectPath.unsafe.relativeDir('_source/')) yield* fs.makeDirectory(sourceRepoPath, { recursive: true }) @@ -145,14 +172,24 @@ export const createStoreFixture = (repos: ReadonlyArray) => // Get the commit SHA const commitSha = yield* runGitCommand(sourceRepoPath, 'rev-parse', 'HEAD') - // Set up bare repo as remote and push - yield* runGitCommand(sourceRepoPath, 'remote', 'add', 'origin', bareRepoPath) + // Set up remote and push branches + yield* runGitCommand(sourceRepoPath, 'remote', 'add', 'origin', pushTargetPath) yield* runGitCommand(sourceRepoPath, 'push', '-u', 'origin', 'main').pipe( Effect.catchAll(() => // Try master if main fails runGitCommand(sourceRepoPath, 'push', '-u', 'origin', 'master'), ), ) + // Push any additional branches requested (beyond the default). + for (const branch of repoFixture.branches ?? []) { + if (branch === 'main' || branch === 'master') continue + yield* runGitCommand(sourceRepoPath, 'branch', branch, commitSha).pipe( + Effect.catchAll(() => Effect.void), + ) + yield* runGitCommand(sourceRepoPath, 'push', 'origin', branch).pipe( + Effect.catchAll(() => Effect.void), + ) + } // Create tags if requested for (const tag of repoFixture.tags ?? []) { @@ -160,6 +197,19 @@ export const createStoreFixture = (repos: ReadonlyArray) => yield* runGitCommand(sourceRepoPath, 'push', 'origin', tag) } + // Wire the store bare to the upstream so it gains `refs/remotes/origin/*` + // (mirrors Git.cloneBare's refspec + Git.fetchBare). + if (withRemote === true) { + yield* runGitCommand(bareRepoPath, 'remote', 'add', 'origin', upstreamRepoPaths[repoKey]!) + yield* runGitCommand( + bareRepoPath, + 'config', + 'remote.origin.fetch', + '+refs/heads/*:refs/remotes/origin/*', + ) + yield* runGitCommand(bareRepoPath, 'fetch', '--tags', '--prune', 'origin') + } + // Create refs directory structure const refsDir = EffectPath.ops.join(repoBasePath, EffectPath.unsafe.relativeDir('refs/')) yield* fs.makeDirectory(refsDir, { recursive: true }) @@ -236,6 +286,7 @@ export const createStoreFixture = (repos: ReadonlyArray) => storePath, worktreePaths, bareRepoPaths, + upstreamRepoPaths, } satisfies StoreFixtureResult }) @@ -315,3 +366,93 @@ export const createWorkspaceWithLock = (args: { */ export const getWorktreeCommit = (worktreePath: AbsoluteDirPath) => runGitCommand(worktreePath, 'rev-parse', 'HEAD') + +/** + * Repoint a workspace member to a new store target WITHOUT re-registering. + * + * Models the decision-0010 repin bug: a workspace repins a member (its + * `repos/` symlink and lock entry now point at `newTarget`) but runs no + * refreshing command, so its liveness record stays stale. The store registry is + * deliberately left untouched — only the on-disk truth (symlink + optional lock) + * is updated. A reconcile-all must re-derive the new target from disk. + */ +export const repinWorkspace = ({ + workspacePath, + memberName, + newTarget, + lockEntry, +}: { + workspacePath: AbsoluteDirPath + memberName: string + newTarget: AbsoluteDirPath + lockEntry?: { url: string; ref: string; commit: string; pinned?: boolean } | undefined +}) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + + const reposDir = EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')) + yield* fs.makeDirectory(reposDir, { recursive: true }) + const symlinkPath = EffectPath.ops.join(reposDir, EffectPath.unsafe.relativeFile(memberName)) + // Replace any existing symlink so the new target is the on-disk truth. + yield* fs.remove(symlinkPath, { force: true }).pipe(Effect.catchAll(() => Effect.void)) + yield* fs.symlink(newTarget.replace(/\/+$/, ''), symlinkPath) + + // Optionally rewrite the lock entry for this member (ref/commit repin), + // preserving every other member verbatim. + if (lockEntry !== undefined) { + const lockPath = EffectPath.ops.join( + workspacePath, + EffectPath.unsafe.relativeFile(LOCK_FILE_NAME), + ) + const existingOpt = yield* readLockFile(lockPath) + const members: Record = {} + for (const [name, member] of Object.entries( + Option.getOrUndefined(existingOpt)?.members ?? {}, + )) { + members[name] = member + } + members[memberName] = createLockedMember({ + url: lockEntry.url, + ref: lockEntry.ref, + commit: lockEntry.commit, + ...(lockEntry.pinned !== undefined ? { pinned: lockEntry.pinned } : {}), + }) + const lockFile: LockFile = { version: 1, members } + yield* writeLockFile({ lockPath, lockFile }) + } + }) + +/** + * Create a valid archive entry (`/.archive/--/`) + * registered as a worktree of the bare repo (proper gitlink), for exercising + * retention/reap. `archivedAt` controls the trailing timestamp segment used by + * the reaper's retention TTL parse. + */ +export const createArchiveEntry = ({ + bareRepoPath, + repoRoot, + branch, + commit, + archivedAt, +}: { + bareRepoPath: AbsoluteDirPath + repoRoot: AbsoluteDirPath + branch: string + commit: string + archivedAt: Date +}) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const archiveDir = EffectPath.ops.join(repoRoot, EffectPath.unsafe.relativeDir('.archive/')) + yield* fs.makeDirectory(archiveDir, { recursive: true }) + + const dirName = `${branch}--${archivedAt.toISOString()}` + const archivePath = EffectPath.ops.join( + archiveDir, + EffectPath.unsafe.relativeDir(`${dirName}/`), + ) + // `worktree add --detach` creates a real gitlink and registers the path in + // the bare's worktree list (the same enumeration the reaper scans). + yield* runGitCommand(bareRepoPath, 'worktree', 'add', '--detach', archivePath, commit) + return { archivePath, dirName } + }) From 46cfbcba241cd57c9c57fc938dd6866693a4905a Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:30:00 +0200 Subject: [PATCH 05/13] feat(megarepo): PrStateResolver service for cold-worktree GC (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add U2 of the cold named-branch reclamation pipeline: a branch -> GitHub PR-state resolver that supplies the primary staleness signal (decision 0005). - PrStateResolver as Context.Tag + Layer.effect; live layer shells one batched `gh pr list` per repo (Schema-validated JSON), cached per gc run. - owner/repo/host parsed from the store-relative path; non-github host, gh failure, non-JSON, or timeout all degrade to `none` (keep — absence of evidence never licenses deletion). - Branches joined to headRefName VERBATIM (names contain `/`); multi-PR per branch resolves any-open => open, else most-recent merged/closed. - Stub layer for deterministic classification tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../megarepo/src/lib/store-pr-state.ts | 297 ++++++++++++++++++ .../src/lib/store-pr-state.unit.test.ts | 195 ++++++++++++ 2 files changed, 492 insertions(+) create mode 100644 packages/@overeng/megarepo/src/lib/store-pr-state.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-pr-state.unit.test.ts diff --git a/packages/@overeng/megarepo/src/lib/store-pr-state.ts b/packages/@overeng/megarepo/src/lib/store-pr-state.ts new file mode 100644 index 000000000..183dfcf00 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-pr-state.ts @@ -0,0 +1,297 @@ +/** + * Branch -> GitHub PR-state resolver (decision 0005). + * + * A worktree's branch having a MERGED (or CLOSED, decision 0009) PR is the + * primary positive staleness signal. Reliable detection requires GitHub's PR + * state because the store's repos squash-merge — a merged branch can sit + * thousands of commits "ahead" of `main`, so the git-only ancestor proxy is + * useless. We therefore shell one batched `gh pr list` per repo and join PRs to + * branches locally by `headRefName`. + * + * Conservative degradation (0005): absence of evidence never licenses deletion. + * A non-GitHub remote, an unparseable repo path, `gh` failing/unauthenticated, + * non-JSON output, or a timeout all resolve to `none` (keep). Only an + * affirmative `merged`/`closed`/`open` from GitHub changes the decision. + * + * The service is a `Context.Tag` + `Layer.effect` (house convention). The live + * layer shells `gh`; tests provide {@link makeStubPrStateResolver} backed by a + * fixed map. The pure join/parse seams ({@link parseRepoCoordinates}, + * {@link decodePrListJson}, {@link resolvePrStateForBranch}) are unit-tested + * directly with fake gh output so no real `gh`/network is needed. + */ + +import { Command, CommandExecutor } from '@effect/platform' +import { Context, Duration, Effect, Layer, Option, Schema } from 'effect' + +import type { RelativeDirPath } from '@overeng/effect-path' + +/** GitHub host segment that gates PR-state resolution; any other host ⇒ all `none`. */ +export const GITHUB_HOST = 'github.com' + +/** Default `--limit` for the batched per-repo `gh pr list` call. */ +export const DEFAULT_PR_LIST_LIMIT = 200 + +/** Default timeout for a single `gh` invocation; exceeding it ⇒ `none` (keep). */ +export const DEFAULT_GH_TIMEOUT = Duration.seconds(30) + +/** + * Resolved PR state for a branch. + * + * `state` is the joined signal; timestamps are present only for the state that + * carries them (`mergedAt` for `merged`, `closedAt` for `closed`). They are the + * inputs the classifier's post-merge grace gate consumes, so they must round + * trip in epoch-ms. + */ +export interface PrStateInfo { + readonly state: 'merged' | 'closed' | 'open' | 'none' + readonly mergedAt?: number | undefined + readonly closedAt?: number | undefined +} + +/** The conservative "no evidence" result (decision 0005): keep. */ +export const PR_STATE_NONE: PrStateInfo = { state: 'none' } + +/** + * Resolves the PR state for one branch in one repo. + * + * `relativePath` is the store-relative repo path (`///`); + * `branch` is joined to `headRefName` VERBATIM (branch names contain `/`). + */ +export interface PrStateResolverService { + readonly resolve: (args: { + relativePath: RelativeDirPath + branch: string + }) => Effect.Effect +} + +/** PR-state resolver service tag. */ +export class PrStateResolver extends Context.Tag('megarepo/PrStateResolver')< + PrStateResolver, + PrStateResolverService +>() {} + +// ============================================================================= +// Pure seams (unit-tested directly with fake gh output) +// ============================================================================= + +/** + * Parse `owner`/`repo` from a store-relative repo path. + * + * Store paths are `///`. Only `github.com` resolves; any + * other host (or a path without the three leading segments, e.g. `local//`) + * yields `none` so the caller degrades to keep. + */ +export const parseRepoCoordinates = ( + relativePath: RelativeDirPath, +): Option.Option<{ owner: string; repo: string }> => { + const segments = relativePath.split('/').filter((s) => s.length > 0) + const [host, owner, repo] = segments + if (host !== GITHUB_HOST || owner === undefined || repo === undefined) { + return Option.none() + } + return Option.some({ owner, repo }) +} + +/** One PR row from `gh pr list --json number,state,headRefName,mergedAt,closedAt`. */ +const GhPr = Schema.Struct({ + number: Schema.Number, + /** gh emits uppercase `MERGED`/`CLOSED`/`OPEN`. */ + state: Schema.Literal('MERGED', 'CLOSED', 'OPEN'), + headRefName: Schema.String, + /** ISO 8601, or `null` when not merged. */ + mergedAt: Schema.NullOr(Schema.String), + /** ISO 8601, or `null` when still open. */ + closedAt: Schema.NullOr(Schema.String), +}) + +/** Decoded `gh pr list` payload. */ +export type GhPr = Schema.Schema.Type + +const GhPrList = Schema.Array(GhPr) + +/** + * Decode `gh pr list` JSON output into PR rows. + * + * Non-JSON or schema-invalid output (e.g. `gh` printed an error, or exited + * non-zero leaving empty stdout) ⇒ `none`, which the caller maps to keep. + */ +export const decodePrListJson = (raw: string): Option.Option> => + Schema.decodeUnknownOption(Schema.parseJson(GhPrList))(raw) + +/** ISO 8601 ⇒ epoch ms; `null`/unparseable ⇒ `undefined`. */ +const isoToMs = (iso: string | null): number | undefined => { + if (iso === null) return undefined + const ms = Date.parse(iso) + return Number.isNaN(ms) === true ? undefined : ms +} + +/** + * Join PR rows to one branch and reduce to a single {@link PrStateInfo} (pure). + * + * Matches `headRefName` VERBATIM against `branch`. Resolution for the matches: + * - no match ⇒ `none` (keep); + * - ANY open ⇒ `open` (active work, keep regardless of other merged/closed PRs); + * - else the most-recent merged/closed PR wins, ranked by its `mergedAt`/ + * `closedAt` (a `merged` PR's `mergedAt`, a `closed` PR's `closedAt`). Rows + * missing a usable timestamp rank oldest so a dated PR is preferred. + */ +export const resolvePrStateForBranch = ({ + prs, + branch, +}: { + prs: ReadonlyArray + branch: string +}): PrStateInfo => { + const matches = prs.filter((pr) => pr.headRefName === branch) + if (matches.length === 0) return PR_STATE_NONE + + if (matches.some((pr) => pr.state === 'OPEN') === true) return { state: 'open' } + + // Only MERGED/CLOSED remain; pick the most recent by its own timestamp. + const ranked = matches + .map((pr) => { + const ts = pr.state === 'MERGED' ? isoToMs(pr.mergedAt) : isoToMs(pr.closedAt) + return { pr, ts } + }) + .toSorted((a, b) => (b.ts ?? -Infinity) - (a.ts ?? -Infinity)) + + const winner = ranked[0] + if (winner === undefined) return PR_STATE_NONE + + if (winner.pr.state === 'MERGED') { + return { state: 'merged', mergedAt: winner.ts } + } + return { state: 'closed', closedAt: winner.ts } +} + +// ============================================================================= +// Live layer (shells `gh`) +// ============================================================================= + +/** + * Live `PrStateResolver` that shells one batched `gh pr list` per repo. + * + * Results are cached per `(relativePath, branch)` for the lifetime of the layer + * (one gc run) so repeated branch lookups in a repo cost a single `gh` call. + * Any failure mode — non-github host, spawn/exec error, non-JSON output, or + * timeout — degrades to `none` (keep). The cache is built lazily and shared via + * a synchronized map so concurrent lookups for the same repo coalesce. + */ +export const makePrStateResolverLayer = ({ + limit = DEFAULT_PR_LIST_LIMIT, + timeout = DEFAULT_GH_TIMEOUT, +}: { + limit?: number + timeout?: Duration.DurationInput +} = {}): Layer.Layer => + Layer.effect( + PrStateResolver, + Effect.gen(function* () { + // Capture the executor once at layer build so the service's `resolve` + // effects discharge their `CommandExecutor` requirement here (the live + // shelling is an implementation detail, not part of the service R-channel). + const executor = yield* CommandExecutor.CommandExecutor + + /** repo `owner/repo` -> decoded PR rows (Option.none ⇒ resolved to no evidence). */ + const repoCache = new Map>>() + + const fetchRepoPrs = ({ + owner, + repo, + }: { + owner: string + repo: string + }): Effect.Effect>> => + Effect.gen(function* () { + const command = Command.make( + 'gh', + 'pr', + 'list', + '--repo', + `${owner}/${repo}`, + '--state', + 'all', + '--limit', + String(limit), + '--json', + 'number,state,headRefName,mergedAt,closedAt', + ) + const raw = yield* Command.string(command).pipe( + Effect.timeoutFail({ + duration: timeout, + onTimeout: () => new Error('gh pr list timed out'), + }), + // Any spawn/exec/timeout failure ⇒ no evidence (keep). + Effect.option, + Effect.provideService(CommandExecutor.CommandExecutor, executor), + ) + return Option.flatMap(raw, decodePrListJson) + }) + + const resolve = ({ + relativePath, + branch, + }: { + relativePath: RelativeDirPath + branch: string + }): Effect.Effect => + Effect.gen(function* () { + const coords = parseRepoCoordinates(relativePath) + if (Option.isNone(coords) === true) return PR_STATE_NONE + const { owner, repo } = coords.value + const key = `${owner}/${repo}` + + const cached = repoCache.get(key) + const prs = + cached ?? + (yield* fetchRepoPrs({ owner, repo }).pipe( + Effect.tap((result) => Effect.sync(() => repoCache.set(key, result))), + )) + + if (Option.isNone(prs) === true) return PR_STATE_NONE + return resolvePrStateForBranch({ prs: prs.value, branch }) + }).pipe( + Effect.withSpan('megarepo/store/gc/resolve-pr-state', { + attributes: { 'span.label': 'pr-state', branch }, + }), + ) + + return { resolve } + }), + ) + +// ============================================================================= +// Stub layer (tests) +// ============================================================================= + +/** A single stubbed PR-list response keyed by store-relative repo path. */ +export interface StubPrRepo { + readonly relativePath: RelativeDirPath + readonly prs: ReadonlyArray +} + +/** + * Build a deterministic stub `PrStateResolver` from fixed per-repo PR rows. + * + * Mirrors the live join semantics ({@link resolvePrStateForBranch}) but reads + * from the supplied map instead of shelling `gh`, so classification tests stay + * pure and fast. A repo not present in `repos` resolves to `none`, matching the + * live "no evidence ⇒ keep" degradation. + */ +export const makeStubPrStateResolver = ( + repos: ReadonlyArray, +): PrStateResolverService => { + const byPath = new Map>(repos.map((r) => [r.relativePath, r.prs])) + return { + resolve: ({ relativePath, branch }) => { + const prs = byPath.get(relativePath) + if (prs === undefined) return Effect.succeed(PR_STATE_NONE) + return Effect.succeed(resolvePrStateForBranch({ prs, branch })) + }, + } +} + +/** Layer wrapper around {@link makeStubPrStateResolver}. */ +export const makeStubPrStateResolverLayer = ( + repos: ReadonlyArray, +): Layer.Layer => Layer.succeed(PrStateResolver, makeStubPrStateResolver(repos)) diff --git a/packages/@overeng/megarepo/src/lib/store-pr-state.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-pr-state.unit.test.ts new file mode 100644 index 000000000..3ac1c469a --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-pr-state.unit.test.ts @@ -0,0 +1,195 @@ +import { it } from '@effect/vitest' +import { Effect, Option } from 'effect' +import { describe, expect } from 'vitest' + +import { EffectPath } from '@overeng/effect-path' + +import { + decodePrListJson, + makeStubPrStateResolver, + parseRepoCoordinates, + PR_STATE_NONE, + PrStateResolver, + makeStubPrStateResolverLayer, + resolvePrStateForBranch, + type GhPr, +} from './store-pr-state.ts' + +const rel = (p: string) => EffectPath.unsafe.relativeDir(p) + +/** Compact PR-row builder; `gh` emits ISO timestamps or null. */ +const pr = (partial: Partial & Pick): GhPr => ({ + mergedAt: null, + closedAt: null, + ...partial, +}) + +describe('store-pr-state', () => { + describe('parseRepoCoordinates', () => { + it('parses owner/repo from a github.com store path', () => { + expect(parseRepoCoordinates(rel('github.com/overengineeringstudio/effect-utils/'))).toEqual( + Option.some({ owner: 'overengineeringstudio', repo: 'effect-utils' }), + ) + }) + + it('bails on a non-github host (⇒ none, keep)', () => { + expect(parseRepoCoordinates(rel('gitlab.com/owner/repo/'))).toEqual(Option.none()) + }) + + it('bails on a local store path with no host/owner/repo triple', () => { + expect(parseRepoCoordinates(rel('local/my-repo/'))).toEqual(Option.none()) + }) + }) + + describe('decodePrListJson', () => { + it('decodes a valid gh pr list payload', () => { + const raw = JSON.stringify([ + { + number: 1, + state: 'MERGED', + headRefName: 'feat/x', + mergedAt: '2026-01-01T00:00:00Z', + closedAt: null, + }, + ]) + const decoded = decodePrListJson(raw) + expect(Option.isSome(decoded)).toBe(true) + expect(Option.getOrThrow(decoded)[0]?.number).toBe(1) + }) + + it('returns none for non-JSON output (gh error / non-zero exit)', () => { + expect(decodePrListJson('error: not authenticated')).toEqual(Option.none()) + }) + + it('returns none for JSON that violates the schema', () => { + expect(decodePrListJson(JSON.stringify([{ number: 'nope' }]))).toEqual(Option.none()) + }) + }) + + describe('resolvePrStateForBranch', () => { + it('no matching PR ⇒ none (keep)', () => { + const prs = [pr({ number: 1, state: 'MERGED', headRefName: 'other' })] + expect(resolvePrStateForBranch({ prs, branch: 'feat/x' })).toEqual(PR_STATE_NONE) + }) + + it('joins by headRefName VERBATIM, including slashes in branch names', () => { + const prs = [ + pr({ number: 1, state: 'MERGED', headRefName: 'feat/x', mergedAt: '2026-01-01T00:00:00Z' }), + pr({ + number: 2, + state: 'MERGED', + headRefName: 'feat/x/nested', + mergedAt: '2026-02-01T00:00:00Z', + }), + ] + // 'feat/x' must NOT match 'feat/x/nested' (verbatim, not prefix). + expect(resolvePrStateForBranch({ prs, branch: 'feat/x' })).toEqual({ + state: 'merged', + mergedAt: Date.parse('2026-01-01T00:00:00Z'), + }) + expect(resolvePrStateForBranch({ prs, branch: 'feat/x/nested' })).toEqual({ + state: 'merged', + mergedAt: Date.parse('2026-02-01T00:00:00Z'), + }) + }) + + it('merged PR carries mergedAt in epoch ms', () => { + const prs = [ + pr({ number: 7, state: 'MERGED', headRefName: 'b', mergedAt: '2026-03-04T05:06:07Z' }), + ] + expect(resolvePrStateForBranch({ prs, branch: 'b' })).toEqual({ + state: 'merged', + mergedAt: Date.parse('2026-03-04T05:06:07Z'), + }) + }) + + it('closed PR carries closedAt and gets no post-close grace signal (state=closed)', () => { + const prs = [ + pr({ number: 8, state: 'CLOSED', headRefName: 'b', closedAt: '2026-03-04T05:06:07Z' }), + ] + expect(resolvePrStateForBranch({ prs, branch: 'b' })).toEqual({ + state: 'closed', + closedAt: Date.parse('2026-03-04T05:06:07Z'), + }) + }) + + it('multi-PR: ANY open ⇒ open even if a merged PR shares the branch', () => { + const prs = [ + pr({ number: 1, state: 'MERGED', headRefName: 'b', mergedAt: '2026-01-01T00:00:00Z' }), + pr({ number: 2, state: 'OPEN', headRefName: 'b' }), + ] + expect(resolvePrStateForBranch({ prs, branch: 'b' })).toEqual({ state: 'open' }) + }) + + it('multi-PR (no open): most-recent merged/closed wins by its own timestamp', () => { + const prs = [ + pr({ number: 1, state: 'CLOSED', headRefName: 'b', closedAt: '2026-01-01T00:00:00Z' }), + pr({ number: 2, state: 'MERGED', headRefName: 'b', mergedAt: '2026-05-01T00:00:00Z' }), + pr({ number: 3, state: 'CLOSED', headRefName: 'b', closedAt: '2026-03-01T00:00:00Z' }), + ] + expect(resolvePrStateForBranch({ prs, branch: 'b' })).toEqual({ + state: 'merged', + mergedAt: Date.parse('2026-05-01T00:00:00Z'), + }) + }) + + it('multi-PR (no open): an older merged loses to a newer closed', () => { + const prs = [ + pr({ number: 1, state: 'MERGED', headRefName: 'b', mergedAt: '2026-01-01T00:00:00Z' }), + pr({ number: 2, state: 'CLOSED', headRefName: 'b', closedAt: '2026-09-01T00:00:00Z' }), + ] + expect(resolvePrStateForBranch({ prs, branch: 'b' })).toEqual({ + state: 'closed', + closedAt: Date.parse('2026-09-01T00:00:00Z'), + }) + }) + }) + + describe('stub PrStateResolver layer', () => { + const repos = [ + { + relativePath: rel('github.com/overengineeringstudio/effect-utils/'), + prs: [ + pr({ + number: 1, + state: 'MERGED', + headRefName: 'feat/x', + mergedAt: '2026-01-01T00:00:00Z', + }), + ], + }, + ] + + it('resolves a known branch through the service interface', () => + Effect.gen(function* () { + const resolver = yield* PrStateResolver + const result = yield* resolver.resolve({ + relativePath: rel('github.com/overengineeringstudio/effect-utils/'), + branch: 'feat/x', + }) + expect(result).toEqual({ state: 'merged', mergedAt: Date.parse('2026-01-01T00:00:00Z') }) + }).pipe(Effect.provide(makeStubPrStateResolverLayer(repos)), Effect.runPromise)) + + it('unknown repo path ⇒ none (keep)', () => { + const resolver = makeStubPrStateResolver(repos) + return Effect.gen(function* () { + const result = yield* resolver.resolve({ + relativePath: rel('github.com/overengineeringstudio/other/'), + branch: 'feat/x', + }) + expect(result).toEqual(PR_STATE_NONE) + }).pipe(Effect.runPromise) + }) + + it('known repo but unmatched branch ⇒ none (keep)', () => { + const resolver = makeStubPrStateResolver(repos) + return Effect.gen(function* () { + const result = yield* resolver.resolve({ + relativePath: rel('github.com/overengineeringstudio/effect-utils/'), + branch: 'feat/missing', + }) + expect(result).toEqual(PR_STATE_NONE) + }).pipe(Effect.runPromise) + }) + }) +}) From edf9facfd3161d892d111e7b65bf1a75c76407f0 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:39:20 +0200 Subject: [PATCH 06/13] feat(megarepo): lossless floor for cold worktree GC (#771) Add the lossless check (U3) used to decide whether a cold named-branch worktree can be archived without losing recoverable work: - unpushedCommitCount via `git rev-list --not --remotes` (not `branch -r --contains`), so a local commit stacked on a parent that lives on an unrelated remote ref still counts as unpushed (the B1 case). Requires fresh remote-tracking refs (caller fetches --prune). - hasStash via presence of the repo-global `refs/stash` ref (stashes do not travel with a worktree directory move). - assessLossless combines unpushed + dirt (Git.getWorktreeStatus) + stash into the shape the cold classifier consumes. New git helpers (revListUnpushed, hasStashRef) follow GitCommandError. Integration tests cover B1, squash-merge/deleted-remote-branch reachability, no-remote-tracking-refs conservatism, and stash detection. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/@overeng/megarepo/src/lib/git.ts | 37 +++ .../lib/store-lossless.integration.test.ts | 275 ++++++++++++++++++ .../megarepo/src/lib/store-lossless.ts | 107 +++++++ 3 files changed, 419 insertions(+) create mode 100644 packages/@overeng/megarepo/src/lib/store-lossless.integration.test.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-lossless.ts diff --git a/packages/@overeng/megarepo/src/lib/git.ts b/packages/@overeng/megarepo/src/lib/git.ts index 4207b95a3..bef09e884 100644 --- a/packages/@overeng/megarepo/src/lib/git.ts +++ b/packages/@overeng/megarepo/src/lib/git.ts @@ -498,6 +498,43 @@ export const refExists = (args: { repoPath: string; ref: string }) => Effect.catchAll(() => Effect.succeed(false)), ) +/** + * List commits reachable from `ref` but not from ANY remote-tracking ref + * (`refs/remotes/*`), i.e. the commits that exist only locally. + * + * This is `git -C rev-list --not --remotes`. Unlike + * `branch -r --contains ` (which asks "is this exact tip on a remote"), + * `rev-list --not --remotes` walks the history from `ref` and stops at the first + * remote-reachable ancestor, so it returns ONLY the genuinely-unpushed commits. + * A local commit stacked on top of a parent that lives on an unrelated remote + * ref therefore still shows up here (the parent is excluded, the new commit is + * not) — the distinction the lossless check relies on. + * + * The result is only as fresh as `refs/remotes/*`, so callers must + * {@link fetchBare} (fetch --prune) first; on a bare repo with no remote-tracking + * refs every commit is reported as unpushed. + */ +export const revListUnpushed = (args: { repoPath: string; ref: string }) => + Effect.gen(function* () { + const output = yield* runGitCommand({ + args: ['rev-list', args.ref, '--not', '--remotes'], + cwd: args.repoPath, + }) + return output.split('\n').filter((line) => line.trim().length > 0) + }) + +/** + * Whether the repo has a non-empty stash. + * + * Stashes live in a single repo-global `refs/stash` ref in the bare repo (they + * are NOT per-worktree and do NOT travel with a worktree directory move), so the + * presence of `refs/stash` is the authoritative "stashed work would be lost" + * signal. We test the ref directly rather than parsing `git stash list`, whose + * output is unreliable for detached worktrees. + */ +export const hasStashRef = (args: { repoPath: string }) => + refExists({ repoPath: args.repoPath, ref: 'refs/stash' }) + // ============================================================================= // Branch Operations // ============================================================================= diff --git a/packages/@overeng/megarepo/src/lib/store-lossless.integration.test.ts b/packages/@overeng/megarepo/src/lib/store-lossless.integration.test.ts new file mode 100644 index 000000000..94fd2985f --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-lossless.integration.test.ts @@ -0,0 +1,275 @@ +/** + * Integration tests for the lossless floor (U3, decisions 0001/0003/0004). + * + * These exercise REAL git: a bare repo wired to a separate upstream so it has + * real `refs/remotes/origin/*`, plus worktrees whose HEADs we drive precisely. + * The headline case is "B1": a local commit stacked on a parent that lives on an + * UNRELATED remote ref must still count as unpushed (`> 0`) — the exact + * distinction `rev-list --not --remotes` draws that `branch -r --contains` does + * not. + */ + +import { Command, FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { describe, it } from '@effect/vitest' +import { Effect } from 'effect' +import { expect } from 'vitest' + +import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' + +import { assessLossless, hasStash, unpushedCommitCount } from './store-lossless.ts' + +const GIT_USER = ['-c', 'user.email=test@example.com', '-c', 'user.name=Test User'] as const + +/** Run git in `cwd`, returning trimmed stdout. */ +const git = (cwd: string, ...args: ReadonlyArray) => + Effect.gen(function* () { + const command = Command.make('git', ...GIT_USER, ...args).pipe(Command.workingDirectory(cwd)) + const result = yield* Command.string(command) + return result.trim() + }) + +/** + * Create a real git stash (`refs/stash`) in `worktreeCwd`. + * + * Bare `git stash` is intercepted by the agent-policy wrapper, so we bypass it + * to produce a genuine standard stash ref — exactly the artifact the lossless + * floor must detect. This is a fixture concern, not product behavior. + */ +const createStash = (worktreeCwd: string) => + Effect.gen(function* () { + const command = Command.make('git', ...GIT_USER, 'stash').pipe( + Command.workingDirectory(worktreeCwd), + Command.env({ AGENT_POLICY_BYPASS: '1' }), + ) + yield* Command.string(command) + }) + +/** + * Build a store-like bare repo wired to a separate upstream (real + * `refs/remotes/origin/*`) with an initial pushed commit on `main`. + * + * Returns the bare path, the upstream path, the source repo path (still wired to + * the upstream so the test can push more branches), and the initial commit SHA. + */ +const makeWiredBare = Effect.fnUntraced(function* () { + const fs = yield* FileSystem.FileSystem + const tmp = EffectPath.unsafe.absoluteDir(`${yield* fs.makeTempDirectoryScoped()}/`) + + const upstream = EffectPath.ops.join(tmp, EffectPath.unsafe.relativeDir('upstream.bare/')) + const bare = EffectPath.ops.join(tmp, EffectPath.unsafe.relativeDir('store.bare/')) + const source = EffectPath.ops.join(tmp, EffectPath.unsafe.relativeDir('source/')) + + yield* fs.makeDirectory(upstream, { recursive: true }) + yield* git(upstream, 'init', '--bare') + + yield* fs.makeDirectory(source, { recursive: true }) + yield* git(source, 'init') + yield* fs.writeFileString( + EffectPath.ops.join(source, EffectPath.unsafe.relativeFile('f.txt')), + 'base\n', + ) + yield* git(source, 'add', '-A') + yield* git(source, 'commit', '--no-verify', '-m', 'base') + yield* git(source, 'remote', 'add', 'origin', upstream) + yield* git(source, 'push', '-u', 'origin', 'main') + const baseCommit = yield* git(source, 'rev-parse', 'HEAD') + + // Wire the bare to the upstream with a fetching refspec (mirrors Git.cloneBare). + yield* fs.makeDirectory(bare, { recursive: true }) + yield* git(bare, 'init', '--bare') + yield* git(bare, 'remote', 'add', 'origin', upstream) + yield* git(bare, 'config', 'remote.origin.fetch', '+refs/heads/*:refs/remotes/origin/*') + yield* git(bare, 'fetch', '--tags', '--prune', 'origin') + + return { bare, upstream, source, baseCommit } +}) + +/** Add a worktree at `ref` under `bare`/wt-`name`. */ +const addWorktree = (args: { bare: AbsoluteDirPath; name: string; ref: string }) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const wt = EffectPath.ops.join(args.bare, EffectPath.unsafe.relativeDir(`../wt-${args.name}/`)) + yield* fs.makeDirectory(wt, { recursive: true }) + yield* git(args.bare, 'worktree', 'add', '--detach', wt, args.ref) + return wt + }) + +describe('store-lossless', () => { + it.effect( + 'unpushedCommitCount is 0 for a head fully on a remote branch', + Effect.fnUntraced( + function* () { + const { bare, baseCommit } = yield* makeWiredBare() + const count = yield* unpushedCommitCount({ + bareRepoPath: bare, + worktreeHead: baseCommit, + }) + expect(count).toBe(0) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'B1: a local commit on top of an unrelated-remote-contained parent counts as unpushed', + Effect.fnUntraced( + function* () { + const { bare, source } = yield* makeWiredBare() + + // Push the base commit to an UNRELATED remote branch `other` (not `main`'s + // tip, but here equal). The point: the worktree's NEW commit's parent is + // reachable via a remote ref, yet the new commit itself is not pushed. + yield* git(source, 'push', 'origin', 'main:other') + yield* git(bare, 'fetch', '--prune', 'origin') + const otherTip = yield* git(bare, 'rev-parse', 'refs/remotes/origin/other') + + const wt = yield* addWorktree({ bare, name: 'b1', ref: otherTip }) + // Stack one genuinely-local commit on top. + yield* FileSystem.FileSystem.pipe( + Effect.flatMap((fs) => + fs.writeFileString( + EffectPath.ops.join(wt, EffectPath.unsafe.relativeFile('f.txt')), + 'local work\n', + ), + ), + ) + yield* git(wt, 'commit', '-a', '--no-verify', '-m', 'local-only') + const wtHead = yield* git(wt, 'rev-parse', 'HEAD') + + const count = yield* unpushedCommitCount({ bareRepoPath: bare, worktreeHead: wtHead }) + // Exactly the one new commit is unpushed; the unrelated-remote parent is excluded. + expect(count).toBe(1) + + const assessment = yield* assessLossless({ + bareRepoPath: bare, + worktreePath: wt, + worktreeHead: wtHead, + }) + expect(assessment.unpushed).toBe(1) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'squash-merge style: remote branch deleted but commit reachable via another remote ref ⇒ unpushed 0', + Effect.fnUntraced( + function* () { + const { bare, source } = yield* makeWiredBare() + + // Create a feature branch on the upstream, fetch it, then DELETE it from + // the upstream and prune — but the same commit is also on `main`. + yield* git(source, 'push', 'origin', 'main:feature') + yield* git(bare, 'fetch', '--prune', 'origin') + const featureTip = yield* git(bare, 'rev-parse', 'refs/remotes/origin/feature') + + const wt = yield* addWorktree({ bare, name: 'squash', ref: featureTip }) + const wtHead = yield* git(wt, 'rev-parse', 'HEAD') + + // Delete the feature branch upstream and prune the remote-tracking ref. + yield* git(source, 'push', 'origin', '--delete', 'feature') + yield* git(bare, 'fetch', '--prune', 'origin') + + const count = yield* unpushedCommitCount({ bareRepoPath: bare, worktreeHead: wtHead }) + // Still reachable via refs/remotes/origin/main ⇒ recoverable ⇒ 0. + expect(count).toBe(0) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'unpushedCommitCount reports all commits when the bare has no remote-tracking refs (conservative)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const tmp = EffectPath.unsafe.absoluteDir(`${yield* fs.makeTempDirectoryScoped()}/`) + const bare = EffectPath.ops.join(tmp, EffectPath.unsafe.relativeDir('lonely.bare/')) + const source = EffectPath.ops.join(tmp, EffectPath.unsafe.relativeDir('src/')) + + // A bare with a branch but NO refs/remotes/* (never fetched a remote). + yield* fs.makeDirectory(bare, { recursive: true }) + yield* git(bare, 'init', '--bare') + yield* fs.makeDirectory(source, { recursive: true }) + yield* git(source, 'init') + yield* fs.writeFileString( + EffectPath.ops.join(source, EffectPath.unsafe.relativeFile('f.txt')), + 'x\n', + ) + yield* git(source, 'add', '-A') + yield* git(source, 'commit', '--no-verify', '-m', 'c0') + yield* git(source, 'remote', 'add', 'origin', bare) + yield* git(source, 'push', 'origin', 'main') + const head = yield* git(bare, 'rev-parse', 'refs/heads/main') + + const count = yield* unpushedCommitCount({ bareRepoPath: bare, worktreeHead: head }) + // No remote-tracking refs ⇒ everything reads as unpushed ⇒ keep. + expect(count).toBeGreaterThan(0) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'hasStash is false without a stash and true with a real refs/stash', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { bare, baseCommit } = yield* makeWiredBare() + + expect(yield* hasStash({ bareRepoPath: bare })).toBe(false) + + // Create a worktree, dirty it, and stash — producing refs/stash in the bare. + const wt = yield* addWorktree({ bare, name: 'stash', ref: baseCommit }) + yield* fs.writeFileString( + EffectPath.ops.join(wt, EffectPath.unsafe.relativeFile('f.txt')), + 'dirty\n', + ) + yield* createStash(wt) + + expect(yield* hasStash({ bareRepoPath: bare })).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'assessLossless surfaces dirt and a present stash together', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { bare, baseCommit } = yield* makeWiredBare() + + const wt = yield* addWorktree({ bare, name: 'assess', ref: baseCommit }) + const wtHead = yield* git(wt, 'rev-parse', 'HEAD') + + // Stash some work (creates refs/stash), then leave NEW dirt behind. + yield* fs.writeFileString( + EffectPath.ops.join(wt, EffectPath.unsafe.relativeFile('f.txt')), + 'to-stash\n', + ) + yield* createStash(wt) + yield* fs.writeFileString( + EffectPath.ops.join(wt, EffectPath.unsafe.relativeFile('untracked.txt')), + 'new dirt\n', + ) + + const assessment = yield* assessLossless({ + bareRepoPath: bare, + worktreePath: wt, + worktreeHead: wtHead, + }) + + expect(assessment).toEqual({ unpushed: 0, dirty: true, hasStash: true }) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) +}) diff --git a/packages/@overeng/megarepo/src/lib/store-lossless.ts b/packages/@overeng/megarepo/src/lib/store-lossless.ts new file mode 100644 index 000000000..c644ccbd2 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-lossless.ts @@ -0,0 +1,107 @@ +/** + * Lossless floor for cold named-branch worktrees (decisions 0001/0003/0004). + * + * A `refs/heads/*` worktree may only be archived/reaped when removing its + * directory loses NO recoverable work. That floor has three independent parts, + * each computed here: + * + * 1. {@link unpushedCommitCount} — commits reachable from the worktree HEAD that + * are on NO remote-tracking ref. This is `git -C rev-list + * --not --remotes` (decision 0003), NOT `branch -r --contains`: the worktree + * head can be a fresh local commit stacked on a parent that lives on an + * unrelated remote ref (the "B1" case). `--not --remotes` walks down and stops + * at the first remote-reachable ancestor, so it reports exactly the + * genuinely-unpushed commits (1 in that case), whereas a "is this tip on a + * remote" check would wrongly call it pushed. `> 0` ⇒ keep. + * 2. {@link hasStash} — presence of the repo-global `refs/stash`. Stash refs live + * in the bare and do NOT travel with a worktree directory move (invariant 2c), + * so a non-empty stash means a dir move would orphan stashed work. Present ⇒ + * keep. + * 3. dirt — uncommitted/untracked changes via {@link Git.getWorktreeStatus}. + * Dirt itself travels intact with `git worktree move`, so it does NOT block + * archival on its own; it is surfaced so the classifier/archiver can record + * and preserve it. + * + * Freshness contract: {@link unpushedCommitCount} only reflects what + * `refs/remotes/*` knows, so the caller MUST {@link Git.fetchBare} (fetch + * --prune) the repo first; on a repo whose fetch failed, every commit reads as + * unpushed and the worktree is kept — the conservative direction. + */ + +import { Effect } from 'effect' + +import type { AbsoluteDirPath } from '@overeng/effect-path' + +import * as Git from './git.ts' + +/** + * The three lossless signals for one named-branch worktree, in the exact shape + * the cold classifier (`classifyColdWorktree`) consumes. + */ +export interface LosslessAssessment { + /** Commits on no remote-tracking ref. `> 0` ⇒ unrecoverable local work ⇒ keep. */ + readonly unpushed: number + /** Uncommitted/untracked changes present (travels with a dir move). */ + readonly dirty: boolean + /** A repo-global stash exists. Present ⇒ keep (does not travel with a dir move). */ + readonly hasStash: boolean +} + +/** + * Count commits reachable from `worktreeHead` that are on NO remote-tracking ref. + * + * `0` ⇒ every commit is recoverable from a remote (pushed, possibly via an + * unrelated remote branch). Requires fresh `refs/remotes/*` (caller fetches + * --prune first). A `GitCommandError` (e.g. an unresolvable head) is propagated + * so the caller can degrade to keep. + */ +export const unpushedCommitCount = (args: { + bareRepoPath: AbsoluteDirPath + worktreeHead: string +}) => + Git.revListUnpushed({ repoPath: args.bareRepoPath, ref: args.worktreeHead }).pipe( + Effect.map((commits) => commits.length), + Effect.withSpan('megarepo/store/gc/unpushed-commit-count', { + attributes: { 'span.label': args.worktreeHead.slice(0, 8), worktreeHead: args.worktreeHead }, + }), + ) + +/** + * Whether the bare repo has a non-empty stash. + * + * Stashes are repo-global (`refs/stash`), not per-worktree, so this is a + * bare-scoped check. Never fails: a missing ref reads as `false`. + */ +export const hasStash = (args: { bareRepoPath: AbsoluteDirPath }) => + Git.hasStashRef({ repoPath: args.bareRepoPath }) + +/** + * Compute the full {@link LosslessAssessment} for one named-branch worktree. + * + * `unpushed` may fail with `GitCommandError` (propagated for conservative + * degradation upstream); `dirty` and `hasStash` are infallible. Assumes the + * repo's `refs/remotes/*` are already fresh (caller fetched --prune). + */ +export const assessLossless = (args: { + bareRepoPath: AbsoluteDirPath + worktreePath: AbsoluteDirPath + worktreeHead: string +}) => + Effect.gen(function* () { + const unpushed = yield* unpushedCommitCount({ + bareRepoPath: args.bareRepoPath, + worktreeHead: args.worktreeHead, + }) + const status = yield* Git.getWorktreeStatus(args.worktreePath) + const stash = yield* hasStash({ bareRepoPath: args.bareRepoPath }) + + return { + unpushed, + dirty: status.isDirty, + hasStash: stash, + } satisfies LosslessAssessment + }).pipe( + Effect.withSpan('megarepo/store/gc/assess-lossless', { + attributes: { 'span.label': 'lossless', worktreePath: args.worktreePath }, + }), + ) From 8b4dc6f3d5e419a8f56d10951b06b7ec24df8ee6 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:53:39 +0200 Subject: [PATCH 07/13] feat(megarepo): reconcile-all liveness + broaden refresh triggers (#771) U4 of cold named-branch GC. Adds fail-safe reconcile-all to the liveness registry so a destructive gc re-derives every present workspace's live paths fresh from disk before deleting anything (decision 0010). - collectWorkspaceLivePathsStrict: surfaces read errors instead of degrading an unreadable workspace to an empty set. - collectStoreLiveSet({ reconcileAllWorkspaces, now }): on success rewrites each record (with explicit now as updatedAt); on read error KEEPS the existing record (never overwrites a non-empty record with empty, B2) and flags it via StoreLiveSet.uncleanReconcilePaths so grace is not advanced; prunes a record only when the workspace dir is gone. - refreshWorkspaceRegistry.updatedAt is now an explicit `now` (epoch ms) seam; the wall clock is read only at the CLI edge. - Broaden refresh: pin (both repin and pin-to-commit paths) now refreshes the workspace registry, closing the verified repin-without-reregister bug; apply already refreshed via the sync engine. Tests: repin-without-reregister regression, present-but-unreadable workspace keeps its live paths, gone-workspace pruning. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../megarepo/src/cli/commands/engine.ts | 5 +- .../@overeng/megarepo/src/cli/commands/pin.ts | 20 +- .../megarepo/src/cli/commands/status.ts | 8 +- .../megarepo/src/cli/commands/store/mod.ts | 5 +- .../src/cli/store.integration.test.ts | 4 +- .../lib/store-liveness.integration.test.ts | 224 +++++++++++++++++- .../megarepo/src/lib/store-liveness.ts | 178 ++++++++++++-- .../lib/store-worktree-policy.unit.test.ts | 1 + 8 files changed, 415 insertions(+), 30 deletions(-) diff --git a/packages/@overeng/megarepo/src/cli/commands/engine.ts b/packages/@overeng/megarepo/src/cli/commands/engine.ts index 80918a316..ca71d3939 100644 --- a/packages/@overeng/megarepo/src/cli/commands/engine.ts +++ b/packages/@overeng/megarepo/src/cli/commands/engine.ts @@ -9,7 +9,7 @@ import { Prompt } from '@effect/cli' import type { CommandExecutor, Terminal } from '@effect/platform' import { FileSystem, type Error as PlatformError } from '@effect/platform' -import { Effect, Option, type ParseResult } from 'effect' +import { Clock, Effect, Option, type ParseResult } from 'effect' import React from 'react' import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' @@ -459,7 +459,8 @@ export const syncMegarepo = ({ if (dryRun === false && changesWorkspace === true) { const store = yield* Store - yield* refreshWorkspaceRegistry({ workspaceRoot: megarepoRoot, store }) + const now = yield* Clock.currentTimeMillis + yield* refreshWorkspaceRegistry({ workspaceRoot: megarepoRoot, store, now }) } // Handle --all flag: recursively sync nested megarepos in parallel diff --git a/packages/@overeng/megarepo/src/cli/commands/pin.ts b/packages/@overeng/megarepo/src/cli/commands/pin.ts index e42acaa78..b81045bee 100644 --- a/packages/@overeng/megarepo/src/cli/commands/pin.ts +++ b/packages/@overeng/megarepo/src/cli/commands/pin.ts @@ -6,7 +6,7 @@ import * as Cli from '@effect/cli' import { FileSystem } from '@effect/platform' -import { Effect, Layer, Option } from 'effect' +import { Clock, Effect, Layer, Option } from 'effect' import React from 'react' import { EffectPath } from '@overeng/effect-path' @@ -35,6 +35,7 @@ import { } from '../../lib/lock.ts' import { classifyRef } from '../../lib/ref.ts' import { runPreflightChecks } from '../../lib/store-hygiene.ts' +import { refreshWorkspaceRegistry } from '../../lib/store-liveness.ts' import { Store, StoreLayer } from '../../lib/store.ts' import { Cwd, findMegarepoRoot, outputOption, outputModeLayer } from '../context.ts' import { @@ -327,6 +328,14 @@ export const pinCommand = Cli.Command.make( yield* writeLockFile({ lockPath, lockFile }) } + // Keep the store liveness record fresh after repinning so a + // concurrent gc sees the new target as live (decision 0010). + yield* refreshWorkspaceRegistry({ + workspaceRoot: root.value, + store, + now: yield* Clock.currentTimeMillis, + }) + tui.dispatch({ _tag: 'SetSuccess', member, @@ -451,6 +460,15 @@ export const pinCommand = Cli.Command.make( } } + // Keep the store liveness record fresh after pinning (the symlink may + // have been repointed to the commit worktree) so a concurrent gc sees + // the new target as live (decision 0010). + yield* refreshWorkspaceRegistry({ + workspaceRoot: root.value, + store, + now: yield* Clock.currentTimeMillis, + }) + tui.dispatch({ _tag: 'SetSuccess', member, diff --git a/packages/@overeng/megarepo/src/cli/commands/status.ts b/packages/@overeng/megarepo/src/cli/commands/status.ts index 9cf06e2b5..da8b14368 100644 --- a/packages/@overeng/megarepo/src/cli/commands/status.ts +++ b/packages/@overeng/megarepo/src/cli/commands/status.ts @@ -7,7 +7,7 @@ import * as Cli from '@effect/cli' import type { CommandExecutor } from '@effect/platform' import { FileSystem, type Error as PlatformError } from '@effect/platform' -import { Effect, Option, type ParseResult } from 'effect' +import { Clock, Effect, Option, type ParseResult } from 'effect' import React from 'react' import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' @@ -320,7 +320,11 @@ export const statusCommand = Cli.Command.make( EffectPath.unsafe.relativeFile(LOCK_FILE_NAME), ) const lockFileOpt = yield* readLockFile(lockPath) - yield* refreshWorkspaceRegistry({ workspaceRoot: root.value, store }) + yield* refreshWorkspaceRegistry({ + workspaceRoot: root.value, + store, + now: yield* Clock.currentTimeMillis, + }) let lastSyncTime: Date | undefined = undefined let lockStaleness: | { diff --git a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts index 559e6c754..c8f23dfc5 100644 --- a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts +++ b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts @@ -6,7 +6,7 @@ import * as Cli from '@effect/cli' import { FileSystem, type Error as PlatformError } from '@effect/platform' -import { Effect, Option, Schedule, Stream } from 'effect' +import { Clock, Effect, Option, Schedule, Stream } from 'effect' import React from 'react' import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' @@ -420,11 +420,13 @@ const storeStatusCommand = Cli.Command.make('status', { output: outputOption }, const fs = yield* FileSystem.FileSystem const root = yield* findMegarepoRoot(cwd) + const now = yield* Clock.currentTimeMillis const liveSet = yield* collectStoreLiveSet({ store, ...(Option.isSome(root) === true ? { currentWorkspaceRoot: root.value } : {}), pruneStaleRegistry: true, refreshCurrentWorkspace: true, + now, }) // List all repos and analyze worktrees in parallel @@ -857,6 +859,7 @@ const storeGcCommand = Cli.Command.make( ...(Option.isSome(root) === true ? { currentWorkspaceRoot: root.value } : {}), pruneStaleRegistry: dryRun === false, refreshCurrentWorkspace: dryRun === false, + now: yield* Clock.currentTimeMillis, }) liveSetForMetrics = liveSet diff --git a/packages/@overeng/megarepo/src/cli/store.integration.test.ts b/packages/@overeng/megarepo/src/cli/store.integration.test.ts index 32f7218df..90c0f40ce 100644 --- a/packages/@overeng/megarepo/src/cli/store.integration.test.ts +++ b/packages/@overeng/megarepo/src/cli/store.integration.test.ts @@ -288,7 +288,7 @@ describe('mr store gc', () => { const env = { MEGAREPO_STORE: storePath } const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) - yield* refreshWorkspaceRegistry({ workspaceRoot: workspaceB, store }) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspaceB, store, now: Date.now() }) const statusB = yield* runMrCommand({ cwd: workspaceB, command: ['status', '--output', 'json'], @@ -352,7 +352,7 @@ describe('mr store gc', () => { EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/repo')), ) const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) - yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store }) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: Date.now() }) const gc = yield* runMrCommand({ cwd: workspacePath, diff --git a/packages/@overeng/megarepo/src/lib/store-liveness.integration.test.ts b/packages/@overeng/megarepo/src/lib/store-liveness.integration.test.ts index 72837dffd..061cb7629 100644 --- a/packages/@overeng/megarepo/src/lib/store-liveness.integration.test.ts +++ b/packages/@overeng/megarepo/src/lib/store-liveness.integration.test.ts @@ -10,10 +10,12 @@ import { createStoreFixture, createWorkspaceWithLock, getWorktreeCommit, + repinWorkspace, } from '../test-utils/store-setup.ts' import { collectStoreLiveSet, collectWorkspaceLivePaths, + collectWorkspaceLivePathsStrict, refreshWorkspaceRegistry, } from './store-liveness.ts' import { makeStoreLayer, Store } from './store.ts' @@ -109,9 +111,14 @@ describe('store-liveness', () => { refType: 'commit', }) - const record = yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store }) + const record = yield* refreshWorkspaceRegistry({ + workspaceRoot: workspacePath, + store, + now: 1_700_000_000_000, + }) expect(record.workspaceRoot).toBe(normalizePath(workspacePath)) + expect(record.updatedAt).toBe(new Date(1_700_000_000_000).toISOString()) expect(record.livePaths).toEqual( [normalizePath(commitWorktreePath), normalizePath(mainWorktreePath)].sort(), ) @@ -186,7 +193,11 @@ describe('store-liveness', () => { }, }) - yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store }) + yield* refreshWorkspaceRegistry({ + workspaceRoot: workspacePath, + store, + now: 1_700_000_000_000, + }) const liveSet = yield* collectStoreLiveSet({ store, refreshCurrentWorkspace: false, @@ -198,4 +209,213 @@ describe('store-liveness', () => { Effect.scoped, ), ) + + it.effect( + 'reconcileAllWorkspaces re-derives a repinned-without-reregister target (decision 0010 regression)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, worktreePaths } = yield* createStoreFixture([ + { + host: 'github.com', + owner: 'test-owner', + repo: 'repin-repo', + branches: ['main', 'feature'], + }, + ]) + const mainWorktreePath = worktreePaths['github.com/test-owner/repin-repo#main']! + const featureWorktreePath = worktreePaths['github.com/test-owner/repin-repo#feature']! + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + + // Workspace initially points its member at the `main` worktree and + // registers that as live. + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { repo: 'test-owner/repin-repo#main' }, + }) + const reposDir = EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')) + yield* fs.makeDirectory(reposDir, { recursive: true }) + yield* fs.symlink( + normalizePath(mainWorktreePath), + EffectPath.ops.join(reposDir, EffectPath.unsafe.relativeFile('repo')), + ) + yield* refreshWorkspaceRegistry({ + workspaceRoot: workspacePath, + store, + now: 1_700_000_000_000, + }) + + // Repin to the `feature` target WITHOUT running any refreshing command: + // the cached record is now stale (still points at `main`). + yield* repinWorkspace({ + workspacePath, + memberName: 'repo', + newTarget: featureWorktreePath, + }) + + // A trusting (non-reconciling) collect would over-protect `main` and miss + // the live `feature` target — exactly the verified pre-existing bug. + const stale = yield* collectStoreLiveSet({ store, refreshCurrentWorkspace: false }) + expect(stale.paths).toContain(normalizePath(mainWorktreePath)) + expect(stale.paths).not.toContain(normalizePath(featureWorktreePath)) + + // Reconcile-all re-derives from disk: the new target is now protected. + const reconciled = yield* collectStoreLiveSet({ + store, + reconcileAllWorkspaces: true, + now: 1_700_000_001_000, + }) + expect(reconciled.paths).toContain(normalizePath(featureWorktreePath)) + expect(reconciled.paths).not.toContain(normalizePath(mainWorktreePath)) + expect(reconciled.uncleanReconcilePaths.size).toBe(0) + + // The on-disk record was rewritten fresh with the explicit `now`. + const registryDir = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir('.state/workspaces/'), + ) + const entries = yield* fs.readDirectory(registryDir) + const content = yield* fs.readFileString( + EffectPath.ops.join(registryDir, EffectPath.unsafe.relativeFile(entries[0]!)), + ) + const record = JSON.parse(content) as { + updatedAt: string + livePaths: ReadonlyArray + } + expect(record.updatedAt).toBe(new Date(1_700_000_001_000).toISOString()) + expect(record.livePaths).toEqual([normalizePath(featureWorktreePath)]) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'reconcileAllWorkspaces fails safe: a present-but-unreadable workspace keeps its last-known live paths (B2)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, worktreePaths } = yield* createStoreFixture([ + { + host: 'github.com', + owner: 'test-owner', + repo: 'unreadable-repo', + branches: ['main'], + }, + ]) + const mainWorktreePath = worktreePaths['github.com/test-owner/unreadable-repo#main']! + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { repo: 'test-owner/unreadable-repo#main' }, + }) + const reposDir = EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')) + yield* fs.makeDirectory(reposDir, { recursive: true }) + yield* fs.symlink( + normalizePath(mainWorktreePath), + EffectPath.ops.join(reposDir, EffectPath.unsafe.relativeFile('repo')), + ) + // Register the live path while still readable. + yield* refreshWorkspaceRegistry({ + workspaceRoot: workspacePath, + store, + now: 1_700_000_000_000, + }) + + // Make the members dir unreadable: a strict reconcile must now fail. + yield* fs.chmod(reposDir, 0o000) + + // Confirm the strict collector surfaces the read error (rather than + // degrading to an empty set). + const strictResult = yield* collectWorkspaceLivePathsStrict({ + workspaceRoot: workspacePath, + store, + }).pipe(Effect.either) + // Restore perms regardless of assertion outcome so scoped cleanup works. + yield* fs.chmod(reposDir, 0o755).pipe(Effect.catchAll(() => Effect.void)) + // Re-break for the reconcile-all assertion below. + yield* fs.chmod(reposDir, 0o000) + expect(strictResult._tag).toBe('Left') + + // Reconcile-all keeps the last-known live paths (never overwrites a + // non-empty record with empty) and flags the workspace unclean. + const reconciled = yield* collectStoreLiveSet({ + store, + reconcileAllWorkspaces: true, + now: 1_700_000_002_000, + }) + yield* fs.chmod(reposDir, 0o755).pipe(Effect.catchAll(() => Effect.void)) + + expect(reconciled.paths).toContain(normalizePath(mainWorktreePath)) + expect([...reconciled.uncleanReconcilePaths]).toContain(normalizePath(mainWorktreePath)) + + // The on-disk record was NOT overwritten (still the original timestamp). + const registryDir = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir('.state/workspaces/'), + ) + const entries = yield* fs.readDirectory(registryDir) + const content = yield* fs.readFileString( + EffectPath.ops.join(registryDir, EffectPath.unsafe.relativeFile(entries[0]!)), + ) + const record = JSON.parse(content) as { updatedAt: string } + expect(record.updatedAt).toBe(new Date(1_700_000_000_000).toISOString()) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'reconcileAllWorkspaces prunes a record whose workspace dir is gone', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, worktreePaths } = yield* createStoreFixture([ + { + host: 'github.com', + owner: 'test-owner', + repo: 'gone-repo', + branches: ['main'], + }, + ]) + const mainWorktreePath = worktreePaths['github.com/test-owner/gone-repo#main']! + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { repo: 'test-owner/gone-repo#main' }, + }) + const reposDir = EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')) + yield* fs.makeDirectory(reposDir, { recursive: true }) + yield* fs.symlink( + normalizePath(mainWorktreePath), + EffectPath.ops.join(reposDir, EffectPath.unsafe.relativeFile('repo')), + ) + yield* refreshWorkspaceRegistry({ + workspaceRoot: workspacePath, + store, + now: 1_700_000_000_000, + }) + + // Workspace dir disappears entirely (not merely unreadable). + yield* fs.remove(workspacePath, { recursive: true }) + + const reconciled = yield* collectStoreLiveSet({ + store, + reconcileAllWorkspaces: true, + now: 1_700_000_003_000, + }) + expect(reconciled.workspaceCount).toBe(0) + expect(reconciled.paths.has(normalizePath(mainWorktreePath))).toBe(false) + + const registryDir = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir('.state/workspaces/'), + ) + const entries = yield* fs.readDirectory(registryDir) + expect(entries.filter((e) => e.endsWith('.json'))).toHaveLength(0) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) }) diff --git a/packages/@overeng/megarepo/src/lib/store-liveness.ts b/packages/@overeng/megarepo/src/lib/store-liveness.ts index 27a03af88..4c44dd856 100644 --- a/packages/@overeng/megarepo/src/lib/store-liveness.ts +++ b/packages/@overeng/megarepo/src/lib/store-liveness.ts @@ -39,6 +39,13 @@ type StoreWorkspaceRecord = Schema.Schema.Type export interface StoreLiveSet { readonly paths: ReadonlySet readonly workspaceCount: number + /** + * Store paths belonging to a workspace that was present but failed a strict + * reconcile this run (only populated by `reconcileAllWorkspaces`). These paths + * stay protected (their last-known live set is kept), but gc must NOT advance + * absence grace for them — their freshness is unconfirmed (B2/decision 0010). + */ + readonly uncleanReconcilePaths: ReadonlySet } const normalizePath = (path: string): string => path.replace(/\/+$/, '') @@ -69,22 +76,36 @@ const isStorePath = ({ store, path }: { store: MegarepoStore; path: string }): b const collectWorkspaceSymlinkTargets = ({ workspaceRoot, store, + strict = false, }: { workspaceRoot: AbsoluteDirPath store: MegarepoStore + /** + * When true, surface read errors (missing dir, unreadable entries) instead of + * swallowing them into an empty/partial set. A present-but-unreadable workspace + * must fail safe (keep its last-known live paths), which is only possible if the + * error is propagated to the caller rather than masked as "no live paths". + */ + strict?: boolean }): Effect.Effect, PlatformError.PlatformError, FileSystem.FileSystem> => Effect.gen(function* () { const fs = yield* FileSystem.FileSystem const targets = new Set() const membersRoot = getMembersRoot(workspaceRoot) - const membersRootExists = yield* fs - .exists(membersRoot) - .pipe(Effect.catchAll(() => Effect.succeed(false))) + const membersRootExists = strict + ? yield* fs.exists(membersRoot) + : yield* fs.exists(membersRoot).pipe(Effect.catchAll(() => Effect.succeed(false))) if (membersRootExists === false) return targets - const entries = yield* fs - .readDirectory(membersRoot) - .pipe(Effect.catchAll(() => Effect.succeed([] as string[]))) + // Workspace-level read failures (unreadable members dir) surface in strict + // mode so a present-but-unreadable workspace fails safe upstream. A + // per-entry `readLink` failure is always tolerated: a non-symlink directory + // entry (e.g. a local repo) legitimately has no store target. + const entries = strict + ? yield* fs.readDirectory(membersRoot) + : yield* fs + .readDirectory(membersRoot) + .pipe(Effect.catchAll(() => Effect.succeed([] as string[]))) for (const entry of entries) { if (entry.startsWith('.') === true) continue const memberPath = EffectPath.ops.join(membersRoot, EffectPath.unsafe.relativeFile(entry)) @@ -102,6 +123,7 @@ const collectWorkspaceSymlinkTargets = ({ attributes: { 'span.label': workspaceLabel(workspaceRoot), workspaceRoot, + strict, }, }), ) @@ -110,16 +132,19 @@ const collectWorkspaceSymlinkTargets = ({ export const collectWorkspaceLivePaths = ({ workspaceRoot, store, + strict = false, }: { workspaceRoot: AbsoluteDirPath store: MegarepoStore + /** When true, surface read errors instead of degrading to a partial/empty set. */ + strict?: boolean }): Effect.Effect< Set, ConfigNotFoundError | PlatformError.PlatformError | ParseResult.ParseError, FileSystem.FileSystem > => Effect.gen(function* () { - const paths = yield* collectWorkspaceSymlinkTargets({ workspaceRoot, store }) + const paths = yield* collectWorkspaceSymlinkTargets({ workspaceRoot, store, strict }) const lockPath = EffectPath.ops.join( workspaceRoot, @@ -168,13 +193,38 @@ export const collectWorkspaceLivePaths = ({ }), ) -/** Refreshes the store-local liveness registry entry for one workspace. */ +/** + * Like {@link collectWorkspaceLivePaths} but SURFACES read errors instead of + * degrading an unreadable workspace to an empty set. Used by reconcile-all so a + * present-but-unreadable workspace fails safe (keeps its last-known live paths) + * rather than silently losing protection. + */ +export const collectWorkspaceLivePathsStrict = ({ + workspaceRoot, + store, +}: { + workspaceRoot: AbsoluteDirPath + store: MegarepoStore +}): Effect.Effect< + Set, + ConfigNotFoundError | PlatformError.PlatformError | ParseResult.ParseError, + FileSystem.FileSystem +> => collectWorkspaceLivePaths({ workspaceRoot, store, strict: true }) + +/** + * Refreshes the store-local liveness registry entry for one workspace. + * + * `now` (epoch ms) is the explicit clock seam for the record's `updatedAt`; the + * CLI edge reads the wall clock, never this decision/persistence path. + */ export const refreshWorkspaceRegistry = ({ workspaceRoot, store, + now, }: { workspaceRoot: AbsoluteDirPath store: MegarepoStore + now: number }): Effect.Effect< StoreWorkspaceRecord, ConfigNotFoundError | PlatformError.PlatformError | ParseResult.ParseError, @@ -186,7 +236,7 @@ export const refreshWorkspaceRegistry = ({ const record: StoreWorkspaceRecord = { version: REGISTRY_VERSION, workspaceRoot: normalizePath(workspaceRoot), - updatedAt: new Date().toISOString(), + updatedAt: new Date(now).toISOString(), livePaths: [...livePaths].toSorted(), } @@ -206,27 +256,47 @@ export const refreshWorkspaceRegistry = ({ }), ) +/** Result of reading (and optionally reconciling) the workspace registry. */ +interface RegistryReadResult { + readonly records: ReadonlyArray + /** + * Store paths belonging to workspaces that were present but failed a strict + * reconcile this run (B2/decision 0010). Their last-known live paths are kept, + * but the caller must NOT treat them as freshly-confirmed (e.g. grace advance). + */ + readonly uncleanReconcilePaths: ReadonlySet +} + const readRegistryRecords = ({ store, pruneStale, + reconcile, }: { store: MegarepoStore pruneStale: boolean + /** + * When provided, re-derive each present workspace's live paths fresh from disk + * (decision 0010). On success the on-disk record is rewritten with `now` as + * `updatedAt`; on read error the existing record is KEPT unchanged (fail safe — + * never overwrite a non-empty record with empty) and flagged unclean. + */ + reconcile?: { now: number } | undefined }): Effect.Effect< - ReadonlyArray, - PlatformError.PlatformError, + RegistryReadResult, + ConfigNotFoundError | PlatformError.PlatformError | ParseResult.ParseError, FileSystem.FileSystem > => Effect.gen(function* () { const fs = yield* FileSystem.FileSystem const registryDir = workspaceRegistryDir(store) const exists = yield* fs.exists(registryDir).pipe(Effect.catchAll(() => Effect.succeed(false))) - if (exists === false) return [] + if (exists === false) return { records: [], uncleanReconcilePaths: new Set() } const entries = yield* fs .readDirectory(registryDir) .pipe(Effect.catchAll(() => Effect.succeed([] as string[]))) const records: StoreWorkspaceRecord[] = [] + const uncleanReconcilePaths = new Set() for (const entry of entries) { if (entry.endsWith('.json') === false) continue @@ -239,50 +309,116 @@ const readRegistryRecords = ({ ) if (parsed === null) continue + const workspaceRoot = EffectPath.unsafe.absoluteDir(`${parsed.workspaceRoot}/`) const workspaceExists = yield* fs .exists(parsed.workspaceRoot) .pipe(Effect.catchAll(() => Effect.succeed(false))) - if (workspaceExists === true) { + + // Prune only when the workspace dir is GONE (decision 0010); a + // present-but-unreadable workspace must never be pruned. + if (workspaceExists === false) { + if (pruneStale === true) { + yield* fs.remove(recordPath).pipe(Effect.catchAll(() => Effect.void)) + } + continue + } + + if (reconcile === undefined) { records.push(parsed) - } else if (pruneStale === true) { - yield* fs.remove(recordPath).pipe(Effect.catchAll(() => Effect.void)) + continue + } + + // Reconcile-all: re-derive from disk. Success ⇒ rewrite the record fresh. + // Read error ⇒ keep the existing record verbatim and flag it unclean. + const reconciled = yield* collectWorkspaceLivePathsStrict({ workspaceRoot, store }).pipe( + Effect.map((paths) => ({ _tag: 'ok' as const, paths })), + Effect.catchAll(() => Effect.succeed({ _tag: 'error' as const })), + ) + + if (reconciled._tag === 'ok') { + const record: StoreWorkspaceRecord = { + version: REGISTRY_VERSION, + workspaceRoot: normalizePath(parsed.workspaceRoot), + updatedAt: new Date(reconcile.now).toISOString(), + livePaths: [...reconciled.paths].toSorted(), + } + const content = yield* Schema.encode(Schema.parseJson(StoreWorkspaceRecord, { space: 2 }))( + record, + ) + yield* fs.writeFileString(recordPath, content + '\n') + records.push(record) + } else { + records.push(parsed) + for (const livePath of parsed.livePaths) { + if (isStorePath({ store, path: livePath }) === true) { + uncleanReconcilePaths.add(normalizePath(livePath)) + } + } } } - return records + return { records, uncleanReconcilePaths } }).pipe( Effect.withSpan('megarepo/store/liveness/read-registry', { - attributes: { 'span.label': 'registry' }, + attributes: { 'span.label': 'registry', reconcileAll: reconcile !== undefined }, }), ) -/** Collects the store-wide protected path set from the workspace registry. */ +/** + * Collects the store-wide protected path set from the workspace registry. + * + * `reconcileAllWorkspaces` (decision 0010) re-derives EVERY present workspace's + * live paths fresh from disk before computing the set, so a repin that ran no + * refreshing command is still caught. Any path-writing mode (`reconcileAll...` or + * `refreshCurrentWorkspace`) requires an explicit `now` (epoch ms) — the wall + * clock is never read on this persistence path. + */ export const collectStoreLiveSet = ({ store, currentWorkspaceRoot, refreshCurrentWorkspace = true, pruneStaleRegistry = true, + reconcileAllWorkspaces = false, + now, }: { store: MegarepoStore currentWorkspaceRoot?: AbsoluteDirPath | undefined refreshCurrentWorkspace?: boolean | undefined pruneStaleRegistry?: boolean | undefined + reconcileAllWorkspaces?: boolean | undefined + /** Required whenever a write happens (refresh or reconcile-all). */ + now?: number | undefined }): Effect.Effect< StoreLiveSet, ConfigNotFoundError | PlatformError.PlatformError | ParseResult.ParseError, FileSystem.FileSystem > => Effect.gen(function* () { + const writesRecord = + reconcileAllWorkspaces === true || + (currentWorkspaceRoot !== undefined && refreshCurrentWorkspace === true) + if (writesRecord === true && now === undefined) { + // Guard the clock seam: a record-writing collect MUST receive an explicit + // `now` rather than silently reading the ambient wall clock. + return yield* Effect.die( + new Error('collectStoreLiveSet: `now` is required when writing a registry record'), + ) + } + const currentWorkspacePaths = currentWorkspaceRoot !== undefined && refreshCurrentWorkspace === false ? yield* collectWorkspaceLivePaths({ workspaceRoot: currentWorkspaceRoot, store }) : undefined if (currentWorkspaceRoot !== undefined && refreshCurrentWorkspace === true) { - yield* refreshWorkspaceRegistry({ workspaceRoot: currentWorkspaceRoot, store }) + yield* refreshWorkspaceRegistry({ workspaceRoot: currentWorkspaceRoot, store, now: now! }) } - const records = yield* readRegistryRecords({ store, pruneStale: pruneStaleRegistry }) + const { records, uncleanReconcilePaths } = yield* readRegistryRecords({ + store, + pruneStale: pruneStaleRegistry, + ...(reconcileAllWorkspaces === true ? { reconcile: { now: now! } } : {}), + }) const paths = new Set() for (const record of records) { for (const livePath of record.livePaths) { @@ -298,6 +434,7 @@ export const collectStoreLiveSet = ({ return { paths, workspaceCount: records.length, + uncleanReconcilePaths, } satisfies StoreLiveSet }).pipe( Effect.withSpan('megarepo/store/liveness/collect-store', { @@ -306,6 +443,7 @@ export const collectStoreLiveSet = ({ hasCurrentWorkspace: currentWorkspaceRoot !== undefined, pruneStaleRegistry, refreshCurrentWorkspace, + reconcileAllWorkspaces, }, }), ) diff --git a/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts index 419f6e38f..ea2b421a1 100644 --- a/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts +++ b/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts @@ -7,6 +7,7 @@ import { classifyStoreWorktreePolicy, isNamedRefWorktree } from './store-worktre const liveSet = (paths: ReadonlyArray): StoreLiveSet => ({ paths: new Set(paths), workspaceCount: 1, + uncleanReconcilePaths: new Set(), }) describe('store-worktree-policy', () => { From c6dc744a4863deb17ea57d2ab946f1169704bae8 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:02:37 +0200 Subject: [PATCH 08/13] feat(megarepo): store-archive (archive/scan/reap) for cold worktree GC (#771) U6 of the cold named-branch reclamation work. Adds src/lib/store-archive.ts: - archiveWorktree: mkdir the .archive parent first, then `git worktree move` (dirty + untracked work travels intact, gitlink fixed), FREE the branch via `git branch -D` so re-apply can re-materialize it (invariant 4), and append a metadata line to .archive/README.md. `now` is an explicit epoch-ms param. - scanArchives: enumerate via Git.listWorktrees filtered to .archive/ paths, parsing archivedAtMs from a strict anchored trailing `--` segment (branch names contain -/--//, parsed relative to .archive/). - reapArchive: `git worktree remove --force`, then ensure the dir is gone. Adds a Git.deleteBranch helper (`branch -d/-D`). Integration tests cover: archive preserves dirty+untracked; branch freed + re-add succeeds; scan/ retention split; reap removes dir + unregisters from the worktree registry. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/@overeng/megarepo/src/lib/git.ts | 19 ++ .../src/lib/store-archive.integration.test.ts | 296 ++++++++++++++++++ .../megarepo/src/lib/store-archive.ts | 274 ++++++++++++++++ 3 files changed, 589 insertions(+) create mode 100644 packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts create mode 100644 packages/@overeng/megarepo/src/lib/store-archive.ts diff --git a/packages/@overeng/megarepo/src/lib/git.ts b/packages/@overeng/megarepo/src/lib/git.ts index bef09e884..1a3790eb2 100644 --- a/packages/@overeng/megarepo/src/lib/git.ts +++ b/packages/@overeng/megarepo/src/lib/git.ts @@ -561,6 +561,25 @@ export const createBranch = (args: { repoPath: string; branch: string; baseRef: return baseCommit }) +/** + * Delete a local branch ref in a (bare) repo. + * + * Used by GC archival to FREE a `refs/heads/` after the worktree has + * been moved aside, so `mr apply` can re-materialize the branch. `force` maps to + * `git branch -D` (delete even if not merged); the commit stays reachable via + * the remote-tracking ref the lossless floor proved. + */ +export const deleteBranch = (args: { repoPath: string; branch: string; force?: boolean }) => + runGitCommand({ + args: ['branch', args.force === true ? '-D' : '-d', args.branch], + cwd: args.repoPath, + }).pipe( + Effect.asVoid, + Effect.withSpan('git/delete-branch', { + attributes: { 'span.label': args.branch, branch: args.branch }, + }), + ) + /** * Push a branch to the remote. * diff --git a/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts b/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts new file mode 100644 index 000000000..e82a237d2 --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts @@ -0,0 +1,296 @@ +/** + * Integration tests for archive + reap (U6, decisions 0004/0007). + * + * Exercises REAL git against store-shaped fixtures (`createStoreFixture`): + * - archiveWorktree moves the worktree under `.archive/` preserving dirty + + * untracked work, FREES the `refs/heads/` ref (so an `mr apply`- + * equivalent re-add succeeds), and records metadata. + * - scanArchives enumerates only `.archive/` entries with a strict `--` + * parse, surfacing `archivedAtMs` for retention. + * - reapArchive removes the directory AND unregisters it from the bare's + * worktree list. + * + * The pure `parseArchiveDirName` regex contract is asserted inline (branch names + * contain `-`/`--`/`/`; only a trailing valid ISO8601 instant is a timestamp). + */ + +import { Command, FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { describe, it } from '@effect/vitest' +import { Effect, Option } from 'effect' +import { expect } from 'vitest' + +import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' + +import { + createArchiveEntry, + createStoreFixture, + getWorktreeCommit, +} from '../test-utils/store-setup.ts' +import * as Git from './git.ts' +import { archiveWorktree, parseArchiveDirName, reapArchive, scanArchives } from './store-archive.ts' + +const git = (cwd: string, ...args: ReadonlyArray) => + Effect.gen(function* () { + const command = Command.make('git', ...args).pipe(Command.workingDirectory(cwd)) + return (yield* Command.string(command)).trim() + }) + +/** `/github.com///` repo root for a fixture repo key. */ +const repoRootFor = (storePath: AbsoluteDirPath, repoKey: string): AbsoluteDirPath => + EffectPath.ops.join(storePath, EffectPath.unsafe.relativeDir(`${repoKey}/`)) + +const REPO = { host: 'github.com', owner: 'acme', repo: 'widget' } as const +const REPO_KEY = `${REPO.host}/${REPO.owner}/${REPO.repo}` + +describe('store-archive: parseArchiveDirName', () => { + it('parses a slash/double-dash branch with a trailing ISO8601 timestamp', () => { + const iso = '2026-06-11T10:20:30.000Z' + const parsed = parseArchiveDirName(`schickling/2026-06-10--feature--x${`--${iso}`}`) + expect(Option.isSome(parsed)).toBe(true) + if (Option.isSome(parsed)) { + expect(parsed.value.branch).toBe('schickling/2026-06-10--feature--x') + expect(parsed.value.archivedAtMs).toBe(Date.parse(iso)) + } + }) + + it('rejects a name without a trailing ISO8601 instant', () => { + expect(Option.isNone(parseArchiveDirName('feature-branch'))).toBe(true) + expect(Option.isNone(parseArchiveDirName('feature--2026-06-11'))).toBe(true) + // Empty branch segment (name starts with the separator) is rejected. + expect(Option.isNone(parseArchiveDirName('--2026-06-11T10:20:30.000Z'))).toBe(true) + }) +}) + +describe('store-archive: archiveWorktree', () => { + it.effect( + 'archives a clean worktree, frees the branch, and an mr-apply-equivalent re-add succeeds', + Effect.fnUntraced( + function* () { + const fixture = yield* createStoreFixture([{ ...REPO, branches: ['feature/x'] }]) + const repoRoot = repoRootFor(fixture.storePath, REPO_KEY) + const bareRepoPath = fixture.bareRepoPaths[REPO_KEY]! + const worktreePath = fixture.worktreePaths[`${REPO_KEY}#feature/x`]! + const commit = yield* getWorktreeCommit(worktreePath) + + // The fixture creates DETACHED worktrees; materialize the real branch ref + // so we can prove archive FREES it (the cold-named-worktree shape). + yield* git(bareRepoPath, 'branch', 'feature/x', commit) + const fs = yield* FileSystem.FileSystem + const before = yield* Git.refExists({ + repoPath: bareRepoPath, + ref: 'refs/heads/feature/x', + }) + expect(before).toBe(true) + + const now = Date.parse('2026-06-11T08:00:00.000Z') + const dest = yield* archiveWorktree({ + repoRoot, + bareRepoPath, + worktreePath, + branch: 'feature/x', + commit, + reason: 'merged', + now, + }) + + // Original gone, archive present. + expect(yield* fs.exists(worktreePath)).toBe(false) + expect(yield* fs.exists(dest)).toBe(true) + expect(dest.includes('.archive/feature/x--2026-06-11T08:00:00.000Z')).toBe(true) + + // Branch FREED — mr apply can re-materialize it. + expect(yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/feature/x' })).toBe( + false, + ) + + // README metadata line recorded. + const readme = yield* fs.readFileString( + EffectPath.ops.join(repoRoot, EffectPath.unsafe.relativeFile('.archive/README.md')), + ) + expect(readme).toContain(`feature/x\t2026-06-11T08:00:00.000Z\t${commit}\tmerged`) + + // mr-apply-equivalent re-add: recreate the branch + worktree at refs/heads/. + const reAddPath = EffectPath.ops.join( + repoRoot, + EffectPath.unsafe.relativeDir('refs/heads/feature/x/'), + ) + yield* git(bareRepoPath, 'branch', 'feature/x', commit) + yield* git(bareRepoPath, 'worktree', 'add', reAddPath, 'feature/x') + expect(yield* fs.exists(reAddPath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'archive preserves uncommitted + untracked work intact with the dir move', + Effect.fnUntraced( + function* () { + const fixture = yield* createStoreFixture([ + { ...REPO, branches: ['feature/dirty'], dirtyWorktrees: ['feature/dirty'] }, + ]) + const repoRoot = repoRootFor(fixture.storePath, REPO_KEY) + const bareRepoPath = fixture.bareRepoPaths[REPO_KEY]! + const worktreePath = fixture.worktreePaths[`${REPO_KEY}#feature/dirty`]! + const commit = yield* getWorktreeCommit(worktreePath) + const fs = yield* FileSystem.FileSystem + + // Add an extra untracked file beyond the fixture's tracked-ish dirt. + yield* fs.writeFileString( + EffectPath.ops.join(worktreePath, EffectPath.unsafe.relativeFile('untracked.txt')), + 'precious\n', + ) + + const dest = yield* archiveWorktree({ + repoRoot, + bareRepoPath, + worktreePath, + branch: 'feature/dirty', + commit, + reason: 'closed', + now: Date.parse('2026-06-11T09:00:00.000Z'), + }) + + // Both the fixture dirt file and the untracked file traveled intact. + expect( + yield* fs.readFileString( + EffectPath.ops.join(dest, EffectPath.unsafe.relativeFile('dirty.txt')), + ), + ).toBe('uncommitted changes\n') + expect( + yield* fs.readFileString( + EffectPath.ops.join(dest, EffectPath.unsafe.relativeFile('untracked.txt')), + ), + ).toBe('precious\n') + + // The moved worktree still reports its dirt (status preserved). + const status = yield* Git.getWorktreeStatus(dest) + expect(status.isDirty).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) +}) + +describe('store-archive: scanArchives + reapArchive', () => { + it.effect( + 'scanArchives lists only .archive/ entries with parsed timestamps; siblings ignored', + Effect.fnUntraced( + function* () { + const fixture = yield* createStoreFixture([{ ...REPO, branches: ['live/keep'] }]) + const repoRoot = repoRootFor(fixture.storePath, REPO_KEY) + const bareRepoPath = fixture.bareRepoPaths[REPO_KEY]! + const liveWorktree = fixture.worktreePaths[`${REPO_KEY}#live/keep`]! + const commit = yield* getWorktreeCommit(liveWorktree) + + const oldAt = new Date('2026-05-01T00:00:00.000Z') + const newAt = new Date('2026-06-10T00:00:00.000Z') + yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/old', + commit, + archivedAt: oldAt, + }) + yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'team/feature--double', + commit, + archivedAt: newAt, + }) + + const entries = yield* scanArchives({ repoRoot, bareRepoPath }) + + // The live (refs/heads) worktree is NOT in the archive set. + const branches = entries.map((entry) => entry.branch).sort() + expect(branches).toEqual(['feature/old', 'team/feature--double']) + + const byBranch = new Map(entries.map((entry) => [entry.branch, entry])) + expect(byBranch.get('feature/old')?.archivedAtMs).toBe(oldAt.getTime()) + expect(byBranch.get('team/feature--double')?.archivedAtMs).toBe(newAt.getTime()) + // Every reported path is under .archive/. + for (const entry of entries) { + expect(entry.path.includes('/.archive/')).toBe(true) + } + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'reapArchive removes the directory AND unregisters it from the bare worktree list', + Effect.fnUntraced( + function* () { + const fixture = yield* createStoreFixture([{ ...REPO, branches: ['live/keep'] }]) + const repoRoot = repoRootFor(fixture.storePath, REPO_KEY) + const bareRepoPath = fixture.bareRepoPaths[REPO_KEY]! + const commit = yield* getWorktreeCommit(fixture.worktreePaths[`${REPO_KEY}#live/keep`]!) + const fs = yield* FileSystem.FileSystem + + const { archivePath } = yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/reapme', + commit, + archivedAt: new Date('2026-04-01T00:00:00.000Z'), + }) + + // Present before reap. + expect((yield* scanArchives({ repoRoot, bareRepoPath })).length).toBe(1) + expect(yield* fs.exists(archivePath)).toBe(true) + + yield* reapArchive({ bareRepoPath, path: archivePath }) + + // Directory gone AND no longer in git's worktree registry. + expect(yield* fs.exists(archivePath)).toBe(false) + expect((yield* scanArchives({ repoRoot, bareRepoPath })).length).toBe(0) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'scan + retention: split archives into reap-eligible vs within-retention by archivedAtMs', + Effect.fnUntraced( + function* () { + const fixture = yield* createStoreFixture([{ ...REPO, branches: ['live/keep'] }]) + const repoRoot = repoRootFor(fixture.storePath, REPO_KEY) + const bareRepoPath = fixture.bareRepoPaths[REPO_KEY]! + const commit = yield* getWorktreeCommit(fixture.worktreePaths[`${REPO_KEY}#live/keep`]!) + + const now = Date.parse('2026-06-11T00:00:00.000Z') + const retentionMs = 30 * 24 * 60 * 60 * 1000 + + // One archived 40d ago (past retention) and one 5d ago (within retention). + yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/stale', + commit, + archivedAt: new Date(now - 40 * 24 * 60 * 60 * 1000), + }) + yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/fresh', + commit, + archivedAt: new Date(now - 5 * 24 * 60 * 60 * 1000), + }) + + const entries = yield* scanArchives({ repoRoot, bareRepoPath }) + const eligible = entries + .filter((entry) => now - entry.archivedAtMs >= retentionMs) + .map((entry) => entry.branch) + expect(eligible).toEqual(['feature/stale']) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) +}) diff --git a/packages/@overeng/megarepo/src/lib/store-archive.ts b/packages/@overeng/megarepo/src/lib/store-archive.ts new file mode 100644 index 000000000..0dfc8c25b --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-archive.ts @@ -0,0 +1,274 @@ +/** + * Archive + reap for cold named-branch worktrees (decisions 0004/0007). + * + * `.archive/` is the single recoverable holding area ("trash", decision 0007): + * a cold, stale, lossless worktree is MOVED there (recoverable), and archives + * past a retention TTL are later reaped (hard-deleted) to reclaim disk. + * + * Three operations live here, each a thin Effect over real git/fs so the + * caller (`mr store gc`) can sequence them under `withWorktreeLock` with a fresh + * live-set veto re-check: + * + * 1. {@link archiveWorktree} — `git worktree move` the worktree under + * `/.archive/--`, then FREE the branch so + * `mr apply` can re-materialize it (invariant 4). The directory move + * preserves dirty + untracked work intact and fixes the absolute gitlink, so + * no `git worktree repair` is needed. A metadata line is appended to + * `/.archive/README.md`. + * 2. {@link scanArchives} — enumerate archive entries via `Git.listWorktrees` + * (git's own worktree registry already lists them — that is exactly why they + * are excluded from the live set), filtered to paths under `/.archive/`, + * parsing `archivedAtMs` from the strict trailing `--` segment. + * 3. {@link reapArchive} — `git worktree remove --force` then ensure the dir is + * gone. The retention-TTL gate and under-lock veto re-check are the caller's + * responsibility (this is the mechanism, not the policy). + * + * `now` is an explicit epoch-ms parameter threaded from the CLI edge — the + * archive directory name and README timestamp NEVER read the ambient wall clock + * on this persistence path. + */ + +import { CommandExecutor, FileSystem, type Error as PlatformError } from '@effect/platform' +import { Effect, Option } from 'effect' + +import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' + +import * as Git from './git.ts' + +/** Relative directory name of the per-repo archive holding area. */ +export const ARCHIVE_DIR_NAME = '.archive' + +/** File the archive metadata log is appended to, relative to `/.archive/`. */ +export const ARCHIVE_README_NAME = 'README.md' + +/** + * One archive entry discovered by {@link scanArchives}. + * + * `archivedAtMs` is parsed from the directory's trailing `--` segment; + * `branch` is everything before it (branch names contain `-`/`--`/`/`). + */ +export interface ArchiveEntry { + /** Absolute path to the archived worktree directory. */ + readonly path: AbsoluteDirPath + /** Branch name recovered from the directory name (segment before `--`). */ + readonly branch: string + /** Epoch-ms parsed from the trailing ISO8601 timestamp segment. */ + readonly archivedAtMs: number +} + +/** + * Strict, anchored parse of an archive entry's path RELATIVE to `.archive/` + * into `{ branch, archivedAtMs }`. + * + * The archive dir name embeds the FULL branch (including any `/`), so the + * `.archive/`-relative path is `--` — e.g. `feature/x--` + * (a nested directory). ISO8601 is exactly `YYYY-MM-DDTHH:mm:ss.sssZ` (the form + * `new Date(now).toISOString()` produces). The branch segment is greedy so the + * LAST `--` is taken as the timestamp even though branch names + * legitimately contain `-`, `--`, and `/`. A name that does not end in a valid + * ISO8601 instant (or whose timestamp does not round-trip) yields + * `Option.none()` and is skipped rather than mis-reaped. + */ +export const parseArchiveDirName = ( + relativeName: string, +): Option.Option<{ + readonly branch: string + readonly archivedAtMs: number +}> => { + const match = relativeName.match( + /^(?.+)--(?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)$/u, + ) + const branch = match?.groups?.branch + const ts = match?.groups?.ts + if (branch === undefined || ts === undefined || branch.length === 0) { + return Option.none() + } + + const archivedAtMs = Date.parse(ts) + // Reject non-instants AND values that do not round-trip back to the same ISO + // string (e.g. an out-of-range day that `Date.parse` would normalize). + if (Number.isNaN(archivedAtMs) || new Date(archivedAtMs).toISOString() !== ts) { + return Option.none() + } + + return Option.some({ branch, archivedAtMs }) +} + +const archiveDirPath = (repoRoot: AbsoluteDirPath): AbsoluteDirPath => + EffectPath.ops.join(repoRoot, EffectPath.unsafe.relativeDir(`${ARCHIVE_DIR_NAME}/`)) + +/** + * Archive a cold worktree: move it under `/.archive/`, free its branch, + * and record metadata. + * + * Order matters and each step is the mechanism for an invariant: + * 1. `mkdir -p /.archive` FIRST — `git worktree move` requires the + * destination's parent to exist. + * 2. `git -C worktree move ` — preserves dirty + untracked + * work (it travels with the directory) and rewrites the gitlink to the new + * absolute path, so no `git worktree repair` is needed afterwards. + * 3. FREE the branch via `git -C branch -D ` so `mr apply` can + * re-materialize it; the commit stays reachable through the remote-tracking + * ref (guaranteed by the lossless floor's invariant 2a, checked upstream). + * 4. Append `branch, ISO(now), commit, reason` to `/.archive/README.md`. + * + * Returns the destination path so the caller can surface a recovery hint. Any + * git/fs failure propagates so the caller can report keep+error and leave the + * original worktree intact. + */ +export const archiveWorktree = (args: { + /** The repo root in the store: `////`. */ + readonly repoRoot: AbsoluteDirPath + /** The bare repo path: `/.bare/`. */ + readonly bareRepoPath: AbsoluteDirPath + /** Source worktree directory to archive. */ + readonly worktreePath: AbsoluteDirPath + /** The `refs/heads/*` branch name the worktree materializes. */ + readonly branch: string + /** The worktree HEAD commit, recorded in the metadata log. */ + readonly commit: string + /** Human-readable reason recorded in the metadata log (e.g. `merged`/`closed`). */ + readonly reason: string + /** Epoch-ms decision time; drives the archive dir name + README timestamp. */ + readonly now: number +}): Effect.Effect< + AbsoluteDirPath, + Git.GitCommandError | PlatformError.PlatformError, + FileSystem.FileSystem | CommandExecutor.CommandExecutor +> => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + + const iso = new Date(args.now).toISOString() + const archiveDir = archiveDirPath(args.repoRoot) + const destPath = EffectPath.ops.join( + archiveDir, + EffectPath.unsafe.relativeDir(`${args.branch}--${iso}/`), + ) + + // (1) Destination PARENT must exist before `git worktree move` (the branch + // embeds `/`, so the dest is nested, e.g. `.archive/feature/x--`). The + // dest is always under `.archive/`, so its parent is never `undefined`. + const destParent = EffectPath.ops.parent(destPath) ?? archiveDir + yield* fs.makeDirectory(destParent, { recursive: true }) + + // (2) Move the worktree — dirty + untracked work travels intact, gitlink fixed. + yield* Git.moveWorktree({ + repoPath: args.bareRepoPath, + fromPath: args.worktreePath, + toPath: destPath, + }) + + // (3) Free the branch so `mr apply` can re-materialize it (invariant 4). + yield* Git.deleteBranch({ repoPath: args.bareRepoPath, branch: args.branch, force: true }) + + // (4) Append a metadata line to the archive README. + const readmePath = EffectPath.ops.join( + archiveDir, + EffectPath.unsafe.relativeFile(ARCHIVE_README_NAME), + ) + const existing = yield* fs + .readFileString(readmePath) + .pipe(Effect.catchAll(() => Effect.succeed(''))) + const line = `${args.branch}\t${iso}\t${args.commit}\t${args.reason}\n` + yield* fs.writeFileString(readmePath, existing + line) + + return destPath + }).pipe( + Effect.withSpan('megarepo/store/gc/archive-worktree', { + attributes: { 'span.label': args.branch, branch: args.branch, reason: args.reason }, + }), + ) + +/** + * Enumerate the archive entries under `/.archive/`. + * + * Uses `Git.listWorktrees(bare)` — git's worktree registry already enumerates + * archives (that is precisely why they are excluded from the live set today) — + * filtered to paths under `/.archive/`, parsing each entry's + * `archivedAtMs` from its strict trailing `--` segment. Entries whose + * base name does not parse are skipped (never mis-reaped). + */ +export const scanArchives = (args: { + readonly repoRoot: AbsoluteDirPath + readonly bareRepoPath: AbsoluteDirPath +}): Effect.Effect< + ReadonlyArray, + Git.GitCommandError | PlatformError.PlatformError, + CommandExecutor.CommandExecutor +> => + Effect.gen(function* () { + const archiveDir = archiveDirPath(args.repoRoot) + // Normalize to a trailing-slash prefix so a sibling like `.archive-old/` can + // never match by string prefix. + const archivePrefix = archiveDir + + const worktrees = yield* Git.listWorktrees(args.bareRepoPath) + + const entries: Array = [] + for (const worktree of worktrees) { + // git reports worktree paths without a trailing slash; normalize so the + // prefix test cannot match a sibling like `.archive-old/`. + const normalized = EffectPath.unsafe.absoluteDir( + worktree.path.endsWith('/') ? worktree.path : `${worktree.path}/`, + ) + if (normalized.startsWith(archivePrefix) === false) continue + + // Parse the path RELATIVE to `.archive/` (NOT just the base name): the + // branch embeds `/`, so the dir is nested (e.g. `.archive/feature/x--`) + // and the full `feature/x` must be recovered, trailing slash stripped. + const relative = normalized.slice(archivePrefix.length).replace(/\/+$/u, '') + const parsed = parseArchiveDirName(relative) + if (Option.isNone(parsed)) continue + + entries.push({ + path: normalized, + branch: parsed.value.branch, + archivedAtMs: parsed.value.archivedAtMs, + }) + } + + return entries + }).pipe( + Effect.withSpan('megarepo/store/gc/scan-archives', { + attributes: { 'span.label': 'scan-archives', repoRoot: args.repoRoot }, + }), + ) + +/** + * Reap (hard-delete) one archived worktree. + * + * `git worktree remove --force` unregisters the worktree and removes its + * directory; we then ensure the directory is gone (defensive — a move/partial + * state could leave it behind). The retention-TTL gate and the under-lock + * live-set veto re-check are the CALLER's responsibility; this is the reclaim + * mechanism only. + */ +export const reapArchive = (args: { + readonly bareRepoPath: AbsoluteDirPath + readonly path: AbsoluteDirPath +}): Effect.Effect< + void, + Git.GitCommandError | PlatformError.PlatformError, + FileSystem.FileSystem | CommandExecutor.CommandExecutor +> => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + + yield* Git.removeWorktree({ + repoPath: args.bareRepoPath, + worktreePath: args.path, + force: true, + }) + + // Ensure the directory is actually gone (idempotent; `remove --force` + // normally deletes it, but a stale/partial state must not survive reap). + const exists = yield* fs.exists(args.path) + if (exists === true) { + yield* fs.remove(args.path, { recursive: true, force: true }) + } + }).pipe( + Effect.withSpan('megarepo/store/gc/reap-archive', { + attributes: { 'span.label': 'reap-archive', path: args.path }, + }), + ) From 8e20546aad4556c8efef325ad617d50ccd32718b Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:07:01 +0200 Subject: [PATCH 09/13] feat(megarepo): cold worktree classification gates (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add classifyColdWorktree (pure) to store-worktree-policy: a tagged keep/archive decision over a fixed gate order — liveness veto, staleness evidence, lossless floor, absence grace, merged-only post-merge grace, then archive (closed has no post-close grace). Explicit now epoch-ms; classifyStoreWorktreePolicy left untouched. Tests: gate-precedence table + near-misses, and FastCheck property invariants (live never archives; open/none keep; unpushed>0 keeps; hasStash keeps). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../megarepo/src/lib/store-worktree-policy.ts | 131 +++++++++ .../lib/store-worktree-policy.unit.test.ts | 274 +++++++++++++++++- 2 files changed, 404 insertions(+), 1 deletion(-) diff --git a/packages/@overeng/megarepo/src/lib/store-worktree-policy.ts b/packages/@overeng/megarepo/src/lib/store-worktree-policy.ts index 8bfb7e63a..fe560abb2 100644 --- a/packages/@overeng/megarepo/src/lib/store-worktree-policy.ts +++ b/packages/@overeng/megarepo/src/lib/store-worktree-policy.ts @@ -1,4 +1,6 @@ +import type { StoreGcConfig } from './store-gc-config.ts' import { isPathProtected, type StoreLiveSet } from './store-liveness.ts' +import type { PrStateInfo } from './store-pr-state.ts' /** Store path ref segment for a materialized worktree. */ export type StoreWorktreeRefType = 'heads' | 'tags' | 'commits' @@ -89,3 +91,132 @@ export const classifyStoreWorktreePolicy = ({ return { isProtected: false, message: undefined, reason: undefined } } + +// ============================================================================= +// Cold named-worktree classification (decisions 0001–0010; pure) +// ============================================================================= + +/** + * Lossless-floor inputs for one worktree (decision 0004, invariant 2). + * + * `unpushed` is the count of commits reachable from the worktree HEAD but not on + * any remote (`git rev-list --not --remotes`); `>0` means unrecoverable + * local history. `hasStash` is a non-empty stash for the worktree (stash refs + * live in the bare and do NOT travel with a dir move). `dirty` is uncommitted or + * untracked content — recoverable because it moves intact with the dir, so it + * does NOT itself force keep (only gates the archive reason). + */ +export interface StoreWorktreeLossless { + readonly unpushed: number + readonly dirty: boolean + readonly hasStash: boolean +} + +/** Why a cold named worktree is kept (no destructive action). */ +export type ColdWorktreeKeepReason = + | 'live' + | 'not-stale' + | 'unrecoverable-local-work' + | 'absence-grace' + | 'post-merge-grace' + | 'defensive' + +/** Why a cold named worktree is eligible to archive. */ +export type ColdWorktreeArchiveReason = 'merged' | 'closed' + +/** + * Cold-classification outcome for ONE named worktree (decision 0001). + * + * Tagged union: `keep` is non-destructive; `archive` makes the worktree eligible + * for the under-lock archive step (U6). Reap is a separate retention decision + * (U6), not produced here. + */ +export type ColdWorktreeDecision = + | { readonly _tag: 'keep'; readonly reason: ColdWorktreeKeepReason } + | { readonly _tag: 'archive'; readonly reason: ColdWorktreeArchiveReason } + +const keep = (reason: ColdWorktreeKeepReason): ColdWorktreeDecision => ({ _tag: 'keep', reason }) + +const archive = (reason: ColdWorktreeArchiveReason): ColdWorktreeDecision => ({ + _tag: 'archive', + reason, +}) + +/** + * Classify a cold named worktree as keep-vs-archive (pure; decisions 0001–0010). + * + * Gates are evaluated in this exact order and each short-circuits (a later gate + * only sees inputs all earlier gates allowed through). This ordering encodes the + * safety lattice: liveness veto first (invariant 1), then staleness evidence, + * then the lossless floor (invariant 2), then the two grace timers, and only + * then a positive archive decision. + * + * 1. In the reconciled live set ⇒ keep `live` (invariant 1; never archived). + * 2. PR state `open` or `none` ⇒ keep `not-stale` (no staleness signal; 0005). + * 3. `unpushed > 0` OR `hasStash` ⇒ keep `unrecoverable-local-work` (lossless + * floor, invariant 2). `dirty` alone does NOT keep — it moves with the dir. + * 4. Never observed cold, or absence-grace not yet elapsed ⇒ keep `absence-grace` + * (decision 0008). `coldSinceMs === undefined` is conservative re-arm. + * 5. `merged` requires `mergedAt`; missing ⇒ keep `defensive`. Within the + * post-merge grace window ⇒ keep `post-merge-grace` (decisions 0005/0008). + * 6. Otherwise archive: `merged` or `closed`. CLOSED has NO post-close grace + * (decision 0009) — the lossless floor already protects unreachable closed + * branches. + * + * `now` is an explicit epoch-ms decision clock (never the ambient wall clock). + */ +export const classifyColdWorktree = ({ + worktree, + liveSet, + prState, + lossless, + coldSinceMs, + config, + now, +}: { + readonly worktree: StoreWorktreePolicyTarget + readonly liveSet: StoreLiveSet + readonly prState: PrStateInfo + readonly lossless: StoreWorktreeLossless + readonly coldSinceMs: number | undefined + readonly config: StoreGcConfig + readonly now: number +}): ColdWorktreeDecision => { + // Gate 1: liveness veto (invariant 1) — a worktree in ANY reconciled live set + // is never archived, regardless of every other signal. + if (isPathProtected({ liveSet, path: worktree.path }) === true) { + return keep('live') + } + + // Gate 2: staleness evidence (decision 0005). Only merged/closed are signals; + // open work and "no PR at all" are kept. + if (prState.state === 'open' || prState.state === 'none') { + return keep('not-stale') + } + + // Gate 3: lossless floor (invariant 2). Unpushed history or a stash cannot + // survive a dir move, so either forces keep. Dirt is recoverable (moves with + // the dir) and intentionally does not gate here. + if (lossless.unpushed > 0 || lossless.hasStash === true) { + return keep('unrecoverable-local-work') + } + + // Gate 4: absence grace (decision 0008). Unobserved-cold re-arms the timer. + if (coldSinceMs === undefined || now - coldSinceMs < config.absenceGraceMs) { + return keep('absence-grace') + } + + // Gate 5: merged-only post-merge grace (decisions 0005/0008). Missing mergedAt + // is treated defensively (cannot prove the window elapsed). + if (prState.state === 'merged') { + if (prState.mergedAt === undefined) { + return keep('defensive') + } + if (now - prState.mergedAt < config.postMergeGraceMs) { + return keep('post-merge-grace') + } + } + + // Gate 6: archive. CLOSED has no post-close grace (decision 0009). + return prState.state === 'merged' ? archive('merged') : archive('closed') +} diff --git a/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts index ea2b421a1..d3ddf81f1 100644 --- a/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts +++ b/packages/@overeng/megarepo/src/lib/store-worktree-policy.unit.test.ts @@ -1,8 +1,18 @@ import { describe, it } from '@effect/vitest' +import * as fc from 'effect/FastCheck' import { expect } from 'vitest' +import type { StoreGcConfig } from './store-gc-config.ts' import type { StoreLiveSet } from './store-liveness.ts' -import { classifyStoreWorktreePolicy, isNamedRefWorktree } from './store-worktree-policy.ts' +import type { PrStateInfo } from './store-pr-state.ts' +import { + classifyColdWorktree, + classifyStoreWorktreePolicy, + isNamedRefWorktree, + type ColdWorktreeDecision, + type StoreWorktreeLossless, + type StoreWorktreePolicyTarget, +} from './store-worktree-policy.ts' const liveSet = (paths: ReadonlyArray): StoreLiveSet => ({ paths: new Set(paths), @@ -91,3 +101,265 @@ describe('store-worktree-policy', () => { expect(isNamedRefWorktree({ refType: 'commits', path: '/commits/abc' })).toBe(false) }) }) + +// ============================================================================= +// classifyColdWorktree +// ============================================================================= + +const DAY_MS = 24 * 60 * 60 * 1000 +const NOW = 1_000_000 * DAY_MS + +const COLD_PATH = '/store/repo/refs/heads/feature' + +const config: StoreGcConfig = { + absenceGraceMs: 14 * DAY_MS, + postMergeGraceMs: 7 * DAY_MS, + archiveRetentionMs: 30 * DAY_MS, +} + +const target: StoreWorktreePolicyTarget = { refType: 'heads', path: COLD_PATH } + +const recoverable: StoreWorktreeLossless = { unpushed: 0, dirty: false, hasStash: false } + +/** Cold long enough that absence grace is satisfied (gate 4 passes). */ +const coldLongAgo = NOW - 30 * DAY_MS + +/** Merged far enough in the past that post-merge grace is satisfied (gate 5 passes). */ +const mergedLongAgo: PrStateInfo = { state: 'merged', mergedAt: NOW - 30 * DAY_MS } + +const classify = (overrides: { + worktree?: StoreWorktreePolicyTarget + liveSet?: StoreLiveSet + prState: PrStateInfo + lossless?: StoreWorktreeLossless + coldSinceMs?: number | undefined + now?: number +}): ColdWorktreeDecision => + classifyColdWorktree({ + worktree: overrides.worktree ?? target, + liveSet: overrides.liveSet ?? liveSet([]), + prState: overrides.prState, + lossless: overrides.lossless ?? recoverable, + coldSinceMs: 'coldSinceMs' in overrides ? overrides.coldSinceMs : coldLongAgo, + config, + now: overrides.now ?? NOW, + }) + +describe('classifyColdWorktree gate precedence', () => { + /** + * Each row is set up so the named gate is the FIRST that fires: all earlier + * gates pass, proving the gate's short-circuit precedence over later signals. + */ + it('gate 1: in live set ⇒ keep(live) even when merged+reachable+grace-met', () => { + // Later gates would archive, but liveness vetoes first. + expect( + classify({ liveSet: liveSet([COLD_PATH]), prState: mergedLongAgo }), + ).toEqual({ _tag: 'keep', reason: 'live' }) + }) + + it('gate 2: prState open ⇒ keep(not-stale)', () => { + expect(classify({ prState: { state: 'open' } })).toEqual({ + _tag: 'keep', + reason: 'not-stale', + }) + }) + + it('gate 2: prState none ⇒ keep(not-stale) even when long cold', () => { + expect(classify({ prState: { state: 'none' } })).toEqual({ + _tag: 'keep', + reason: 'not-stale', + }) + }) + + it('gate 3: unpushed>0 ⇒ keep(unrecoverable-local-work) over merged+grace-met', () => { + expect( + classify({ + prState: mergedLongAgo, + lossless: { unpushed: 1, dirty: false, hasStash: false }, + }), + ).toEqual({ _tag: 'keep', reason: 'unrecoverable-local-work' }) + }) + + it('gate 3: hasStash ⇒ keep(unrecoverable-local-work) over merged+grace-met', () => { + expect( + classify({ prState: mergedLongAgo, lossless: { unpushed: 0, dirty: false, hasStash: true } }), + ).toEqual({ _tag: 'keep', reason: 'unrecoverable-local-work' }) + }) + + it('gate 4: coldSince undefined ⇒ keep(absence-grace) (re-arm)', () => { + expect( + classify({ prState: mergedLongAgo, coldSinceMs: undefined }), + ).toEqual({ _tag: 'keep', reason: 'absence-grace' }) + }) + + it('gate 4: absence grace not yet elapsed ⇒ keep(absence-grace)', () => { + expect( + classify({ prState: mergedLongAgo, coldSinceMs: NOW - (14 * DAY_MS - 1) }), + ).toEqual({ _tag: 'keep', reason: 'absence-grace' }) + }) + + it('gate 5: merged missing mergedAt ⇒ keep(defensive)', () => { + expect(classify({ prState: { state: 'merged' } })).toEqual({ + _tag: 'keep', + reason: 'defensive', + }) + }) + + it('gate 5: merged within post-merge grace ⇒ keep(post-merge-grace)', () => { + expect( + classify({ prState: { state: 'merged', mergedAt: NOW - (7 * DAY_MS - 1) } }), + ).toEqual({ _tag: 'keep', reason: 'post-merge-grace' }) + }) + + it('gate 6: merged + all gates passed ⇒ archive(merged)', () => { + expect(classify({ prState: mergedLongAgo })).toEqual({ + _tag: 'archive', + reason: 'merged', + }) + }) + + it('gate 6: closed has no post-close grace ⇒ archive(closed)', () => { + // Closed just now (no grace), absence grace met, recoverable ⇒ archive. + expect(classify({ prState: { state: 'closed', closedAt: NOW } })).toEqual( + { _tag: 'archive', reason: 'closed' }, + ) + }) +}) + +describe('classifyColdWorktree near-misses', () => { + it('merged + grace met + not reachable (unpushed>0) ⇒ keep', () => { + expect( + classify({ + prState: mergedLongAgo, + lossless: { unpushed: 3, dirty: false, hasStash: false }, + }), + ).toEqual({ _tag: 'keep', reason: 'unrecoverable-local-work' }) + }) + + it('merged + grace met + reachable + dirty ⇒ archive (dirt moves with the dir)', () => { + expect( + classify({ prState: mergedLongAgo, lossless: { unpushed: 0, dirty: true, hasStash: false } }), + ).toEqual({ _tag: 'archive', reason: 'merged' }) + }) + + it('closed + reachable + absence grace met ⇒ archive(closed)', () => { + expect( + classify({ prState: { state: 'closed', closedAt: NOW - 5 * DAY_MS } }), + ).toEqual({ _tag: 'archive', reason: 'closed' }) + }) + + it('absence grace met but post-merge grace unmet ⇒ keep(post-merge-grace)', () => { + expect( + classify({ + prState: { state: 'merged', mergedAt: NOW - 1 * DAY_MS }, + coldSinceMs: NOW - 20 * DAY_MS, + }), + ).toEqual({ _tag: 'keep', reason: 'post-merge-grace' }) + }) + + it('absence grace exactly at boundary (now-coldSince === absenceGrace) ⇒ proceeds', () => { + // Strict `<` means equality is NOT within grace; gate 4 passes, gate 6 archives. + expect( + classify({ prState: mergedLongAgo, coldSinceMs: NOW - 14 * DAY_MS }), + ).toEqual({ _tag: 'archive', reason: 'merged' }) + }) +}) + +// ============================================================================= +// Property-based invariants (decisions 0001–0009, invariants 1–3) +// ============================================================================= + +const arbPrState: fc.Arbitrary = fc.oneof( + fc.constant({ state: 'open' }), + fc.constant({ state: 'none' }), + fc + .option(fc.integer({ min: 0, max: NOW }), { nil: undefined }) + .map((mergedAt): PrStateInfo => ({ state: 'merged', mergedAt })), + fc + .option(fc.integer({ min: 0, max: NOW }), { nil: undefined }) + .map((closedAt): PrStateInfo => ({ state: 'closed', closedAt })), +) + +const arbLossless: fc.Arbitrary = fc.record({ + unpushed: fc.integer({ min: 0, max: 50 }), + dirty: fc.boolean(), + hasStash: fc.boolean(), +}) + +const arbColdSince: fc.Arbitrary = fc.option(fc.integer({ min: 0, max: NOW }), { + nil: undefined, +}) + +describe('classifyColdWorktree invariants (property)', () => { + it.prop( + 'a worktree in the live set is NEVER archived (invariant 1)', + [arbPrState, arbLossless, arbColdSince], + ([prState, lossless, coldSinceMs]) => { + const decision = classifyColdWorktree({ + worktree: target, + liveSet: liveSet([COLD_PATH]), + prState, + lossless, + coldSinceMs, + config, + now: NOW, + }) + expect(decision).toEqual({ _tag: 'keep', reason: 'live' }) + }, + { fastCheck: { numRuns: 200 } }, + ) + + it.prop( + 'open or no-PR worktrees are always kept (decision 0005)', + [fc.constantFrom({ state: 'open' }, { state: 'none' }), arbLossless, arbColdSince], + ([prState, lossless, coldSinceMs]) => { + const decision = classifyColdWorktree({ + worktree: target, + liveSet: liveSet([]), + prState, + lossless, + coldSinceMs, + config, + now: NOW, + }) + expect(decision._tag).toBe('keep') + }, + { fastCheck: { numRuns: 200 } }, + ) + + it.prop( + 'unpushed>0 always keeps (lossless floor, invariant 2)', + [arbPrState, fc.integer({ min: 1, max: 50 }), fc.boolean(), fc.boolean(), arbColdSince], + ([prState, unpushed, dirty, hasStash, coldSinceMs]) => { + const decision = classifyColdWorktree({ + worktree: target, + liveSet: liveSet([]), + prState, + lossless: { unpushed, dirty, hasStash }, + coldSinceMs, + config, + now: NOW, + }) + expect(decision._tag).toBe('keep') + }, + { fastCheck: { numRuns: 200 } }, + ) + + it.prop( + 'hasStash always keeps (lossless floor, invariant 2)', + [arbPrState, fc.integer({ min: 0, max: 50 }), fc.boolean(), arbColdSince], + ([prState, unpushed, dirty, coldSinceMs]) => { + const decision = classifyColdWorktree({ + worktree: target, + liveSet: liveSet([]), + prState, + lossless: { unpushed, dirty, hasStash: true }, + coldSinceMs, + config, + now: NOW, + }) + expect(decision._tag).toBe('keep') + }, + { fastCheck: { numRuns: 200 } }, + ) +}) From 27104276b80e0e3b70316dae33eed637e69fe5c2 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:33:11 +0200 Subject: [PATCH 10/13] feat(megarepo): wire cold named-branch reclamation into store gc (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrate the cold reclamation path (U7/U7b, decisions 0001–0010) into the default `mr store gc` flow. Additive third path; `--all` semantics unchanged. - reconcile-all the live set ONCE and thread it; re-check the liveness veto under `withWorktreeLock` against a fresh reconcile immediately before BOTH archive and reap (invariant 1) - per repo: fetchBare first (failure => keep all its named worktrees); resolve PR state adjacent to classification; record cold observations once (store- global ledger, under lock); classify each named worktree; ref_mismatch (HEAD on a different branch) => keep - archive decisions: archive => verify => free branch; failure => keep + error - scan archives and reap entries past the retention TTL, each under lock + veto - PrStateResolver is injected when present (stub layer in tests), else the live gh-shelling layer is built in the command Extend StoreGcResult.status with archived/reaped/kept plus reason and an optional recoverPath (intentional, documented JSON contract change) across the schema, view renderer, fixtures, and stories. Add an end-to-end integration suite covering the full matrix: registered vs unregistered, repin-without-reregister, present-but-unreadable workspace, merged+clean=>archived (branch freed, re-add works), merged+dirty, merged+stash, merged+unpushed, open, squash+remote-deleted, grace cases, retention reap, veto-at-reap, dry-run, and --all unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../megarepo/src/cli/commands/store/mod.ts | 509 +++++++++- .../src/cli/renderers/StoreOutput/mod.ts | 2 + .../src/cli/renderers/StoreOutput/schema.ts | 32 +- .../StoreOutput/stories/GC.stories.tsx | 33 + .../StoreOutput/stories/_fixtures.ts | 51 + .../src/cli/renderers/StoreOutput/view.tsx | 111 ++- .../src/cli/store-gc-cold.integration.test.ts | 916 ++++++++++++++++++ .../src/cli/store.integration.test.ts | 6 +- 8 files changed, 1631 insertions(+), 29 deletions(-) create mode 100644 packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts diff --git a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts index c8f23dfc5..34a6084fb 100644 --- a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts +++ b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts @@ -21,9 +21,31 @@ import { import * as Git from '../../../lib/git.ts' import { LOCK_FILE_NAME, readLockFile } from '../../../lib/lock.ts' import { classifyRef } from '../../../lib/ref.ts' +import { archiveWorktree, reapArchive, scanArchives } from '../../../lib/store-archive.ts' +import { loadStoreGcConfig, type StoreGcConfig } from '../../../lib/store-gc-config.ts' +import { + coldSinceMs as coldSinceMsFor, + recordObservations, +} from '../../../lib/store-gc-observations.ts' import { validateStoreMembers, fixStoreIssues } from '../../../lib/store-hygiene.ts' -import { collectStoreLiveSet, type StoreLiveSet } from '../../../lib/store-liveness.ts' +import { + collectStoreLiveSet, + isPathProtected, + type StoreLiveSet, +} from '../../../lib/store-liveness.ts' import { StoreLock } from '../../../lib/store-lock.ts' +import { assessLossless } from '../../../lib/store-lossless.ts' +import { + makePrStateResolverLayer, + PrStateResolver, + type PrStateInfo, + type PrStateResolverService, +} from '../../../lib/store-pr-state.ts' +import { + classifyColdWorktree, + isNamedRefWorktree, + type ColdWorktreeDecision, +} from '../../../lib/store-worktree-policy.ts' import { classifyStoreWorktreePolicy } from '../../../lib/store-worktree-policy.ts' import { Store, StoreLayer } from '../../../lib/store.ts' import { getCloneUrl } from '../../../lib/sync/mod.ts' @@ -392,6 +414,335 @@ const classifyGcWorktree = ({ }), ) +const normalizeStorePath = (path: string): string => path.replace(/\/+$/, '') + +/** A named (`refs/heads/*`) worktree paired with the repo it belongs to. */ +interface NamedWorktreeTarget { + readonly repoRelativePath: string + readonly repoFullPath: AbsoluteDirPath + readonly bareRepoPath: AbsoluteDirPath + readonly worktree: CollectedWorktree +} + +/** + * Build a `StoreGcResult` for a cold-path outcome. + * + * `reason` is the stable classification tag (live/not-stale/merged/...); + * `message` carries free-form detail; `recoverPath` is the `.archive/` location + * for an archived worktree. + */ +const coldResult = ({ + target, + status, + reason, + message, + recoverPath, +}: { + target: NamedWorktreeTarget + status: StoreGcResult['status'] + reason?: string | undefined + message?: string | undefined + recoverPath?: string | undefined +}): StoreGcResult => ({ + repo: target.repoRelativePath, + ref: target.worktree.ref, + refType: target.worktree.refType, + path: target.worktree.path, + status, + ...(reason !== undefined ? { reason } : {}), + ...(message !== undefined ? { message } : {}), + ...(recoverPath !== undefined ? { recoverPath } : {}), +}) + +/** + * Re-derive a fresh live set under lock for the veto re-check (invariant 1). + * + * Reconciles every present workspace again so a worktree that became live + * between the initial collect and this destructive step is never archived/reaped. + * Read-only with respect to the on-disk records here? No — reconcile rewrites + * records, so it is serialized by the caller's `withWorktreeLock`. + */ +const reReconcileLiveSet = ({ + store, + root, + now, +}: { + store: Effect.Effect.Success + root: Option.Option + now: number +}) => + collectStoreLiveSet({ + store, + ...(Option.isSome(root) === true ? { currentWorkspaceRoot: root.value } : {}), + refreshCurrentWorkspace: false, + pruneStaleRegistry: false, + reconcileAllWorkspaces: true, + now, + }) + +/** + * Cold reclamation for ONE repo's named worktrees (decisions 0001–0010). + * + * Fetch the bare first (failure ⇒ keep ALL this repo's named worktrees — the + * reachability signal would be stale). Then per named worktree: enforce the + * actual-HEAD-branch gate (`ref_mismatch` ⇒ keep), resolve PR state adjacent to + * classification, assess the lossless floor, and classify. An `archive` decision + * runs under `withWorktreeLock` with a FRESH live-set veto re-check immediately + * before `archiveWorktree` (archive → verify → free-branch is the helper's job); + * any failure leaves the original intact and reports keep+error. Finally scan + * `.archive/` and reap entries past the retention TTL, each under lock + veto. + * + * `now` is the explicit epoch-ms decision clock; `coldSince` is read from the + * pre-recorded observation ledger so grace windows are consistent across repos. + */ +const coldReclaimRepo = ({ + store, + storeLock, + prResolver, + root, + repoRelativePath, + repoFullPath, + bareRepoPath, + namedWorktrees, + liveSet, + ledger, + config, + now, + dryRun, +}: { + store: Effect.Effect.Success + storeLock: Effect.Effect.Success + prResolver: PrStateResolverService + root: Option.Option + repoRelativePath: string + repoFullPath: AbsoluteDirPath + bareRepoPath: AbsoluteDirPath + namedWorktrees: ReadonlyArray + liveSet: StoreLiveSet + ledger: Record + config: StoreGcConfig + now: number + dryRun: boolean +}) => + Effect.gen(function* () { + const results: StoreGcResult[] = [] + + // Fetch --prune so `refs/remotes/*` is fresh (the reachability + PR-prune + // signal). A repo whose fetch fails keeps ALL its named worktrees — the + // conservative direction (every commit would read as unpushed). + const fetchResult = yield* Git.fetchBare({ repoPath: bareRepoPath }).pipe(Effect.either) + if (fetchResult._tag === 'Left') { + const message = + fetchResult.left instanceof Error === true + ? fetchResult.left.message + : String(fetchResult.left) + for (const target of namedWorktrees) { + results.push(coldResult({ target, status: 'kept', reason: 'fetch-failed', message })) + } + return results + } + + for (const target of namedWorktrees) { + const { worktree } = target + // Only `refs/heads/*` carries a branch identity to reclaim; tags have no + // PR/branch to free, so they are always kept by the cold path. + if (worktree.refType !== 'heads') { + results.push(coldResult({ target, status: 'kept', reason: 'named-tag-ref' })) + continue + } + + // ref_mismatch gate: the store path claims `` but the worktree HEAD is + // on a different branch. Archiving frees `refs/heads/`, which is NOT + // the branch actually checked out — keep and surface the divergence. + const headBranch = yield* Git.getCurrentBranch(worktree.path).pipe( + Effect.catchAll(() => Effect.succeed(Option.none())), + ) + if (Option.isSome(headBranch) === true && headBranch.value !== worktree.ref) { + results.push( + coldResult({ + target, + status: 'kept', + reason: 'ref_mismatch', + message: `HEAD is '${headBranch.value}'`, + }), + ) + continue + } + + const prState: PrStateInfo = yield* prResolver.resolve({ + relativePath: EffectPath.unsafe.relativeDir(target.repoRelativePath), + branch: worktree.ref, + }) + + const head = yield* Git.getCurrentCommit(worktree.path).pipe( + Effect.map(Option.some), + Effect.catchAll(() => Effect.succeed(Option.none())), + ) + if (Option.isNone(head) === true) { + results.push(coldResult({ target, status: 'kept', reason: 'unreadable-head' })) + continue + } + const worktreeHead = head.value + + const lossless = yield* assessLossless({ + bareRepoPath, + worktreePath: worktree.path, + worktreeHead, + }).pipe( + Effect.map(Option.some), + // A failed lossless probe (e.g. unresolvable head) degrades to keep. + Effect.catchAll(() => Effect.succeed(Option.none())), + ) + if (Option.isNone(lossless) === true) { + results.push(coldResult({ target, status: 'kept', reason: 'unrecoverable-local-work' })) + continue + } + + const decision: ColdWorktreeDecision = classifyColdWorktree({ + worktree: { refType: 'heads', path: worktree.path }, + liveSet, + prState, + lossless: lossless.value, + coldSinceMs: coldSinceMsFor({ ledger, path: worktree.path }), + config, + now, + }) + + if (decision._tag === 'keep') { + results.push(coldResult({ target, status: 'kept', reason: decision.reason })) + continue + } + + // Archive decision: serialize under the worktree lock and re-check the live + // veto against a FRESH reconcile immediately before moving (invariant 1). + if (dryRun === true) { + results.push(coldResult({ target, status: 'archived', reason: decision.reason })) + continue + } + + const archiveOutcome = yield* storeLock + .withWorktreeLock(worktree.path)( + Effect.gen(function* () { + const freshLiveSet = yield* reReconcileLiveSet({ store, root, now }) + if (isPathProtected({ liveSet: freshLiveSet, path: worktree.path }) === true) { + return { _tag: 'kept-live' as const } + } + const dest = yield* archiveWorktree({ + repoRoot: repoFullPath, + bareRepoPath, + worktreePath: worktree.path, + branch: worktree.ref, + commit: worktreeHead, + reason: decision.reason, + now, + }) + return { _tag: 'archived' as const, recoverPath: dest } + }), + ) + .pipe( + Effect.catchAll((error) => + Effect.succeed({ + _tag: 'error' as const, + message: error instanceof Error === true ? error.message : String(error), + }), + ), + ) + + if (archiveOutcome._tag === 'kept-live') { + results.push(coldResult({ target, status: 'kept', reason: 'live' })) + } else if (archiveOutcome._tag === 'error') { + // Archive failed mid-flight: the original worktree is left intact. + results.push( + coldResult({ + target, + status: 'error', + reason: decision.reason, + message: archiveOutcome.message, + }), + ) + } else { + results.push( + coldResult({ + target, + status: 'archived', + reason: decision.reason, + recoverPath: archiveOutcome.recoverPath, + }), + ) + } + } + + // Reap archives past the retention TTL, each under lock + a fresh veto. + const archives = yield* scanArchives({ repoRoot: repoFullPath, bareRepoPath }).pipe( + Effect.catchAll(() => Effect.succeed([] as never[])), + ) + for (const entry of archives) { + if (now - entry.archivedAtMs < config.archiveRetentionMs) continue + + const reapTarget: NamedWorktreeTarget = { + repoRelativePath, + repoFullPath, + bareRepoPath, + worktree: { + ref: entry.branch, + refType: 'heads', + path: entry.path, + broken: false, + }, + } + + if (dryRun === true) { + results.push(coldResult({ target: reapTarget, status: 'reaped', reason: 'retention' })) + continue + } + + const reapOutcome = yield* storeLock + .withWorktreeLock(entry.path)( + Effect.gen(function* () { + const freshLiveSet = yield* reReconcileLiveSet({ store, root, now }) + if (isPathProtected({ liveSet: freshLiveSet, path: entry.path }) === true) { + return { _tag: 'kept-live' as const } + } + yield* reapArchive({ bareRepoPath, path: entry.path }) + return { _tag: 'reaped' as const } + }), + ) + .pipe( + Effect.catchAll((error) => + Effect.succeed({ + _tag: 'error' as const, + message: error instanceof Error === true ? error.message : String(error), + }), + ), + ) + + if (reapOutcome._tag === 'kept-live') { + results.push(coldResult({ target: reapTarget, status: 'kept', reason: 'live' })) + } else if (reapOutcome._tag === 'error') { + results.push( + coldResult({ + target: reapTarget, + status: 'error', + reason: 'retention', + message: reapOutcome.message, + }), + ) + } else { + results.push(coldResult({ target: reapTarget, status: 'reaped', reason: 'retention' })) + } + } + + return results + }).pipe( + Effect.withSpan('megarepo/store/gc/cold-reclaim-repo', { + attributes: { + 'span.label': repoRelativePath, + 'store.repo': repoRelativePath, + 'store.bare_repo.path': bareRepoPath, + }, + }), + ) + /** List repos in the store */ const storeLsCommand = Cli.Command.make('ls', { output: outputOption }, ({ output }) => Effect.gen(function* () { @@ -849,17 +1200,25 @@ const storeGcCommand = Cli.Command.make( ) } + // Single decision clock for the whole run — every grace/retention/ + // persistence path reads THIS value, never the ambient wall clock again. + const now = yield* Clock.currentTimeMillis + statusMessage = 'collecting liveness registry' if (progressive === true) { yield* dispatchGc({ done: false, forceDispatch: true }) } root = yield* findMegarepoRoot(cwd) + // Default cold path reconciles EVERY present workspace once (decision + // 0010) so a repin that ran no refreshing command is still caught; the + // result is threaded everywhere. `--all` keeps its lighter collect. const liveSet = yield* collectStoreLiveSet({ store, ...(Option.isSome(root) === true ? { currentWorkspaceRoot: root.value } : {}), pruneStaleRegistry: dryRun === false, refreshCurrentWorkspace: dryRun === false, - now: yield* Clock.currentTimeMillis, + ...(all === false ? { reconcileAllWorkspaces: true } : {}), + now, }) liveSetForMetrics = liveSet @@ -881,20 +1240,144 @@ const storeGcCommand = Cli.Command.make( yield* dispatchGc({ done: false, forceDispatch: true }) } - yield* Stream.fromIterable(repos).pipe( + // Per-repo collected worktrees, computed once so the default cold path + // can record observations globally (ledger replaces, not merges) before + // any per-repo classification. + const repoWorktrees = yield* Effect.all( + repos.map((repo) => + Effect.gen(function* () { + const bareRepoPath = EffectPath.ops.join( + repo.fullPath, + EffectPath.unsafe.relativeDir('.bare/'), + ) + const worktrees = yield* collectRepoStoreWorktrees({ + fs, + repoPath: repo.fullPath, + bareRepoPath, + }) + return { repo, bareRepoPath, worktrees } + }), + ), + { concurrency: GC_REPO_CONCURRENCY }, + ) + + // Default cold reclamation path (decisions 0001–0010): additive third + // path. Named (`refs/heads/*`/`refs/tags/*`) worktrees are owned here; + // `--all` removes everything via the legacy stream and skips this. + if (all === false) { + const namedTargets: Array = [] + for (const { repo, bareRepoPath, worktrees } of repoWorktrees) { + for (const worktree of worktrees) { + if (worktree.broken === true) continue + if (isNamedRefWorktree(worktree) === false) continue + namedTargets.push({ + repoRelativePath: repo.relativePath, + repoFullPath: repo.fullPath, + bareRepoPath, + worktree, + }) + } + } + + // Cold = a named worktree absent from the reconciled live set. Record + // the FULL cold set ONCE (the ledger is store-global; a per-repo write + // would launder other repos' grace). Unclean-reconcile paths re-arm. + const coldPaths = namedTargets + .filter( + (target) => isPathProtected({ liveSet, path: target.worktree.path }) === false, + ) + .map((target) => normalizeStorePath(target.worktree.path)) + // The ledger read-modify-write is store-global; serialize it under a + // stable store-keyed lock so concurrent gc runs don't clobber it. + const ledger = yield* storeLock.withWorktreeLock( + `${store.basePath}.state/gc-observations`, + )( + recordObservations({ + storeBasePath: store.basePath, + coldPaths, + uncleanReconcilePaths: [...liveSet.uncleanReconcilePaths], + now, + }), + ) + + const config = yield* loadStoreGcConfig({ storeBasePath: store.basePath }) + + // Use an injected `PrStateResolver` when present (tests provide a stub + // layer); otherwise build the live `gh`-shelling resolver here so the + // default `mr store gc` path needs no extra wiring at the CLI edge. + const injectedResolver = yield* Effect.serviceOption(PrStateResolver) + const prResolver = + Option.isSome(injectedResolver) === true + ? injectedResolver.value + : yield* PrStateResolver.pipe(Effect.provide(makePrStateResolverLayer())) + + statusMessage = 'reclaiming cold worktrees' + if (progressive === true) { + yield* dispatchGc({ done: false, forceDispatch: true }) + } + + // Group named targets by repo, then reclaim per repo (concurrency 1 so + // a global PR snapshot can never go stale — resolve adjacent instead). + yield* Stream.fromIterable(repoWorktrees).pipe( + Stream.mapEffect( + ({ repo, bareRepoPath }) => + Effect.gen(function* () { + const repoNamed = namedTargets.filter( + (target) => target.repoRelativePath === repo.relativePath, + ) + if (repoNamed.length === 0) return + discoveredWorktreeCount += repoNamed.length + activeWorktreeCount += repoNamed.length + if (progressive === true) { + yield* dispatchGc({ done: false, forceDispatch: true }) + } + const repoResults = yield* coldReclaimRepo({ + store, + storeLock, + prResolver, + root, + repoRelativePath: repo.relativePath, + repoFullPath: repo.fullPath, + bareRepoPath, + namedWorktrees: repoNamed, + liveSet, + ledger, + config, + now, + dryRun, + }).pipe( + Effect.ensuring( + Effect.sync(() => { + activeWorktreeCount -= repoNamed.length + }), + ), + ) + for (const result of repoResults) results.push(result) + if (progressive === true) { + yield* dispatchGc({ done: false, forceDispatch: true }) + } + }), + { concurrency: GC_REPO_CONCURRENCY, unordered: true }, + ), + Stream.runDrain, + ) + } + + yield* Stream.fromIterable(repoWorktrees).pipe( Stream.mapEffect( - (repo) => + ({ repo, bareRepoPath, worktrees: allWorktrees }) => Effect.gen(function* () { let removedForRepo = 0 - const bareRepoPath = EffectPath.ops.join( - repo.fullPath, - EffectPath.unsafe.relativeDir('.bare/'), - ) - const worktrees = yield* collectRepoStoreWorktrees({ - fs, - repoPath: repo.fullPath, - bareRepoPath, - }) + // Default mode owns named worktrees in the cold path above; the + // legacy stream only handles commit worktrees (and everything in + // `--all` mode). + const worktrees = + all === false + ? allWorktrees.filter( + (worktree) => + worktree.broken === true || isNamedRefWorktree(worktree) === false, + ) + : allWorktrees yield* Stream.fromIterable(worktrees).pipe( Stream.mapEffect( diff --git a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/mod.ts b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/mod.ts index 6f76c49f7..084ac2e5c 100644 --- a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/mod.ts +++ b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/mod.ts @@ -20,6 +20,7 @@ export { StoreRepo, StoreFetchResult, StoreGcResult, + StoreGcResultStatus, StoreWorktreeIssue, StoreWorktreeStatus, StoreGcWarning, @@ -39,6 +40,7 @@ export type { StoreRepo as StoreRepoType, StoreFetchResult as StoreFetchResultType, StoreGcResult as StoreGcResultType, + StoreGcResultStatus as StoreGcResultStatusType, StoreWorktreeIssue as StoreWorktreeIssueType, StoreWorktreeStatus as StoreWorktreeStatusType, StoreGcWarning as StoreGcWarningType, diff --git a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/schema.ts b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/schema.ts index b6c816445..dd15f2f57 100644 --- a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/schema.ts +++ b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/schema.ts @@ -29,14 +29,44 @@ export const StoreFetchResult = Schema.Struct({ /** Inferred type for a store fetch result. */ export type StoreFetchResult = Schema.Schema.Type +/** + * Status of a single GC result. + * + * `removed`/`skipped_dirty`/`skipped_in_use`/`error` are the legacy + * commit-worktree + `--all` outcomes. The cold named-branch path (decisions + * 0001–0010) adds three more: `archived` (moved to `.archive/`, recoverable), + * `reaped` (an archive past retention hard-deleted), and `kept` (a cold named + * worktree deliberately left in place, e.g. live/not-stale/lossless/grace). + */ +export const StoreGcResultStatus = Schema.Literal( + 'removed', + 'skipped_dirty', + 'skipped_in_use', + 'error', + 'archived', + 'reaped', + 'kept', +) + +/** Inferred type for a GC result status. */ +export type StoreGcResultStatus = Schema.Schema.Type + /** Schema for the result of garbage-collecting a single worktree. */ export const StoreGcResult = Schema.Struct({ repo: Schema.String, ref: Schema.String, refType: Schema.Literal('heads', 'tags', 'commits'), path: Schema.String, - status: Schema.Literal('removed', 'skipped_dirty', 'skipped_in_use', 'error'), + status: StoreGcResultStatus, message: Schema.optional(Schema.String), + /** + * Why a cold named worktree was kept/archived/reaped (e.g. `live`, + * `not-stale`, `unrecoverable-local-work`, `merged`, `closed`, `ref_mismatch`). + * Distinct from `message` (free-form detail); `reason` is the stable tag. + */ + reason: Schema.optional(Schema.String), + /** For `archived`: the `.archive/` location the worktree was moved to (recovery hint). */ + recoverPath: Schema.optional(Schema.String), }) /** Inferred type for a store GC result. */ diff --git a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/GC.stories.tsx b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/GC.stories.tsx index af1376910..fdb9242de 100644 --- a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/GC.stories.tsx +++ b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/GC.stories.tsx @@ -692,3 +692,36 @@ const LargeCleanupRender = (args: StoryArgs) => { export const LargeCleanup: Story = { render: LargeCleanupRender, } + +const ColdReclamationRender = (args: StoryArgs) => { + const stateConfig = useMemo( + () => ({ + results: fixtures.exampleColdGcResults, + dryRun: args.dryRun, + force: args.force, + all: args.all, + showForceHint: !args.force, + }), + [args.dryRun, args.force, args.all], + ) + return ( + + ) +} + +export const ColdReclamation: Story = { + render: ColdReclamationRender, +} diff --git a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/_fixtures.ts b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/_fixtures.ts index b8a0cc163..226c29d9a 100644 --- a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/_fixtures.ts +++ b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/stories/_fixtures.ts @@ -185,6 +185,57 @@ export const exampleGcResults: StoreGcResult[] = [ }, ] +/** + * Cold named-branch GC outcomes (decisions 0001–0010): a merged branch archived + * to `.archive/` with a recovery hint, a stale archive reaped past retention, and + * cold-but-kept worktrees (live / unrecoverable-local-work / ref_mismatch). + */ +export const exampleColdGcResults: StoreGcResult[] = [ + { + repo: 'github.com/alice/core-lib/', + ref: 'feature/merged-pr', + refType: 'heads', + path: '/Users/dev/.megarepo/github.com/alice/core-lib/refs/heads/feature/merged-pr/', + status: 'archived', + reason: 'merged', + recoverPath: + '/Users/dev/.megarepo/github.com/alice/core-lib/.archive/feature/merged-pr--2026-06-11T08:00:00.000Z/', + }, + { + repo: 'github.com/alice/core-lib/', + ref: 'feature/old-archive', + refType: 'heads', + path: '/Users/dev/.megarepo/github.com/alice/core-lib/.archive/feature/old-archive--2026-05-01T00:00:00.000Z/', + status: 'reaped', + reason: 'retention', + }, + { + repo: 'github.com/acme-org/dev-tools/', + ref: 'feature/unpushed', + refType: 'heads', + path: '/Users/dev/.megarepo/github.com/acme-org/dev-tools/refs/heads/feature/unpushed/', + status: 'kept', + reason: 'unrecoverable-local-work', + }, + { + repo: 'github.com/acme-org/dev-tools/', + ref: 'feature/repinned', + refType: 'heads', + path: '/Users/dev/.megarepo/github.com/acme-org/dev-tools/refs/heads/feature/repinned/', + status: 'kept', + reason: 'live', + }, + { + repo: 'github.com/acme-org/dev-tools/', + ref: 'feature/diverged', + refType: 'heads', + path: '/Users/dev/.megarepo/github.com/acme-org/dev-tools/refs/heads/feature/diverged/', + status: 'kept', + reason: 'ref_mismatch', + message: "HEAD is 'feature/other'", + }, +] + export const createGcState = (opts: { results: StoreGcResult[] dryRun: boolean diff --git a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/view.tsx b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/view.tsx index e3d3fb7b2..7fc5df118 100644 --- a/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/view.tsx +++ b/packages/@overeng/megarepo/src/cli/renderers/StoreOutput/view.tsx @@ -516,12 +516,16 @@ const StoreGcView = ({ maxInUseToShow?: number }) => { const removed = results.filter((r) => r.status === 'removed') + const archived = results.filter((r) => r.status === 'archived') + const reaped = results.filter((r) => r.status === 'reaped') const skippedDirty = results.filter((r) => r.status === 'skipped_dirty') const skippedInUse = results.filter((r) => r.status === 'skipped_in_use') + const kept = results.filter((r) => r.status === 'kept') const errors = results.filter((r) => r.status === 'error') // Determine which results to show const showInUse = skippedInUse.length <= maxInUseToShow + const showKept = kept.length <= maxInUseToShow return ( @@ -567,6 +571,20 @@ const StoreGcView = ({ dryRun={dryRun} /> ))} + {archived.map((result) => ( + + ))} + {reaped.map((result) => ( + + ))} {skippedDirty.map((result) => ( ))} + {showKept && + kept.map((result) => ( + + ))} {errors.map((result) => ( @@ -656,12 +685,18 @@ const StoreGcResultRow = ({ result, dryRun }: { result: StoreGcResult; dryRun: b switch (result.status) { case 'removed': return {SYMBOLS.check} + case 'archived': + return {SYMBOLS.check} + case 'reaped': + return {SYMBOLS.check} case 'error': return {SYMBOLS.cross} case 'skipped_dirty': return {SYMBOLS.circle} case 'skipped_in_use': return {SYMBOLS.check} + case 'kept': + return {SYMBOLS.circle} } } @@ -669,6 +704,17 @@ const StoreGcResultRow = ({ result, dryRun }: { result: StoreGcResult; dryRun: b switch (result.status) { case 'removed': return ({dryRun === true ? 'would remove' : 'removed'}) + case 'archived': + return ( + + {' '} + ({dryRun === true ? 'would archive' : 'archived'}: {result.reason ?? 'stale'}) + + ) + case 'reaped': + return ({dryRun === true ? 'would reap' : 'reaped'} past retention) + case 'kept': + return (kept: {result.reason ?? result.message ?? 'cold'}) case 'skipped_dirty': return ({result.message ?? 'dirty'}) case 'skipped_in_use': @@ -678,23 +724,28 @@ const StoreGcResultRow = ({ result, dryRun }: { result: StoreGcResult; dryRun: b } } - const isDim = result.status === 'skipped_in_use' + const isDim = result.status === 'skipped_in_use' || result.status === 'kept' return ( - - {getSymbol()} - {isDim === true ? ( - - {' '} - {result.repo}refs/{result.refType}/{result.ref}{' '} - - ) : ( - - {' '} - {result.repo}refs/{result.refType}/{result.ref}{' '} - + + + {getSymbol()} + {isDim === true ? ( + + {' '} + {result.repo}refs/{result.refType}/{result.ref}{' '} + + ) : ( + + {' '} + {result.repo}refs/{result.refType}/{result.ref}{' '} + + )} + {getStatusText()} + + {result.status === 'archived' && result.recoverPath !== undefined && ( + recover from: {result.recoverPath} )} - {getStatusText()} ) } @@ -702,14 +753,20 @@ const StoreGcResultRow = ({ result, dryRun }: { result: StoreGcResult; dryRun: b /** GC Summary component */ const StoreGcSummary = ({ removed, + archived, + reaped, skippedDirty, skippedInUse, + kept, errors, dryRun, }: { removed: number + archived: number + reaped: number skippedDirty: number skippedInUse: number + kept: number errors: number dryRun: boolean }) => { @@ -725,6 +782,26 @@ const StoreGcSummary = ({ ), }) } + if (archived > 0) { + parts.push({ + key: 'archived', + element: ( + + {archived} {dryRun === true ? 'would be archived' : 'archived'} + + ), + }) + } + if (reaped > 0) { + parts.push({ + key: 'reaped', + element: ( + + {reaped} {dryRun === true ? 'would be reaped' : 'reaped'} + + ), + }) + } if (skippedDirty > 0) { parts.push({ key: 'dirty', @@ -737,6 +814,12 @@ const StoreGcSummary = ({ element: {skippedInUse} in use, }) } + if (kept > 0) { + parts.push({ + key: 'kept', + element: {kept} kept, + }) + } if (errors > 0) { parts.push({ key: 'errors', diff --git a/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts b/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts new file mode 100644 index 000000000..200166587 --- /dev/null +++ b/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts @@ -0,0 +1,916 @@ +/** + * Integration tests for the cold named-branch reclamation path of `mr store gc` + * (U7 / decisions 0001–0010). + * + * Runs the REAL `mr store gc` command (through `mrCommand`) against store-shaped + * fixtures with a deterministic decision clock (fixed `Clock`) and a stub + * `PrStateResolver` layer (no real `gh`/network). Exercises the full matrix from + * the plan's Test section: + * + * - cross-megarepo registered ⇒ kept (live) vs unregistered+merged ⇒ archived + * - repin-without-reregister ⇒ new target kept (reconcile-all, B2 + 0010 bug) + * - present-but-unreadable workspace ⇒ its live worktree kept (B2) + * - merged + clean + reachable ⇒ archived + branch freed (mr-apply re-add works) + * - merged + dirty ⇒ archived with dirt intact + * - merged + stash ⇒ kept (B3) + * - merged + unpushed ⇒ kept (B1) + * - open ⇒ kept + * - squash-merged + remote-branch-deleted ⇒ kept (no reachable proof) + * - absence/post-merge grace unmet ⇒ kept + * - archived past retention ⇒ reaped; within retention ⇒ kept + * - veto re-checked at archive AND reap (a worktree made live mid-run is kept) + * - archive ⇒ mr-apply-equivalent re-materializes the branch (B4) + * + * The lossless floor, archive mechanics, and classifier gates have their own unit + * + library integration tests; here we assert the END-TO-END command outcome + * (`status`/`reason` in the JSON document and the on-disk effect). + */ + +import * as Cli from '@effect/cli' +import { Command, FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { describe, it } from '@effect/vitest' +import { Clock, Effect, Exit, Layer, Schema } from 'effect' +import { expect } from 'vitest' + +import { EffectPath, type AbsoluteDirPath, type RelativeDirPath } from '@overeng/effect-path' + +import * as Git from '../lib/git.ts' +import { refreshWorkspaceRegistry } from '../lib/store-liveness.ts' +import { makeStubPrStateResolverLayer, type GhPr, type StubPrRepo } from '../lib/store-pr-state.ts' +import { makeStoreLayer, Store } from '../lib/store.ts' +import { makeConsoleCapture } from '../test-utils/consoleCapture.ts' +import { + createArchiveEntry, + createStoreFixture, + createWorkspaceWithLock, + getWorktreeCommit, + repinWorkspace, +} from '../test-utils/store-setup.ts' +import { Cwd } from './context.ts' +import { mrCommand } from './mod.ts' + +const DAY_MS = 24 * 60 * 60 * 1000 +/** A fixed decision clock: well past every default grace window. */ +const NOW = Date.parse('2026-06-11T12:00:00.000Z') + +const git = (cwd: string, ...args: ReadonlyArray) => + Effect.gen(function* () { + const command = Command.make('git', ...args).pipe(Command.workingDirectory(cwd)) + return (yield* Command.string(command)).trim() + }) + +/** Deterministic clock so grace/retention decisions are reproducible. */ +const fixedClockLayer = (nowMs: number) => + Layer.setClock({ + [Clock.ClockTypeId]: Clock.ClockTypeId, + currentTimeMillis: Effect.succeed(nowMs), + currentTimeNanos: Effect.succeed(BigInt(nowMs) * 1_000_000n), + sleep: () => Effect.void, + unsafeCurrentTimeMillis: () => nowMs, + unsafeCurrentTimeNanos: () => BigInt(nowMs) * 1_000_000n, + }) + +const StoreGcJsonOutput = Schema.Struct({ + results: Schema.Array( + Schema.Struct({ + repo: Schema.String, + ref: Schema.String, + path: Schema.String, + status: Schema.String, + message: Schema.optional(Schema.String), + reason: Schema.optional(Schema.String), + recoverPath: Schema.optional(Schema.String), + }), + ), +}) +const decodeGc = Schema.decodeUnknownSync(Schema.parseJson(StoreGcJsonOutput)) +type GcResult = Schema.Schema.Type['results'][number] + +const findByRef = (results: ReadonlyArray, ref: string) => + results.find((result) => result.ref === ref) + +/** + * Run `mr store gc` end-to-end with a fixed clock, an injected stub + * `PrStateResolver`, and `MEGAREPO_STORE` pointed at the fixture store. + */ +const runGc = ({ + cwd, + storePath, + prRepos, + now = NOW, + args = [], +}: { + cwd: AbsoluteDirPath + storePath: AbsoluteDirPath + prRepos: ReadonlyArray + now?: number + args?: ReadonlyArray +}) => + Effect.gen(function* () { + const { consoleLayer, getStdoutLines } = yield* makeConsoleCapture + const previous = process.env['MEGAREPO_STORE'] + process.env['MEGAREPO_STORE'] = storePath + + const argv = ['node', 'mr', 'store', 'gc', ...args, '--output', 'json'] + const exit = yield* Cli.Command.run(mrCommand, { name: 'mr', version: 'test' })(argv).pipe( + Effect.provideService(Cwd, cwd), + Effect.provide(consoleLayer), + Effect.provide(makeStubPrStateResolverLayer(prRepos)), + Effect.provide(fixedClockLayer(now)), + Effect.exit, + ) + + if (previous === undefined) delete process.env['MEGAREPO_STORE'] + else process.env['MEGAREPO_STORE'] = previous + + const stdout = (yield* getStdoutLines).join('\n') + return { exitCode: Exit.isSuccess(exit) === true ? 0 : 1, results: decodeGc(stdout).results } + }).pipe(Effect.scoped) + +const REPO = { host: 'github.com', owner: 'acme', repo: 'widget' } as const +const REPO_KEY = `${REPO.host}/${REPO.owner}/${REPO.repo}` +const REPO_RELATIVE = `${REPO_KEY}/` as RelativeDirPath + +const mergedPr = (branch: string, mergedAt: number): GhPr => ({ + number: 1, + state: 'MERGED', + headRefName: branch, + mergedAt: new Date(mergedAt).toISOString(), + closedAt: new Date(mergedAt).toISOString(), +}) + +const openPr = (branch: string): GhPr => ({ + number: 2, + state: 'OPEN', + headRefName: branch, + mergedAt: null, + closedAt: null, +}) + +/** Materialize a real `refs/heads/` ref for a fixture (detached) worktree. */ +const materializeBranchRef = ({ + bareRepoPath, + branch, + commit, +}: { + bareRepoPath: AbsoluteDirPath + branch: string + commit: string +}) => git(bareRepoPath, 'branch', branch, commit) + +/** + * Pre-seed the observation ledger so absence grace (default 14d) is already + * satisfied at NOW: run gc once `sinceDays` in the past with no PR evidence, which + * records `firstSeenColdAtMs` for every then-cold named worktree. + */ +const seedColdObservation = ({ + cwd, + storePath, + sinceDays = 20, +}: { + cwd: AbsoluteDirPath + storePath: AbsoluteDirPath + sinceDays?: number +}) => + runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + now: NOW - sinceDays * DAY_MS, + }) + +/** An outside cwd (not in any megarepo) so gc uses the registry-only liveness. */ +const outsideCwd = () => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const tmpDir = EffectPath.unsafe.absoluteDir(`${yield* fs.makeTempDirectoryScoped()}/`) + const cwd = EffectPath.ops.join(tmpDir, EffectPath.unsafe.relativeDir('outside/')) + yield* fs.makeDirectory(cwd, { recursive: true }) + return cwd + }) + +describe('mr store gc — cold named-branch reclamation', () => { + it.effect( + 'merged + clean + reachable ⇒ archived, branch freed, mr-apply re-add works', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/merged'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/merged`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/merged', commit }) + + const cwd = yield* outsideCwd() + yield* seedColdObservation({ cwd, storePath }) + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/merged', NOW - 30 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/merged') + expect(result?.status).toBe('archived') + expect(result?.reason).toBe('merged') + expect(result?.recoverPath).toContain('/.archive/feature/merged--') + // Original gone, branch freed. + expect(yield* fs.exists(worktreePath)).toBe(false) + expect( + yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/feature/merged' }), + ).toBe(false) + // mr-apply-equivalent re-materialization succeeds (B4). + const reAddPath = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir(`${REPO_KEY}/refs/heads/feature/merged/`), + ) + yield* git(bareRepoPath, 'branch', 'feature/merged', commit) + yield* git(bareRepoPath, 'worktree', 'add', reAddPath, 'feature/merged') + expect(yield* fs.exists(reAddPath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'merged + dirty ⇒ archived with dirt intact', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { + ...REPO, + branches: ['feature/dirty'], + dirtyWorktrees: ['feature/dirty'], + withRemote: true, + }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/dirty`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/dirty', commit }) + + const cwd = yield* outsideCwd() + yield* seedColdObservation({ cwd, storePath }) + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/dirty', NOW - 30 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/dirty') + expect(result?.status).toBe('archived') + // The dirt traveled with the move. + const dest = EffectPath.unsafe.absoluteDir(`${result!.recoverPath!.replace(/\/+$/, '')}/`) + expect( + yield* fs.readFileString( + EffectPath.ops.join(dest, EffectPath.unsafe.relativeFile('dirty.txt')), + ), + ).toBe('uncommitted changes\n') + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'merged + unpushed ⇒ kept (B1: unrecoverable local history)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/unpushed'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/unpushed`]! + + // Create a local commit on the worktree that is on NO remote. + yield* fs.writeFileString( + EffectPath.ops.join(worktreePath, EffectPath.unsafe.relativeFile('local.txt')), + 'local-only\n', + ) + yield* git(worktreePath, 'add', '-A') + yield* git(worktreePath, 'commit', '--no-verify', '-m', 'local only') + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/unpushed', commit }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/unpushed', NOW - 30 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/unpushed') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('unrecoverable-local-work') + expect(yield* fs.exists(worktreePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'merged + stash ⇒ kept (B3: stash does not travel with a dir move)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/stash'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/stash`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/stash', commit }) + // Put a real stash (modify a tracked file then stash). + yield* git(worktreePath, 'checkout', 'feature/stash') + yield* fs.writeFileString( + EffectPath.ops.join(worktreePath, EffectPath.unsafe.relativeFile('README.md')), + '# modified for stash\n', + ) + yield* git(worktreePath, 'stash') + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/stash', NOW - 30 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/stash') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('unrecoverable-local-work') + expect(yield* fs.exists(worktreePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'open PR ⇒ kept (not-stale)', + Effect.fnUntraced( + function* () { + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/open'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/open`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/open', commit }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [openPr('feature/open')] }], + }) + + const result = findByRef(results, 'feature/open') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('not-stale') + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'squash-merged + remote branch deleted (no PR evidence) ⇒ kept', + Effect.fnUntraced( + function* () { + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/squash'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/squash`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/squash', commit }) + + const cwd = yield* outsideCwd() + // No PR rows for this branch ⇒ resolver returns `none` ⇒ keep (not-stale). + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + }) + + const result = findByRef(results, 'feature/squash') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('not-stale') + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'merged but within post-merge grace ⇒ kept (grace)', + Effect.fnUntraced( + function* () { + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/grace'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/grace`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/grace', commit }) + + const cwd = yield* outsideCwd() + // Pre-seed the observation ledger (absence grace already elapsed) by running + // gc once at an earlier time, then run again within the post-merge window. + yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + now: NOW - 20 * DAY_MS, + }) + // Merged 1 day ago (< 7d post-merge grace) at the real NOW. + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/grace', NOW - 1 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/grace') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('post-merge-grace') + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'absence grace unmet (first observation this run) ⇒ kept', + Effect.fnUntraced( + function* () { + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/fresh'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/fresh`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/fresh', commit }) + + const cwd = yield* outsideCwd() + // First-ever observation: coldSince === now ⇒ absence grace not yet elapsed. + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/fresh', NOW - 30 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/fresh') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('absence-grace') + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'registered by another workspace ⇒ kept (live); unregistered+merged ⇒ archived', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/live', 'feature/dead'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const livePath = worktreePaths[`${REPO_KEY}#feature/live`]! + const deadPath = worktreePaths[`${REPO_KEY}#feature/dead`]! + const liveCommit = yield* getWorktreeCommit(livePath) + const deadCommit = yield* getWorktreeCommit(deadPath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/live', commit: liveCommit }) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/dead', commit: deadCommit }) + + // Observe both branches cold in the past so absence grace is satisfied. + yield* seedColdObservation({ cwd: yield* outsideCwd(), storePath }) + + // Register a workspace that consumes feature/live via a repos/ symlink. + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { widget: 'acme/widget#feature/live' }, + }) + yield* fs.makeDirectory( + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')), + { recursive: true }, + ) + yield* fs.symlink( + livePath.replace(/\/+$/, ''), + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/widget')), + ) + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: NOW }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { + relativePath: REPO_RELATIVE, + prs: [ + mergedPr('feature/live', NOW - 30 * DAY_MS), + mergedPr('feature/dead', NOW - 30 * DAY_MS), + ], + }, + ], + }) + + expect(findByRef(results, 'feature/live')?.status).toBe('kept') + expect(findByRef(results, 'feature/live')?.reason).toBe('live') + expect(findByRef(results, 'feature/dead')?.status).toBe('archived') + expect(yield* fs.exists(livePath)).toBe(true) + expect(yield* fs.exists(deadPath)).toBe(false) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'repin-without-reregister ⇒ reconcile-all keeps the new target (B2 / 0010)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/old', 'feature/new'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const oldPath = worktreePaths[`${REPO_KEY}#feature/old`]! + const newPath = worktreePaths[`${REPO_KEY}#feature/new`]! + const oldCommit = yield* getWorktreeCommit(oldPath) + const newCommit = yield* getWorktreeCommit(newPath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/old', commit: oldCommit }) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/new', commit: newCommit }) + + // Register a workspace pointing at feature/old, then repin to feature/new + // WITHOUT re-registering (stale liveness record still names old). + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { widget: 'acme/widget#feature/old' }, + }) + yield* fs.makeDirectory( + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')), + { recursive: true }, + ) + yield* fs.symlink( + oldPath.replace(/\/+$/, ''), + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/widget')), + ) + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: NOW }) + yield* repinWorkspace({ workspacePath, memberName: 'widget', newTarget: newPath }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { + relativePath: REPO_RELATIVE, + prs: [ + mergedPr('feature/old', NOW - 30 * DAY_MS), + mergedPr('feature/new', NOW - 30 * DAY_MS), + ], + }, + ], + }) + + // reconcile-all re-derives feature/new from the repinned symlink ⇒ kept. + expect(findByRef(results, 'feature/new')?.status).toBe('kept') + expect(findByRef(results, 'feature/new')?.reason).toBe('live') + expect(yield* fs.exists(newPath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'present-but-unreadable workspace ⇒ its live worktree kept (fail safe, B2)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/protected'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const protectedPath = worktreePaths[`${REPO_KEY}#feature/protected`]! + const commit = yield* getWorktreeCommit(protectedPath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/protected', commit }) + + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { widget: 'acme/widget#feature/protected' }, + }) + yield* fs.makeDirectory( + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')), + { recursive: true }, + ) + yield* fs.symlink( + protectedPath.replace(/\/+$/, ''), + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/widget')), + ) + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: NOW }) + + // Make the workspace's members dir unreadable so a strict reconcile errors; + // the last-known live path must be preserved (never overwritten with empty). + const reposDir = EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')) + yield* fs.chmod(reposDir, 0o000) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { + relativePath: REPO_RELATIVE, + prs: [mergedPr('feature/protected', NOW - 30 * DAY_MS)], + }, + ], + }).pipe(Effect.ensuring(fs.chmod(reposDir, 0o755).pipe(Effect.ignore))) + + const result = findByRef(results, 'feature/protected') + expect(result?.status).toBe('kept') + // live (last-known path retained) — NOT archived. + expect(yield* fs.exists(protectedPath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'ref_mismatch (HEAD on a different branch) ⇒ kept', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/claimed'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/claimed`]! + const commit = yield* getWorktreeCommit(worktreePath) + // Check out a DIFFERENT branch in the worktree than the path claims. + yield* git(bareRepoPath, 'branch', 'feature/other', commit) + yield* git(worktreePath, 'checkout', 'feature/other') + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { + relativePath: REPO_RELATIVE, + prs: [mergedPr('feature/claimed', NOW - 30 * DAY_MS)], + }, + ], + }) + + const result = findByRef(results, 'feature/claimed') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('ref_mismatch') + expect(yield* fs.exists(worktreePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'fetch failure (no remote configured) ⇒ all named worktrees kept', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + // No `withRemote`: the bare has no `origin`, so `fetch --prune origin` fails. + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/no-remote'] }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/no-remote`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/no-remote', commit }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { + relativePath: REPO_RELATIVE, + prs: [mergedPr('feature/no-remote', NOW - 30 * DAY_MS)], + }, + ], + }) + + const result = findByRef(results, 'feature/no-remote') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('fetch-failed') + expect(yield* fs.exists(worktreePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'archive past retention ⇒ reaped; within retention ⇒ kept', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['live/keep'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const repoRoot = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir(`${REPO_KEY}/`), + ) + const commit = yield* getWorktreeCommit(worktreePaths[`${REPO_KEY}#live/keep`]!) + + // One archived 40d ago (> 30d retention) and one 5d ago (within). + const { archivePath: stalePath } = yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/stale', + commit, + archivedAt: new Date(NOW - 40 * DAY_MS), + }) + const { archivePath: freshPath } = yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/fresh-archive', + commit, + archivedAt: new Date(NOW - 5 * DAY_MS), + }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + }) + + const reaped = results.find((r) => r.status === 'reaped') + expect(reaped?.ref).toBe('feature/stale') + expect(yield* fs.exists(stalePath)).toBe(false) + // The within-retention archive is untouched and not reported as reaped. + expect(yield* fs.exists(freshPath)).toBe(true) + expect( + results.some((r) => r.status === 'reaped' && r.ref === 'feature/fresh-archive'), + ).toBe(false) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'veto re-check at reap: an archive that became live ⇒ kept, not reaped', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['live/keep'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const repoRoot = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir(`${REPO_KEY}/`), + ) + const commit = yield* getWorktreeCommit(worktreePaths[`${REPO_KEY}#live/keep`]!) + + const { archivePath } = yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/contested', + commit, + archivedAt: new Date(NOW - 40 * DAY_MS), + }) + + // Register a workspace whose symlink points AT the archived path, so the + // under-lock veto re-check finds it live and refuses to reap (invariant 1). + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { widget: 'acme/widget#feature/contested' }, + }) + yield* fs.makeDirectory( + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')), + { recursive: true }, + ) + yield* fs.symlink( + archivePath.replace(/\/+$/, ''), + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/widget')), + ) + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: NOW }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + }) + + const result = findByRef(results, 'feature/contested') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('live') + expect(yield* fs.exists(archivePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'dry-run ⇒ reports archive/reap intent without mutating disk', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/merged'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/merged`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/merged', commit }) + // Seed an old observation so absence grace is satisfied on the dry run. + yield* runGc({ + cwd: yield* outsideCwd(), + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + now: NOW - 20 * DAY_MS, + }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/merged', NOW - 30 * DAY_MS)] }, + ], + args: ['--dry-run'], + }) + + expect(findByRef(results, 'feature/merged')?.status).toBe('archived') + // Dry run leaves the worktree and branch intact. + expect(yield* fs.exists(worktreePath)).toBe(true) + expect( + yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/feature/merged' }), + ).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + '--all is unchanged: removes named worktrees (no cold path)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/x'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/x`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/x', commit }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + // No PR rows — under --all this is irrelevant (everything is removed). + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + args: ['--all'], + }) + + const result = findByRef(results, 'feature/x') + expect(result?.status).toBe('removed') + // Not archived/kept — the legacy --all path owns it. + expect(result?.reason).toBeUndefined() + expect(yield* fs.exists(worktreePath)).toBe(false) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) +}) diff --git a/packages/@overeng/megarepo/src/cli/store.integration.test.ts b/packages/@overeng/megarepo/src/cli/store.integration.test.ts index 90c0f40ce..958adcc55 100644 --- a/packages/@overeng/megarepo/src/cli/store.integration.test.ts +++ b/packages/@overeng/megarepo/src/cli/store.integration.test.ts @@ -304,7 +304,11 @@ describe('mr store gc', () => { expect(gcA.exitCode).toBe(0) const json = decodeStoreGcJsonOutput(gcA.stdout) const repoBResult = json.results.find((r) => r.repo === 'github.com/test-owner/repo-b/') - expect(repoBResult?.status).toBe('skipped_in_use') + // Named branch worktrees registered by another workspace are now owned + // by the cold reclamation path (decisions 0001–0010): the worktree is + // still PROTECTED, surfaced as `kept` (the prior status was the + // commit-path `skipped_in_use`). The protection guarantee is unchanged. + expect(repoBResult?.status).toBe('kept') expect(yield* fs.exists(repoBPath)).toBe(true) }, Effect.provide(NodeContext.layer), From f7507308cae98c4b3a489a8af2718c6faa46e79b Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:38:08 +0200 Subject: [PATCH 11/13] docs(megarepo): mark cold named-branch GC implemented in spec + changelog (#771) Flip the spec's cold named-branch reclamation subsection from designed-not-implemented to the shipped behavior: layered gate order (live-set veto, staleness, lossless floor, three grace timers, archive then reap), config override path, reconcile-all-before-delete fail-safe semantics, and the new --json statuses (archived/reaped/kept + reason + recoverPath). Add an [Unreleased] CHANGELOG entry for the feature. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 1 + packages/@overeng/megarepo/docs/spec.md | 92 +++++++++++++++---------- 2 files changed, 55 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35e19d53c..5d8e7970c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ All notable changes to this project will be documented in this file. ### Added +- **@overeng/megarepo**: `mr store gc` now reclaims **cold** named-branch (`refs/heads/*`) worktrees, the dominant store accumulation that default gc previously protected unconditionally. A named worktree is reclaimed only after passing layered, short-circuiting gates: (1) a hard cross-megarepo live-set veto (`collectStoreLiveSet`, store-wide — a `repos/` symlink alone never protects; only a recorded `livePaths` entry does); (2) staleness — the branch's GitHub PR is merged or closed (an open PR, no PR, or any `gh`/resolver failure ⇒ keep); (3) a lossless floor — every local commit is reachable on a remote (`git rev-list --not --remotes` empty after `fetch --prune`), no stash, no unpushed commits, and a per-repo fetch failure keeps all of that repo's worktrees; (4) three grace timers tracked against a persisted observation ledger — _absence grace_ (default 14d) for every cold worktree, plus a _post-merge grace_ (default 7d after `mergedAt`) for merged branches (closed-unmerged has no post-close grace). Capture is two-phase: a qualifying worktree is `git worktree move`d to `/.archive/--/` and its local ref freed (so `mr apply` re-materializes it), then **reaped** (hard-deleted) once it ages past the _archive retention TTL_ (default 30d); gc also reaps pre-existing `.archive/` worktrees it formerly ignored. Timers are overridable via `$STORE/.state/gc-config.json`. Before any deletion gc reconciles ALL registered workspaces (not just the current one), and `mr apply`/`sync`/`pull`/`pin` now refresh the liveness record — closing a bug where a repinned-but-unre-registered workspace's _live_ worktree could be deleted; reconcile-all fails safe (a present-but-unreadable workspace keeps its last-known live paths, and grace is not advanced for an unclean reconcile). Archive and reap both re-check the live-set veto under the worktree lock immediately before acting. Provably lossless and conservative: absence of evidence never licenses deletion. `mr store gc --json` results gain `archived`/`reaped`/`kept` statuses with a stable `reason` tag and (for `archived`) a `recoverPath`. `--all` is unchanged (nuclear, live-set-bypassing). Design in `docs/decisions/0001`–`0011`, terms in `docs/glossary.md`, spec in `docs/spec.md` (#771). - **@overeng/utils-dev/otelite**: Add a vitest ↔ otelite capture bridge that wires an in-process `Otelite.capture` receiver to a vitest test's OTLP trace exporter, so spans emitted IN-PROCESS through the normal `@effect/opentelemetry` `OtlpTracer` layer land in a capture the test can assert over. `makeOteliteCaptureLayer(options?)` is a scoped `Layer` that boots ONE receiver, exposes its `CaptureHandle` via the new `OteliteCapture` `Context.Tag`, AND installs the trace exporter pointed at `${handle.endpoints.http}/v1/traces`; used with `@effect/vitest`'s `layer(...)` it gives a PER-FILE lifecycle (one receiver per test file, shared across that file's tests; tests disambiguate by a unique `service.name` / span name) — the cheap default per decision 0015, with per-test available by giving each `layer(...)` its own instance. A test does `const cap = yield* OteliteCapture; …; yield* cap.inspect({ signal: 'traces', name })`. `flushCaptureSpans()` force-flushes the exporter (the emitter's job) before inspecting. Silent-failure guard: a misrouted exporter (the `/v1/traces` suffix bug, see Fixed) lands nothing, so the demonstrator's non-zero `inspect`/`span_count` assertions FAIL the test rather than pass vacuously. Real-binary tests emit a span in-process through the REAL exporter and assert it round-trips, plus a regression that a bare un-suffixed URL captures nothing (#769, #772). - **@overeng/notion-effect-client**: Add a real-consumer span-assertion demonstrator (D3, decision 0015) co-located in this client's own test suite (`src/test/otelite-span-shape.test.ts`). It drives the REAL instrumented query path (`NotionDatabases.query` → `executeRequest` → `Effect.withSpan('NotionHttp.POST')`) against a STUB upstream — a `HttpClient.make(...)` answering the one `POST /data_sources/{id}/query` endpoint with a canned empty paginated list + `x-ratelimit-remaining`/`x-request-id` headers — under the `@overeng/utils-dev/otelite` capture bridge, with NO secrets and NO network. It asserts the emitted span shape: exactly one `NotionHttp.POST` span carrying the templated `notion.http.route` = `/data_sources/{data_source_id}/query` + `notion.http.method`/`operation`/`status_code` (200) + `notion.rate_limit.remaining` (42); exactly one auto `http.client POST` child from `@effect/platform` whose `url.path` proves the stub served the request; a non-zero `span_count` (silent-export guard); and a public-repo leak guard that NO captured span attribute carries an `authorization` header or the token value (`@effect/platform` records only a header subset and excludes Authorization). The churn-coupled `notion.http.*` assertions sit next to the instrumentation that churns; the bridge stays a lean shared helper. The shadowing gotcha (the bridge re-exports the exporter's `FetchHttpClient` as `HttpClient.HttpClient`) is resolved by providing the stub to the effect-under-test directly (`Effect.provide`, innermost-wins) so the consumer sees the stub. Runs the real nix-built `otelite` binary on `PATH` (#769, #772). - **@overeng/utils-dev/otelite**: Add a scoped in-process `Otelite.capture` primitive for harnesses that own the system-under-test lifecycle themselves (vs `run`, which spawns the child). `capture(options?)` spawns `otelite capture` with piped stdin/stdout and yields a scoped `CaptureHandle` (`endpoints`, `outDir`, `inspect`, `summary`). It learns the ephemeral receiver endpoints by decoding the FIRST stdout line against a new `EndpointsEvent`/`otelite.endpoints/v1` `Schema` — dispatching on the `schema` tag, never string-scraping. Closing the scope stops the receiver by closing the child's stdin (EOF), drains in-flight exports, and resolves `handle.summary` (the final `otelite.summary/v1` line); teardown is interrupt-safe so an interrupted scope leaves no orphaned child. The handle's `inspect` pins `src` to the out-dir and does a small bounded short-poll retry on a transient 0-row read (the receiver writes each export straight to the file with `write_all` before acking, so a captured span is durable immediately — but an independent reader can briefly observe 0 rows from pure scheduler/fs-visibility latency). Real-binary tests raw-POST a known OTLP/JSON span and assert the typed `SpanRow` round-trips through `inspect` and `summary.counts.spans` (#769, #772). diff --git a/packages/@overeng/megarepo/docs/spec.md b/packages/@overeng/megarepo/docs/spec.md index 880da7b60..d036dffd1 100644 --- a/packages/@overeng/megarepo/docs/spec.md +++ b/packages/@overeng/megarepo/docs/spec.md @@ -611,61 +611,77 @@ mr store gc [--dry-run] [--force] [--all] **Behavior:** -1. Refresh the current workspace liveness record, then read the store-local root set from registered workspaces. +1. Reconcile every registered workspace's liveness record (re-derive each one's + live paths fresh from disk), then read the store-wide live set. 2. Walk the store to find all `refs/heads/*`, `refs/tags/*`, and `refs/commits/*` worktrees. -3. Keep named `refs/heads/*` and `refs/tags/*` worktrees by default. -4. Remove clean `refs/commits/*` worktrees that are not referenced by any workspace root set. +3. Remove clean `refs/commits/*` worktrees that are not referenced by any workspace live set. +4. Reclaim **cold** named `refs/heads/*` worktrees (archive, then reap aged + archives) — see [Cold named-branch reclamation](#cold-named-branch-reclamation). + `refs/tags/*` worktrees are kept by default. **Options:** - `--dry-run`: show what would be removed - `--force`: remove even dirty worktrees -- `--all`: also consider named branch and tag worktrees for removal +- `--all`: also consider named branch and tag worktrees for removal (nuclear mode + — bypasses the live set entirely; distinct from cold reclamation, which honors it) -**Safety:** Skips worktrees with uncommitted changes or unpushed commits unless `--force`, and rechecks the root set under the worktree lock before removal. +**Safety:** Skips worktrees with uncommitted changes or unpushed commits unless `--force`, and rechecks the live set under the worktree lock before removal. **Scope:** Uses the store-local workspace registry plus the current workspace. Run `mr status` or another registry-refreshing command from active megarepos so their commit worktrees remain rooted. -##### Cold named-branch reclamation (designed, not yet implemented) - -> Status: design agreed, implementation pending. Rationale and trade-offs in -> `docs/decisions/0001`–`0007`; domain terms in `docs/glossary.md`. - -Today default gc unconditionally protects every `refs/heads/*`/`refs/tags/*` -worktree, so it cannot reclaim cold named-branch worktrees — the dominant -accumulation (survey 2026-06-10: 323 named-branch worktrees, 122 in effect-utils -alone). Default gc will be extended to delete a named-branch worktree only when -it is **cold**, decided by layered gates in this order: - -1. **Cross-megarepo live-set veto (hard).** Not present in any registered - workspace's live set (`collectStoreLiveSet`, store-wide). Verified that a - `repos/` symlink alone gives no protection — only recorded `livePaths` count. -2. **Lossless floor.** Every local commit reachable on a remote; any uncommitted - state captured first (see step 5). No data may be lost by deletion. -3. **Staleness.** The branch's GitHub PR is **merged or closed** (primary signal; - the git-ancestor proxy is unusable because the repos squash-merge). An open PR - or no PR ⇒ keep. Closed-unmerged is safe under the same gates because the - lossless floor keeps any worktree whose commits aren't reachable on a remote. +##### Cold named-branch reclamation + +> Rationale and trade-offs in `docs/decisions/0001`–`0011`; domain terms in +> `docs/glossary.md`. + +A named `refs/heads/*` worktree is reclaimed only when it is **cold**, decided by +layered gates evaluated in this order (each short-circuits to keep): + +1. **Cross-megarepo live-set veto (hard).** Present in any registered workspace's + live set (`collectStoreLiveSet`, store-wide) ⇒ keep. A `repos/` symlink alone + gives no protection — only a recorded `livePaths` entry does. +2. **Staleness.** The branch's GitHub PR must be **merged or closed** (primary + signal; the git-ancestor proxy is unusable because the repos squash-merge). An + open PR, no PR, or any resolver/`gh` failure ⇒ keep. +3. **Lossless floor.** Every local commit must be reachable on a remote + (`git rev-list --not --remotes` is empty, after a `fetch --prune`); a + non-empty stash or unpushed commits ⇒ keep. A fetch failure for a repo keeps + all of that repo's worktrees. Any uncommitted/untracked dirt travels intact + with the directory on archive. 4. **Grace windows (three timers).** Continuously absent from all live sets for the _absence grace_ (default 14d); for merged, also past the _post-merge grace_ - (default 7d after `mergedAt`) — not just absent in one snapshot. -5. **Capture = archive → reap.** A qualifying worktree is moved to - `/.archive/` (recoverable; reuses the existing worktree-archive - convention), then **reaped** (hard-deleted) once it ages past the _archive - retention TTL_ (default 30d). gc also reaps pre-existing `.archive/` worktrees, - which it currently ignores entirely. + (default 7d after `mergedAt`) — measured against a persisted observation ledger, + not one snapshot. Closed-unmerged has no post-close grace. +5. **Capture = archive → reap.** A qualifying worktree is `git worktree move`d to + `/.archive/--/` (recoverable; reuses the existing + worktree-archive convention) and its local `refs/heads/` ref is freed so + `mr apply` can re-materialize it. The archive is later **reaped** (hard-deleted) + once it ages past the _archive retention TTL_ (default 30d). gc also reaps + pre-existing `.archive/` worktrees it would otherwise ignore. + +Timer defaults are overridable via `$STORE/.state/gc-config.json` +(`absenceGraceMs`, `postMergeGraceMs`, `archiveRetentionMs`). Archive and reap +both re-check the live-set veto under the worktree lock immediately before acting. +An actual-HEAD-branch ≠ store-path-ref worktree is kept (`ref_mismatch`). Before any deletion, gc **reconciles all registered workspaces** (re-derives each one's live paths fresh from disk), not just the current workspace, and more `mr` -commands refresh the liveness record — closing a verified bug where a -repinned-but-unre-registered workspace's _live_ worktree could be deleted. +commands (`apply`, `sync`, `pull`, `pin`) refresh the liveness record — closing a +bug where a repinned-but-unre-registered workspace's _live_ worktree could be +deleted. reconcile-all fails safe: a present-but-unreadable workspace keeps its +last-known live paths, and grace is not advanced for a workspace that failed a +clean reconcile. Provably-lossless and conservative: absence of evidence never licenses deletion; -worst case is a re-`mr apply` (re-fetch), except the deleted-remote-branch edge. - -Remaining open: exact timer defaults are tunable per host; whether a post-close -grace mirrors post-merge grace; metrics/output surface for the disk-hygiene -consumer. +worst case is a re-`mr apply` (re-fetch), except the deleted-remote-branch edge +(a squash-merged branch whose remote ref was pruned is kept by the lossless floor). + +**JSON output statuses.** With `--json`, each worktree result carries a `status` +of `removed`, `archived`, `reaped`, `kept`, `skipped_dirty`, `skipped_in_use`, or +`error`, plus a stable `reason` tag (`live`, `not-stale`, `unrecoverable-local-work`, +`absence-grace`, `post-merge-grace`, `merged`, `closed`, `ref_mismatch`, …) and, +for `archived`, a `recoverPath` pointing at the `.archive/` location. #### `mr store ls` From 5f4fcb3c0394ceae5e2645447cecf0a24cec0799 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Thu, 11 Jun 2026 18:13:08 +0200 Subject: [PATCH 12/13] fix(megarepo): harden cold named-branch store gc (adversarial review) (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix findings from an adversarial review of the cold named-branch worktree GC reclamation path in `mr store gc`. - B1 (blocker): `archiveWorktree` freed no branch for production worktrees. A production `refs/heads/*` worktree is non-detached, so after `git worktree move` the moved worktree still has the branch checked out and `git branch -D` is refused (`cannot delete branch used by worktree`), leaving the branch unfreed and a later `mr apply` re-add broken (invariant 4). Detach the moved worktree's HEAD (new `Git.detachWorktreeHead`) before freeing the ref. Added an integration test using a non-detached worktree (prior fixtures only used `--detach`, masking the defect) — it fails before the fix, passes after. - M1 (major): route both liveness-registry writes (`refreshWorkspaceRegistry`, under-lock reconcile rewrite) through `writeFileAtomic` so a torn read during a concurrent gc reconcile can't drop a workspace's live-set veto (decision 0010 hard veto). - B1-followup (major): once the move succeeds, report `archived` with the real `recoverPath`; post-move branch-free + README steps are best-effort-but-reported via a warning (no false `error`/"left intact"). The `.archive/README.md` append is now an atomic write. - n1: drop the unreachable `winner === undefined` branch in `resolvePrStateForBranch`. New tests: archive-time live-set veto re-check; `loadStoreGcConfig` valid + corrupt-file degradation; CLOSED-PR archive (reason `closed`, no post-close grace); dry-run reap intent; unclean-reconcile grace withholding/restart; `writeFileAtomic` temp-cleanup on a rename failure. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 1 + .../megarepo/src/cli/commands/store/mod.ts | 17 +- .../src/cli/store-gc-cold.integration.test.ts | 225 ++++++++++++++++++ packages/@overeng/megarepo/src/lib/git.ts | 19 ++ .../src/lib/store-archive.integration.test.ts | 67 +++++- .../megarepo/src/lib/store-archive.ts | 94 ++++++-- .../src/lib/store-fs-atomic.unit.test.ts | 72 ++++++ .../src/lib/store-gc-config.unit.test.ts | 72 ++++++ .../megarepo/src/lib/store-liveness.ts | 13 +- .../megarepo/src/lib/store-pr-state.ts | 5 +- .../megarepo/src/test-utils/store-setup.ts | 32 +++ 11 files changed, 585 insertions(+), 32 deletions(-) create mode 100644 packages/@overeng/megarepo/src/lib/store-fs-atomic.unit.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d8e7970c..249201b65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ All notable changes to this project will be documented in this file. ### Fixed +- **@overeng/megarepo**: Harden the cold named-branch `mr store gc` reclamation path against an adversarial review. (1) **Archive freed no branch for production worktrees**: a production `refs/heads/*` worktree is NON-DETACHED, so after `git worktree move` the moved worktree still has the branch checked out and `git branch -D` is REFUSED (`cannot delete branch used by worktree`), leaving the branch unfreed and a later `mr apply` re-add broken (invariant 4). Fixed by detaching the moved worktree's HEAD (`git checkout --detach`, new `Git.detachWorktreeHead`) before freeing the ref; covered by a new integration test using a non-detached worktree (the prior fixtures only used `--detach`, masking the defect). (2) **Liveness registry writes were non-atomic**: `refreshWorkspaceRegistry` and the under-lock reconcile rewrite used plain `writeFileString`, so a torn read during a concurrent gc reconcile could drop a workspace's live-set veto (decision 0010 hard veto); both now route through `writeFileAtomic`. (3) **Partial-archive mis-reporting**: once the move succeeded the result is now `archived` with the real `recoverPath` (post-move branch-free + README steps are best-effort-but-reported via a warning), instead of falsely reporting `error`/"left intact". The `.archive/README.md` append is now an atomic write. New tests cover the archive-time live-set veto re-check, `loadStoreGcConfig` file load + corrupt-file degradation, a CLOSED-PR archive, dry-run reap intent, the unclean-reconcile grace withholding/restart, and `writeFileAtomic` temp-cleanup on a rename failure (#771). - **@overeng/otelite**: Honor durability-before-ack — flush each export to the kernel before the 200/OK. `tokio::fs::File` buffers writes, so `write_all` alone did NOT guarantee the bytes reached the kernel before the sink acked; an independent reader (or a crash) before the next flush could miss them, contradicting R05 ("flush … before acking") and the `append_line` doc's own "durably reaching the kernel before returning" promise. This surfaced as a CI flake in the `durable_before_ack` gate (a read immediately after the 200 occasionally saw an empty file under thread contention — reproduced ~1/60 at 16 test threads). Fix: `SignalFile::append_line` / `append_json` now `flush()` after `write_all`, before returning. This is a flush, not an fsync — `sync_all` (physical-disk durability) stays deferred to shutdown, so the M2 "no per-export fsync under the lock" throughput decision is preserved. Verified: 0 failures over 200 × 16-thread runs (was ~1/60). - **@overeng/otelite**: Make the HTTP-JSON metrics receive path lossless, fixing two silent data-loss bugs a stress test surfaced. The upstream `opentelemetry-proto` `with-serde` deserialize — which the receiver used to BUILD the proto value the sink then re-serialized — silently drops several metric JSON shapes: a `sum`/`gauge` `NumberDataPoint` whose int64 value is the default string form (`"asInt":"7"`) lost its value entirely (captured null), and a regular `histogram` metric was dropped down to `{name,description,unit,metadata}` (its data oneof gone). Both returned HTTP 200 + bumped `counts.metrics` → a silent mis-capture that violates the lossless + "loud, never silent" contracts (decisions/0011). Fix: on the JSON metrics path, `with-serde` still runs purely as the dialect VALIDATOR (Err → 400 + `note_rejected`, gate unchanged), but on success the receiver now persists the VALIDATED RAW JSON body verbatim (re-emitted through `serde_json::Value` via the new `Sink::write_metrics_json`, counting metrics from the JSON structure) instead of the lossy proto re-serialization. Since the body is already canonical OTLP/JSON and `inspect` walks raw JSON, the JSON metrics path is now lossless for string-int64 sums/gauges, regular histograms, AND exponential histograms — the last also RESOLVING the previously-documented exp-histogram-on-JSON limitation for the receive path. Traces/logs JSON paths and all protobuf/gRPC paths are unchanged (already lossless). New gates (real receiver, no mocks): an HTTP-JSON round-trip of a string-int64 sum + histogram + exponential histogram all survive receive → capture → `inspect`; cross-transport equivalence extended to metrics (the same logical string-int64-sum + histogram over HTTP-JSON vs HTTP-protobuf vs gRPC flattens to equivalent `inspect` rows, the proto/gRPC fixture built natively to avoid the lossy `with-serde` source); and a loud-rejection guard that a malformed metrics JSON body still 400s + is captured nowhere. KNOWN RESIDUAL: the upstream metrics `with-serde` is more lenient than the trace one, so for metrics the JSON dialect gate is effectively structural (malformed JSON / hard field-type mismatches), tolerating some non-default dialect shapes (numeric int64 nanos, string enums) rather than rejecting them loudly — a stricter metrics dialect gate is a follow-up (#769, #772). diff --git a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts index 34a6084fb..21d03bad2 100644 --- a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts +++ b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts @@ -627,7 +627,7 @@ const coldReclaimRepo = ({ if (isPathProtected({ liveSet: freshLiveSet, path: worktree.path }) === true) { return { _tag: 'kept-live' as const } } - const dest = yield* archiveWorktree({ + const outcome = yield* archiveWorktree({ repoRoot: repoFullPath, bareRepoPath, worktreePath: worktree.path, @@ -636,7 +636,11 @@ const coldReclaimRepo = ({ reason: decision.reason, now, }) - return { _tag: 'archived' as const, recoverPath: dest } + return { + _tag: 'archived' as const, + recoverPath: outcome.destPath, + warnings: outcome.warnings, + } }), ) .pipe( @@ -651,7 +655,9 @@ const coldReclaimRepo = ({ if (archiveOutcome._tag === 'kept-live') { results.push(coldResult({ target, status: 'kept', reason: 'live' })) } else if (archiveOutcome._tag === 'error') { - // Archive failed mid-flight: the original worktree is left intact. + // Only a PRE-move failure reaches here (post-move steps are best-effort + // and reported as warnings, never errors), so the original worktree is + // genuinely left intact. results.push( coldResult({ target, @@ -661,12 +667,17 @@ const coldReclaimRepo = ({ }), ) } else { + // The move succeeded: report `archived` + the real `.archive/` recovery + // path even if a best-effort post-move step (branch free / README) failed. results.push( coldResult({ target, status: 'archived', reason: decision.reason, recoverPath: archiveOutcome.recoverPath, + ...(archiveOutcome.warnings.length > 0 + ? { message: archiveOutcome.warnings.join('; ') } + : {}), }), ) } diff --git a/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts b/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts index 200166587..ca3d644ab 100644 --- a/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts +++ b/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts @@ -148,6 +148,14 @@ const openPr = (branch: string): GhPr => ({ closedAt: null, }) +const closedPr = (branch: string, closedAt: number): GhPr => ({ + number: 3, + state: 'CLOSED', + headRefName: branch, + mergedAt: null, + closedAt: new Date(closedAt).toISOString(), +}) + /** Materialize a real `refs/heads/` ref for a fixture (detached) worktree. */ const materializeBranchRef = ({ bareRepoPath, @@ -839,6 +847,111 @@ describe('mr store gc — cold named-branch reclamation', () => { ), ) + it.effect( + 'veto re-check at archive: a merged+clean+grace-met worktree that is live ⇒ kept, not archived', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/contested-archive'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/contested-archive`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ + bareRepoPath, + branch: 'feature/contested-archive', + commit, + }) + + // Make the worktree archive-eligible: cold long enough that absence grace + // is satisfied, with a long-merged PR (past post-merge grace). + yield* seedColdObservation({ cwd: yield* outsideCwd(), storePath }) + + // Register a workspace whose symlink points AT the would-be-archived + // worktree. The fresh under-lock reconcile (invariant 1) must find it live + // and refuse to archive — mirrors the reap-veto test for the archive path. + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { widget: 'acme/widget#feature/contested-archive' }, + }) + yield* fs.makeDirectory( + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')), + { recursive: true }, + ) + yield* fs.symlink( + worktreePath.replace(/\/+$/, ''), + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/widget')), + ) + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: NOW }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { + relativePath: REPO_RELATIVE, + prs: [mergedPr('feature/contested-archive', NOW - 30 * DAY_MS)], + }, + ], + }) + + const result = findByRef(results, 'feature/contested-archive') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('live') + // The worktree and its branch are untouched. + expect(yield* fs.exists(worktreePath)).toBe(true) + expect( + yield* Git.refExists({ + repoPath: bareRepoPath, + ref: 'refs/heads/feature/contested-archive', + }), + ).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'closed (unmerged) PR + clean + reachable + grace-met ⇒ archived, reason closed (no post-close grace)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/closed'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/closed`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/closed', commit }) + + const cwd = yield* outsideCwd() + yield* seedColdObservation({ cwd, storePath }) + // Closed only ONE day ago: decision 0009 has NO post-close grace, so a + // recently-closed PR still archives once absence grace is met. + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [closedPr('feature/closed', NOW - 1 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/closed') + expect(result?.status).toBe('archived') + expect(result?.reason).toBe('closed') + expect(yield* fs.exists(worktreePath)).toBe(false) + expect( + yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/feature/closed' }), + ).toBe(false) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + it.effect( 'dry-run ⇒ reports archive/reap intent without mutating disk', Effect.fnUntraced( @@ -881,6 +994,118 @@ describe('mr store gc — cold named-branch reclamation', () => { ), ) + it.effect( + 'dry-run ⇒ reports reap intent for a past-retention archive WITHOUT removing it', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['live/keep'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const repoRoot = EffectPath.ops.join( + storePath, + EffectPath.unsafe.relativeDir(`${REPO_KEY}/`), + ) + const commit = yield* getWorktreeCommit(worktreePaths[`${REPO_KEY}#live/keep`]!) + + // Past-retention archive (40d > 30d): reap-eligible. + const { archivePath } = yield* createArchiveEntry({ + bareRepoPath, + repoRoot, + branch: 'feature/stale-dry', + commit, + archivedAt: new Date(NOW - 40 * DAY_MS), + }) + + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + args: ['--dry-run'], + }) + + // Reap intent reported but the archive dir is left on disk. + expect(results.some((r) => r.status === 'reaped' && r.ref === 'feature/stale-dry')).toBe( + true, + ) + expect(yield* fs.exists(archivePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'unclean reconcile withholds absence grace: a later clean run restarts the clock (kept absence-grace)', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['feature/unclean'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#feature/unclean`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'feature/unclean', commit }) + + // A workspace that DOES NOT consume this worktree (its symlink points + // elsewhere) but whose strict reconcile fails this run — flagging a path + // unclean so absence grace must NOT advance for it (decision 0010 / B2). + const { workspacePath } = yield* createWorkspaceWithLock({ + members: { other: 'acme/widget#feature/other' }, + }) + const reposDir = EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeDir('repos/')) + yield* fs.makeDirectory(reposDir, { recursive: true }) + yield* fs.symlink( + worktreePath.replace(/\/+$/, ''), + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/other')), + ) + const store = yield* Store.pipe(Effect.provide(makeStoreLayer({ basePath: storePath }))) + yield* refreshWorkspaceRegistry({ workspaceRoot: workspacePath, store, now: NOW }) + + // First run, 20d in the past, but with the workspace UNREADABLE so the + // reconcile is unclean: its live path stays protected, but absence grace + // is withheld (firstSeenColdAtMs is NOT recorded for the protected path). + yield* fs.chmod(reposDir, 0o000) + const firstRun = yield* runGc({ + cwd: yield* outsideCwd(), + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [] }], + now: NOW - 20 * DAY_MS, + }).pipe(Effect.ensuring(fs.chmod(reposDir, 0o755).pipe(Effect.ignore))) + // While unclean it is protected as live, never advanced toward archive. + expect(findByRef(firstRun.results, 'feature/unclean')?.status).toBe('kept') + + // Second run now CLEAN: the worktree is no longer live (symlink readable + // again, points at it — so still live). Make it NOT live by repointing the + // symlink away, so this run is its FIRST clean cold observation ⇒ absence + // grace clock starts here, not 20d ago. + yield* fs.remove( + EffectPath.ops.join(workspacePath, EffectPath.unsafe.relativeFile('repos/other')), + { force: true }, + ) + const cwd = yield* outsideCwd() + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [ + { relativePath: REPO_RELATIVE, prs: [mergedPr('feature/unclean', NOW - 30 * DAY_MS)] }, + ], + }) + + const result = findByRef(results, 'feature/unclean') + expect(result?.status).toBe('kept') + // Grace restarted: kept on absence-grace, NOT archived. + expect(result?.reason).toBe('absence-grace') + expect(yield* fs.exists(worktreePath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + it.effect( '--all is unchanged: removes named worktrees (no cold path)', Effect.fnUntraced( diff --git a/packages/@overeng/megarepo/src/lib/git.ts b/packages/@overeng/megarepo/src/lib/git.ts index 1a3790eb2..bbe3f2805 100644 --- a/packages/@overeng/megarepo/src/lib/git.ts +++ b/packages/@overeng/megarepo/src/lib/git.ts @@ -787,6 +787,25 @@ export const checkoutWorktree = (args: { worktreePath: string; ref: string }) => cwd: args.worktreePath, }).pipe(Effect.asVoid) +/** + * Detach a worktree's HEAD from its branch (`git checkout --detach`). + * + * Used by GC archival: a moved named-branch worktree still has its + * `refs/heads/` checked out, so `git branch -D ` is refused + * (`cannot delete branch 'X' used by worktree at ...`). Detaching HEAD first + * frees the branch ref for deletion + later re-materialization (invariant 4). + */ +export const detachWorktreeHead = (args: { worktreePath: string }) => + runGitCommand({ + args: ['checkout', '--detach'], + cwd: args.worktreePath, + }).pipe( + Effect.asVoid, + Effect.withSpan('git/detach-worktree-head', { + attributes: { 'span.label': args.worktreePath, worktreePath: args.worktreePath }, + }), + ) + // ============================================================================= // Megarepo Name Derivation // ============================================================================= diff --git a/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts b/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts index e82a237d2..34e0b2f51 100644 --- a/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts +++ b/packages/@overeng/megarepo/src/lib/store-archive.integration.test.ts @@ -26,6 +26,7 @@ import { createArchiveEntry, createStoreFixture, getWorktreeCommit, + materializeNonDetachedBranchWorktree, } from '../test-utils/store-setup.ts' import * as Git from './git.ts' import { archiveWorktree, parseArchiveDirName, reapArchive, scanArchives } from './store-archive.ts' @@ -48,7 +49,7 @@ describe('store-archive: parseArchiveDirName', () => { const iso = '2026-06-11T10:20:30.000Z' const parsed = parseArchiveDirName(`schickling/2026-06-10--feature--x${`--${iso}`}`) expect(Option.isSome(parsed)).toBe(true) - if (Option.isSome(parsed)) { + if (Option.isSome(parsed) === true) { expect(parsed.value.branch).toBe('schickling/2026-06-10--feature--x') expect(parsed.value.archivedAtMs).toBe(Date.parse(iso)) } @@ -84,7 +85,7 @@ describe('store-archive: archiveWorktree', () => { expect(before).toBe(true) const now = Date.parse('2026-06-11T08:00:00.000Z') - const dest = yield* archiveWorktree({ + const { destPath: dest, warnings } = yield* archiveWorktree({ repoRoot, bareRepoPath, worktreePath, @@ -93,6 +94,7 @@ describe('store-archive: archiveWorktree', () => { reason: 'merged', now, }) + expect(warnings).toEqual([]) // Original gone, archive present. expect(yield* fs.exists(worktreePath)).toBe(false) @@ -124,6 +126,65 @@ describe('store-archive: archiveWorktree', () => { ), ) + it.effect( + 'archives a NON-DETACHED refs/heads worktree (production shape): frees the branch so re-add succeeds', + Effect.fnUntraced( + function* () { + const fixture = yield* createStoreFixture([{ ...REPO, branches: ['feature/prod'] }]) + const repoRoot = repoRootFor(fixture.storePath, REPO_KEY) + const bareRepoPath = fixture.bareRepoPaths[REPO_KEY]! + const worktreePath = fixture.worktreePaths[`${REPO_KEY}#feature/prod`]! + const commit = yield* getWorktreeCommit(worktreePath) + const fs = yield* FileSystem.FileSystem + + // Re-materialize as the production shape: a NON-DETACHED worktree with + // the branch checked out (the default fixtures use `--detach`, which + // masks the `git branch -D` refusal this test guards against). + yield* materializeNonDetachedBranchWorktree({ + bareRepoPath, + worktreePath, + branch: 'feature/prod', + commit, + }) + expect( + yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/feature/prod' }), + ).toBe(true) + + const { destPath: dest, warnings } = yield* archiveWorktree({ + repoRoot, + bareRepoPath, + worktreePath, + branch: 'feature/prod', + commit, + reason: 'merged', + now: Date.parse('2026-06-11T08:00:00.000Z'), + }) + // The branch is freed cleanly, so no best-effort warning is surfaced. + expect(warnings).toEqual([]) + + // Original gone, archive present. + expect(yield* fs.exists(worktreePath)).toBe(false) + expect(yield* fs.exists(dest)).toBe(true) + + // Branch FREED despite having been checked out in the moved worktree. + expect( + yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/feature/prod' }), + ).toBe(false) + + // mr-apply-equivalent re-add of the SAME branch succeeds. + const reAddPath = EffectPath.ops.join( + repoRoot, + EffectPath.unsafe.relativeDir('refs/heads/feature/prod/'), + ) + yield* git(bareRepoPath, 'branch', 'feature/prod', commit) + yield* git(bareRepoPath, 'worktree', 'add', reAddPath, 'feature/prod') + expect(yield* fs.exists(reAddPath)).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + it.effect( 'archive preserves uncommitted + untracked work intact with the dir move', Effect.fnUntraced( @@ -143,7 +204,7 @@ describe('store-archive: archiveWorktree', () => { 'precious\n', ) - const dest = yield* archiveWorktree({ + const { destPath: dest } = yield* archiveWorktree({ repoRoot, bareRepoPath, worktreePath, diff --git a/packages/@overeng/megarepo/src/lib/store-archive.ts b/packages/@overeng/megarepo/src/lib/store-archive.ts index 0dfc8c25b..8f4f76b6f 100644 --- a/packages/@overeng/megarepo/src/lib/store-archive.ts +++ b/packages/@overeng/megarepo/src/lib/store-archive.ts @@ -28,12 +28,14 @@ * on this persistence path. */ -import { CommandExecutor, FileSystem, type Error as PlatformError } from '@effect/platform' +import type { CommandExecutor } from '@effect/platform' +import { FileSystem, type Error as PlatformError } from '@effect/platform' import { Effect, Option } from 'effect' import { EffectPath, type AbsoluteDirPath } from '@overeng/effect-path' import * as Git from './git.ts' +import { writeFileAtomic } from './store-fs-atomic.ts' /** Relative directory name of the per-repo archive holding area. */ export const ARCHIVE_DIR_NAME = '.archive' @@ -87,7 +89,7 @@ export const parseArchiveDirName = ( const archivedAtMs = Date.parse(ts) // Reject non-instants AND values that do not round-trip back to the same ISO // string (e.g. an out-of-range day that `Date.parse` would normalize). - if (Number.isNaN(archivedAtMs) || new Date(archivedAtMs).toISOString() !== ts) { + if (Number.isNaN(archivedAtMs) === true || new Date(archivedAtMs).toISOString() !== ts) { return Option.none() } @@ -97,6 +99,23 @@ export const parseArchiveDirName = ( const archiveDirPath = (repoRoot: AbsoluteDirPath): AbsoluteDirPath => EffectPath.ops.join(repoRoot, EffectPath.unsafe.relativeDir(`${ARCHIVE_DIR_NAME}/`)) +/** + * Result of {@link archiveWorktree} once the (irreversible) move has succeeded. + * + * The move is the point of no return: once the worktree directory lives under + * `.archive/`, the data is recoverable there regardless of what happens to the + * post-move bookkeeping. `warnings` records any best-effort post-move step that + * failed (branch not freed, README not updated) so the caller surfaces the + * `.archive/` location AND tells the operator about the residual state instead + * of mislabeling a moved worktree as an untouched no-op. + */ +export interface ArchiveOutcome { + /** The `.archive/` destination the worktree was moved to (recovery location). */ + readonly destPath: AbsoluteDirPath + /** Non-fatal post-move issues (e.g. branch still referenced, README append failed). */ + readonly warnings: ReadonlyArray +} + /** * Archive a cold worktree: move it under `/.archive/`, free its branch, * and record metadata. @@ -106,15 +125,21 @@ const archiveDirPath = (repoRoot: AbsoluteDirPath): AbsoluteDirPath => * destination's parent to exist. * 2. `git -C worktree move ` — preserves dirty + untracked * work (it travels with the directory) and rewrites the gitlink to the new - * absolute path, so no `git worktree repair` is needed afterwards. - * 3. FREE the branch via `git -C branch -D ` so `mr apply` can - * re-materialize it; the commit stays reachable through the remote-tracking - * ref (guaranteed by the lossless floor's invariant 2a, checked upstream). - * 4. Append `branch, ISO(now), commit, reason` to `/.archive/README.md`. + * absolute path, so no `git worktree repair` is needed afterwards. This is the + * POINT OF NO RETURN; failing here means nothing moved (caller: keep+error). + * 3. DETACH the moved worktree's HEAD (`git -C checkout --detach`) then + * FREE the branch (`git -C branch -D `) so `mr apply` can + * re-materialize it (invariant 4). Production named-branch worktrees are + * NON-DETACHED, so without the detach `git branch -D` is refused (`cannot + * delete branch used by worktree`). The commit stays reachable via the + * remote-tracking ref (lossless floor invariant 2a, checked upstream). + * 4. Append `branch, ISO(now), commit, reason` to `/.archive/README.md` + * (atomic write-temp-then-rename so a concurrent reader never sees a torn log). * - * Returns the destination path so the caller can surface a recovery hint. Any - * git/fs failure propagates so the caller can report keep+error and leave the - * original worktree intact. + * Steps 3 and 4 are BEST-EFFORT-BUT-REPORTED: a failure after the move does not + * fail the effect (the data is already safe in `.archive/`); instead it is + * recorded in {@link ArchiveOutcome.warnings}. Only a pre-move failure (step 1/2) + * propagates as an error, leaving the original worktree intact. */ export const archiveWorktree = (args: { /** The repo root in the store: `////`. */ @@ -132,7 +157,7 @@ export const archiveWorktree = (args: { /** Epoch-ms decision time; drives the archive dir name + README timestamp. */ readonly now: number }): Effect.Effect< - AbsoluteDirPath, + ArchiveOutcome, Git.GitCommandError | PlatformError.PlatformError, FileSystem.FileSystem | CommandExecutor.CommandExecutor > => @@ -152,28 +177,53 @@ export const archiveWorktree = (args: { const destParent = EffectPath.ops.parent(destPath) ?? archiveDir yield* fs.makeDirectory(destParent, { recursive: true }) - // (2) Move the worktree — dirty + untracked work travels intact, gitlink fixed. + // (2) Move the worktree — dirty + untracked work travels intact, gitlink + // fixed. POINT OF NO RETURN: a failure here propagates (nothing moved). yield* Git.moveWorktree({ repoPath: args.bareRepoPath, fromPath: args.worktreePath, toPath: destPath, }) - // (3) Free the branch so `mr apply` can re-materialize it (invariant 4). - yield* Git.deleteBranch({ repoPath: args.bareRepoPath, branch: args.branch, force: true }) + // From here on the data is safe in `.archive/`. Post-move steps are + // best-effort: any failure becomes a warning, never an error. + const warnings: Array = [] + + // (3) Detach the moved worktree's HEAD, then free the branch so `mr apply` + // can re-materialize it (invariant 4). The detach is required because the + // moved worktree still has the branch checked out (non-detached in prod), so + // `git branch -D` would otherwise be refused. + yield* Git.detachWorktreeHead({ worktreePath: destPath }).pipe( + Effect.flatMap(() => + Git.deleteBranch({ repoPath: args.bareRepoPath, branch: args.branch, force: true }), + ), + Effect.catchAll((error) => + Effect.sync(() => { + warnings.push( + `branch '${args.branch}' could not be freed (re-add may fail until cleaned up): ${error.message}`, + ) + }), + ), + ) - // (4) Append a metadata line to the archive README. + // (4) Append a metadata line to the archive README via an atomic write + // (write-temp-then-rename) so a concurrent reader never sees a torn log. const readmePath = EffectPath.ops.join( archiveDir, EffectPath.unsafe.relativeFile(ARCHIVE_README_NAME), ) - const existing = yield* fs - .readFileString(readmePath) - .pipe(Effect.catchAll(() => Effect.succeed(''))) const line = `${args.branch}\t${iso}\t${args.commit}\t${args.reason}\n` - yield* fs.writeFileString(readmePath, existing + line) + yield* fs.readFileString(readmePath).pipe( + Effect.catchAll(() => Effect.succeed('')), + Effect.flatMap((existing) => writeFileAtomic({ path: readmePath, content: existing + line })), + Effect.catchAll((error) => + Effect.sync(() => { + warnings.push(`archive README metadata not recorded: ${error.message}`) + }), + ), + ) - return destPath + return { destPath, warnings } }).pipe( Effect.withSpan('megarepo/store/gc/archive-worktree', { attributes: { 'span.label': args.branch, branch: args.branch, reason: args.reason }, @@ -210,7 +260,7 @@ export const scanArchives = (args: { // git reports worktree paths without a trailing slash; normalize so the // prefix test cannot match a sibling like `.archive-old/`. const normalized = EffectPath.unsafe.absoluteDir( - worktree.path.endsWith('/') ? worktree.path : `${worktree.path}/`, + worktree.path.endsWith('/') === true ? worktree.path : `${worktree.path}/`, ) if (normalized.startsWith(archivePrefix) === false) continue @@ -219,7 +269,7 @@ export const scanArchives = (args: { // and the full `feature/x` must be recovered, trailing slash stripped. const relative = normalized.slice(archivePrefix.length).replace(/\/+$/u, '') const parsed = parseArchiveDirName(relative) - if (Option.isNone(parsed)) continue + if (Option.isNone(parsed) === true) continue entries.push({ path: normalized, diff --git a/packages/@overeng/megarepo/src/lib/store-fs-atomic.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-fs-atomic.unit.test.ts new file mode 100644 index 000000000..b45be404e --- /dev/null +++ b/packages/@overeng/megarepo/src/lib/store-fs-atomic.unit.test.ts @@ -0,0 +1,72 @@ +/** + * Unit tests for {@link writeFileAtomic} (decision 0010 atomicity helper). + * + * Exercises REAL filesystem writes against a scoped temp dir: + * - the happy path lands the content via write-temp-then-rename; + * - a rename failure (target path is a directory) fails the effect AND leaves + * no `.tmp-*` sibling lingering as garbage (the `tapError` cleanup branch). + */ + +import { FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { describe, it } from '@effect/vitest' +import { Effect } from 'effect' +import { expect } from 'vitest' + +import { EffectPath } from '@overeng/effect-path' + +import { writeFileAtomic } from './store-fs-atomic.ts' + +describe('store-fs-atomic: writeFileAtomic', () => { + it.effect( + 'writes content atomically with no temp file left behind', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const dir = EffectPath.unsafe.absoluteDir(`${yield* fs.makeTempDirectoryScoped()}/`) + const target = EffectPath.ops.join(dir, EffectPath.unsafe.relativeFile('record.json')) + + yield* writeFileAtomic({ path: target, content: '{"v":1}\n' }) + + expect(yield* fs.readFileString(target)).toBe('{"v":1}\n') + // No `.tmp-*` sibling survives the successful rename. + const remaining = yield* fs.readDirectory(dir) + expect(remaining.filter((name) => name.includes('.tmp-'))).toEqual([]) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + it.effect( + 'on a rename failure (target is a directory) it fails AND removes the temp file', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const dir = EffectPath.unsafe.absoluteDir(`${yield* fs.makeTempDirectoryScoped()}/`) + const target = EffectPath.ops.join(dir, EffectPath.unsafe.relativeFile('record.json')) + + // Make the target path a NON-EMPTY directory so `rename(temp, target)` + // is refused (ENOTDIR/ENOTEMPTY) — the temp file is written, the rename + // fails, and the cleanup branch must run. + yield* fs.makeDirectory(target, { recursive: true }) + yield* fs.writeFileString( + EffectPath.ops.join( + EffectPath.unsafe.absoluteDir(`${target}/`), + EffectPath.unsafe.relativeFile('occupant'), + ), + 'blocks the rename\n', + ) + + const result = yield* writeFileAtomic({ path: target, content: 'x' }).pipe(Effect.either) + expect(result._tag).toBe('Left') + + // The `.tmp-` sibling must not survive the failed write. + const remaining = yield* fs.readDirectory(dir) + expect(remaining.filter((name) => name.includes('.tmp-'))).toEqual([]) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) +}) diff --git a/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts b/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts index 63e26cbf7..812608adc 100644 --- a/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts +++ b/packages/@overeng/megarepo/src/lib/store-gc-config.unit.test.ts @@ -1,10 +1,18 @@ +import { FileSystem } from '@effect/platform' +import { NodeContext } from '@effect/platform-node' +import { it as effectIt } from '@effect/vitest' +import { Effect } from 'effect' import { describe, expect, it } from 'vitest' +import { EffectPath } from '@overeng/effect-path' + import { DEFAULT_ABSENCE_GRACE_MS, DEFAULT_ARCHIVE_RETENTION_MS, DEFAULT_POST_MERGE_GRACE_MS, DEFAULT_STORE_GC_CONFIG, + GC_CONFIG_RELATIVE_PATH, + loadStoreGcConfig, mergeStoreGcConfig, } from './store-gc-config.ts' @@ -41,4 +49,68 @@ describe('store-gc-config', () => { expect(mergeStoreGcConfig({ postMergeGraceMs: 0 }).postMergeGraceMs).toBe(0) }) }) + + describe('loadStoreGcConfig', () => { + const writeConfig = (content: string) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + const storeBasePath = EffectPath.unsafe.absoluteDir( + `${yield* fs.makeTempDirectoryScoped()}/`, + ) + const configPath = EffectPath.ops.join( + storeBasePath, + EffectPath.unsafe.relativeFile(GC_CONFIG_RELATIVE_PATH), + ) + const configDir = EffectPath.ops.parent(configPath)! + yield* fs.makeDirectory(configDir, { recursive: true }) + yield* fs.writeFileString(configPath, content) + return storeBasePath + }) + + effectIt.effect( + 'absent file ⇒ defaults', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const storeBasePath = EffectPath.unsafe.absoluteDir( + `${yield* fs.makeTempDirectoryScoped()}/`, + ) + expect(yield* loadStoreGcConfig({ storeBasePath })).toEqual(DEFAULT_STORE_GC_CONFIG) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + effectIt.effect( + 'valid override file ⇒ merged timers reflect it', + Effect.fnUntraced( + function* () { + const storeBasePath = yield* writeConfig( + JSON.stringify({ absenceGraceMs: 1234, archiveRetentionMs: 5678 }), + ) + expect(yield* loadStoreGcConfig({ storeBasePath })).toEqual({ + absenceGraceMs: 1234, + postMergeGraceMs: DEFAULT_POST_MERGE_GRACE_MS, + archiveRetentionMs: 5678, + }) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + + effectIt.effect( + 'corrupt file ⇒ DEFAULT_STORE_GC_CONFIG without error', + Effect.fnUntraced( + function* () { + const storeBasePath = yield* writeConfig('{ not valid json ::: }') + // Degrades to defaults rather than failing the gc path. + expect(yield* loadStoreGcConfig({ storeBasePath })).toEqual(DEFAULT_STORE_GC_CONFIG) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + }) }) diff --git a/packages/@overeng/megarepo/src/lib/store-liveness.ts b/packages/@overeng/megarepo/src/lib/store-liveness.ts index 4c44dd856..20ec70c0e 100644 --- a/packages/@overeng/megarepo/src/lib/store-liveness.ts +++ b/packages/@overeng/megarepo/src/lib/store-liveness.ts @@ -22,6 +22,7 @@ import { readMegarepoConfig, } from './config.ts' import { LOCK_FILE_NAME, readLockFile } from './lock.ts' +import { writeFileAtomic } from './store-fs-atomic.ts' import type { MegarepoStore } from './store.ts' const REGISTRY_VERSION = 1 @@ -245,7 +246,13 @@ export const refreshWorkspaceRegistry = ({ const content = yield* Schema.encode(Schema.parseJson(StoreWorkspaceRecord, { space: 2 }))( record, ) - yield* fs.writeFileString(workspaceRecordPath({ store, workspaceRoot }), content + '\n') + // Atomic (write-temp-then-rename): a concurrent reader (e.g. an under-lock + // reconcile in another gc process) must never observe a half-written record + // and silently drop this workspace's live-set veto (decision 0010). + yield* writeFileAtomic({ + path: workspaceRecordPath({ store, workspaceRoot }), + content: content + '\n', + }) return record }).pipe( Effect.withSpan('megarepo/store/liveness/refresh-workspace', { @@ -345,7 +352,9 @@ const readRegistryRecords = ({ const content = yield* Schema.encode(Schema.parseJson(StoreWorkspaceRecord, { space: 2 }))( record, ) - yield* fs.writeFileString(recordPath, content + '\n') + // Atomic rewrite so a concurrent reader never sees a torn record and + // drops a live workspace's veto right before deletion (decision 0010). + yield* writeFileAtomic({ path: recordPath, content: content + '\n' }) records.push(record) } else { records.push(parsed) diff --git a/packages/@overeng/megarepo/src/lib/store-pr-state.ts b/packages/@overeng/megarepo/src/lib/store-pr-state.ts index 183dfcf00..a7c469454 100644 --- a/packages/@overeng/megarepo/src/lib/store-pr-state.ts +++ b/packages/@overeng/megarepo/src/lib/store-pr-state.ts @@ -148,6 +148,8 @@ export const resolvePrStateForBranch = ({ if (matches.some((pr) => pr.state === 'OPEN') === true) return { state: 'open' } // Only MERGED/CLOSED remain; pick the most recent by its own timestamp. + // `matches` is non-empty (the early return above guards `length === 0`) and + // `ranked` is a 1:1 map of it, so `ranked[0]` is always defined. const ranked = matches .map((pr) => { const ts = pr.state === 'MERGED' ? isoToMs(pr.mergedAt) : isoToMs(pr.closedAt) @@ -155,8 +157,7 @@ export const resolvePrStateForBranch = ({ }) .toSorted((a, b) => (b.ts ?? -Infinity) - (a.ts ?? -Infinity)) - const winner = ranked[0] - if (winner === undefined) return PR_STATE_NONE + const winner = ranked[0]! if (winner.pr.state === 'MERGED') { return { state: 'merged', mergedAt: winner.ts } diff --git a/packages/@overeng/megarepo/src/test-utils/store-setup.ts b/packages/@overeng/megarepo/src/test-utils/store-setup.ts index 585527226..90a971382 100644 --- a/packages/@overeng/megarepo/src/test-utils/store-setup.ts +++ b/packages/@overeng/megarepo/src/test-utils/store-setup.ts @@ -422,6 +422,38 @@ export const repinWorkspace = ({ } }) +/** + * Re-materialize a fixture worktree as a NON-DETACHED `refs/heads/` + * worktree — the exact shape production creates (`createWorktree({createBranch: + * false})`), as opposed to the `--detach` worktrees `createStoreFixture` + * defaults to. Removes the detached worktree, creates the branch ref, then adds + * a worktree that has that branch checked out (so `git branch -D` is refused + * until HEAD is detached). Returns the (unchanged) worktree path. + */ +export const materializeNonDetachedBranchWorktree = ({ + bareRepoPath, + worktreePath, + branch, + commit, +}: { + bareRepoPath: AbsoluteDirPath + worktreePath: AbsoluteDirPath + branch: string + commit: string +}) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem + // Drop the detached worktree the fixture created at this path. + yield* runGitCommand(bareRepoPath, 'worktree', 'remove', '--force', worktreePath) + yield* fs + .remove(worktreePath, { recursive: true, force: true }) + .pipe(Effect.catchAll(() => Effect.void)) + // Create the branch ref and check it out in a fresh worktree (non-detached). + yield* runGitCommand(bareRepoPath, 'branch', branch, commit) + yield* runGitCommand(bareRepoPath, 'worktree', 'add', worktreePath, branch) + return worktreePath + }) + /** * Create a valid archive entry (`/.archive/--/`) * registered as a worktree of the bare repo (proper gitlink), for exercising From f94675597a6936b30c99f61df1ef73f374597108 Mon Sep 17 00:00:00 2001 From: schickling-assistant <261620128+schickling-assistant@users.noreply.github.com> Date: Sun, 14 Jun 2026 15:36:31 +0200 Subject: [PATCH 13/13] feat(megarepo): never archive a repo's default branch in cold gc (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dry-run validation (real branch CLI in isolated stores: 9/9 scenarios pass; read-only projection over the real 281 named worktrees) surfaced a hazard: a vendored dep's `main` worktree was archive-eligible via a headRefName=main PR-join false-positive while not in any recorded live set. Add a hard default-branch guard (Git.getStoreDefaultBranch reads the bare's HEAD offline; keep reason `default-branch`) before any staleness/liveness logic — belt-and-suspenders over the cross-megarepo veto. Decision 0012 also records the validation outcomes left as-is: stash stays repo-global, default-on stands (0006), worktree-deletion is the accepted scope (no artifact-pruning). Regression test: a merged+clean+grace-met worktree on the default branch is kept. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 1 + .../0012-never-archive-default-branch.md | 47 +++++++++++++++++++ packages/@overeng/megarepo/docs/spec.md | 6 ++- .../megarepo/src/cli/commands/store/mod.ts | 12 +++++ .../src/cli/store-gc-cold.integration.test.ts | 37 +++++++++++++++ packages/@overeng/megarepo/src/lib/git.ts | 16 +++++++ 6 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 packages/@overeng/megarepo/docs/decisions/0012-never-archive-default-branch.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 249201b65..18896bce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ All notable changes to this project will be documented in this file. ### Fixed +- **@overeng/megarepo**: Cold `mr store gc` now never reclaims a repo's **default branch** (read offline from the bare's `HEAD`), independent of PR state or liveness. Dry-run validation against the real store found a vendored dependency's `main` worktree archive-eligible via a `headRefName=main` PR-join false-positive while not in any recorded live set; the guard (keep reason `default-branch`, decision 0012) closes the `main`/`master` hazard as a belt-and-suspenders complement to the cross-megarepo veto (#771). - **@overeng/megarepo**: Harden the cold named-branch `mr store gc` reclamation path against an adversarial review. (1) **Archive freed no branch for production worktrees**: a production `refs/heads/*` worktree is NON-DETACHED, so after `git worktree move` the moved worktree still has the branch checked out and `git branch -D` is REFUSED (`cannot delete branch used by worktree`), leaving the branch unfreed and a later `mr apply` re-add broken (invariant 4). Fixed by detaching the moved worktree's HEAD (`git checkout --detach`, new `Git.detachWorktreeHead`) before freeing the ref; covered by a new integration test using a non-detached worktree (the prior fixtures only used `--detach`, masking the defect). (2) **Liveness registry writes were non-atomic**: `refreshWorkspaceRegistry` and the under-lock reconcile rewrite used plain `writeFileString`, so a torn read during a concurrent gc reconcile could drop a workspace's live-set veto (decision 0010 hard veto); both now route through `writeFileAtomic`. (3) **Partial-archive mis-reporting**: once the move succeeded the result is now `archived` with the real `recoverPath` (post-move branch-free + README steps are best-effort-but-reported via a warning), instead of falsely reporting `error`/"left intact". The `.archive/README.md` append is now an atomic write. New tests cover the archive-time live-set veto re-check, `loadStoreGcConfig` file load + corrupt-file degradation, a CLOSED-PR archive, dry-run reap intent, the unclean-reconcile grace withholding/restart, and `writeFileAtomic` temp-cleanup on a rename failure (#771). - **@overeng/otelite**: Honor durability-before-ack — flush each export to the kernel before the 200/OK. `tokio::fs::File` buffers writes, so `write_all` alone did NOT guarantee the bytes reached the kernel before the sink acked; an independent reader (or a crash) before the next flush could miss them, contradicting R05 ("flush … before acking") and the `append_line` doc's own "durably reaching the kernel before returning" promise. This surfaced as a CI flake in the `durable_before_ack` gate (a read immediately after the 200 occasionally saw an empty file under thread contention — reproduced ~1/60 at 16 test threads). Fix: `SignalFile::append_line` / `append_json` now `flush()` after `write_all`, before returning. This is a flush, not an fsync — `sync_all` (physical-disk durability) stays deferred to shutdown, so the M2 "no per-export fsync under the lock" throughput decision is preserved. Verified: 0 failures over 200 × 16-thread runs (was ~1/60). - **@overeng/otelite**: Make the HTTP-JSON metrics receive path lossless, fixing two silent data-loss bugs a stress test surfaced. The upstream `opentelemetry-proto` `with-serde` deserialize — which the receiver used to BUILD the proto value the sink then re-serialized — silently drops several metric JSON shapes: a `sum`/`gauge` `NumberDataPoint` whose int64 value is the default string form (`"asInt":"7"`) lost its value entirely (captured null), and a regular `histogram` metric was dropped down to `{name,description,unit,metadata}` (its data oneof gone). Both returned HTTP 200 + bumped `counts.metrics` → a silent mis-capture that violates the lossless + "loud, never silent" contracts (decisions/0011). Fix: on the JSON metrics path, `with-serde` still runs purely as the dialect VALIDATOR (Err → 400 + `note_rejected`, gate unchanged), but on success the receiver now persists the VALIDATED RAW JSON body verbatim (re-emitted through `serde_json::Value` via the new `Sink::write_metrics_json`, counting metrics from the JSON structure) instead of the lossy proto re-serialization. Since the body is already canonical OTLP/JSON and `inspect` walks raw JSON, the JSON metrics path is now lossless for string-int64 sums/gauges, regular histograms, AND exponential histograms — the last also RESOLVING the previously-documented exp-histogram-on-JSON limitation for the receive path. Traces/logs JSON paths and all protobuf/gRPC paths are unchanged (already lossless). New gates (real receiver, no mocks): an HTTP-JSON round-trip of a string-int64 sum + histogram + exponential histogram all survive receive → capture → `inspect`; cross-transport equivalence extended to metrics (the same logical string-int64-sum + histogram over HTTP-JSON vs HTTP-protobuf vs gRPC flattens to equivalent `inspect` rows, the proto/gRPC fixture built natively to avoid the lossy `with-serde` source); and a loud-rejection guard that a malformed metrics JSON body still 400s + is captured nowhere. KNOWN RESIDUAL: the upstream metrics `with-serde` is more lenient than the trace one, so for metrics the JSON dialect gate is effectively structural (malformed JSON / hard field-type mismatches), tolerating some non-default dialect shapes (numeric int64 nanos, string enums) rather than rejecting them loudly — a stricter metrics dialect gate is a follow-up (#769, #772). diff --git a/packages/@overeng/megarepo/docs/decisions/0012-never-archive-default-branch.md b/packages/@overeng/megarepo/docs/decisions/0012-never-archive-default-branch.md new file mode 100644 index 000000000..1aea3e57f --- /dev/null +++ b/packages/@overeng/megarepo/docs/decisions/0012-never-archive-default-branch.md @@ -0,0 +1,47 @@ +# Never archive a repo's default branch + validation outcomes + +## Status + +accepted (from dry-run validation) + +## Context + +Manual dry-run validation drove the real branch CLI against isolated stores (all +9 scenarios passed) and projected the classifier, read-only, over the real store +(281 named worktrees). Two things emerged worth deciding/recording. + +The validation surfaced a concrete hazard: `ai/nanoid main` — a vendored +dependency's default branch — was steady-state archive-eligible, because the +PR-state join matched an old upstream PR whose `headRefName` was `main`, and the +worktree was not in any recorded live set. Archiving a dependency's default +branch is never wanted, and common names (`main`/`master`) are exactly the ones +prone to PR-join false positives. + +## Decision + +Add a hard **default-branch guard**: a worktree whose ref equals its repo's +default branch is NEVER reclaimed by the cold path, independent of PR state and +liveness. The default branch is read locally and offline from the bare repo's +`HEAD` symbolic ref (`Git.getStoreDefaultBranch`), so it costs no extra network. +The guard runs before any staleness/liveness logic (keep reason `default-branch`). + +## Validation outcomes (other levers — decided to leave as-is) + +- **Stash stays repo-global** (the dominant suppressor: 146/151 keeps). Per-worktree + stash would lift eligibility from 6 to ~61 worktrees (~7.9G), but it was kept + repo-global: the over-keep is conservative (never risks a stash) and per-worktree + attribution is fuzzy. Confirms decision 0004/B3 granularity intentionally. +- **Default-on stands** (decision 0006): the per-run cost (~31 fetch + ~31 gh) is + accepted even though steady-state reclaim is modest. +- **Worktree-deletion is the accepted scope**: validation showed it reclaims + ~90M–7.9G while ~445G sits in `node_modules`/`target` of legitimately-kept + worktrees. Artifact-pruning (#771's original framing) is explicitly NOT pursued; + bulk disk is handled by other means. Reaffirms [0001](0001-gc-reclaims-cold-named-worktrees.md). +- **First real run archives nothing** (14d absence-grace bootstrap) — accepted as + the deliberate slow-to-first-archive behaviour. + +## Consequences + +- One extra local `symbolic-ref` read per repo on the cold path (negligible). +- A dependency's default branch is safe even when the liveness registry is stale — + a belt-and-suspenders complement to the cross-megarepo veto. diff --git a/packages/@overeng/megarepo/docs/spec.md b/packages/@overeng/megarepo/docs/spec.md index d036dffd1..803dec200 100644 --- a/packages/@overeng/megarepo/docs/spec.md +++ b/packages/@overeng/megarepo/docs/spec.md @@ -632,12 +632,16 @@ mr store gc [--dry-run] [--force] [--all] ##### Cold named-branch reclamation -> Rationale and trade-offs in `docs/decisions/0001`–`0011`; domain terms in +> Rationale and trade-offs in `docs/decisions/0001`–`0012`; domain terms in > `docs/glossary.md`. A named `refs/heads/*` worktree is reclaimed only when it is **cold**, decided by layered gates evaluated in this order (each short-circuits to keep): +0. **Default-branch guard (hard).** A worktree whose ref is its repo's default + branch (read offline from the bare's `HEAD`) is never reclaimed, independent of + PR state or liveness — closes the `main`/`master` PR-join false-positive + (decision 0012). 1. **Cross-megarepo live-set veto (hard).** Present in any registered workspace's live set (`collectStoreLiveSet`, store-wide) ⇒ keep. A `repos/` symlink alone gives no protection — only a recorded `livePaths` entry does. diff --git a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts index 21d03bad2..12aaccf4c 100644 --- a/packages/@overeng/megarepo/src/cli/commands/store/mod.ts +++ b/packages/@overeng/megarepo/src/cli/commands/store/mod.ts @@ -542,6 +542,12 @@ const coldReclaimRepo = ({ return results } + // The repo's default branch (e.g. `main`) is NEVER reclaimed, regardless of + // PR state or liveness — archiving a dependency's default branch is never + // wanted, and common names (`main`/`master`) are prone to PR-join false + // positives. Read locally from the bare's HEAD (offline). + const defaultBranch = Option.getOrUndefined(yield* Git.getStoreDefaultBranch({ bareRepoPath })) + for (const target of namedWorktrees) { const { worktree } = target // Only `refs/heads/*` carries a branch identity to reclaim; tags have no @@ -551,6 +557,12 @@ const coldReclaimRepo = ({ continue } + // Default-branch guard (hard keep, before any staleness/liveness logic). + if (defaultBranch !== undefined && worktree.ref === defaultBranch) { + results.push(coldResult({ target, status: 'kept', reason: 'default-branch' })) + continue + } + // ref_mismatch gate: the store path claims `` but the worktree HEAD is // on a different branch. Archiving frees `refs/heads/`, which is NOT // the branch actually checked out — keep and surface the divergence. diff --git a/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts b/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts index ca3d644ab..f6bb5932a 100644 --- a/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts +++ b/packages/@overeng/megarepo/src/cli/store-gc-cold.integration.test.ts @@ -245,6 +245,43 @@ describe('mr store gc — cold named-branch reclamation', () => { ), ) + it.effect( + 'default branch ⇒ kept (never archived) even when merged + clean + grace-met', + Effect.fnUntraced( + function* () { + const fs = yield* FileSystem.FileSystem + const { storePath, bareRepoPaths, worktreePaths } = yield* createStoreFixture([ + { ...REPO, branches: ['trunk'], withRemote: true }, + ]) + const bareRepoPath = bareRepoPaths[REPO_KEY]! + const worktreePath = worktreePaths[`${REPO_KEY}#trunk`]! + const commit = yield* getWorktreeCommit(worktreePath) + yield* materializeBranchRef({ bareRepoPath, branch: 'trunk', commit }) + // Make `trunk` the repo's default branch (the bare's HEAD). + yield* git(bareRepoPath, 'symbolic-ref', 'HEAD', 'refs/heads/trunk') + + const cwd = yield* outsideCwd() + // Seed cold so absence grace is satisfied — proving the keep reason is the + // default-branch guard, not `absence-grace`. A merged PR would otherwise archive. + yield* seedColdObservation({ cwd, storePath }) + const { results } = yield* runGc({ + cwd, + storePath, + prRepos: [{ relativePath: REPO_RELATIVE, prs: [mergedPr('trunk', NOW - 30 * DAY_MS)] }], + }) + + const result = findByRef(results, 'trunk') + expect(result?.status).toBe('kept') + expect(result?.reason).toBe('default-branch') + // Untouched on disk; branch ref intact. + expect(yield* fs.exists(worktreePath)).toBe(true) + expect(yield* Git.refExists({ repoPath: bareRepoPath, ref: 'refs/heads/trunk' })).toBe(true) + }, + Effect.provide(NodeContext.layer), + Effect.scoped, + ), + ) + it.effect( 'merged + dirty ⇒ archived with dirt intact', Effect.fnUntraced( diff --git a/packages/@overeng/megarepo/src/lib/git.ts b/packages/@overeng/megarepo/src/lib/git.ts index bbe3f2805..21050c4ce 100644 --- a/packages/@overeng/megarepo/src/lib/git.ts +++ b/packages/@overeng/megarepo/src/lib/git.ts @@ -474,6 +474,22 @@ export const getDefaultBranch = (args: { url: string } | { repoPath: string; rem return Option.none() }) +/** + * The store bare repo's default branch, read LOCALLY from its `HEAD` symbolic ref + * (set at clone time to the remote's default). Offline — no network, unlike + * {@link getDefaultBranch} which `ls-remote`s. Returns `none` when HEAD is + * detached or unreadable. Used by cold GC to never reclaim a repo's default + * branch regardless of PR state or liveness. + */ +export const getStoreDefaultBranch = (args: { bareRepoPath: string }) => + runGitCommand({ + args: ['symbolic-ref', '--short', 'HEAD'], + cwd: args.bareRepoPath, + }).pipe( + Effect.map((out) => (out === '' ? Option.none() : Option.some(out))), + Effect.catchAll(() => Effect.succeed(Option.none())), + ) + /** * Resolve a ref to its commit SHA * Works with branches, tags, and commits