From 6578288b9957f6b5475604c0f3e948c75e477b22 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Sat, 13 Jun 2026 20:37:00 -0400
Subject: [PATCH 01/12] code-status-bar: add usage-bar tests + harden installer
 (review #1, #4)

---
 code-status-bar/install.sh              | 41 ++++++++++++--
 code-status-bar/package.json            |  8 +++
 code-status-bar/test/usage-bar.test.cjs | 73 +++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 6 deletions(-)
 create mode 100644 code-status-bar/package.json
 create mode 100644 code-status-bar/test/usage-bar.test.cjs
diff --git a/code-status-bar/install.sh b/code-status-bar/install.sh
index 12e2b87..b023a2d 100755
--- a/code-status-bar/install.sh
+++ b/code-status-bar/install.sh
@@ -12,17 +12,47 @@ DEST="$CONFIG_DIR/settings.json"
 COLORED=0
 [ "${1:-}" = "--colored" ] && COLORED=1
 
+if [ "$COLORED" -eq 1 ] && ! command -v node >/dev/null 2>&1; then
+  echo "Error: --colored needs Node on your PATH (the helper runs via node)." >&2
+  echo "Install Node, or use the default (no-flag) config." >&2
+  exit 1
+fi
+
 mkdir -p "$CONFIG_DIR"
+
+# Only treat the script's directory as a real clone if it actually contains this
+# module (both files present). When run via `curl | bash`, BASH_SOURCE is unset and
+# this stays 0, so we always download instead of copying a stray local file.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" 2>/dev/null && pwd || echo "")"
+LOCAL_OK=0
+if [ -n "$SCRIPT_DIR" ] && [ -f "$SCRIPT_DIR/install.sh" ] && [ -f "$SCRIPT_DIR/settings.json" ]; then
+  LOCAL_OK=1
+fi
 
-# fetch <relative-path> <destination> — prefer a local clone, fall back to download.
+# fetch <relative-path> <destination>: download (or copy from a verified clone) to a
+# temp file, validate, then move into place — so a failed fetch never leaves a broken
+# or empty config at the destination.
 fetch() {
-  local rel="$1" out="$2"
-  if [ -n "$SCRIPT_DIR" ] && [ -f "$SCRIPT_DIR/$rel" ]; then
-    cp "$SCRIPT_DIR/$rel" "$out"
+  local rel="$1" out="$2" tmp
+  tmp="$(mktemp)"
+  if [ "$LOCAL_OK" -eq 1 ] && [ -f "$SCRIPT_DIR/$rel" ]; then
+    cp "$SCRIPT_DIR/$rel" "$tmp"
   else
-    curl -fsSL "$REPO_RAW/$rel" -o "$out"
+    curl -fsSL "$REPO_RAW/$rel" -o "$tmp"
+  fi
+  if [ ! -s "$tmp" ]; then
+    echo "Error: fetched '$rel' is empty; aborting (your existing config is untouched)." >&2
+    rm -f "$tmp"; exit 1
   fi
+  case "$rel" in
+    *.json)
+      if command -v node >/dev/null 2>&1; then
+        node -e 'JSON.parse(require("fs").readFileSync(process.argv[1],"utf8"))' "$tmp" 2>/dev/null \
+          || { echo "Error: fetched '$rel' is not valid JSON; aborting." >&2; rm -f "$tmp"; exit 1; }
+      fi
+      ;;
+  esac
+  mv "$tmp" "$out"
 }
 
 if [ -f "$DEST" ]; then
@@ -37,7 +67,6 @@ if [ "$COLORED" -eq 1 ]; then
   fetch "settings.colored.json" "$DEST"
   echo "Installed COLORED variant -> $DEST"
   echo "Helper script           -> $SCRIPTS_DIR/usage-bar.cjs"
-  echo "(needs Node on your PATH at render time — ccstatusline already provides it)"
 else
   fetch "settings.json" "$DEST"
   echo "Installed -> $DEST"
diff --git a/code-status-bar/package.json b/code-status-bar/package.json
new file mode 100644
index 0000000..8df5cd1
--- /dev/null
+++ b/code-status-bar/package.json
@@ -0,0 +1,8 @@
+{
+  "name": "code-status-bar",
+  "version": "0.1.0",
+  "private": true,
+  "description": "Usage-limit-aware Claude Code status bar (Agentic Workflow Toolkit module 1)",
+  "engines": { "node": ">=18" },
+  "scripts": { "test": "node --test" }
+}
diff --git a/code-status-bar/test/usage-bar.test.cjs b/code-status-bar/test/usage-bar.test.cjs
new file mode 100644
index 0000000..40e7284
--- /dev/null
+++ b/code-status-bar/test/usage-bar.test.cjs
@@ -0,0 +1,73 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const cp = require('node:child_process');
+const path = require('node:path');
+
+const SCRIPT = path.resolve(__dirname, '..', 'scripts', 'usage-bar.cjs');
+const ESC = '\x1b';
+const YELLOW = `${ESC}[38;2;252;233;79m`;
+const GREEN = `${ESC}[38;2;138;226;52m`;
+const RED = `${ESC}[38;2;239;41;41m`;
+
+function run(args, payload) {
+  return cp.execFileSync('node', [SCRIPT, ...args], {
+    input: payload === undefined ? '' : JSON.stringify(payload),
+    encoding: 'utf8'
+  });
+}
+
+function rl(over) {
+  const now = Math.floor(Date.now() / 1000);
+  return {
+    rate_limits: Object.assign({
+      five_hour: { used_percentage: 72, resets_at: now + 7200 },
+      seven_day: { used_percentage: 41, resets_at: now + 432000 },
+      seven_day_opus: { used_percentage: 88, resets_at: now + 432000 }
+    }, over || {})
+  };
+}
+
+test('session at 72% renders bold yellow with label and percent', () => {
+  const out = run(['session'], rl());
+  assert.ok(out.includes(YELLOW), 'expected yellow');
+  assert.ok(out.includes('Session: '), 'expected label');
+  assert.ok(out.includes('72.0%'), 'expected percent');
+  assert.ok(out.startsWith(`${ESC}[1m`), 'expected bold prefix');
+});
+
+test('weekly at 41% is green, opus at 88% is red', () => {
+  assert.ok(run(['weekly'], rl()).includes(GREEN));
+  assert.ok(run(['opus'], rl()).includes(RED));
+});
+
+test('multiple limits are joined with a separator', () => {
+  const out = run(['weekly', 'opus'], rl());
+  assert.ok(out.includes('Weekly: '));
+  assert.ok(out.includes('Weekly Opus: '));
+  assert.ok(out.includes(' | '));
+});
+
+test('absent data renders nothing so the widget collapses', () => {
+  assert.equal(run(['session'], {}), '');
+  assert.equal(run(['session']), '');
+  assert.equal(run(['session'], rl({ five_hour: undefined })), '');
+});
+
+test('non-numeric percentage renders nothing', () => {
+  assert.equal(run(['session'], rl({ five_hour: { used_percentage: 'oops', resets_at: 0 } })), '');
+});
+
+test('thresholds: 50 -> yellow, just under -> green; 85 -> red, just under -> yellow', () => {
+  const at = (p) => run(['session'], rl({
+    five_hour: { used_percentage: p, resets_at: Math.floor(Date.now() / 1000) + 1 }
+  }));
+  assert.ok(at(50).includes(YELLOW), '50 should be yellow');
+  assert.ok(at(49.9).includes(GREEN), '49.9 should be green');
+  assert.ok(at(85).includes(RED), '85 should be red');
+  assert.ok(at(84.9).includes(YELLOW), '84.9 should be yellow');
+});
+
+test('unknown limit name renders nothing', () => {
+  assert.equal(run(['bogus'], rl()), '');
+});

From bebeeb2d09d2fcff1e49e894c11363945fd46b37 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Sat, 13 Jun 2026 20:37:00 -0400
Subject: [PATCH 02/12] shift: implement v1 keep-going engine with tests
 (module 2)

---
 .gitignore                       |  1 +
 shift/PLAN.md                    | 14 +++++
 shift/README.md                  | 72 +++++++++++++++++++++++
 shift/bin/shift                  | 88 ++++++++++++++++++++++++++++
 shift/examples/queue/00-hello.md |  6 ++
 shift/hooks/shift-stop.cjs       | 98 ++++++++++++++++++++++++++++++++
 shift/lib/bounds.cjs             | 17 ++++++
 shift/lib/brief.cjs              | 27 +++++++++
 shift/lib/decision.cjs           | 18 ++++++
 shift/lib/discovery.cjs          | 35 ++++++++++++
 shift/lib/state.cjs              | 42 ++++++++++++++
 shift/package.json               |  9 +++
 shift/test/bounds.test.cjs       | 27 +++++++++
 shift/test/brief.test.cjs        | 31 ++++++++++
 shift/test/cli.test.cjs          | 49 ++++++++++++++++
 shift/test/decision.test.cjs     | 38 +++++++++++++
 shift/test/discovery.test.cjs    | 37 ++++++++++++
 shift/test/hook.test.cjs         | 95 +++++++++++++++++++++++++++++++
 shift/test/state.test.cjs        | 49 ++++++++++++++++
 19 files changed, 753 insertions(+)
 create mode 100644 shift/README.md
 create mode 100755 shift/bin/shift
 create mode 100644 shift/examples/queue/00-hello.md
 create mode 100755 shift/hooks/shift-stop.cjs
 create mode 100644 shift/lib/bounds.cjs
 create mode 100644 shift/lib/brief.cjs
 create mode 100644 shift/lib/decision.cjs
 create mode 100644 shift/lib/discovery.cjs
 create mode 100644 shift/lib/state.cjs
 create mode 100644 shift/package.json
 create mode 100644 shift/test/bounds.test.cjs
 create mode 100644 shift/test/brief.test.cjs
 create mode 100644 shift/test/cli.test.cjs
 create mode 100644 shift/test/decision.test.cjs
 create mode 100644 shift/test/discovery.test.cjs
 create mode 100644 shift/test/hook.test.cjs
 create mode 100644 shift/test/state.test.cjs

diff --git a/.gitignore b/.gitignore
index e0a2660..26ec0e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 *.bak
 *.bak-*
 node_modules/
+.shift/
diff --git a/shift/PLAN.md b/shift/PLAN.md
index 5eddb56..d9f79bf 100644
--- a/shift/PLAN.md
+++ b/shift/PLAN.md
@@ -1018,3 +1018,17 @@ Re-run with `maxIterations: 1`; confirm the run ends on "max iterations" with pe
 - **Testing strategy (unit pure modules + integration hook/CLI + manual smoke + dry-run):** Tasks 1–7, 9. ✔
 - **No third-party deps:** all `node:` built-ins. ✔
 - **Known gaps (deferred, documented):** usage-cap data source and rate-limit termination signature → v2 (SPEC §9). Mid-bin early-stop accepted in v1, reviewer-caught; verify pass → v3.
+
+---
+
+## Implementation notes (as-built deviations)
+
+Built on branch `shift-v1`. The draft code blocks above are the design intent; these corrections were applied during implementation:
+
+- **`state.cjs` — carry `text` through the merge, strip it on save.** `mergeDiscovered` copies each bin's freshly-read `text` into the in-memory bin (the brief needs the body); `saveState` strips `text` before writing so `state.json` stays lean. Without this the fed-back brief had the instructions but not the task body — caught by the hook integration test, not the unit tests.
+- **Review fix #2 — `shift-stop.cjs` resolves the repo from the hook payload's `cwd`** (`input.cwd || process.cwd()`); a hook's process cwd isn't guaranteed to be the project root. Has a dedicated test.
+- **Review fix #3 — summary surfaces logged `Needs you:` lines**, not just blocked bins; `brief.cjs` documents the `Needs you: <detail>` convention. Has a test.
+- **Security — `bin/shift` uses `execFileSync('git', [...args])`** (argument array, no shell) for branch ops, so a config-supplied branch name can't inject shell metacharacters; added `git checkout` fallbacks for Git < 2.23.
+- **`package.json` — `"test": "node --test"`** (Node ≥18 auto-discovery; a bare `test/` arg isn't accepted) and `"engines": { "node": ">=18" }`.
+
+All 28 `shift` tests + 7 `code-status-bar` tests pass; `install.sh` verified end-to-end.
diff --git a/shift/README.md b/shift/README.md
new file mode 100644
index 0000000..5ebe712
--- /dev/null
+++ b/shift/README.md
@@ -0,0 +1,72 @@
+# shift
+
+Autonomous work-queue runner for **Claude Code** — module 2 of the [Agentic Workflow Toolkit](../). Pre-load bins of work, leave, and `shift` keeps Claude working through them past natural stop points, using its best judgment, until the queue is empty or a bound is hit. You review the output at the end.
+
+> **This is v1** — the intra-session engine (a Stop hook). It keeps a *running* session grinding the queue, bounded by a time box + max iterations. Surviving the 5-hour rate-limit wall (auto-resume) and a usage cap are **v2**. See [SPEC.md](./SPEC.md) and [PLAN.md](./PLAN.md).
+
+## How it works
+
+You drop work into source folders (hand-written briefs and/or plugin-generated plans). `shift start` discovers them, records a run in `.shift/`, and creates a `shift/<date>` branch. You open Claude Code and say "begin the shift." From then on, a **Stop hook** runs each time the agent would stop: it marks the finished bin done, picks the next pending bin, and feeds it back as the next instruction — so the agent keeps going. When the queue drains (or a bound trips, or you hit the kill switch), it lets the session stop and writes `.shift/summary.md`.
+
+The hook is safe to register globally: it no-ops in any repo that isn't an active `shift` run.
+
+## Safety model
+
+Full best-judgment autonomy on reversible, in-worktree work. By default it will **not** push, publish, send externally, or delete outside the worktree — it does the preparable part and records a `Needs you:` line instead, which the summary collects. All work lands on the `shift/<date>` branch, so review is a clean diff. Every decision is logged. Hard stops: time box, max iterations, and a kill switch (`shift stop`).
+
+## Install
+
+1. Get the files (clone the toolkit, or copy the `shift/` folder).
+2. Register the Stop hook **once** in `~/.claude/settings.json`:
+
+```json
+{
+  "hooks": {
+    "Stop": [
+      { "matcher": "", "hooks": [
+        { "type": "command", "command": "node /ABSOLUTE/PATH/TO/shift/hooks/shift-stop.cjs" }
+      ] }
+    ]
+  }
+}
+```
+
+> Verify the exact hook schema against the current Claude Code hooks docs. The engine only needs "block + feed `reason` back" and the `stop_hook_active` re-entry flag, and it resolves the repo from the hook payload's `cwd`.
+
+3. (Optional) put `shift/bin/shift` on your PATH.
+
+## Use
+
+```bash
+cd your-repo
+mkdir queue && $EDITOR queue/01-first-task.md     # one brief per file
+shift start --dry-run                              # preview the queue, branch, bounds
+shift start                                        # init run + create shift/<date> branch
+# open Claude Code here and say: "begin the shift"
+shift status                                       # check progress anytime
+shift stop                                         # stop cleanly after the current bin
+```
+
+Point at plan folders too (e.g. Superpowers output) by editing `.shift/config.json`:
+
+```json
+{
+  "sources": [
+    { "path": "queue", "kind": "briefs" },
+    { "path": "docs/superpowers/plans", "kind": "plans" }
+  ],
+  "bounds": { "maxHours": 4, "maxIterations": 30 },
+  "definitionOfDone": "Builds and tests pass; work committed on the run branch.",
+  "git": { "branch": "shift/{date}", "allowPush": false, "allowOutwardActions": false }
+}
+```
+
+When the run ends, read `.shift/summary.md` (it lists bins done/blocked and a "Needs you" section), then review the `shift/<date>` branch.
+
+## Develop
+
+```bash
+cd shift && npm test     # node --test, no dependencies
+```
+
+Pure logic lives in `lib/` (discovery, state, bounds, brief, decision) and is unit-tested; `hooks/shift-stop.cjs` is the thin I/O shell, integration-tested by driving it with crafted hook input.
diff --git a/shift/bin/shift b/shift/bin/shift
new file mode 100755
index 0000000..6d6f1d4
--- /dev/null
+++ b/shift/bin/shift
@@ -0,0 +1,88 @@
+#!/usr/bin/env node
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+const cp = require('node:child_process');
+const { discoverBins } = require('../lib/discovery.cjs');
+const { initState, saveState, loadState, mergeDiscovered } = require('../lib/state.cjs');
+
+function isoStamp(d) { return d.toISOString().replace(/[:.]/g, '-').slice(0, 19); }
+function dateStr(d) { return d.toISOString().slice(0, 10); }
+
+const DEFAULT_CONFIG = {
+  sources: [{ path: 'queue', kind: 'briefs' }],
+  bounds: { maxHours: 2, maxIterations: 20 },
+  definitionOfDone: 'Builds and tests pass; work committed on the run branch.',
+  git: { branch: 'shift/{date}', allowPush: false, allowOutwardActions: false }
+};
+
+function ensureBranch(cwd, branch) {
+  // execFileSync with an argument array — no shell, so a branch name from config
+  // can't inject shell metacharacters.
+  for (const args of [
+    ['switch', '-c', branch], ['switch', branch],
+    ['checkout', '-b', branch], ['checkout', branch]
+  ]) {
+    try { cp.execFileSync('git', args, { cwd, stdio: 'ignore' }); return true; } catch { /* try next */ }
+  }
+  return false;
+}
+
+function cmdStart(args) {
+  const cwd = process.cwd();
+  const dir = path.join(cwd, '.shift');
+  const now = new Date();
+  const dryRun = args.includes('--dry-run');
+
+  let config = DEFAULT_CONFIG;
+  const cfgFile = path.join(dir, 'config.json');
+  if (fs.existsSync(cfgFile)) {
+    config = { ...DEFAULT_CONFIG, ...JSON.parse(fs.readFileSync(cfgFile, 'utf8')) };
+  }
+  const branch = (config.git.branch || 'shift/{date}').replace('{date}', dateStr(now));
+  const discovered = discoverBins(config.sources, cwd);
+
+  if (dryRun) {
+    console.log('shift dry-run');
+    console.log(`branch: ${branch}`);
+    console.log(`bounds: ${JSON.stringify(config.bounds)}`);
+    console.log(`queue (${discovered.length}):`);
+    discovered.forEach((b, i) => console.log(`  ${i + 1}. ${b.id} [${b.kind}]`));
+    return;
+  }
+
+  fs.mkdirSync(dir, { recursive: true });
+  if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP'));
+  fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2));
+  let state = initState({ runId: isoStamp(now), startedAt: now.toISOString(), branch });
+  state = mergeDiscovered(state, discovered);
+  saveState(dir, state);
+  fs.writeFileSync(path.join(dir, 'log.md'), `# shift log — ${state.runId}\n`);
+
+  if (!ensureBranch(cwd, branch)) {
+    console.log(`warning: could not create/switch to branch ${branch} (is this a git repo?)`);
+  }
+
+  console.log(`shift started: ${discovered.length} bins on branch ${branch}`);
+  console.log('Now open Claude Code in this repo and say: "begin the shift".');
+}
+
+function cmdStatus() {
+  const state = loadState(path.join(process.cwd(), '.shift'));
+  const c = s => state.bins.filter(b => b.status === s).length;
+  console.log(`run ${state.runId} · branch ${state.branch} · iter ${state.iterations}`);
+  console.log(`bins: ${c('done')} done · ${c('blocked')} blocked · ${c('pending')} pending`);
+}
+
+function cmdStop() {
+  const dir = path.join(process.cwd(), '.shift');
+  fs.mkdirSync(dir, { recursive: true });
+  fs.writeFileSync(path.join(dir, 'STOP'), '');
+  console.log('shift will stop cleanly after the current bin.');
+}
+
+const [, , sub, ...rest] = process.argv;
+if (sub === 'start') cmdStart(rest);
+else if (sub === 'status') cmdStatus();
+else if (sub === 'stop') cmdStop();
+else { console.log('usage: shift <start|status|stop> [--dry-run]'); process.exit(1); }
diff --git a/shift/examples/queue/00-hello.md b/shift/examples/queue/00-hello.md
new file mode 100644
index 0000000..ecfed01
--- /dev/null
+++ b/shift/examples/queue/00-hello.md
@@ -0,0 +1,6 @@
+# Add a project HELLO file
+
+Create a file `HELLO.md` at the repo root containing one sentence describing
+what this repository is. Commit it.
+
+Definition of done: `HELLO.md` exists and is committed on the run branch.
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
new file mode 100755
index 0000000..243f3d5
--- /dev/null
+++ b/shift/hooks/shift-stop.cjs
@@ -0,0 +1,98 @@
+#!/usr/bin/env node
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+const { discoverBins } = require('../lib/discovery.cjs');
+const { loadState, saveState, mergeDiscovered, setBinStatus } = require('../lib/state.cjs');
+const { decide } = require('../lib/decision.cjs');
+
+function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } }
+
+function readBlocked(dir) {
+  try {
+    return fs.readFileSync(path.join(dir, 'blocked.jsonl'), 'utf8')
+      .split('\n').filter(Boolean)
+      .map(l => { try { return JSON.parse(l); } catch { return null; } })
+      .filter(Boolean);
+  } catch { return []; }
+}
+
+// "Needs you: <detail>" lines the agent appended to the log (non-blocking flags).
+function readNeedsYou(dir) {
+  try {
+    return fs.readFileSync(path.join(dir, 'log.md'), 'utf8')
+      .split('\n')
+      .map(l => l.match(/^Needs you:\s*(.+)$/))
+      .filter(Boolean)
+      .map(m => m[1].trim());
+  } catch { return []; }
+}
+
+function writeSummary(dir, state, reason, now) {
+  const done = state.bins.filter(b => b.status === 'done').length;
+  const blocked = state.bins.filter(b => b.status === 'blocked');
+  const pending = state.bins.filter(b => b.status === 'pending').length;
+  const mins = Math.round((now - Date.parse(state.startedAt)) / 60000);
+  const items = [
+    ...blocked.map(b => `- ${b.id}: ${b.note || 'blocked'}`),
+    ...readNeedsYou(dir).map(n => `- ${n}`)
+  ];
+  const lines = [
+    `# shift summary — ${state.runId}`, '',
+    `Ended: ${reason}`,
+    `Duration: ${mins} min · Iterations: ${state.iterations}`,
+    `Branch: ${state.branch}`,
+    `Bins: ${done} done · ${blocked.length} blocked · ${pending} pending`, '',
+    '## Needs you',
+    ...(items.length ? items : ['- (nothing flagged)'])
+  ];
+  fs.writeFileSync(path.join(dir, 'summary.md'), lines.join('\n') + '\n');
+}
+
+function main() {
+  let input = {};
+  try { input = JSON.parse(readStdin() || '{}'); } catch { input = {}; }
+
+  // Resolve the repo from the hook payload's cwd (the hook's process cwd is not
+  // guaranteed to be the project root); fall back to process.cwd().
+  const cwd = (input && typeof input.cwd === 'string' && input.cwd) ? input.cwd : process.cwd();
+  const dir = path.join(cwd, '.shift');
+  if (!fs.existsSync(path.join(dir, 'state.json'))) { process.stdout.write('{}'); return; }
+
+  const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8'));
+  let state = loadState(dir);
+  const now = Date.now();
+  const killSwitch = fs.existsSync(path.join(dir, 'STOP'));
+
+  // Attribute the just-finished work to the current bin.
+  if (state.currentBinId) {
+    const b = readBlocked(dir).find(x => x.id === state.currentBinId);
+    state = setBinStatus(state, state.currentBinId, b
+      ? { status: 'blocked', note: b.note }
+      : { status: 'done', finishedAt: new Date(now).toISOString() });
+  }
+
+  // Re-discover (picks up newly added files) and carry over statuses.
+  state = mergeDiscovered(state, discoverBins(config.sources, cwd));
+
+  const result = decide({
+    bins: state.bins, state, config, now,
+    stopHookActive: !!input.stop_hook_active, killSwitch
+  });
+
+  if (result.action === 'block') {
+    state.iterations += 1;
+    state.currentBinId = result.nextBinId;
+    saveState(dir, state);
+    fs.appendFileSync(path.join(dir, 'log.md'),
+      `\n## ${new Date(now).toISOString()} — start ${result.nextBinId} (iter ${state.iterations})\n`);
+    process.stdout.write(JSON.stringify({ decision: 'block', reason: result.reason }));
+  } else {
+    state.currentBinId = null;
+    saveState(dir, state);
+    writeSummary(dir, state, result.reason, now);
+    process.stdout.write('{}');
+  }
+}
+
+main();
diff --git a/shift/lib/bounds.cjs b/shift/lib/bounds.cjs
new file mode 100644
index 0000000..6708260
--- /dev/null
+++ b/shift/lib/bounds.cjs
@@ -0,0 +1,17 @@
+'use strict';
+
+// now: epoch ms. Returns null (continue) or { reason } (terminate the run).
+function evaluateBounds(state, config, now) {
+  const b = (config && config.bounds) || {};
+  if (typeof b.maxIterations === 'number' && state.iterations >= b.maxIterations) {
+    return { reason: `max iterations (${b.maxIterations}) reached` };
+  }
+  if (typeof b.maxHours === 'number') {
+    if (now - Date.parse(state.startedAt) >= b.maxHours * 3_600_000) {
+      return { reason: `time box (${b.maxHours}h) reached` };
+    }
+  }
+  return null;
+}
+
+module.exports = { evaluateBounds };
diff --git a/shift/lib/brief.cjs b/shift/lib/brief.cjs
new file mode 100644
index 0000000..7a68aa2
--- /dev/null
+++ b/shift/lib/brief.cjs
@@ -0,0 +1,27 @@
+'use strict';
+
+// Render the unattended instruction + bin text fed back to the agent on `block`.
+function renderBrief(bin, config) {
+  const dod = (config && config.definitionOfDone) || 'Complete the task and commit your work.';
+  const git = (config && config.git) || {};
+  const forbidden = [];
+  if (!git.allowPush) forbidden.push('push to any remote');
+  if (!git.allowOutwardActions) forbidden.push('publish, send to external services, or delete files outside the working tree');
+  const guard = forbidden.length
+    ? `Do NOT ${forbidden.join(', or ')}; if the work needs one, treat it as a "Needs you" item (below) and continue with the rest.`
+    : '';
+  return [
+    'You are running unattended under `shift`. Complete the brief below end-to-end using your best judgment.',
+    'Do NOT ask questions — if you would normally ask, decide and record the decision in .shift/log.md.',
+    `Definition of done: ${dod}`,
+    'When finished, commit your work on the current branch.',
+    'Flag anything that needs the human (a deferred decision, an action you could not take) by appending a line to .shift/log.md as: "Needs you: <detail>" — these surface in the run summary.',
+    'If a true blocker stops you from finishing this bin, append one line to .shift/blocked.jsonl: {"id":"<bin id>","note":"<reason>"} then stop.',
+    guard,
+    '',
+    `--- BIN: ${bin.id} ---`,
+    bin.text
+  ].filter(Boolean).join('\n');
+}
+
+module.exports = { renderBrief };
diff --git a/shift/lib/decision.cjs b/shift/lib/decision.cjs
new file mode 100644
index 0000000..5a9cfe3
--- /dev/null
+++ b/shift/lib/decision.cjs
@@ -0,0 +1,18 @@
+'use strict';
+const { evaluateBounds } = require('./bounds.cjs');
+const { firstPending } = require('./state.cjs');
+const { renderBrief } = require('./brief.cjs');
+
+// ctx: { bins, state, config, now, stopHookActive, killSwitch }
+// returns { action:'allow', reason } | { action:'block', reason, nextBinId }
+function decide(ctx) {
+  const { bins, state, config, now, killSwitch } = ctx;
+  if (killSwitch) return { action: 'allow', reason: 'kill switch (.shift/STOP) present' };
+  const bound = evaluateBounds(state, config, now);
+  if (bound) return { action: 'allow', reason: bound.reason };
+  const next = firstPending(bins);
+  if (!next) return { action: 'allow', reason: 'queue empty' };
+  return { action: 'block', reason: renderBrief(next, config), nextBinId: next.id };
+}
+
+module.exports = { decide };
diff --git a/shift/lib/discovery.cjs b/shift/lib/discovery.cjs
new file mode 100644
index 0000000..49931d5
--- /dev/null
+++ b/shift/lib/discovery.cjs
@@ -0,0 +1,35 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+const crypto = require('node:crypto');
+
+function hashText(text) {
+  return crypto.createHash('sha256').update(text).digest('hex').slice(0, 12);
+}
+
+function listMarkdown(dirAbs) {
+  let entries;
+  try { entries = fs.readdirSync(dirAbs, { withFileTypes: true }); }
+  catch { return []; }
+  return entries.filter(e => e.isFile() && e.name.endsWith('.md')).map(e => e.name).sort();
+}
+
+// sources: [{ path, kind }]. cwd: repo root. Returns ordered bins (source then filename).
+function discoverBins(sources, cwd) {
+  const bins = [];
+  for (const source of sources) {
+    const dirAbs = path.resolve(cwd, source.path);
+    for (const name of listMarkdown(dirAbs)) {
+      const text = fs.readFileSync(path.join(dirAbs, name), 'utf8');
+      bins.push({
+        id: path.posix.join(source.path, name),
+        hash: hashText(text),
+        kind: source.kind || 'briefs',
+        text
+      });
+    }
+  }
+  return bins;
+}
+
+module.exports = { discoverBins, hashText };
diff --git a/shift/lib/state.cjs b/shift/lib/state.cjs
new file mode 100644
index 0000000..9d10a99
--- /dev/null
+++ b/shift/lib/state.cjs
@@ -0,0 +1,42 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+
+function statePath(dir) { return path.join(dir, 'state.json'); }
+
+function loadState(dir) { return JSON.parse(fs.readFileSync(statePath(dir), 'utf8')); }
+
+function saveState(dir, state) {
+  fs.mkdirSync(dir, { recursive: true });
+  // Persist lean: the bin `text` is re-read from disk on each discovery pass, so
+  // keep it out of state.json (avoids bloating state with full brief/plan bodies).
+  const lean = { ...state, bins: state.bins.map(({ text, ...b }) => b) };
+  fs.writeFileSync(statePath(dir), JSON.stringify(lean, null, 2));
+}
+
+function initState({ runId, startedAt, branch }) {
+  return { runId, startedAt, iterations: 0, branch, currentBinId: null, bins: [] };
+}
+
+// Merge freshly discovered bins into state, carrying over status by id+hash.
+// New or content-changed files appear as 'pending'.
+function mergeDiscovered(state, discovered) {
+  const prev = new Map(state.bins.map(b => [b.id + '@' + b.hash, b]));
+  const bins = discovered.map(d => {
+    const carried = prev.get(d.id + '@' + d.hash);
+    // Always carry the freshly-read `text` (needed to render the brief); status
+    // comes from the prior run if this id+hash was already seen.
+    return carried
+      ? { ...carried, kind: d.kind, text: d.text }
+      : { id: d.id, hash: d.hash, kind: d.kind, status: 'pending', text: d.text };
+  });
+  return { ...state, bins };
+}
+
+function firstPending(bins) { return bins.find(b => b.status === 'pending') || null; }
+
+function setBinStatus(state, id, patch) {
+  return { ...state, bins: state.bins.map(b => (b.id === id ? { ...b, ...patch } : b)) };
+}
+
+module.exports = { statePath, loadState, saveState, initState, mergeDiscovered, firstPending, setBinStatus };
diff --git a/shift/package.json b/shift/package.json
new file mode 100644
index 0000000..52fabb4
--- /dev/null
+++ b/shift/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "shift",
+  "version": "0.1.0",
+  "private": true,
+  "description": "Autonomous work-queue runner for Claude Code (Agentic Workflow Toolkit module 2)",
+  "bin": { "shift": "bin/shift" },
+  "engines": { "node": ">=18" },
+  "scripts": { "test": "node --test" }
+}
diff --git a/shift/test/bounds.test.cjs b/shift/test/bounds.test.cjs
new file mode 100644
index 0000000..b46cce3
--- /dev/null
+++ b/shift/test/bounds.test.cjs
@@ -0,0 +1,27 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { evaluateBounds } = require('../lib/bounds.cjs');
+
+const base = { startedAt: '2026-06-13T00:00:00Z', iterations: 0 };
+const t0 = Date.parse(base.startedAt);
+
+test('returns null when within bounds', () => {
+  const cfg = { bounds: { maxHours: 2, maxIterations: 10 } };
+  assert.equal(evaluateBounds(base, cfg, t0 + 60_000), null);
+});
+
+test('terminates on max iterations', () => {
+  const cfg = { bounds: { maxHours: 2, maxIterations: 5 } };
+  assert.match(evaluateBounds({ ...base, iterations: 5 }, cfg, t0 + 1000).reason, /max iterations/);
+});
+
+test('terminates on time box', () => {
+  const cfg = { bounds: { maxHours: 1, maxIterations: 100 } };
+  assert.match(evaluateBounds(base, cfg, t0 + 3_600_001).reason, /time box/);
+});
+
+test('iterations checked before time', () => {
+  const cfg = { bounds: { maxHours: 1, maxIterations: 1 } };
+  assert.match(evaluateBounds({ ...base, iterations: 1 }, cfg, t0 + 3_600_001).reason, /max iterations/);
+});
diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs
new file mode 100644
index 0000000..2212e4e
--- /dev/null
+++ b/shift/test/brief.test.cjs
@@ -0,0 +1,31 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { renderBrief } = require('../lib/brief.cjs');
+
+const bin = { id: 'queue/01.md', text: 'Do the thing.' };
+
+test('includes the bin text, id, and definition of done', () => {
+  const out = renderBrief(bin, { definitionOfDone: 'tests pass', git: {} });
+  assert.match(out, /Do the thing\./);
+  assert.match(out, /queue\/01\.md/);
+  assert.match(out, /tests pass/);
+});
+
+test('forbids push and outward actions by default', () => {
+  const out = renderBrief(bin, { git: { allowPush: false, allowOutwardActions: false } });
+  assert.match(out, /Do NOT/);
+  assert.match(out, /push to any remote/);
+});
+
+test('omits the forbid-guard when everything is allowed', () => {
+  const out = renderBrief(bin, { git: { allowPush: true, allowOutwardActions: true } });
+  assert.doesNotMatch(out, /Do NOT push/);
+});
+
+test('always explains decision logging, the Needs-you convention, and blocker flagging', () => {
+  const out = renderBrief(bin, { git: {} });
+  assert.match(out, /\.shift\/log\.md/);
+  assert.match(out, /Needs you:/);
+  assert.match(out, /blocked\.jsonl/);
+});
diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs
new file mode 100644
index 0000000..62dbad1
--- /dev/null
+++ b/shift/test/cli.test.cjs
@@ -0,0 +1,49 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const cp = require('node:child_process');
+
+const CLI = path.resolve(__dirname, '..', 'bin', 'shift');
+
+function repoWithQueue() {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-cli-'));
+  cp.execSync('git init -q', { cwd });
+  cp.execSync('git config user.email t@t.co', { cwd });
+  cp.execSync('git config user.name t', { cwd });
+  cp.execSync('git commit -q --allow-empty -m init', { cwd });
+  fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
+  fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one');
+  return cwd;
+}
+
+function run(cwd, args) {
+  return cp.execFileSync('node', [CLI, ...args], { cwd, encoding: 'utf8' });
+}
+
+test('--dry-run lists the queue and writes nothing', () => {
+  const cwd = repoWithQueue();
+  const out = run(cwd, ['start', '--dry-run']);
+  assert.match(out, /queue\/01\.md/);
+  assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json')));
+});
+
+test('start writes config + state and creates the run branch', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  assert.ok(fs.existsSync(path.join(cwd, '.shift', 'state.json')));
+  assert.ok(fs.existsSync(path.join(cwd, '.shift', 'config.json')));
+  const branch = cp.execSync('git branch --show-current', { cwd, encoding: 'utf8' }).trim();
+  assert.match(branch, /^shift\//);
+  const state = JSON.parse(fs.readFileSync(path.join(cwd, '.shift', 'state.json'), 'utf8'));
+  assert.equal(state.bins.length, 1);
+});
+
+test('stop creates the kill switch', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  run(cwd, ['stop']);
+  assert.ok(fs.existsSync(path.join(cwd, '.shift', 'STOP')));
+});
diff --git a/shift/test/decision.test.cjs b/shift/test/decision.test.cjs
new file mode 100644
index 0000000..76cbd66
--- /dev/null
+++ b/shift/test/decision.test.cjs
@@ -0,0 +1,38 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { decide } = require('../lib/decision.cjs');
+
+const cfg = { bounds: { maxHours: 2, maxIterations: 10 }, definitionOfDone: 'done', git: {} };
+const state = { startedAt: '2026-06-13T00:00:00Z', iterations: 0, currentBinId: null };
+const t0 = Date.parse(state.startedAt) + 1000;
+
+test('blocks with the first pending bin', () => {
+  const bins = [{ id: 'a', status: 'done' }, { id: 'b', status: 'pending', text: 'work b' }];
+  const r = decide({ bins, state, config: cfg, now: t0, stopHookActive: false, killSwitch: false });
+  assert.equal(r.action, 'block');
+  assert.equal(r.nextBinId, 'b');
+  assert.match(r.reason, /work b/);
+});
+
+test('allows stop when queue empty', () => {
+  const bins = [{ id: 'a', status: 'done' }];
+  const r = decide({ bins, state, config: cfg, now: t0, stopHookActive: false, killSwitch: false });
+  assert.equal(r.action, 'allow');
+  assert.match(r.reason, /queue empty/);
+});
+
+test('kill switch allows stop even with pending work', () => {
+  const bins = [{ id: 'b', status: 'pending', text: 'x' }];
+  const r = decide({ bins, state, config: cfg, now: t0, stopHookActive: false, killSwitch: true });
+  assert.equal(r.action, 'allow');
+  assert.match(r.reason, /kill switch/);
+});
+
+test('a bound (time box) allows stop even with pending work', () => {
+  const bins = [{ id: 'b', status: 'pending', text: 'x' }];
+  const late = Date.parse(state.startedAt) + 3 * 3_600_000;
+  const r = decide({ bins, state, config: cfg, now: late, stopHookActive: false, killSwitch: false });
+  assert.equal(r.action, 'allow');
+  assert.match(r.reason, /time box/);
+});
diff --git a/shift/test/discovery.test.cjs b/shift/test/discovery.test.cjs
new file mode 100644
index 0000000..3dfad7c
--- /dev/null
+++ b/shift/test/discovery.test.cjs
@@ -0,0 +1,37 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { discoverBins, hashText } = require('../lib/discovery.cjs');
+
+function tmpRepo() {
+  const d = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-disc-'));
+  fs.mkdirSync(path.join(d, 'queue'), { recursive: true });
+  fs.mkdirSync(path.join(d, 'plans'), { recursive: true });
+  fs.writeFileSync(path.join(d, 'queue', '02-b.md'), 'second');
+  fs.writeFileSync(path.join(d, 'queue', '01-a.md'), 'first');
+  fs.writeFileSync(path.join(d, 'queue', 'notes.txt'), 'ignored');
+  fs.writeFileSync(path.join(d, 'plans', 'p1.md'), 'plan one');
+  return d;
+}
+
+test('discovers .md files, ordered by source then filename', () => {
+  const cwd = tmpRepo();
+  const bins = discoverBins([{ path: 'queue', kind: 'briefs' }, { path: 'plans', kind: 'plans' }], cwd);
+  assert.deepEqual(bins.map(b => b.id), ['queue/01-a.md', 'queue/02-b.md', 'plans/p1.md']);
+  assert.equal(bins[0].kind, 'briefs');
+  assert.equal(bins[2].kind, 'plans');
+  assert.equal(bins[0].text, 'first');
+});
+
+test('hash is stable for same content, differs for different content', () => {
+  assert.equal(hashText('x'), hashText('x'));
+  assert.notEqual(hashText('x'), hashText('y'));
+});
+
+test('missing source folder yields no bins (no throw)', () => {
+  const cwd = tmpRepo();
+  assert.deepEqual(discoverBins([{ path: 'does-not-exist', kind: 'briefs' }], cwd), []);
+});
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
new file mode 100644
index 0000000..aae0144
--- /dev/null
+++ b/shift/test/hook.test.cjs
@@ -0,0 +1,95 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const cp = require('node:child_process');
+
+const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs');
+
+function setupRun() {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hook-'));
+  fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
+  fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one');
+  fs.writeFileSync(path.join(cwd, 'queue', '02.md'), 'bin two');
+  const dir = path.join(cwd, '.shift');
+  fs.mkdirSync(dir, { recursive: true });
+  fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({
+    sources: [{ path: 'queue', kind: 'briefs' }],
+    bounds: { maxHours: 24, maxIterations: 10 },
+    definitionOfDone: 'done', git: {}
+  }));
+  fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+    runId: 'r', startedAt: new Date().toISOString(), iterations: 0,
+    branch: 'shift/x', currentBinId: null, bins: []
+  }));
+  fs.writeFileSync(path.join(dir, 'log.md'), '# log\n');
+  return { cwd, dir };
+}
+
+function runHook(cwd, input) {
+  const out = cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify(input), encoding: 'utf8' });
+  return JSON.parse(out || '{}');
+}
+
+test('no-ops (allows stop) when no .shift/state.json exists', () => {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-none-'));
+  assert.deepEqual(runHook(cwd, { stop_hook_active: false }), {});
+});
+
+test('first stop blocks bin 1; second marks it done + blocks bin 2; third drains -> allow + summary', () => {
+  const { cwd, dir } = setupRun();
+  const r1 = runHook(cwd, { stop_hook_active: false });
+  assert.equal(r1.decision, 'block');
+  assert.match(r1.reason, /bin one/);
+
+  const r2 = runHook(cwd, { stop_hook_active: true });
+  assert.equal(r2.decision, 'block');
+  assert.match(r2.reason, /bin two/);
+  const s2 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  assert.equal(s2.bins.find(b => b.id === 'queue/01.md').status, 'done');
+
+  const r3 = runHook(cwd, { stop_hook_active: true });
+  assert.deepEqual(r3, {});
+  assert.ok(fs.existsSync(path.join(dir, 'summary.md')));
+  assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /queue empty/);
+});
+
+test('blocked.jsonl marks the current bin blocked and surfaces it in the summary', () => {
+  const { cwd, dir } = setupRun();
+  runHook(cwd, { stop_hook_active: false });
+  fs.writeFileSync(path.join(dir, 'blocked.jsonl'), JSON.stringify({ id: 'queue/01.md', note: 'needs key' }) + '\n');
+  runHook(cwd, { stop_hook_active: true });
+  runHook(cwd, { stop_hook_active: true });
+  assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /needs key/);
+});
+
+test('logged "Needs you:" lines surface in the summary', () => {
+  const { cwd, dir } = setupRun();
+  runHook(cwd, { stop_hook_active: false });
+  fs.appendFileSync(path.join(dir, 'log.md'), '\nNeeds you: push the release tag\n');
+  runHook(cwd, { stop_hook_active: true });
+  runHook(cwd, { stop_hook_active: true });
+  assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /push the release tag/);
+});
+
+test('kill switch ends the run immediately', () => {
+  const { cwd, dir } = setupRun();
+  fs.writeFileSync(path.join(dir, 'STOP'), '');
+  assert.deepEqual(runHook(cwd, { stop_hook_active: false }), {});
+  assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /kill switch/);
+});
+
+test('resolves .shift from the hook payload cwd, not the process cwd', () => {
+  const { cwd } = setupRun();
+  const neutral = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-neutral-'));
+  const out = cp.execFileSync('node', [HOOK], {
+    cwd: neutral,
+    input: JSON.stringify({ stop_hook_active: false, cwd }),
+    encoding: 'utf8'
+  });
+  const r = JSON.parse(out || '{}');
+  assert.equal(r.decision, 'block');
+  assert.match(r.reason, /bin one/);
+});
diff --git a/shift/test/state.test.cjs b/shift/test/state.test.cjs
new file mode 100644
index 0000000..e123620
--- /dev/null
+++ b/shift/test/state.test.cjs
@@ -0,0 +1,49 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { initState, saveState, loadState, mergeDiscovered, firstPending, setBinStatus } = require('../lib/state.cjs');
+
+test('init + save + load round-trips', () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-state-'));
+  const s = initState({ runId: 'r1', startedAt: '2026-06-13T00:00:00Z', branch: 'shift/x' });
+  assert.equal(s.iterations, 0);
+  assert.equal(s.currentBinId, null);
+  saveState(dir, s);
+  assert.deepEqual(loadState(dir), s);
+});
+
+test('mergeDiscovered carries status by id+hash, new files are pending', () => {
+  let s = initState({ runId: 'r', startedAt: '2026-06-13T00:00:00Z', branch: 'b' });
+  s = mergeDiscovered(s, [{ id: 'queue/a.md', hash: 'h1', kind: 'briefs' }]);
+  assert.equal(s.bins[0].status, 'pending');
+  s = setBinStatus(s, 'queue/a.md', { status: 'done' });
+  s = mergeDiscovered(s, [
+    { id: 'queue/a.md', hash: 'h1', kind: 'briefs' },
+    { id: 'queue/b.md', hash: 'h2', kind: 'briefs' }
+  ]);
+  assert.equal(s.bins.find(b => b.id === 'queue/a.md').status, 'done');
+  assert.equal(s.bins.find(b => b.id === 'queue/b.md').status, 'pending');
+});
+
+test('edited file (new hash) becomes pending again', () => {
+  let s = initState({ runId: 'r', startedAt: 't', branch: 'b' });
+  s = mergeDiscovered(s, [{ id: 'q/a.md', hash: 'h1', kind: 'briefs' }]);
+  s = setBinStatus(s, 'q/a.md', { status: 'done' });
+  s = mergeDiscovered(s, [{ id: 'q/a.md', hash: 'h2', kind: 'briefs' }]);
+  assert.equal(s.bins[0].status, 'pending');
+});
+
+test('firstPending returns first pending or null', () => {
+  let s = initState({ runId: 'r', startedAt: 't', branch: 'b' });
+  s = mergeDiscovered(s, [
+    { id: 'a', hash: '1', kind: 'briefs' },
+    { id: 'b', hash: '2', kind: 'briefs' }
+  ]);
+  s = setBinStatus(s, 'a', { status: 'done' });
+  assert.equal(firstPending(s.bins).id, 'b');
+  s = setBinStatus(s, 'b', { status: 'done' });
+  assert.equal(firstPending(s.bins), null);
+});

From f5e67ad49695372c98e1e6be28c2c45061805f49 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Sat, 13 Jun 2026 22:45:57 -0400
Subject: [PATCH 03/12] shift: add v2 (headless auto-resume + usage cap) and v3
 (per-bin verify gate)

---
 shift/PLAN.md                | 11 +++++
 shift/README.md              | 62 +++++++++++++++++++-------
 shift/SPEC.md                | 13 ++++++
 shift/bin/shift              | 53 +++++++++++++++++++++-
 shift/hooks/shift-stop.cjs   | 65 +++++++++++++++++++--------
 shift/lib/bounds.cjs         |  9 +++-
 shift/lib/decision.cjs       |  6 +--
 shift/lib/outcome.cjs        | 30 +++++++++++++
 shift/lib/run-loop.cjs       | 63 ++++++++++++++++++++++++++
 shift/lib/usage.cjs          | 35 +++++++++++++++
 shift/lib/verify.cjs         | 23 ++++++++++
 shift/test/bounds.test.cjs   | 11 +++++
 shift/test/decision.test.cjs |  8 ++++
 shift/test/hook.test.cjs     | 53 ++++++++++++++++++++--
 shift/test/outcome.test.cjs  | 33 ++++++++++++++
 shift/test/run-loop.test.cjs | 86 ++++++++++++++++++++++++++++++++++++
 shift/test/usage.test.cjs    | 40 +++++++++++++++++
 shift/test/verify.test.cjs   | 25 +++++++++++
 18 files changed, 583 insertions(+), 43 deletions(-)
 create mode 100644 shift/lib/outcome.cjs
 create mode 100644 shift/lib/run-loop.cjs
 create mode 100644 shift/lib/usage.cjs
 create mode 100644 shift/lib/verify.cjs
 create mode 100644 shift/test/outcome.test.cjs
 create mode 100644 shift/test/run-loop.test.cjs
 create mode 100644 shift/test/usage.test.cjs
 create mode 100644 shift/test/verify.test.cjs

diff --git a/shift/PLAN.md b/shift/PLAN.md
index d9f79bf..387f464 100644
--- a/shift/PLAN.md
+++ b/shift/PLAN.md
@@ -1032,3 +1032,14 @@ Built on branch `shift-v1`. The draft code blocks above are the design intent; t
 - **`package.json` — `"test": "node --test"`** (Node ≥18 auto-discovery; a bare `test/` arg isn't accepted) and `"engines": { "node": ">=18" }`.
 
 All 28 `shift` tests + 7 `code-status-bar` tests pass; `install.sh` verified end-to-end.
+
+---
+
+## v2 + v3 (built on the same branch)
+
+Added after v1, same TDD discipline (52 `shift` tests total). See SPEC §13 for the design decisions.
+
+- **v3 verify gate** — `lib/verify.cjs` (injectable exec) + a gate in the Stop hook: a bin passes only if `verify.command` exits 0; failures re-feed the bin with the output up to `verify.maxAttempts`, then block it. Tests: `verify.test.cjs` + hook gate cases.
+- **v2 usage cap** — `lib/usage.cjs` caches the hook payload's `rate_limits` to `.shift/usage.json`; `evaluateBounds` gains a `usagePercent` arg (cap on weekly %); the hook reads it from the payload and degrades gracefully when absent. Tests: `usage.test.cjs` + bounds/decision/hook cases.
+- **v2 headless runner** — `lib/outcome.cjs` (classify a spawn: completed / rate_limited / error, inferring rate-limit from cached usage since the exit signature is undocumented) + `lib/run-loop.cjs` (pure outer loop with injected effects: bounds, max-resumes backstop, wait-until-reset auto-resume) + `bin/shift run` (thin real-effects wiring). Tests: `outcome.test.cjs`, `run-loop.test.cjs`.
+- **Security** — `lib/verify.cjs` uses `spawnSync(command, { shell: true })` with the whole user-config command (not interpolated); documented inline.
diff --git a/shift/README.md b/shift/README.md
index 5ebe712..d871a96 100644
--- a/shift/README.md
+++ b/shift/README.md
@@ -1,23 +1,27 @@
 # shift
 
-Autonomous work-queue runner for **Claude Code** — module 2 of the [Agentic Workflow Toolkit](../). Pre-load bins of work, leave, and `shift` keeps Claude working through them past natural stop points, using its best judgment, until the queue is empty or a bound is hit. You review the output at the end.
+Autonomous work-queue runner for **Claude Code** — module 2 of the [Agentic Workflow Toolkit](../). Pre-load bins of work, leave, and `shift` keeps Claude working through them past natural stop points, using its best judgment, until the queue is empty or a bound is hit — surviving the 5-hour rate-limit wall by waiting for the window to reopen. You review the output at the end.
 
-> **This is v1** — the intra-session engine (a Stop hook). It keeps a *running* session grinding the queue, bounded by a time box + max iterations. Surviving the 5-hour rate-limit wall (auto-resume) and a usage cap are **v2**. See [SPEC.md](./SPEC.md) and [PLAN.md](./PLAN.md).
+See [SPEC.md](./SPEC.md) and [PLAN.md](./PLAN.md) for the design.
 
 ## How it works
 
-You drop work into source folders (hand-written briefs and/or plugin-generated plans). `shift start` discovers them, records a run in `.shift/`, and creates a `shift/<date>` branch. You open Claude Code and say "begin the shift." From then on, a **Stop hook** runs each time the agent would stop: it marks the finished bin done, picks the next pending bin, and feeds it back as the next instruction — so the agent keeps going. When the queue drains (or a bound trips, or you hit the kill switch), it lets the session stop and writes `.shift/summary.md`.
+You drop work into source folders — hand-written briefs and/or plugin-generated plans (e.g. Superpowers' plans dir). `shift start` discovers them, records a run in `.shift/`, and creates a `shift/<date>` branch. Then:
 
-The hook is safe to register globally: it no-ops in any repo that isn't an active `shift` run.
+- **Keep-going engine (Stop hook).** Each time the agent would stop, the hook marks the finished bin done, picks the next pending one, and feeds it back as the next instruction — so the session keeps working. When the queue drains (or a bound trips, or the kill switch is set) it lets the session stop and writes `.shift/summary.md`.
+- **Verify gate.** If you set a `verify.command`, each bin must pass it (e.g. `npm test`) before it counts as done; failures re-feed the bin with the output (up to `maxAttempts`), then mark it blocked. This catches "looked done but wasn't."
+- **All-day runner (`shift run`).** A headless outer loop that spawns Claude, lets the engine grind, and — when a spawn dies on the rate-limit wall — waits until the window resets and resumes. Bounded by wall-clock, max iterations, a usage cap, and a resume backstop.
+
+The hook is safe to register globally: it no-ops in any repo that isn't an active `shift` run, and resolves the repo from the hook payload's `cwd`.
 
 ## Safety model
 
-Full best-judgment autonomy on reversible, in-worktree work. By default it will **not** push, publish, send externally, or delete outside the worktree — it does the preparable part and records a `Needs you:` line instead, which the summary collects. All work lands on the `shift/<date>` branch, so review is a clean diff. Every decision is logged. Hard stops: time box, max iterations, and a kill switch (`shift stop`).
+Full best-judgment autonomy on reversible, in-worktree work. By default it will **not** push, publish, send externally, or delete outside the worktree — it does the preparable part and records a `Needs you:` line, which the summary collects. All work lands on the `shift/<date>` branch, so review is a clean diff. Every decision is logged. Hard stops: time box, max iterations, usage cap, kill switch (`shift stop`).
 
 ## Install
 
 1. Get the files (clone the toolkit, or copy the `shift/` folder).
-2. Register the Stop hook **once** in `~/.claude/settings.json`:
+2. Register the Stop hook **once** in `~/.claude/settings.json` (safe globally — no-ops outside an active run):
 
 ```json
 {
@@ -31,7 +35,7 @@ Full best-judgment autonomy on reversible, in-worktree work. By default it will
 }
 ```
 
-> Verify the exact hook schema against the current Claude Code hooks docs. The engine only needs "block + feed `reason` back" and the `stop_hook_active` re-entry flag, and it resolves the repo from the hook payload's `cwd`.
+> Verify the hook schema against the current Claude Code hooks docs. The engine needs only: "block + feed `reason` back", the `stop_hook_active` flag, the payload `cwd`, and (for the usage cap / auto-resume) the payload `rate_limits`.
 
 3. (Optional) put `shift/bin/shift` on your PATH.
 
@@ -42,12 +46,21 @@ cd your-repo
 mkdir queue && $EDITOR queue/01-first-task.md     # one brief per file
 shift start --dry-run                              # preview the queue, branch, bounds
 shift start                                        # init run + create shift/<date> branch
-# open Claude Code here and say: "begin the shift"
-shift status                                       # check progress anytime
-shift stop                                         # stop cleanly after the current bin
 ```
 
-Point at plan folders too (e.g. Superpowers output) by editing `.shift/config.json`:
+Then either:
+
+- **Interactive:** open Claude Code in the repo and say *"begin the shift"* — the Stop hook drives it while you're away (within this session).
+- **All-day / unattended:** `shift run` — the headless loop drives Claude, survives rate-limit resets, and stops on a bound.
+
+```bash
+shift status     # progress anytime
+shift stop       # stop cleanly after the current bin
+```
+
+When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" section) and review the `shift/<date>` branch.
+
+## Configure (`.shift/config.json`)
 
 ```json
 {
@@ -55,18 +68,37 @@ Point at plan folders too (e.g. Superpowers output) by editing `.shift/config.js
     { "path": "queue", "kind": "briefs" },
     { "path": "docs/superpowers/plans", "kind": "plans" }
   ],
-  "bounds": { "maxHours": 4, "maxIterations": 30 },
+  "bounds": {
+    "maxHours": 4,
+    "maxIterations": 30,
+    "maxResumes": 12,
+    "usageCapPercent": 90,
+    "autoResumeOnReset": true
+  },
   "definitionOfDone": "Builds and tests pass; work committed on the run branch.",
+  "verify": { "command": "npm test", "maxAttempts": 2 },
+  "permissionMode": "acceptEdits",
   "git": { "branch": "shift/{date}", "allowPush": false, "allowOutwardActions": false }
 }
 ```
 
-When the run ends, read `.shift/summary.md` (it lists bins done/blocked and a "Needs you" section), then review the `shift/<date>` branch.
+- **`usageCapPercent`** — stop when weekly usage reaches this (read from the hook payload's `rate_limits`; skipped when that data is absent, e.g. non-Pro/Max).
+- **`autoResumeOnReset`** — on a rate-limit wall, `shift run` waits for the 5-hour window to reopen and resumes (never past the time box).
+- **`verify.command`** — per-bin acceptance gate; `null` disables it.
+
+### Permissions for unattended runs
+
+`shift run` invokes `claude -p --permission-mode <permissionMode>`. `acceptEdits` (the default) auto-approves file edits but **other tools (e.g. Bash) can still prompt — and a headless run can't answer prompts.** For real unattended work that runs tests/commands, either:
+
+- pre-allow the tools the work needs via `permissions.allow` in your Claude settings and set `"permissionMode": "dontAsk"`, or
+- set `"permissionMode": "bypassPermissions"` (broadest; rely on the branch-only / no-push safety model and bounds).
+
+Pick the narrowest mode that lets the work actually proceed.
 
 ## Develop
 
 ```bash
-cd shift && npm test     # node --test, no dependencies
+cd shift && npm test     # node --test, zero dependencies
 ```
 
-Pure logic lives in `lib/` (discovery, state, bounds, brief, decision) and is unit-tested; `hooks/shift-stop.cjs` is the thin I/O shell, integration-tested by driving it with crafted hook input.
+Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop) and is unit-tested; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input.
diff --git a/shift/SPEC.md b/shift/SPEC.md
index b250662..e81e1eb 100644
--- a/shift/SPEC.md
+++ b/shift/SPEC.md
@@ -259,3 +259,16 @@ shift/
 └─ examples/
    └─ queue/            # sample bins
 ```
+
+---
+
+## 13. Implementation status (as built — v1 + v2 + v3)
+
+All three phases are implemented on branch `shift-v1`. Notable as-built decisions:
+
+- **Rate-limit detection without the undocumented exit signature (resolves §9.2).** Research confirmed the headless rate-limit termination signature is undocumented, but the **Stop hook payload includes `rate_limits`**. So the engine caches the latest reset/usage to `.shift/usage.json`, and `lib/outcome.cjs` classifies a non-finalized, non-zero spawn as `rate_limited` by **inference** — near-limit cached usage (≥95%) + a future reset — with config-overridable stderr patterns as a fallback. No dependency on an exact exit code/message.
+- **Usage cap source (resolves §9.1).** Enforced from the hook payload's `rate_limits.seven_day.used_percentage`; absent data (non-Pro/Max, pre-first-response) degrades to "cap skipped," never an error.
+- **Verify gate (v3, resolves §9.3).** `verify.command` runs per bin; failures re-feed the bin with the output up to `maxAttempts`, then block it — so "looked done but wasn't" is caught, not silently accepted.
+- **Permissions.** `shift run` uses `--permission-mode` (default `acceptEdits`). Truly unattended work that runs commands typically needs `dontAsk` + a `permissions.allow` list, or `bypassPermissions` — documented in the README; the branch-only/no-push model and bounds are the backstop.
+
+**New modules beyond §12:** `lib/verify.cjs`, `lib/usage.cjs`, `lib/outcome.cjs`, `lib/run-loop.cjs`; `bin/shift` gains `run`. **Tests:** 52 in `shift` (pure unit + hook/CLI/run-loop integration), all green.
diff --git a/shift/bin/shift b/shift/bin/shift
index 6d6f1d4..f0d4327 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -11,8 +11,16 @@ function dateStr(d) { return d.toISOString().slice(0, 10); }
 
 const DEFAULT_CONFIG = {
   sources: [{ path: 'queue', kind: 'briefs' }],
-  bounds: { maxHours: 2, maxIterations: 20 },
+  bounds: {
+    maxHours: 2,
+    maxIterations: 20,
+    maxResumes: 12,
+    usageCapPercent: 90,
+    autoResumeOnReset: true
+  },
   definitionOfDone: 'Builds and tests pass; work committed on the run branch.',
+  verify: { command: null, maxAttempts: 2 },
+  permissionMode: 'acceptEdits',
   git: { branch: 'shift/{date}', allowPush: false, allowOutwardActions: false }
 };
 
@@ -81,8 +89,49 @@ function cmdStop() {
   console.log('shift will stop cleanly after the current bin.');
 }
 
+// v2: headless outer loop — keeps spawning claude until the engine finalizes,
+// a bound trips, or (on a rate-limit wall) it waits for the window to reopen.
+async function cmdRun() {
+  const cwd = process.cwd();
+  const dir = path.join(cwd, '.shift');
+  if (!fs.existsSync(path.join(dir, 'state.json'))) {
+    console.log('No active run. Run `shift start` first.');
+    process.exit(1);
+  }
+  const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8'));
+  const mode = config.permissionMode || 'acceptEdits';
+  const { runLoop } = require('../lib/run-loop.cjs');
+  const { readUsageCache } = require('../lib/usage.cjs');
+
+  // Clear any stale summary so finalized() reflects THIS run.
+  try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ }
+
+  let first = true;
+  const effects = {
+    now: () => Date.now(),
+    loadState: () => loadState(dir),
+    readUsage: () => readUsageCache(dir),
+    log: (m) => console.log(`[shift] ${m}`),
+    finalized: () => fs.existsSync(path.join(dir, 'summary.md')),
+    sleepUntil: (ms) => new Promise(r => setTimeout(r, Math.max(0, ms - Date.now()))),
+    spawn: () => {
+      const args = ['-p', '--permission-mode', mode];
+      if (first) { args.push('begin the shift'); first = false; }
+      else { args.push('--continue', 'continue the shift'); }
+      return cp.spawnSync('claude', args, {
+        cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], maxBuffer: 64 * 1024 * 1024
+      });
+    }
+  };
+
+  const result = await runLoop({ config, effects });
+  console.log(`[shift] stopped: ${result.reason} (after ${result.spawns} spawn(s))`);
+  console.log(`[shift] review: ${path.join(dir, 'summary.md')}`);
+}
+
 const [, , sub, ...rest] = process.argv;
 if (sub === 'start') cmdStart(rest);
 else if (sub === 'status') cmdStatus();
 else if (sub === 'stop') cmdStop();
-else { console.log('usage: shift <start|status|stop> [--dry-run]'); process.exit(1); }
+else if (sub === 'run') cmdRun().catch(e => { console.error(e); process.exit(1); });
+else { console.log('usage: shift <start|run|status|stop> [--dry-run]'); process.exit(1); }
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
index 243f3d5..16358a4 100755
--- a/shift/hooks/shift-stop.cjs
+++ b/shift/hooks/shift-stop.cjs
@@ -5,6 +5,8 @@ const path = require('node:path');
 const { discoverBins } = require('../lib/discovery.cjs');
 const { loadState, saveState, mergeDiscovered, setBinStatus } = require('../lib/state.cjs');
 const { decide } = require('../lib/decision.cjs');
+const { runVerify } = require('../lib/verify.cjs');
+const { writeUsageCache } = require('../lib/usage.cjs');
 
 function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } }
 
@@ -17,17 +19,18 @@ function readBlocked(dir) {
   } catch { return []; }
 }
 
-// "Needs you: <detail>" lines the agent appended to the log (non-blocking flags).
 function readNeedsYou(dir) {
   try {
     return fs.readFileSync(path.join(dir, 'log.md'), 'utf8')
-      .split('\n')
-      .map(l => l.match(/^Needs you:\s*(.+)$/))
-      .filter(Boolean)
-      .map(m => m[1].trim());
+      .split('\n').map(l => l.match(/^Needs you:\s*(.+)$/)).filter(Boolean).map(m => m[1].trim());
   } catch { return []; }
 }
 
+function tail(s, n) {
+  if (typeof s !== 'string') return '';
+  return s.length > n ? s.slice(s.length - n) : s;
+}
+
 function writeSummary(dir, state, reason, now) {
   const done = state.bins.filter(b => b.status === 'done').length;
   const blocked = state.bins.filter(b => b.status === 'blocked');
@@ -60,33 +63,59 @@ function main() {
   if (!fs.existsSync(path.join(dir, 'state.json'))) { process.stdout.write('{}'); return; }
 
   const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8'));
-  let state = loadState(dir);
   const now = Date.now();
   const killSwitch = fs.existsSync(path.join(dir, 'STOP'));
 
-  // Attribute the just-finished work to the current bin.
-  if (state.currentBinId) {
-    const b = readBlocked(dir).find(x => x.id === state.currentBinId);
-    state = setBinStatus(state, state.currentBinId, b
-      ? { status: 'blocked', note: b.note }
-      : { status: 'done', finishedAt: new Date(now).toISOString() });
-  }
+  // Capture rate limits from the hook payload: enforce the usage cap and cache
+  // reset times for the headless runner. Absent on non-Pro/Max or pre-first-response.
+  const usagePercent = writeUsageCache(dir, input.rate_limits, Math.floor(now / 1000));
 
-  // Re-discover (picks up newly added files) and carry over statuses.
-  state = mergeDiscovered(state, discoverBins(config.sources, cwd));
+  // Re-discover (fresh text + new files) and carry over status/attempts.
+  let state = mergeDiscovered(loadState(dir), discoverBins(config.sources, cwd));
+
+  const prevBinId = state.currentBinId;
+  const verifyCmd = config.verify && config.verify.command;
+  const maxAttempts = (config.verify && config.verify.maxAttempts) || 2;
+  let retryFeedback = null;
+
+  // Attribute the just-finished work to the current bin (blocked / verify gate / done).
+  if (prevBinId) {
+    const blocked = readBlocked(dir).find(x => x.id === prevBinId);
+    if (blocked) {
+      state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note });
+    } else if (verifyCmd) {
+      const v = runVerify(verifyCmd, cwd);
+      if (v.ok) {
+        state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() });
+      } else {
+        const bin = state.bins.find(b => b.id === prevBinId) || {};
+        const attempts = (bin.attempts || 0) + 1;
+        if (attempts < maxAttempts) {
+          state = setBinStatus(state, prevBinId, { attempts }); // stays pending → re-blocked below
+          retryFeedback = `Your previous attempt failed verification (\`${verifyCmd}\`). Fix it and make it pass. Output (tail):\n${tail(v.output, 2000)}`;
+        } else {
+          state = setBinStatus(state, prevBinId, { status: 'blocked', attempts, note: `failed verification after ${attempts} attempts` });
+        }
+      }
+    } else {
+      state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() });
+    }
+  }
 
   const result = decide({
-    bins: state.bins, state, config, now,
+    bins: state.bins, state, config, now, usagePercent,
     stopHookActive: !!input.stop_hook_active, killSwitch
   });
 
   if (result.action === 'block') {
+    let reason = result.reason;
+    if (retryFeedback && result.nextBinId === prevBinId) reason += `\n\n${retryFeedback}`;
     state.iterations += 1;
     state.currentBinId = result.nextBinId;
     saveState(dir, state);
     fs.appendFileSync(path.join(dir, 'log.md'),
-      `\n## ${new Date(now).toISOString()} — start ${result.nextBinId} (iter ${state.iterations})\n`);
-    process.stdout.write(JSON.stringify({ decision: 'block', reason: result.reason }));
+      `\n## ${new Date(now).toISOString()} — work ${result.nextBinId} (iter ${state.iterations})\n`);
+    process.stdout.write(JSON.stringify({ decision: 'block', reason }));
   } else {
     state.currentBinId = null;
     saveState(dir, state);
diff --git a/shift/lib/bounds.cjs b/shift/lib/bounds.cjs
index 6708260..4040f42 100644
--- a/shift/lib/bounds.cjs
+++ b/shift/lib/bounds.cjs
@@ -1,11 +1,16 @@
 'use strict';
 
-// now: epoch ms. Returns null (continue) or { reason } (terminate the run).
-function evaluateBounds(state, config, now) {
+// now: epoch ms. usagePercent: latest weekly usage % (or undefined/null if unknown).
+// Returns null (continue) or { reason } (terminate the run).
+function evaluateBounds(state, config, now, usagePercent) {
   const b = (config && config.bounds) || {};
   if (typeof b.maxIterations === 'number' && state.iterations >= b.maxIterations) {
     return { reason: `max iterations (${b.maxIterations}) reached` };
   }
+  if (typeof b.usageCapPercent === 'number' && typeof usagePercent === 'number'
+      && usagePercent >= b.usageCapPercent) {
+    return { reason: `usage cap (${b.usageCapPercent}%) reached at ${usagePercent}%` };
+  }
   if (typeof b.maxHours === 'number') {
     if (now - Date.parse(state.startedAt) >= b.maxHours * 3_600_000) {
       return { reason: `time box (${b.maxHours}h) reached` };
diff --git a/shift/lib/decision.cjs b/shift/lib/decision.cjs
index 5a9cfe3..e6a985a 100644
--- a/shift/lib/decision.cjs
+++ b/shift/lib/decision.cjs
@@ -3,12 +3,12 @@ const { evaluateBounds } = require('./bounds.cjs');
 const { firstPending } = require('./state.cjs');
 const { renderBrief } = require('./brief.cjs');
 
-// ctx: { bins, state, config, now, stopHookActive, killSwitch }
+// ctx: { bins, state, config, now, usagePercent, stopHookActive, killSwitch }
 // returns { action:'allow', reason } | { action:'block', reason, nextBinId }
 function decide(ctx) {
-  const { bins, state, config, now, killSwitch } = ctx;
+  const { bins, state, config, now, usagePercent, killSwitch } = ctx;
   if (killSwitch) return { action: 'allow', reason: 'kill switch (.shift/STOP) present' };
-  const bound = evaluateBounds(state, config, now);
+  const bound = evaluateBounds(state, config, now, usagePercent);
   if (bound) return { action: 'allow', reason: bound.reason };
   const next = firstPending(bins);
   if (!next) return { action: 'allow', reason: 'queue empty' };
diff --git a/shift/lib/outcome.cjs b/shift/lib/outcome.cjs
new file mode 100644
index 0000000..07ff0e9
--- /dev/null
+++ b/shift/lib/outcome.cjs
@@ -0,0 +1,30 @@
+'use strict';
+
+// The rate-limit termination signature of a headless `claude -p` run is not
+// documented, so we classify defensively: prefer inference from cached usage
+// (near-limit + a future reset), then fall back to stderr patterns.
+const DEFAULT_PATTERNS = [/rate.?limit/i, /usage limit/i, /quota/i, /\b429\b/];
+const NEAR_LIMIT_PERCENT = 95;
+
+// ctx: { finalized, code, stderr, usage, now (ms), patterns? }
+// returns 'completed' | 'rate_limited' | 'error'
+function classifyOutcome(ctx) {
+  const { finalized, code, stderr, usage, now, patterns } = ctx;
+  if (finalized) return 'completed';      // the engine wrote summary.md → run is done
+  if (code === 0) return 'completed';      // clean exit without finalize (nothing left to do)
+
+  const nowSec = (typeof now === 'number' ? now : Date.now()) / 1000;
+  const resetFuture = usage && typeof usage.sessionResetAt === 'number' && usage.sessionResetAt > nowSec;
+  const nearLimit = usage && (
+    (typeof usage.sessionUsedPercent === 'number' && usage.sessionUsedPercent >= NEAR_LIMIT_PERCENT) ||
+    (typeof usage.weeklyPercent === 'number' && usage.weeklyPercent >= NEAR_LIMIT_PERCENT)
+  );
+  if (resetFuture && nearLimit) return 'rate_limited';
+
+  const pats = patterns || DEFAULT_PATTERNS;
+  if (typeof stderr === 'string' && pats.some(p => p.test(stderr))) return 'rate_limited';
+
+  return 'error';
+}
+
+module.exports = { classifyOutcome, DEFAULT_PATTERNS, NEAR_LIMIT_PERCENT };
diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs
new file mode 100644
index 0000000..41ad2b2
--- /dev/null
+++ b/shift/lib/run-loop.cjs
@@ -0,0 +1,63 @@
+'use strict';
+const { evaluateBounds } = require('./bounds.cjs');
+const { classifyOutcome } = require('./outcome.cjs');
+
+const RESET_BUFFER_MS = 60_000;
+
+// The headless outer loop (v2). All side effects are injected so the loop is
+// fully testable without a real `claude` or real sleeping.
+//
+// effects: {
+//   now(): ms, loadState(): state, readUsage(): usageCache|null, log(msg),
+//   finalized(): bool,                 // did the engine write summary.md this run?
+//   spawn(n): { status, stderr },      // run claude once (n = 1-based spawn count)
+//   sleepUntil(ms): Promise<void>
+// }
+// Returns { reason, spawns }.
+async function runLoop({ config, effects }) {
+  const bounds = (config && config.bounds) || {};
+  const maxResumes = typeof bounds.maxResumes === 'number' ? bounds.maxResumes : 12;
+  let spawns = 0;
+  let lastOutcome = null;
+
+  for (;;) {
+    const state = effects.loadState();
+    const now = effects.now();
+    const usage = effects.readUsage();
+
+    const bound = evaluateBounds(state, config, now, usage ? usage.weeklyPercent : undefined);
+    if (bound) return { reason: bound.reason, spawns };
+    if (spawns >= maxResumes) return { reason: `max resumes (${maxResumes}) reached`, spawns };
+
+    if (lastOutcome === 'completed') return { reason: 'run finalized by the engine', spawns };
+    if (lastOutcome === 'error') return { reason: 'run errored — stopping (see output)', spawns };
+
+    if (lastOutcome === 'rate_limited') {
+      if (!bounds.autoResumeOnReset) return { reason: 'rate limited; auto-resume disabled', spawns };
+      const resetAt = usage && typeof usage.sessionResetAt === 'number' ? usage.sessionResetAt * 1000 : null;
+      if (!resetAt) return { reason: 'rate limited but no reset time available — stopping', spawns };
+      const until = resetAt + RESET_BUFFER_MS;
+      if (typeof bounds.maxHours === 'number') {
+        const deadline = Date.parse(state.startedAt) + bounds.maxHours * 3_600_000;
+        if (until >= deadline) return { reason: 'rate limited; reset is past the time box — stopping', spawns };
+      }
+      effects.log(`rate limited — waiting until ${new Date(until).toISOString()}`);
+      await effects.sleepUntil(until);
+      lastOutcome = null;
+      continue;
+    }
+
+    spawns += 1;
+    effects.log(`spawn #${spawns}: running claude`);
+    const res = effects.spawn(spawns);
+    lastOutcome = classifyOutcome({
+      finalized: effects.finalized(),
+      code: res ? res.status : 1,
+      stderr: res ? res.stderr : '',
+      usage: effects.readUsage(),
+      now: effects.now()
+    });
+  }
+}
+
+module.exports = { runLoop, RESET_BUFFER_MS };
diff --git a/shift/lib/usage.cjs b/shift/lib/usage.cjs
new file mode 100644
index 0000000..77af349
--- /dev/null
+++ b/shift/lib/usage.cjs
@@ -0,0 +1,35 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+
+function cachePath(dir) { return path.join(dir, 'usage.json'); }
+
+function num(v) { return (typeof v === 'number' && Number.isFinite(v)) ? v : null; }
+
+// Cache the rate-limit data from a hook payload so the headless runner can read
+// the reset time and current usage between spawns. Returns the weekly % (or null).
+// Absent/partial rate_limits degrade to null and write nothing.
+function writeUsageCache(dir, rateLimits, nowSec) {
+  if (!rateLimits || typeof rateLimits !== 'object') return null;
+  const fh = rateLimits.five_hour || {};
+  const sd = rateLimits.seven_day || {};
+  const cache = {
+    weeklyPercent: num(sd.used_percentage),
+    sessionUsedPercent: num(fh.used_percentage),
+    sessionResetAt: num(fh.resets_at),
+    weeklyResetAt: num(sd.resets_at),
+    capturedAt: typeof nowSec === 'number' ? nowSec : null
+  };
+  try {
+    fs.mkdirSync(dir, { recursive: true });
+    fs.writeFileSync(cachePath(dir), JSON.stringify(cache, null, 2));
+  } catch { /* best-effort */ }
+  return cache.weeklyPercent;
+}
+
+function readUsageCache(dir) {
+  try { return JSON.parse(fs.readFileSync(cachePath(dir), 'utf8')); }
+  catch { return null; }
+}
+
+module.exports = { writeUsageCache, readUsageCache };
diff --git a/shift/lib/verify.cjs b/shift/lib/verify.cjs
new file mode 100644
index 0000000..60b22a9
--- /dev/null
+++ b/shift/lib/verify.cjs
@@ -0,0 +1,23 @@
+'use strict';
+const cp = require('node:child_process');
+
+// Run a per-bin verification command in `cwd`. `exec` is injectable for tests.
+// Returns { ok: boolean, output: string }. A null/empty command is a pass.
+function runVerify(command, cwd, exec) {
+  if (!command) return { ok: true, output: '' };
+  return (exec || defaultExec)(command, cwd);
+}
+
+function defaultExec(command, cwd) {
+  // shell:true is intentional — `command` is the user's own config value (e.g.
+  // "npm test && npm run build") and is passed as a whole, not interpolated into
+  // a larger string. It is never built from untrusted input.
+  const r = cp.spawnSync(command, {
+    cwd, shell: true, encoding: 'utf8',
+    timeout: 10 * 60 * 1000,
+    maxBuffer: 10 * 1024 * 1024
+  });
+  return { ok: r.status === 0, output: `${r.stdout || ''}${r.stderr || ''}` };
+}
+
+module.exports = { runVerify };
diff --git a/shift/test/bounds.test.cjs b/shift/test/bounds.test.cjs
index b46cce3..78a5f5b 100644
--- a/shift/test/bounds.test.cjs
+++ b/shift/test/bounds.test.cjs
@@ -25,3 +25,14 @@ test('iterations checked before time', () => {
   const cfg = { bounds: { maxHours: 1, maxIterations: 1 } };
   assert.match(evaluateBounds({ ...base, iterations: 1 }, cfg, t0 + 3_600_001).reason, /max iterations/);
 });
+
+test('terminates on usage cap when usage is known', () => {
+  const cfg = { bounds: { maxHours: 8, usageCapPercent: 90 } };
+  assert.match(evaluateBounds(base, cfg, t0 + 1000, 92).reason, /usage cap/);
+});
+
+test('usage cap is ignored when usage is unknown', () => {
+  const cfg = { bounds: { maxHours: 8, usageCapPercent: 90 } };
+  assert.equal(evaluateBounds(base, cfg, t0 + 1000, undefined), null);
+  assert.equal(evaluateBounds(base, cfg, t0 + 1000, null), null);
+});
diff --git a/shift/test/decision.test.cjs b/shift/test/decision.test.cjs
index 76cbd66..9619b70 100644
--- a/shift/test/decision.test.cjs
+++ b/shift/test/decision.test.cjs
@@ -36,3 +36,11 @@ test('a bound (time box) allows stop even with pending work', () => {
   assert.equal(r.action, 'allow');
   assert.match(r.reason, /time box/);
 });
+
+test('usage cap allows stop even with pending work', () => {
+  const bins = [{ id: 'b', status: 'pending', text: 'x' }];
+  const capCfg = { bounds: { maxHours: 8, usageCapPercent: 90 }, definitionOfDone: 'd', git: {} };
+  const r = decide({ bins, state, config: capCfg, now: t0, usagePercent: 95, killSwitch: false });
+  assert.equal(r.action, 'allow');
+  assert.match(r.reason, /usage cap/);
+});
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
index aae0144..ad2c603 100644
--- a/shift/test/hook.test.cjs
+++ b/shift/test/hook.test.cjs
@@ -8,18 +8,18 @@ const cp = require('node:child_process');
 
 const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs');
 
-function setupRun() {
+function setupRun(configOverride) {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hook-'));
   fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
   fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one');
   fs.writeFileSync(path.join(cwd, 'queue', '02.md'), 'bin two');
   const dir = path.join(cwd, '.shift');
   fs.mkdirSync(dir, { recursive: true });
-  fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({
+  fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify(Object.assign({
     sources: [{ path: 'queue', kind: 'briefs' }],
     bounds: { maxHours: 24, maxIterations: 10 },
     definitionOfDone: 'done', git: {}
-  }));
+  }, configOverride || {})));
   fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
     runId: 'r', startedAt: new Date().toISOString(), iterations: 0,
     branch: 'shift/x', currentBinId: null, bins: []
@@ -93,3 +93,50 @@ test('resolves .shift from the hook payload cwd, not the process cwd', () => {
   assert.equal(r.decision, 'block');
   assert.match(r.reason, /bin one/);
 });
+
+// ---- v3: verify gate ----
+
+test('verify gate (passing) marks bins done and drains', () => {
+  const { cwd, dir } = setupRun({ verify: { command: 'true', maxAttempts: 2 } });
+  runHook(cwd, { stop_hook_active: false }); // start bin 1
+  runHook(cwd, { stop_hook_active: true });  // verify passes -> bin1 done, start bin2
+  const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done');
+});
+
+test('verify gate (failing) re-blocks the same bin with feedback, then blocks after maxAttempts', () => {
+  const { cwd, dir } = setupRun({ verify: { command: 'false', maxAttempts: 2 } });
+  runHook(cwd, { stop_hook_active: false });            // start bin 1
+  const r1 = runHook(cwd, { stop_hook_active: true });  // verify fails, attempt 1 < 2 -> retry SAME bin
+  assert.equal(r1.decision, 'block');
+  assert.match(r1.reason, /failed verification/);
+  assert.match(r1.reason, /bin one/);
+  let s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'pending');
+  assert.equal(s.bins.find(b => b.id === 'queue/01.md').attempts, 1);
+
+  const r2 = runHook(cwd, { stop_hook_active: true });  // verify fails again, attempt 2 == max -> blocked, move on
+  assert.equal(r2.decision, 'block');
+  assert.match(r2.reason, /bin two/);
+  s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'blocked');
+});
+
+// ---- v2: usage cap + cache ----
+
+test('usage cap from the hook payload ends the run and caches usage', () => {
+  const { cwd, dir } = setupRun({ bounds: { maxHours: 24, maxIterations: 10, usageCapPercent: 90 } });
+  const reset = Math.floor(Date.now() / 1000) + 3600;
+  const r = runHook(cwd, {
+    stop_hook_active: false,
+    rate_limits: {
+      five_hour: { used_percentage: 30, resets_at: reset },
+      seven_day: { used_percentage: 95, resets_at: reset }
+    }
+  });
+  assert.deepEqual(r, {});
+  assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /usage cap/);
+  const usage = JSON.parse(fs.readFileSync(path.join(dir, 'usage.json'), 'utf8'));
+  assert.equal(usage.weeklyPercent, 95);
+  assert.equal(usage.sessionResetAt, reset);
+});
diff --git a/shift/test/outcome.test.cjs b/shift/test/outcome.test.cjs
new file mode 100644
index 0000000..a436e6f
--- /dev/null
+++ b/shift/test/outcome.test.cjs
@@ -0,0 +1,33 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { classifyOutcome } = require('../lib/outcome.cjs');
+
+const nowMs = 1_000_000_000_000;
+const nowSec = nowMs / 1000;
+
+test('finalized run is completed', () => {
+  assert.equal(classifyOutcome({ finalized: true, code: 1, now: nowMs }), 'completed');
+});
+
+test('clean exit (code 0) is completed', () => {
+  assert.equal(classifyOutcome({ finalized: false, code: 0, now: nowMs }), 'completed');
+});
+
+test('nonzero + near-limit usage + future reset is rate_limited', () => {
+  const usage = { sessionUsedPercent: 99, weeklyPercent: 50, sessionResetAt: nowSec + 3600 };
+  assert.equal(classifyOutcome({ finalized: false, code: 1, usage, now: nowMs }), 'rate_limited');
+});
+
+test('nonzero + rate-limit stderr is rate_limited', () => {
+  assert.equal(classifyOutcome({ finalized: false, code: 1, stderr: 'Error: rate limit exceeded', now: nowMs }), 'rate_limited');
+});
+
+test('nonzero with no signal is error', () => {
+  assert.equal(classifyOutcome({ finalized: false, code: 1, stderr: 'boom', now: nowMs }), 'error');
+});
+
+test('near-limit but reset already past is NOT rate_limited (no future window)', () => {
+  const usage = { sessionUsedPercent: 99, sessionResetAt: nowSec - 10 };
+  assert.equal(classifyOutcome({ finalized: false, code: 1, usage, stderr: 'boom', now: nowMs }), 'error');
+});
diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs
new file mode 100644
index 0000000..b4c1c22
--- /dev/null
+++ b/shift/test/run-loop.test.cjs
@@ -0,0 +1,86 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { runLoop } = require('../lib/run-loop.cjs');
+
+function makeEffects({ spawns, usage, bounds }) {
+  const state = { startedAt: new Date(Date.now()).toISOString(), iterations: 0 };
+  let i = 0;
+  let finalized = false;
+  const calls = { sleepUntil: [], spawns: 0 };
+  const effects = {
+    now: () => Date.now(),
+    loadState: () => state,
+    readUsage: () => usage,
+    log: () => {},
+    finalized: () => finalized,
+    sleepUntil: (ms) => { calls.sleepUntil.push(ms); return Promise.resolve(); },
+    spawn: () => {
+      calls.spawns += 1;
+      const s = spawns[i++] || { result: { status: 1, stderr: '' }, finalize: false };
+      finalized = s.finalize;
+      return s.result;
+    }
+  };
+  return { effects, calls, config: { bounds: bounds || { maxHours: 8, maxResumes: 12, autoResumeOnReset: true } } };
+}
+
+test('a single finalizing spawn completes the run', async () => {
+  const { effects, calls, config } = makeEffects({
+    spawns: [{ result: { status: 0 }, finalize: true }],
+    usage: null
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /finalized/);
+  assert.equal(r.spawns, 1);
+  assert.equal(calls.sleepUntil.length, 0);
+});
+
+test('rate-limited spawn waits for reset, then resumes and finishes', async () => {
+  const usage = { weeklyPercent: 50, sessionUsedPercent: 99, sessionResetAt: Math.floor(Date.now() / 1000) + 3600 };
+  const { effects, calls, config } = makeEffects({
+    spawns: [
+      { result: { status: 1, stderr: '' }, finalize: false }, // rate-limited (inferred from usage)
+      { result: { status: 0 }, finalize: true }               // resumes, finalizes
+    ],
+    usage
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /finalized/);
+  assert.equal(r.spawns, 2);
+  assert.equal(calls.sleepUntil.length, 1, 'should have waited once');
+});
+
+test('rate-limited with auto-resume disabled stops', async () => {
+  const usage = { weeklyPercent: 50, sessionUsedPercent: 99, sessionResetAt: Math.floor(Date.now() / 1000) + 3600 };
+  const { effects, config } = makeEffects({
+    spawns: [{ result: { status: 1, stderr: '' }, finalize: false }],
+    usage,
+    bounds: { maxHours: 8, maxResumes: 12, autoResumeOnReset: false }
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /auto-resume disabled/);
+  assert.equal(r.spawns, 1);
+});
+
+test('usage cap stops before any spawn', async () => {
+  const { effects, calls, config } = makeEffects({
+    spawns: [{ result: { status: 0 }, finalize: true }],
+    usage: { weeklyPercent: 95 },
+    bounds: { maxHours: 8, usageCapPercent: 90, autoResumeOnReset: true }
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /usage cap/);
+  assert.equal(calls.spawns, 0);
+});
+
+test('maxResumes acts as a runaway backstop', async () => {
+  const { effects, config } = makeEffects({
+    spawns: [{ result: { status: 0 }, finalize: true }],
+    usage: null,
+    bounds: { maxHours: 8, maxResumes: 0, autoResumeOnReset: true }
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /max resumes/);
+  assert.equal(r.spawns, 0);
+});
diff --git a/shift/test/usage.test.cjs b/shift/test/usage.test.cjs
new file mode 100644
index 0000000..66c3f8d
--- /dev/null
+++ b/shift/test/usage.test.cjs
@@ -0,0 +1,40 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { writeUsageCache, readUsageCache } = require('../lib/usage.cjs');
+
+function tmp() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-usage-')); }
+
+test('write + read round-trips the full rate-limit payload', () => {
+  const dir = tmp();
+  const weekly = writeUsageCache(dir, {
+    five_hour: { used_percentage: 72, resets_at: 1000 },
+    seven_day: { used_percentage: 41, resets_at: 2000 }
+  }, 123);
+  assert.equal(weekly, 41);
+  assert.deepEqual(readUsageCache(dir), {
+    weeklyPercent: 41, sessionUsedPercent: 72, sessionResetAt: 1000, weeklyResetAt: 2000, capturedAt: 123
+  });
+});
+
+test('absent rate_limits returns null and writes nothing', () => {
+  const dir = tmp();
+  assert.equal(writeUsageCache(dir, undefined, 1), null);
+  assert.equal(readUsageCache(dir), null);
+});
+
+test('partial windows degrade to null fields', () => {
+  const dir = tmp();
+  const weekly = writeUsageCache(dir, { five_hour: { used_percentage: 60, resets_at: 5 } }, 9);
+  assert.equal(weekly, null);
+  const c = readUsageCache(dir);
+  assert.equal(c.sessionUsedPercent, 60);
+  assert.equal(c.weeklyPercent, null);
+});
+
+test('readUsageCache returns null when no cache exists', () => {
+  assert.equal(readUsageCache(tmp()), null);
+});
diff --git a/shift/test/verify.test.cjs b/shift/test/verify.test.cjs
new file mode 100644
index 0000000..09a4a90
--- /dev/null
+++ b/shift/test/verify.test.cjs
@@ -0,0 +1,25 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const os = require('node:os');
+const { runVerify } = require('../lib/verify.cjs');
+
+test('a null/empty command is a pass', () => {
+  assert.deepEqual(runVerify(null, '.'), { ok: true, output: '' });
+  assert.deepEqual(runVerify('', '.'), { ok: true, output: '' });
+});
+
+test('uses the injected exec and returns its result', () => {
+  const fake = (cmd, cwd) => ({ ok: false, output: `ran ${cmd} in ${cwd}` });
+  const r = runVerify('npm test', '/repo', fake);
+  assert.equal(r.ok, false);
+  assert.match(r.output, /ran npm test in \/repo/);
+});
+
+test('default exec: zero exit passes, non-zero fails, output captured', () => {
+  assert.equal(runVerify('true', os.tmpdir()).ok, true);
+  assert.equal(runVerify('false', os.tmpdir()).ok, false);
+  const r = runVerify('echo hi', os.tmpdir());
+  assert.equal(r.ok, true);
+  assert.match(r.output, /hi/);
+});

From e172bb0bf5870320ae1305d4e9dd2c2f0ee5c35e Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Mon, 15 Jun 2026 14:06:53 -0400
Subject: [PATCH 04/12] shift: one-command Stop-hook installer + list module 2
 in root README

- shift/install.sh wires the Stop hook into ~/.claude/settings.json idempotently
  (backup -> merge -> validate -> atomic move); never duplicates, updates the path
  on repo move, preserves existing hooks/settings.
- shift/lib/install.cjs: pure mergeStopHook() (tested); install.sh is a thin shell.
- shift/test/install.test.cjs: 7 tests (unit merge + live install.sh integration).
- README (root): list shift in the Modules table + candor pointer.
- shift/README: swap manual hook-wiring for the installer; resolve the hook-schema
  caveat (block/reason contract verified against the Claude Code hooks docs).
---
 README.md                   |   3 ++
 shift/README.md             |  28 +++++-----
 shift/install.sh            |  79 ++++++++++++++++++++++++++++
 shift/lib/install.cjs       |  47 +++++++++++++++++
 shift/test/install.test.cjs | 102 ++++++++++++++++++++++++++++++++++++
 5 files changed, 246 insertions(+), 13 deletions(-)
 create mode 100755 shift/install.sh
 create mode 100644 shift/lib/install.cjs
 create mode 100644 shift/test/install.test.cjs

diff --git a/README.md b/README.md
index 5c7576c..eea0110 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,12 @@ Transparency isn't a feature bolted on the side; for agentic coding it's the who
 | Module | What it is | Targets |
 |---|---|---|
 | [**code-status-bar**](./code-status-bar) | A status line that shows usage limits, cost, context health, and git/worktree state at a glance | Claude Code (via [ccstatusline](https://github.com/sirmalloc/ccstatusline)) |
+| [**shift**](./shift) | An autonomous work-queue runner: pre-load bins of work, leave, and it keeps the agent grinding through them — past natural stop points and across rate-limit resets — leaving every decision logged and every change a reviewable commit | Claude Code (Stop hook + headless `-p`) |
 
 > **New here? Start with the [Code Status Bar](./code-status-bar).** It installs as a portable, zero-dependency default, or an [opt-in colored variant](./code-status-bar#color--static-by-default-status-driven-by-opt-in) that recolors the usage bars **green → yellow → red** as you approach each limit — so you *feel* a wall coming before you read a single number. You could build it by hand in ccstatusline's editor; this is that setup already done — one command, no configuration, and still fully editable.
 
+> **Going heads-down?** [**shift**](./shift) turns an unattended run — the *least* transparent mode there is — into an honest paper trail: you trade real-time steering for a `shift/<date>` branch, a decision log, and a "here's what I did and what needs you" summary. One command wires the hook; the safety model keeps the work on a branch and off your remotes.
+
 More to come. Each module is self-contained, declares which agent it targets, and explains *why* every piece earns its place — because justifying the real estate is part of the philosophy.
 
 ## License
diff --git a/shift/README.md b/shift/README.md
index d871a96..36fc316 100644
--- a/shift/README.md
+++ b/shift/README.md
@@ -20,24 +20,26 @@ Full best-judgment autonomy on reversible, in-worktree work. By default it will
 
 ## Install
 
-1. Get the files (clone the toolkit, or copy the `shift/` folder).
-2. Register the Stop hook **once** in `~/.claude/settings.json` (safe globally — no-ops outside an active run):
+1. Clone the toolkit (the hook runs from these files by absolute path, so it installs locally — no `curl | bash`).
+2. Wire the Stop hook into `~/.claude/settings.json` — one command, idempotent:
+
+```bash
+bash shift/install.sh
+```
+
+It merges the entry below (safe globally — the hook no-ops in any repo without an active `.shift/` run), backs up any existing settings first, and never duplicates on re-run — re-running after a `git pull` or a repo move just updates the path:
 
 ```json
-{
-  "hooks": {
-    "Stop": [
-      { "matcher": "", "hooks": [
-        { "type": "command", "command": "node /ABSOLUTE/PATH/TO/shift/hooks/shift-stop.cjs" }
-      ] }
-    ]
-  }
-}
+{ "hooks": { "Stop": [
+  { "matcher": "", "hooks": [
+    { "type": "command", "command": "node /ABSOLUTE/PATH/TO/shift/hooks/shift-stop.cjs" }
+  ] }
+] } }
 ```
 
-> Verify the hook schema against the current Claude Code hooks docs. The engine needs only: "block + feed `reason` back", the `stop_hook_active` flag, the payload `cwd`, and (for the usage cap / auto-resume) the payload `rate_limits`.
+> **Hook contract (verified against the [Claude Code hooks docs](https://code.claude.com/docs/en/hooks)).** The Stop hook returns `{"decision":"block","reason":…}` to keep the session going — the `reason` becomes the next instruction — and omits `decision` (or exits 0) to allow the stop. The usage cap and `shift run` auto-resume read the hook payload's `rate_limits` when present and **skip cleanly when it's absent** (e.g. non-Pro/Max), so the engine never depends on it.
 
-3. (Optional) put `shift/bin/shift` on your PATH.
+3. (Optional) put `shift/bin/shift` on your PATH — the installer prints the `ln -s` command.
 
 ## Use
 
diff --git a/shift/install.sh b/shift/install.sh
new file mode 100755
index 0000000..eb291d3
--- /dev/null
+++ b/shift/install.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# shift installer — Agentic Workflow Toolkit (module 2)
+# Wires shift's Stop hook into ~/.claude/settings.json, idempotently.
+#
+# Unlike the status-bar installer, this one is LOCAL-ONLY: the hook entry points at
+# this clone's hooks/shift-stop.cjs by absolute path, so it must run from the files
+# on disk (no curl | bash). Re-running after `git pull` (or after moving the repo)
+# updates the path in place — it never duplicates the hook.
+set -euo pipefail
+
+if ! command -v node >/dev/null 2>&1; then
+  echo "Error: shift needs Node on your PATH (the hook + this installer run via node)." >&2
+  exit 1
+fi
+
+# Resolve this script's directory; the hook lives next to it under hooks/.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" 2>/dev/null && pwd || echo "")"
+HOOK="$SCRIPT_DIR/hooks/shift-stop.cjs"
+MERGER="$SCRIPT_DIR/lib/install.cjs"
+if [ -z "$SCRIPT_DIR" ] || [ ! -f "$HOOK" ] || [ ! -f "$MERGER" ]; then
+  echo "Error: run this from a shift clone — couldn't find hooks/shift-stop.cjs next to install.sh." >&2
+  echo "       (clone the toolkit, then: bash shift/install.sh)" >&2
+  exit 1
+fi
+
+COMMAND="node $HOOK"
+SETTINGS_DIR="$HOME/.claude"
+DEST="$SETTINGS_DIR/settings.json"
+mkdir -p "$SETTINGS_DIR"
+
+# Compute the merged settings into a temp file via the unit-tested merger, then
+# move it into place — a failed merge never leaves a broken settings.json behind.
+TMP="$(mktemp)"
+ACTION="$(node -e '
+  const fs = require("node:fs");
+  const { mergeStopHook } = require(process.argv[1]);
+  const dest = process.argv[2], command = process.argv[3], tmp = process.argv[4];
+  let settings = {};
+  if (fs.existsSync(dest)) {
+    const raw = fs.readFileSync(dest, "utf8").trim();
+    if (raw) {
+      try { settings = JSON.parse(raw); }
+      catch { console.error("Error: " + dest + " is not valid JSON; fix or move it, then re-run."); process.exit(2); }
+    }
+  }
+  const r = mergeStopHook(settings, command);
+  fs.writeFileSync(tmp, JSON.stringify(r.settings, null, 2) + "\n");
+  process.stdout.write(r.action);
+' "$MERGER" "$DEST" "$COMMAND" "$TMP")" || { rm -f "$TMP"; exit 1; }
+
+if [ ! -s "$TMP" ]; then
+  echo "Error: merge produced an empty file; aborting (your settings are untouched)." >&2
+  rm -f "$TMP"; exit 1
+fi
+
+if [ "$ACTION" = "unchanged" ]; then
+  echo "Already wired: shift Stop hook is present in $DEST (no change)."
+  rm -f "$TMP"
+else
+  if [ -f "$DEST" ]; then
+    BAK="$DEST.bak-$(date +%Y%m%d-%H%M%S)"
+    cp "$DEST" "$BAK"
+    echo "Backed up existing settings -> $BAK"
+  fi
+  mv "$TMP" "$DEST"
+  case "$ACTION" in
+    added)   echo "Installed: shift Stop hook -> $DEST" ;;
+    updated) echo "Updated: shift Stop hook path -> $DEST" ;;
+    *)       echo "Wrote: $DEST ($ACTION)" ;;
+  esac
+fi
+
+echo "  hook: $COMMAND"
+echo
+echo "Safe globally — the hook no-ops in any repo without an active .shift/ run."
+echo "Next: cd into a repo, add briefs under queue/, then: ${SCRIPT_DIR}/bin/shift start"
+echo "(optional) put it on PATH:  ln -s ${SCRIPT_DIR}/bin/shift /usr/local/bin/shift"
+echo
+echo "To remove later, delete the shift Stop entry from $DEST (restore a .bak-* backup)."
diff --git a/shift/lib/install.cjs b/shift/lib/install.cjs
new file mode 100644
index 0000000..993e427
--- /dev/null
+++ b/shift/lib/install.cjs
@@ -0,0 +1,47 @@
+'use strict';
+// Pure logic for wiring shift's Stop hook into a Claude Code settings object.
+// The I/O (read/back-up/validate/write ~/.claude/settings.json) lives in install.sh;
+// this stays a pure function so it can be unit-tested without touching the filesystem.
+
+// A command string belongs to shift if it invokes our Stop hook script.
+function isShiftCommand(command) {
+  return typeof command === 'string' && command.includes('shift-stop.cjs');
+}
+
+function makeGroup(command) {
+  return { matcher: '', hooks: [{ type: 'command', command }] };
+}
+
+// mergeStopHook(settings, command) -> { settings, changed, action }
+//   action: 'added' (no prior shift hook) | 'updated' (path changed) | 'unchanged' (already wired).
+// Never mutates the input; returns a fresh deep-ish copy of the parts it touches.
+function mergeStopHook(settings, command) {
+  const next = { ...(settings || {}) };
+  const hooks = { ...(next.hooks || {}) };
+  const stop = Array.isArray(hooks.Stop) ? hooks.Stop.map(g => ({ ...g })) : [];
+
+  // Find an existing group that already points at shift's hook.
+  const idx = stop.findIndex(g =>
+    Array.isArray(g.hooks) && g.hooks.some(h => isShiftCommand(h && h.command)));
+
+  let action;
+  if (idx === -1) {
+    stop.push(makeGroup(command));
+    action = 'added';
+  } else {
+    const current = stop[idx].hooks.find(h => isShiftCommand(h && h.command));
+    if (current.command === command) {
+      action = 'unchanged';
+    } else {
+      // Repo moved: rewrite that group to the canonical single shift command.
+      stop[idx] = makeGroup(command);
+      action = 'updated';
+    }
+  }
+
+  hooks.Stop = stop;
+  next.hooks = hooks;
+  return { settings: next, changed: action !== 'unchanged', action };
+}
+
+module.exports = { mergeStopHook, isShiftCommand };
diff --git a/shift/test/install.test.cjs b/shift/test/install.test.cjs
new file mode 100644
index 0000000..a8a1352
--- /dev/null
+++ b/shift/test/install.test.cjs
@@ -0,0 +1,102 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const cp = require('node:child_process');
+const { mergeStopHook } = require('../lib/install.cjs');
+
+const CMD = 'node /abs/path/to/shift/hooks/shift-stop.cjs';
+const INSTALL = path.resolve(__dirname, '..', 'install.sh');
+const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs');
+
+function runInstall(home) {
+  return cp.execFileSync('bash', [INSTALL], {
+    env: { ...process.env, HOME: home }, encoding: 'utf8'
+  });
+}
+function readSettings(home) {
+  return JSON.parse(fs.readFileSync(path.join(home, '.claude', 'settings.json'), 'utf8'));
+}
+
+test('adds the Stop hook to empty settings', () => {
+  const r = mergeStopHook({}, CMD);
+  assert.equal(r.action, 'added');
+  assert.equal(r.changed, true);
+  const groups = r.settings.hooks.Stop;
+  assert.equal(groups.length, 1);
+  assert.deepEqual(groups[0], { matcher: '', hooks: [{ type: 'command', command: CMD }] });
+});
+
+test('is idempotent — same command twice does not duplicate', () => {
+  const once = mergeStopHook({}, CMD).settings;
+  const twice = mergeStopHook(once, CMD);
+  assert.equal(twice.action, 'unchanged');
+  assert.equal(twice.changed, false);
+  assert.equal(twice.settings.hooks.Stop.length, 1);
+});
+
+test('preserves unrelated hooks and existing Stop groups', () => {
+  const existing = {
+    statusLine: { type: 'command', command: 'x' },
+    hooks: {
+      PreToolUse: [{ matcher: 'Bash', hooks: [{ type: 'command', command: 'guard' }] }],
+      Stop: [{ matcher: '', hooks: [{ type: 'command', command: 'other-stop-hook' }] }]
+    }
+  };
+  const r = mergeStopHook(existing, CMD);
+  assert.equal(r.action, 'added');
+  // unrelated settings + hooks untouched
+  assert.deepEqual(r.settings.statusLine, { type: 'command', command: 'x' });
+  assert.equal(r.settings.hooks.PreToolUse.length, 1);
+  // shift appended, the foreign Stop group kept
+  assert.equal(r.settings.hooks.Stop.length, 2);
+  assert.equal(r.settings.hooks.Stop[0].hooks[0].command, 'other-stop-hook');
+  assert.equal(r.settings.hooks.Stop[1].hooks[0].command, CMD);
+});
+
+test('updates the path when the shift hook moved', () => {
+  const old = mergeStopHook({}, 'node /old/path/shift/hooks/shift-stop.cjs').settings;
+  const r = mergeStopHook(old, CMD);
+  assert.equal(r.action, 'updated');
+  assert.equal(r.changed, true);
+  assert.equal(r.settings.hooks.Stop.length, 1);
+  assert.equal(r.settings.hooks.Stop[0].hooks[0].command, CMD);
+});
+
+test('does not mutate the input settings object', () => {
+  const input = { hooks: { Stop: [] } };
+  const snapshot = JSON.stringify(input);
+  mergeStopHook(input, CMD);
+  assert.equal(JSON.stringify(input), snapshot);
+});
+
+test('install.sh wires the hook into a fresh ~/.claude/settings.json', () => {
+  const home = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-inst-'));
+  const out = runInstall(home);
+  assert.match(out, /Installed: shift Stop hook/);
+  const s = readSettings(home);
+  assert.equal(s.hooks.Stop.length, 1);
+  assert.equal(s.hooks.Stop[0].hooks[0].command, `node ${HOOK}`);
+});
+
+test('install.sh is idempotent and preserves existing settings + backs up', () => {
+  const home = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-inst-'));
+  const claude = path.join(home, '.claude');
+  fs.mkdirSync(claude, { recursive: true });
+  fs.writeFileSync(path.join(claude, 'settings.json'),
+    JSON.stringify({ statusLine: { type: 'command', command: 'x' } }, null, 2));
+
+  const out1 = runInstall(home);
+  assert.match(out1, /Backed up existing settings/);
+  const s1 = readSettings(home);
+  assert.deepEqual(s1.statusLine, { type: 'command', command: 'x' }); // preserved
+  assert.equal(s1.hooks.Stop.length, 1);
+
+  const out2 = runInstall(home);
+  assert.match(out2, /Already wired/);
+  assert.equal(readSettings(home).hooks.Stop.length, 1); // no duplicate
+  const baks = fs.readdirSync(claude).filter(f => f.startsWith('settings.json.bak-'));
+  assert.equal(baks.length, 1); // unchanged run made no second backup
+});

From c7adfb889843f6c47e324fffb04b32571caf8f04 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Mon, 15 Jun 2026 21:42:25 -0400
Subject: [PATCH 05/12] shift: harden the headless runner (post-smoke-audit
 fixes)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A real `shift run` smoke confirmed headless `claude -p` honors the Stop-hook
block and drives the queue warm (resolves the SPEC §9.2 open question). A
pre-flight audit of the previously-untested runner path drove these fixes:

- No false-green: classifyOutcome returns 'completed' only when the engine
  finalized (summary.md). A code-0 exit without finalize is 'incomplete' — the
  runner resumes if the queue advanced, else stops with a 'is the Stop hook
  wired?' diagnostic. `shift run` grades on summary.md, not the exit line.
- Stale-reset guard: auto-resume stops cleanly when the cached reset time is
  already in the past (was a maxResumes-bounded busy-spin).
- Per-spawn timeout (spawnTimeoutMinutes, default 30) kills a wedged claude so
  spawnSync can't hang the runner; launch failures + kills are surfaced.
- Warn when a headless run uses a Bash-prompting permission mode.
- Dropped a spurious audit suggestion (runner writing state.iterations) that
  would have double-counted the hook's bound tracking.

63 shift tests green (pure unit + hook/CLI/run-loop/install integration).
---
 shift/README.md              |  7 +++++-
 shift/SPEC.md                | 15 ++++++++++--
 shift/bin/shift              | 29 +++++++++++++++++++++---
 shift/lib/outcome.cjs        |  7 ++++--
 shift/lib/run-loop.cjs       | 20 +++++++++++++++-
 shift/test/outcome.test.cjs  | 11 +++++++--
 shift/test/run-loop.test.cjs | 44 ++++++++++++++++++++++++++++++++++++
 7 files changed, 122 insertions(+), 11 deletions(-)

diff --git a/shift/README.md b/shift/README.md
index 36fc316..8bb45c0 100644
--- a/shift/README.md
+++ b/shift/README.md
@@ -74,6 +74,7 @@ When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" sectio
     "maxHours": 4,
     "maxIterations": 30,
     "maxResumes": 12,
+    "spawnTimeoutMinutes": 30,
     "usageCapPercent": 90,
     "autoResumeOnReset": true
   },
@@ -85,9 +86,13 @@ When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" sectio
 ```
 
 - **`usageCapPercent`** — stop when weekly usage reaches this (read from the hook payload's `rate_limits`; skipped when that data is absent, e.g. non-Pro/Max).
-- **`autoResumeOnReset`** — on a rate-limit wall, `shift run` waits for the 5-hour window to reopen and resumes (never past the time box).
+- **`autoResumeOnReset`** — on a rate-limit wall, `shift run` waits for the 5-hour window to reopen and resumes (never past the time box). If the cached reset time is stale/in the past it stops cleanly rather than busy-spinning.
+- **`maxResumes`** — the runner's own backstop on the number of `claude` spawns (independent of the hook-maintained `maxIterations`/`maxHours`).
+- **`spawnTimeoutMinutes`** — hard per-spawn wall: a wedged `claude` is killed (SIGTERM) so it can't hang the runner. Default 30.
 - **`verify.command`** — per-bin acceptance gate; `null` disables it.
 
+> A headless `shift run` grades success on `.shift/summary.md` (written only when the engine finalizes), not on the exit line: a `claude -p` that exits without finalizing is reported as *"no summary written — did NOT finalize"* with a hint to check the hook wiring, never as a false success.
+
 ### Permissions for unattended runs
 
 `shift run` invokes `claude -p --permission-mode <permissionMode>`. `acceptEdits` (the default) auto-approves file edits but **other tools (e.g. Bash) can still prompt — and a headless run can't answer prompts.** For real unattended work that runs tests/commands, either:
diff --git a/shift/SPEC.md b/shift/SPEC.md
index e81e1eb..229c6ac 100644
--- a/shift/SPEC.md
+++ b/shift/SPEC.md
@@ -269,6 +269,17 @@ All three phases are implemented on branch `shift-v1`. Notable as-built decision
 - **Rate-limit detection without the undocumented exit signature (resolves §9.2).** Research confirmed the headless rate-limit termination signature is undocumented, but the **Stop hook payload includes `rate_limits`**. So the engine caches the latest reset/usage to `.shift/usage.json`, and `lib/outcome.cjs` classifies a non-finalized, non-zero spawn as `rate_limited` by **inference** — near-limit cached usage (≥95%) + a future reset — with config-overridable stderr patterns as a fallback. No dependency on an exact exit code/message.
 - **Usage cap source (resolves §9.1).** Enforced from the hook payload's `rate_limits.seven_day.used_percentage`; absent data (non-Pro/Max, pre-first-response) degrades to "cap skipped," never an error.
 - **Verify gate (v3, resolves §9.3).** `verify.command` runs per bin; failures re-feed the bin with the output up to `maxAttempts`, then block it — so "looked done but wasn't" is caught, not silently accepted.
-- **Permissions.** `shift run` uses `--permission-mode` (default `acceptEdits`). Truly unattended work that runs commands typically needs `dontAsk` + a `permissions.allow` list, or `bypassPermissions` — documented in the README; the branch-only/no-push model and bounds are the backstop.
+- **Permissions.** `shift run` uses `--permission-mode` (default `acceptEdits`). Truly unattended work that runs commands typically needs `dontAsk` + a `permissions.allow` list, or `bypassPermissions` — documented in the README; the branch-only/no-push model and bounds are the backstop. The runner now **warns** at startup when `permissionMode` would prompt on Bash (a headless run can't answer), since that combination otherwise exits without finalizing.
 
-**New modules beyond §12:** `lib/verify.cjs`, `lib/usage.cjs`, `lib/outcome.cjs`, `lib/run-loop.cjs`; `bin/shift` gains `run`. **Tests:** 52 in `shift` (pure unit + hook/CLI/run-loop integration), all green.
+**New modules beyond §12:** `lib/verify.cjs`, `lib/usage.cjs`, `lib/outcome.cjs`, `lib/run-loop.cjs`, `lib/install.cjs`; `bin/shift` gains `run`; `install.sh` wires the Stop hook.
+
+### Smoke validation + post-smoke hardening (2026-06-15)
+
+A real bounded `shift run` smoke (2 commit-a-file bins, `bypassPermissions`) **empirically resolved the open question behind §9.2**: headless `claude -p` **does** honor the Stop hook's `{"decision":"block"}` and continues the session warm — both bins were completed and committed within a single spawn. A pre-flight audit of the (previously untested) runner path then drove four fixes:
+
+- **No false-green.** `classifyOutcome` only returns `completed` when the engine actually finalized (`summary.md` written). A `claude -p` that exits 0 without finalizing is `incomplete` — the runner **resumes** if the queue advanced, else **stops with a "is the Stop hook wired?" diagnostic** instead of reporting success. `shift run` grades on `summary.md`, not the exit line.
+- **Stale-reset guard.** Auto-resume stops cleanly when the cached reset time is already in the past (previously a `maxResumes`-bounded busy-spin).
+- **Per-spawn timeout.** `spawnTimeoutMinutes` (default 30) kills a wedged `claude` so a blocking `spawnSync` can't hang the runner; launch failures (`claude` not on PATH) and kills are now surfaced, not swallowed. *Known limitation:* the timeout SIGTERMs the `claude` process only, not any tool-subprocess grandchildren it spawned (an inherent `spawnSync` behavior) — a wedged grandchild can outlive the kill; a detached-process-group reap is a future improvement.
+- **Hook-install is required for `shift run`** and `install.sh` automates it (the bin's task text reaches the agent only via the Stop-hook block).
+
+**Tests:** 63 in `shift` (pure unit + hook/CLI/run-loop/install integration), all green.
diff --git a/shift/bin/shift b/shift/bin/shift
index f0d4327..2c5f557 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -15,6 +15,7 @@ const DEFAULT_CONFIG = {
     maxHours: 2,
     maxIterations: 20,
     maxResumes: 12,
+    spawnTimeoutMinutes: 30,
     usageCapPercent: 90,
     autoResumeOnReset: true
   },
@@ -103,6 +104,18 @@ async function cmdRun() {
   const { runLoop } = require('../lib/run-loop.cjs');
   const { readUsageCache } = require('../lib/usage.cjs');
 
+  // A headless `-p` run cannot answer permission prompts. Only bypassPermissions/dontAsk
+  // auto-approve tool calls like Bash(git commit) — anything else stalls or denies on the
+  // first command the work needs (the engine then exits without finalizing).
+  if (!['bypassPermissions', 'dontAsk'].includes(mode)) {
+    console.log(`[shift] warning: permissionMode "${mode}" prompts on tools like Bash, which a headless run can't answer.`);
+    console.log('[shift]          set "permissionMode":"dontAsk" (+ permissions.allow) or "bypassPermissions" in .shift/config.json.');
+  }
+
+  // Hard per-spawn timeout so a wedged `claude` can't hang the runner forever
+  // (spawnSync is blocking; the loop's time bounds can't interrupt it).
+  const spawnTimeoutMs = (((config.bounds && config.bounds.spawnTimeoutMinutes) || 30)) * 60_000;
+
   // Clear any stale summary so finalized() reflects THIS run.
   try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ }
 
@@ -118,15 +131,25 @@ async function cmdRun() {
       const args = ['-p', '--permission-mode', mode];
       if (first) { args.push('begin the shift'); first = false; }
       else { args.push('--continue', 'continue the shift'); }
-      return cp.spawnSync('claude', args, {
-        cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], maxBuffer: 64 * 1024 * 1024
+      const res = cp.spawnSync('claude', args, {
+        cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'],
+        maxBuffer: 64 * 1024 * 1024, timeout: spawnTimeoutMs, killSignal: 'SIGTERM'
       });
+      // Surface launch failures (claude not on PATH → ENOENT) and timeouts/kills —
+      // otherwise they classify as a bare 'error' with no diagnostics.
+      if (res && res.error) console.log(`[shift] spawn failed to run claude: ${res.error.message}`);
+      if (res && res.signal) console.log(`[shift] spawn killed by signal ${res.signal} (timeout ${spawnTimeoutMs / 60000}min?)`);
+      return res;
     }
   };
 
   const result = await runLoop({ config, effects });
   console.log(`[shift] stopped: ${result.reason} (after ${result.spawns} spawn(s))`);
-  console.log(`[shift] review: ${path.join(dir, 'summary.md')}`);
+  if (effects.finalized()) {
+    console.log(`[shift] review: ${path.join(dir, 'summary.md')}`);
+  } else {
+    console.log('[shift] no summary written — the run did NOT finalize; see the [shift] lines above. Nothing was committed by the engine.');
+  }
 }
 
 const [, , sub, ...rest] = process.argv;
diff --git a/shift/lib/outcome.cjs b/shift/lib/outcome.cjs
index 07ff0e9..9638983 100644
--- a/shift/lib/outcome.cjs
+++ b/shift/lib/outcome.cjs
@@ -7,11 +7,14 @@ const DEFAULT_PATTERNS = [/rate.?limit/i, /usage limit/i, /quota/i, /\b429\b/];
 const NEAR_LIMIT_PERCENT = 95;
 
 // ctx: { finalized, code, stderr, usage, now (ms), patterns? }
-// returns 'completed' | 'rate_limited' | 'error'
+// returns 'completed' | 'incomplete' | 'rate_limited' | 'error'
 function classifyOutcome(ctx) {
   const { finalized, code, stderr, usage, now, patterns } = ctx;
   if (finalized) return 'completed';      // the engine wrote summary.md → run is done
-  if (code === 0) return 'completed';      // clean exit without finalize (nothing left to do)
+  // A clean exit WITHOUT finalize is NOT success: claude stopped but the engine never
+  // wrote summary.md (hook not wired, or a partial stop). Caller resumes or stops — it
+  // must never be reported as 'completed' (that was a silent false-green).
+  if (code === 0) return 'incomplete';
 
   const nowSec = (typeof now === 'number' ? now : Date.now()) / 1000;
   const resetFuture = usage && typeof usage.sessionResetAt === 'number' && usage.sessionResetAt > nowSec;
diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs
index 41ad2b2..8703b92 100644
--- a/shift/lib/run-loop.cjs
+++ b/shift/lib/run-loop.cjs
@@ -37,6 +37,10 @@ async function runLoop({ config, effects }) {
       const resetAt = usage && typeof usage.sessionResetAt === 'number' ? usage.sessionResetAt * 1000 : null;
       if (!resetAt) return { reason: 'rate limited but no reset time available — stopping', spawns };
       const until = resetAt + RESET_BUFFER_MS;
+      // The cached reset time is only refreshed by the Stop hook; a wall that kills the
+      // session before any hook fires leaves it stale. If it's already in the past,
+      // sleepUntil(past) returns instantly and we'd re-spawn in a tight loop — stop instead.
+      if (until <= now) return { reason: 'rate limited but the reset window is stale/past — stopping', spawns };
       if (typeof bounds.maxHours === 'number') {
         const deadline = Date.parse(state.startedAt) + bounds.maxHours * 3_600_000;
         if (until >= deadline) return { reason: 'rate limited; reset is past the time box — stopping', spawns };
@@ -47,16 +51,30 @@ async function runLoop({ config, effects }) {
       continue;
     }
 
+    const iterBefore = (state && typeof state.iterations === 'number') ? state.iterations : 0;
     spawns += 1;
     effects.log(`spawn #${spawns}: running claude`);
     const res = effects.spawn(spawns);
-    lastOutcome = classifyOutcome({
+    const outcome = classifyOutcome({
       finalized: effects.finalized(),
       code: res ? res.status : 1,
       stderr: res ? res.stderr : '',
       usage: effects.readUsage(),
       now: effects.now()
     });
+
+    // 'incomplete' = claude exited cleanly but the engine never finalized. If it advanced
+    // the queue (partial progress), resume to finish it; if it advanced nothing, resuming
+    // won't help — stop with a diagnostic rather than spin or report a false-green.
+    if (outcome === 'incomplete') {
+      const after = effects.loadState();
+      const iterAfter = (after && typeof after.iterations === 'number') ? after.iterations : iterBefore;
+      if (iterAfter <= iterBefore) {
+        return { reason: 'claude exited without finalizing and made no progress — is the Stop hook wired? (nothing committed)', spawns };
+      }
+      effects.log('claude exited mid-queue with progress — resuming');
+    }
+    lastOutcome = outcome;
   }
 }
 
diff --git a/shift/test/outcome.test.cjs b/shift/test/outcome.test.cjs
index a436e6f..0f6cd84 100644
--- a/shift/test/outcome.test.cjs
+++ b/shift/test/outcome.test.cjs
@@ -10,8 +10,15 @@ test('finalized run is completed', () => {
   assert.equal(classifyOutcome({ finalized: true, code: 1, now: nowMs }), 'completed');
 });
 
-test('clean exit (code 0) is completed', () => {
-  assert.equal(classifyOutcome({ finalized: false, code: 0, now: nowMs }), 'completed');
+test('finalized wins even on a clean exit', () => {
+  assert.equal(classifyOutcome({ finalized: true, code: 0, now: nowMs }), 'completed');
+});
+
+test('clean exit (code 0) WITHOUT finalize is incomplete, not completed', () => {
+  // The engine writes summary.md (finalized) on a real drain; a code-0 exit without
+  // it means claude stopped without the engine finalizing (e.g. hook not wired, or a
+  // partial stop). That must NOT read as success — it is 'incomplete' (resume/stop).
+  assert.equal(classifyOutcome({ finalized: false, code: 0, now: nowMs }), 'incomplete');
 });
 
 test('nonzero + near-limit usage + future reset is rate_limited', () => {
diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs
index b4c1c22..e06cae2 100644
--- a/shift/test/run-loop.test.cjs
+++ b/shift/test/run-loop.test.cjs
@@ -19,6 +19,7 @@ function makeEffects({ spawns, usage, bounds }) {
       calls.spawns += 1;
       const s = spawns[i++] || { result: { status: 1, stderr: '' }, finalize: false };
       finalized = s.finalize;
+      if (typeof s.iterations === 'number') state.iterations = s.iterations; // simulate engine progress
       return s.result;
     }
   };
@@ -84,3 +85,46 @@ test('maxResumes acts as a runaway backstop', async () => {
   assert.match(r.reason, /max resumes/);
   assert.equal(r.spawns, 0);
 });
+
+test('incomplete spawn WITH progress resumes and finishes', async () => {
+  // spawn 1: clean exit, no finalize, but the engine advanced iterations (partial work);
+  // spawn 2: resumes and finalizes.
+  const { effects, calls, config } = makeEffects({
+    spawns: [
+      { result: { status: 0 }, finalize: false, iterations: 1 }, // progress, not done
+      { result: { status: 0 }, finalize: true, iterations: 2 }   // resume → drain
+    ],
+    usage: null
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /finalized/);
+  assert.equal(calls.spawns, 2);
+});
+
+test('incomplete spawn WITHOUT progress stops with a hook-wiring diagnostic (no false-green)', async () => {
+  // claude exits 0 but the engine never advanced (e.g. Stop hook not wired). Must NOT
+  // report success, and must NOT keep re-spawning pointlessly.
+  const { effects, calls, config } = makeEffects({
+    spawns: [{ result: { status: 0 }, finalize: false }], // iterations stays 0
+    usage: null
+  });
+  const r = await runLoop({ config, effects });
+  assert.doesNotMatch(r.reason, /finalized/);
+  assert.match(r.reason, /no progress|hook/i);
+  assert.equal(calls.spawns, 1, 'must not spin');
+});
+
+test('rate-limited with a stale/past reset stops instead of busy-spinning', async () => {
+  // Reset time is already in the past (stale cache). sleepUntil(past) would return
+  // instantly and re-spawn forever (bounded only by maxResumes) — guard must stop.
+  const usage = { weeklyPercent: 50, sessionUsedPercent: 99, sessionResetAt: Math.floor(Date.now() / 1000) - 600 };
+  const { effects, calls, config } = makeEffects({
+    spawns: [{ result: { status: 1, stderr: 'Error: rate limit exceeded' }, finalize: false }],
+    usage,
+    bounds: { maxHours: 8, maxResumes: 12, autoResumeOnReset: true }
+  });
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /stale|past|reset/i);
+  assert.equal(calls.spawns, 1);
+  assert.equal(calls.sleepUntil.length, 0, 'must not sleep on a past reset');
+});

From 45f2c25969b7afb2034f4db6fa2f572f76ef2e67 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 09:29:07 -0400
Subject: [PATCH 06/12] shift: live dashboard + keyboard control (shift watch)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A headless run was a black box while running — good trail after, no visibility
during. `shift watch` is a zero-dep live TUI over .shift/ that closes that gap:

- Dashboard: progress bar, per-bin status (done/current/pending/skipped/blocked),
  elapsed, decision-log tail, Needs-you count. Redraws on an interval.
- Two-way control via a file-based channel the engine honors: [p]ause (runner
  idles, still time-boxed), [k] skip current bin (new 'skipped' status), [q] stop
  (existing kill switch), [x] close watcher.
- lib/control.cjs (signal channel) + lib/watch-model.cjs (buildModel + a PURE
  renderFrame/renderLine, so the dashboard is unit-tested without a TTY).
- bin/shift gains 'watch' and 'status --line' (one-liner for the module-1 status
  bar — surfaces shift where you're already looking).

Engine integration: Stop hook applies SKIP (marks current bin skipped, advances)
and summary now reports skipped; run-loop honors PAUSE between spawns.

77 shift tests green.
---
 shift/README.md                 |  29 ++++++-
 shift/SPEC.md                   |   6 ++
 shift/bin/shift                 |  83 +++++++++++++++++--
 shift/hooks/shift-stop.cjs      |  13 ++-
 shift/lib/control.cjs           |  41 ++++++++++
 shift/lib/run-loop.cjs          |   9 ++
 shift/lib/watch-model.cjs       | 141 ++++++++++++++++++++++++++++++++
 shift/test/control.test.cjs     |  56 +++++++++++++
 shift/test/hook.test.cjs        |  12 +++
 shift/test/run-loop.test.cjs    |  13 +++
 shift/test/watch-model.test.cjs |  84 +++++++++++++++++++
 11 files changed, 475 insertions(+), 12 deletions(-)
 create mode 100644 shift/lib/control.cjs
 create mode 100644 shift/lib/watch-model.cjs
 create mode 100644 shift/test/control.test.cjs
 create mode 100644 shift/test/watch-model.test.cjs

diff --git a/shift/README.md b/shift/README.md
index 8bb45c0..b9e42fd 100644
--- a/shift/README.md
+++ b/shift/README.md
@@ -60,7 +60,32 @@ shift status     # progress anytime
 shift stop       # stop cleanly after the current bin
 ```
 
-When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" section) and review the `shift/<date>` branch.
+When it ends, read `.shift/summary.md` (bins done/blocked/skipped + a "Needs you" section) and review the `shift/<date>` branch.
+
+## Watch it live + steer it (`shift watch`)
+
+An unattended run is the *least* transparent mode there is — so `shift` gives you a live window into it. In a second terminal:
+
+```bash
+cd your-repo && shift watch
+```
+
+A dashboard redraws on an interval: a progress bar (`done/total`), every bin with its status (`✓` done · `▶` current · `·` pending · `⤫` skipped · `✗` blocked), elapsed time, the decision-log tail, and the "Needs you" count. Because a run is otherwise a black box, this is where you *see* it working.
+
+It's also the **control surface** — a status bar can show state but can't take input, so `watch` captures keys and writes signals the engine honors at the next stop:
+
+| key | action |
+|---|---|
+| `p` | pause / resume (the headless runner idles until you resume; still bounded by the time box) |
+| `k` | skip the current bin (marks it `skipped`, moves on — any work stays on the branch) |
+| `q` | stop the run (finalizes after the current bin — same as `shift stop`) |
+| `x` | close the watcher (the run keeps going) |
+
+Control is file-based under `.shift/` (`PAUSE` / `SKIP` / `STOP`), so it works whether the run is interactive or headless, and from any terminal in the repo.
+
+### In your status bar (module 1)
+
+For an at-a-glance signal in the [Code Status Bar](../code-status-bar), `shift status --line` prints a one-liner (`⚙ shift 2/5 · 18m · ⚑1`) — empty when no run is active. Wire it into a ccstatusline `custom-command` widget to surface shift "in the place you're already looking."
 
 ## Configure (`.shift/config.json`)
 
@@ -108,4 +133,4 @@ Pick the narrowest mode that lets the work actually proceed.
 cd shift && npm test     # node --test, zero dependencies
 ```
 
-Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop) and is unit-tested; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input.
+Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model) and is unit-tested — including `renderFrame`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules.
diff --git a/shift/SPEC.md b/shift/SPEC.md
index 229c6ac..527c190 100644
--- a/shift/SPEC.md
+++ b/shift/SPEC.md
@@ -283,3 +283,9 @@ A real bounded `shift run` smoke (2 commit-a-file bins, `bypassPermissions`) **e
 - **Hook-install is required for `shift run`** and `install.sh` automates it (the bin's task text reaches the agent only via the Stop-hook block).
 
 **Tests:** 63 in `shift` (pure unit + hook/CLI/run-loop/install integration), all green.
+
+### Live visibility + control — `shift watch` (2026-06-16)
+
+The candor gap in v2 was that a headless run is opaque *while* it runs (good paper trail after, black box during). `shift watch` closes it: a zero-dependency live TUI that reads `.shift/` on an interval and renders a dashboard (progress bar, per-bin status, current bin, elapsed, decision-log tail, "Needs you"), plus **two-way control**. Since an output-only surface (a status bar) can't take input, control is a separate file-based channel under `.shift/` that the engine honors: `STOP` (existing kill switch / `q`), `PAUSE` (`p` — the runner idles, still bounded by the time box), `SKIP` (`k` — the hook marks the current bin `skipped` and advances). New status value: `skipped`. New modules: `lib/control.cjs` (signal channel) and `lib/watch-model.cjs` (`buildModel` + a **pure** `renderFrame`/`renderLine`, so the dashboard and the status-bar one-liner are unit-tested without a TTY). `bin/shift` gains `watch` and `status --line` (a one-liner for the module-1 status bar — ties the two modules together). **Tests:** 77 in `shift`, all green.
+
+*Known limitation:* `pause` and `skip` apply at the next stop-hook boundary (between bins), not mid-bin — the hook is the only point the engine re-evaluates. Mid-bin interruption would need a different mechanism.
diff --git a/shift/bin/shift b/shift/bin/shift
index 2c5f557..7b055d2 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -76,11 +76,72 @@ function cmdStart(args) {
   console.log('Now open Claude Code in this repo and say: "begin the shift".');
 }
 
-function cmdStatus() {
-  const state = loadState(path.join(process.cwd(), '.shift'));
-  const c = s => state.bins.filter(b => b.status === s).length;
-  console.log(`run ${state.runId} · branch ${state.branch} · iter ${state.iterations}`);
-  console.log(`bins: ${c('done')} done · ${c('blocked')} blocked · ${c('pending')} pending`);
+function cmdStatus(args) {
+  const dir = path.join(process.cwd(), '.shift');
+  const { buildModel, renderLine } = require('../lib/watch-model.cjs');
+  const model = buildModel({ dir, now: Date.now() });
+
+  // `shift status --line` → a one-line summary for a status bar (module 1 / ccstatusline
+  // custom-command widget pipes the session payload in; this just prints shift's line).
+  if (args && args.includes('--line')) {
+    if (model.exists && !model.finalized) process.stdout.write(renderLine(model, { color: !args.includes('--no-color') }) + '\n');
+    return;
+  }
+  if (!model.exists) { console.log('No active shift run here. Start one with `shift start`.'); return; }
+  const cs = model.counts;
+  console.log(`run ${model.runId} · branch ${model.branch} · iter ${model.iterations}${model.paused ? ' · PAUSED' : ''}`);
+  console.log(`bins: ${cs.done} done · ${cs.blocked} blocked · ${cs.skipped} skipped · ${cs.pending} pending  (${model.elapsedMin}m)`);
+}
+
+// v3: live dashboard + keyboard control. Reads .shift/ on an interval and writes
+// control signals (PAUSE/SKIP/STOP) that the engine honors. Output-only surfaces
+// (a status bar) can't take input, so this is the interactive control surface.
+function cmdWatch() {
+  const dir = path.join(process.cwd(), '.shift');
+  const { buildModel, renderFrame } = require('../lib/watch-model.cjs');
+  const { setPause, isPaused, requestSkip, requestStop } = require('../lib/control.cjs');
+  const out = process.stdout;
+  const interactive = !!(process.stdin.isTTY && out.isTTY);
+
+  let model;
+  const draw = () => {
+    model = buildModel({ dir, now: Date.now() });
+    const frame = renderFrame(model, { width: out.columns || 80, color: true });
+    if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame
+    else out.write(frame);
+  };
+
+  if (!interactive) { draw(); return; } // piped / non-TTY: print one frame and exit
+
+  let timer = null;
+  const cleanup = () => {
+    if (timer) clearInterval(timer);
+    try { process.stdin.setRawMode(false); } catch { /* ignore */ }
+    process.stdin.pause();
+    out.write('\x1b[?25h'); // show cursor
+  };
+
+  out.write('\x1b[?25l'); // hide cursor
+  process.stdin.setRawMode(true);
+  process.stdin.resume();
+  process.stdin.setEncoding('utf8');
+  process.stdin.on('data', (key) => {
+    if (key === 'x' || key === '\x1b' || key === '\x03') { // x / Esc / Ctrl-C
+      cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0);
+    } else if (key === 'p') {
+      setPause(dir, !isPaused(dir)); draw();
+    } else if (key === 'k') {
+      const cur = (model.bins || []).find(b => b.current);
+      if (cur) requestSkip(dir, cur.id);
+      draw();
+    } else if (key === 'q') {
+      requestStop(dir); draw();
+    }
+  });
+  process.on('SIGINT', () => { cleanup(); process.exit(0); });
+
+  draw();
+  timer = setInterval(draw, 800);
 }
 
 function cmdStop() {
@@ -103,6 +164,7 @@ async function cmdRun() {
   const mode = config.permissionMode || 'acceptEdits';
   const { runLoop } = require('../lib/run-loop.cjs');
   const { readUsageCache } = require('../lib/usage.cjs');
+  const { isPaused } = require('../lib/control.cjs');
 
   // A headless `-p` run cannot answer permission prompts. Only bypassPermissions/dontAsk
   // auto-approve tool calls like Bash(git commit) — anything else stalls or denies on the
@@ -126,6 +188,7 @@ async function cmdRun() {
     readUsage: () => readUsageCache(dir),
     log: (m) => console.log(`[shift] ${m}`),
     finalized: () => fs.existsSync(path.join(dir, 'summary.md')),
+    isPaused: () => isPaused(dir),
     sleepUntil: (ms) => new Promise(r => setTimeout(r, Math.max(0, ms - Date.now()))),
     spawn: () => {
       const args = ['-p', '--permission-mode', mode];
@@ -154,7 +217,13 @@ async function cmdRun() {
 
 const [, , sub, ...rest] = process.argv;
 if (sub === 'start') cmdStart(rest);
-else if (sub === 'status') cmdStatus();
+else if (sub === 'status') cmdStatus(rest);
+else if (sub === 'watch') cmdWatch();
 else if (sub === 'stop') cmdStop();
 else if (sub === 'run') cmdRun().catch(e => { console.error(e); process.exit(1); });
-else { console.log('usage: shift <start|run|status|stop> [--dry-run]'); process.exit(1); }
+else {
+  console.log('usage: shift <start|run|watch|status|stop> [--dry-run]');
+  console.log('  watch         live dashboard + control: [p]ause [k]skip [q]stop [x]exit');
+  console.log('  status --line one-line summary for a status bar');
+  process.exit(1);
+}
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
index 16358a4..af99ed1 100755
--- a/shift/hooks/shift-stop.cjs
+++ b/shift/hooks/shift-stop.cjs
@@ -7,6 +7,7 @@ const { loadState, saveState, mergeDiscovered, setBinStatus } = require('../lib/
 const { decide } = require('../lib/decision.cjs');
 const { runVerify } = require('../lib/verify.cjs');
 const { writeUsageCache } = require('../lib/usage.cjs');
+const { readSkip, clearSkip } = require('../lib/control.cjs');
 
 function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } }
 
@@ -34,6 +35,7 @@ function tail(s, n) {
 function writeSummary(dir, state, reason, now) {
   const done = state.bins.filter(b => b.status === 'done').length;
   const blocked = state.bins.filter(b => b.status === 'blocked');
+  const skipped = state.bins.filter(b => b.status === 'skipped').length;
   const pending = state.bins.filter(b => b.status === 'pending').length;
   const mins = Math.round((now - Date.parse(state.startedAt)) / 60000);
   const items = [
@@ -45,7 +47,7 @@ function writeSummary(dir, state, reason, now) {
     `Ended: ${reason}`,
     `Duration: ${mins} min · Iterations: ${state.iterations}`,
     `Branch: ${state.branch}`,
-    `Bins: ${done} done · ${blocked.length} blocked · ${pending} pending`, '',
+    `Bins: ${done} done · ${blocked.length} blocked · ${skipped} skipped · ${pending} pending`, '',
     '## Needs you',
     ...(items.length ? items : ['- (nothing flagged)'])
   ];
@@ -78,10 +80,15 @@ function main() {
   const maxAttempts = (config.verify && config.verify.maxAttempts) || 2;
   let retryFeedback = null;
 
-  // Attribute the just-finished work to the current bin (blocked / verify gate / done).
+  // Attribute the just-finished work to the current bin (skipped / blocked / verify gate / done).
   if (prevBinId) {
+    const skipId = readSkip(dir);
     const blocked = readBlocked(dir).find(x => x.id === prevBinId);
-    if (blocked) {
+    if (skipId === prevBinId) {
+      // User hit [k] in `shift watch`: drop this bin and move on (work, if any, stays on the branch).
+      state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user' });
+      clearSkip(dir);
+    } else if (blocked) {
       state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note });
     } else if (verifyCmd) {
       const v = runVerify(verifyCmd, cwd);
diff --git a/shift/lib/control.cjs b/shift/lib/control.cjs
new file mode 100644
index 0000000..37d7d45
--- /dev/null
+++ b/shift/lib/control.cjs
@@ -0,0 +1,41 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+
+// File-based control channel between `shift watch` (writer) and the engine
+// (reader: the Stop hook + the headless runner). Files live in .shift/:
+//   STOP   — kill switch (already honored by the hook); finalize after current bin.
+//   PAUSE  — the headless runner idles while this exists; cleared to resume.
+//   SKIP   — contains a bin id; the hook marks that bin 'skipped' and moves on.
+// Everything is best-effort and absence-means-off, so a missing dir never throws.
+
+function p(dir, name) { return path.join(dir, name); }
+function exists(file) { try { return fs.existsSync(file); } catch { return false; } }
+function touch(dir, name) {
+  try { fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync(p(dir, name), ''); } catch { /* best-effort */ }
+}
+function remove(dir, name) { try { fs.unlinkSync(p(dir, name)); } catch { /* already gone */ } }
+
+function requestStop(dir) { touch(dir, 'STOP'); }
+function isStopRequested(dir) { return exists(p(dir, 'STOP')); }
+
+function setPause(dir, on) { if (on) touch(dir, 'PAUSE'); else remove(dir, 'PAUSE'); }
+function isPaused(dir) { return exists(p(dir, 'PAUSE')); }
+
+function requestSkip(dir, binId) {
+  try { fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync(p(dir, 'SKIP'), String(binId || '')); }
+  catch { /* best-effort */ }
+}
+function readSkip(dir) {
+  try {
+    const v = fs.readFileSync(p(dir, 'SKIP'), 'utf8').trim();
+    return v || null;
+  } catch { return null; }
+}
+function clearSkip(dir) { remove(dir, 'SKIP'); }
+
+module.exports = {
+  requestStop, isStopRequested,
+  setPause, isPaused,
+  requestSkip, readSkip, clearSkip
+};
diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs
index 8703b92..9e14be2 100644
--- a/shift/lib/run-loop.cjs
+++ b/shift/lib/run-loop.cjs
@@ -3,6 +3,7 @@ const { evaluateBounds } = require('./bounds.cjs');
 const { classifyOutcome } = require('./outcome.cjs');
 
 const RESET_BUFFER_MS = 60_000;
+const PAUSE_POLL_MS = 5_000;
 
 // The headless outer loop (v2). All side effects are injected so the loop is
 // fully testable without a real `claude` or real sleeping.
@@ -32,6 +33,14 @@ async function runLoop({ config, effects }) {
     if (lastOutcome === 'completed') return { reason: 'run finalized by the engine', spawns };
     if (lastOutcome === 'error') return { reason: 'run errored — stopping (see output)', spawns };
 
+    // Paused via `shift watch` ([p]): idle without spawning until resumed. Still bounded
+    // by maxHours/usage (re-checked each poll), so a forgotten pause can't run forever.
+    if (effects.isPaused && effects.isPaused()) {
+      effects.log('paused — waiting (resume with [p] in `shift watch`)');
+      await effects.sleepUntil(now + PAUSE_POLL_MS);
+      continue;
+    }
+
     if (lastOutcome === 'rate_limited') {
       if (!bounds.autoResumeOnReset) return { reason: 'rate limited; auto-resume disabled', spawns };
       const resetAt = usage && typeof usage.sessionResetAt === 'number' ? usage.sessionResetAt * 1000 : null;
diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs
new file mode 100644
index 0000000..5eadf6e
--- /dev/null
+++ b/shift/lib/watch-model.cjs
@@ -0,0 +1,141 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+const { loadState } = require('./state.cjs');
+const { isPaused, isStopRequested } = require('./control.cjs');
+
+// --- model -----------------------------------------------------------------
+
+function readLog(dir) {
+  let raw;
+  try { raw = fs.readFileSync(path.join(dir, 'log.md'), 'utf8'); } catch { return { recent: [], needsYou: [] }; }
+  const lines = raw.split('\n');
+  const recent = [];
+  const needsYou = [];
+  for (const line of lines) {
+    // hook writes: "## <iso> — work <id> (iter N)"
+    const m = line.match(/^##\s*(\S+)\s*—\s*(.+)$/);
+    if (m) {
+      const time = (m[1].match(/T(\d{2}:\d{2})/) || [])[1] || m[1];
+      recent.push(`${time}  ${m[2]}`);
+    }
+    const n = line.match(/^Needs you:\s*(.+)$/);
+    if (n) needsYou.push(n[1].trim());
+  }
+  return { recent: recent.slice(-6), needsYou };
+}
+
+// buildModel({ dir, now }) — read .shift/ into a plain view model. Pure of rendering.
+function buildModel({ dir, now }) {
+  let state;
+  try { state = loadState(dir); } catch { return { exists: false }; }
+
+  const bins = (state.bins || []).map(b => ({
+    id: b.id, status: b.status, commit: b.commit || null, note: b.note || null,
+    current: b.id === state.currentBinId && b.status === 'pending'
+  }));
+  const count = s => bins.filter(b => b.status === s).length;
+  const counts = {
+    done: count('done'), blocked: count('blocked'), skipped: count('skipped'),
+    pending: count('pending'), total: bins.length
+  };
+
+  const { recent, needsYou: logged } = readLog(dir);
+  const needsYou = [
+    ...bins.filter(b => b.status === 'blocked').map(b => `${b.id}: ${b.note || 'blocked'}`),
+    ...logged
+  ];
+
+  const startedMs = Date.parse(state.startedAt);
+  const elapsedMin = Number.isFinite(startedMs) ? Math.max(0, Math.round((now - startedMs) / 60000)) : 0;
+
+  return {
+    exists: true,
+    runId: state.runId, branch: state.branch, iterations: state.iterations || 0,
+    elapsedMin, paused: isPaused(dir), stopping: isStopRequested(dir),
+    finalized: fs.existsSync(path.join(dir, 'summary.md')),
+    bins, counts, recent, needsYou
+  };
+}
+
+// --- render ----------------------------------------------------------------
+
+const ANSI = {
+  reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m',
+  green: '\x1b[32m', yellow: '\x1b[33m', red: '\x1b[31m', cyan: '\x1b[36m', gray: '\x1b[90m'
+};
+function paint(color, code, s) { return color ? code + s + ANSI.reset : s; }
+
+const GLYPH = { done: '✓', blocked: '✗', skipped: '⤫', pending: '·' };
+function binGlyph(b) { return b.current ? '▶' : (GLYPH[b.status] || '·'); }
+function binColor(b) {
+  if (b.current) return ANSI.cyan;
+  return { done: ANSI.green, blocked: ANSI.red, skipped: ANSI.gray, pending: ANSI.dim }[b.status] || '';
+}
+
+function bar(done, total, width) {
+  if (total <= 0) return '';
+  const filled = Math.round((done / total) * width);
+  return '█'.repeat(filled) + '░'.repeat(Math.max(0, width - filled));
+}
+
+function pad(s, n) { s = String(s); return s.length >= n ? s.slice(0, n) : s + ' '.repeat(n - s.length); }
+
+// renderFrame(model, { width, color }) -> string. Pure.
+function renderFrame(model, opts = {}) {
+  const width = opts.width || 80;
+  const color = opts.color !== false;
+  const c = (code, s) => paint(color, code, s);
+
+  if (!model || !model.exists) {
+    return c(ANSI.dim, 'No active shift run in this directory. Start one with `shift start`.') + '\n';
+  }
+
+  const L = [];
+  const status = model.finalized
+    ? c(ANSI.green, '● finalized')
+    : model.stopping ? c(ANSI.red, '■ stopping after current bin')
+      : model.paused ? c(ANSI.yellow, '⏸ PAUSED') : c(ANSI.green, '▶ running');
+  L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations}   ${status}`);
+  L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64))));
+
+  const { done, total } = { done: model.counts.done, total: model.counts.total };
+  L.push(`${c(ANSI.green, bar(done, total, 24))}  ${c(ANSI.bold, `${done}/${total}`)} bins ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`);
+  L.push('');
+
+  for (const b of model.bins) {
+    const g = c(binColor(b), binGlyph(b));
+    const id = c(b.current ? ANSI.cyan : (b.status === 'pending' ? ANSI.dim : ANSI.reset), pad(b.id, 28));
+    let tail = b.status;
+    if (b.current) tail = 'working  ← current';
+    else if (b.commit) tail = `done  (${b.commit.slice(0, 7)})`;
+    else if (b.note) tail = `${b.status}  — ${b.note}`;
+    L.push(` ${g} ${id} ${c(ANSI.dim, tail)}`);
+  }
+  L.push('');
+
+  if (model.recent.length) {
+    L.push(c(ANSI.dim, 'recent:'));
+    for (const r of model.recent.slice(-4)) L.push(c(ANSI.gray, `   ${r}`));
+    L.push('');
+  }
+
+  const needs = model.needsYou.length;
+  const needsLabel = needs ? c(ANSI.yellow, `Needs you: ${needs}`) : c(ANSI.dim, 'Needs you: 0');
+  const hints = `${c(ANSI.bold, '[p]')}ause  ${c(ANSI.bold, '[k]')}skip current  ${c(ANSI.bold, '[q]')}stop  ${c(ANSI.bold, '[x]')}exit watcher`;
+  L.push(`${needsLabel}   ${c(ANSI.dim, '·')}   ${hints}`);
+
+  return L.join('\n') + '\n';
+}
+
+// One-line summary for a status bar (module 1 / ccstatusline custom-command).
+function renderLine(model, opts = {}) {
+  const color = opts.color !== false;
+  const c = (code, s) => paint(color, code, s);
+  if (!model || !model.exists) return '';
+  const flag = model.finalized ? '●' : model.paused ? '⏸' : '⚙';
+  const needs = model.needsYou.length ? ` ${c(ANSI.yellow, '⚑' + model.needsYou.length)}` : '';
+  return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')}${needs}`;
+}
+
+module.exports = { buildModel, renderFrame, renderLine };
diff --git a/shift/test/control.test.cjs b/shift/test/control.test.cjs
new file mode 100644
index 0000000..dfb17db
--- /dev/null
+++ b/shift/test/control.test.cjs
@@ -0,0 +1,56 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const {
+  requestStop, isStopRequested,
+  setPause, isPaused,
+  requestSkip, readSkip, clearSkip
+} = require('../lib/control.cjs');
+
+function tmp() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-ctl-')); }
+
+test('stop: absent by default, present after request', () => {
+  const d = tmp();
+  assert.equal(isStopRequested(d), false);
+  requestStop(d);
+  assert.equal(isStopRequested(d), true);
+  // STOP is the existing kill switch file name (engine already honors it)
+  assert.ok(fs.existsSync(path.join(d, 'STOP')));
+});
+
+test('pause: toggles on and off', () => {
+  const d = tmp();
+  assert.equal(isPaused(d), false);
+  setPause(d, true);
+  assert.equal(isPaused(d), true);
+  setPause(d, false);
+  assert.equal(isPaused(d), false);
+  setPause(d, false); // idempotent off
+  assert.equal(isPaused(d), false);
+});
+
+test('skip: records a bin id, reads it back, clears it', () => {
+  const d = tmp();
+  assert.equal(readSkip(d), null);
+  requestSkip(d, 'queue/03-build.md');
+  assert.equal(readSkip(d), 'queue/03-build.md');
+  clearSkip(d);
+  assert.equal(readSkip(d), null);
+});
+
+test('skip: reading a malformed/empty file yields null (no throw)', () => {
+  const d = tmp();
+  fs.mkdirSync(d, { recursive: true });
+  fs.writeFileSync(path.join(d, 'SKIP'), '   ');
+  assert.equal(readSkip(d), null);
+});
+
+test('all readers are safe on a missing dir', () => {
+  const d = path.join(os.tmpdir(), 'shift-ctl-missing-' + process.pid);
+  assert.equal(isStopRequested(d), false);
+  assert.equal(isPaused(d), false);
+  assert.equal(readSkip(d), null);
+});
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
index ad2c603..17f9726 100644
--- a/shift/test/hook.test.cjs
+++ b/shift/test/hook.test.cjs
@@ -74,6 +74,18 @@ test('logged "Needs you:" lines surface in the summary', () => {
   assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /push the release tag/);
 });
 
+test('SKIP control marks the current bin skipped and advances to the next', () => {
+  const { cwd, dir } = setupRun();
+  runHook(cwd, { stop_hook_active: false });            // start bin 1 (current = queue/01.md)
+  fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/01.md');
+  const r = runHook(cwd, { stop_hook_active: true });   // skip bin 1, block bin 2
+  assert.equal(r.decision, 'block');
+  assert.match(r.reason, /bin two/);
+  const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'skipped');
+  assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'SKIP is consumed');
+});
+
 test('kill switch ends the run immediately', () => {
   const { cwd, dir } = setupRun();
   fs.writeFileSync(path.join(dir, 'STOP'), '');
diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs
index e06cae2..4e32e61 100644
--- a/shift/test/run-loop.test.cjs
+++ b/shift/test/run-loop.test.cjs
@@ -114,6 +114,19 @@ test('incomplete spawn WITHOUT progress stops with a hook-wiring diagnostic (no
   assert.equal(calls.spawns, 1, 'must not spin');
 });
 
+test('pause idles the runner (no spawn) until unpaused, then proceeds', async () => {
+  const { effects, calls, config } = makeEffects({
+    spawns: [{ result: { status: 0 }, finalize: true }],
+    usage: null
+  });
+  let checks = 0;
+  effects.isPaused = () => checks++ < 2; // paused for the first two loop iterations
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /finalized/);
+  assert.ok(calls.sleepUntil.length >= 2, 'idled while paused');
+  assert.equal(calls.spawns, 1, 'no spawn while paused; one after resume');
+});
+
 test('rate-limited with a stale/past reset stops instead of busy-spinning', async () => {
   // Reset time is already in the past (stale cache). sleepUntil(past) would return
   // instantly and re-spawn forever (bounded only by maxResumes) — guard must stop.
diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs
new file mode 100644
index 0000000..dc93b44
--- /dev/null
+++ b/shift/test/watch-model.test.cjs
@@ -0,0 +1,84 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { buildModel, renderFrame } = require('../lib/watch-model.cjs');
+
+function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-'));
+  const dir = path.join(cwd, '.shift');
+  fs.mkdirSync(dir, { recursive: true });
+  const startedAt = new Date(Date.now() - 12 * 60_000).toISOString(); // 12 min ago
+  fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+    runId: '2026-06-16T00-00-00', startedAt, iterations: 7, branch: 'shift/smoke',
+    currentBinId,
+    bins: [
+      { id: 'queue/01-hello.md', status: 'done', commit: 'a1b2c3d' },
+      { id: 'queue/02-notes.md', status: 'done', commit: 'd4e5f6a' },
+      { id: 'queue/03-build.md', status: 'pending' },
+      { id: 'queue/04-test.md', status: 'pending' },
+      { id: 'queue/05-ship.md', status: 'blocked', note: 'needs API key' }
+    ]
+  }));
+  fs.writeFileSync(path.join(dir, 'log.md'),
+    '# shift log\n\n## 2026-06-16T00:05:00Z — work queue/03-build.md (iter 7)\nNeeds you: confirm the deploy target\n');
+  if (paused) fs.writeFileSync(path.join(dir, 'PAUSE'), '');
+  return dir;
+}
+
+test('buildModel reads run state and computes counts + elapsed', () => {
+  const m = buildModel({ dir: fixture(), now: Date.now() });
+  assert.equal(m.exists, true);
+  assert.equal(m.branch, 'shift/smoke');
+  assert.equal(m.iterations, 7);
+  assert.equal(m.counts.done, 2);
+  assert.equal(m.counts.blocked, 1);
+  assert.equal(m.counts.pending, 2);
+  assert.equal(m.counts.total, 5);
+  assert.ok(m.elapsedMin >= 11 && m.elapsedMin <= 13);
+});
+
+test('buildModel marks the current bin and surfaces Needs you', () => {
+  const m = buildModel({ dir: fixture(), now: Date.now() });
+  const current = m.bins.find(b => b.current);
+  assert.equal(current.id, 'queue/03-build.md');
+  assert.ok(m.needsYou.some(n => /API key/.test(n)));        // blocked note
+  assert.ok(m.needsYou.some(n => /deploy target/.test(n)));  // logged "Needs you:" line
+});
+
+test('buildModel reflects pause state', () => {
+  assert.equal(buildModel({ dir: fixture({ paused: true }), now: Date.now() }).paused, true);
+  assert.equal(buildModel({ dir: fixture({ paused: false }), now: Date.now() }).paused, false);
+});
+
+test('buildModel returns exists:false when no run is present', () => {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none-'));
+  const m = buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() });
+  assert.equal(m.exists, false);
+});
+
+test('renderFrame (no color) shows progress, the current bin, and control hints', () => {
+  const out = renderFrame(buildModel({ dir: fixture(), now: Date.now() }), { width: 80, color: false });
+  assert.match(out, /2\/5/);                 // progress count
+  assert.match(out, /shift\/smoke/);         // branch
+  assert.match(out, /queue\/05-ship\.md/);   // a bin row
+  assert.match(out, /needs API key/);        // blocker surfaced
+  assert.match(out, /\[q\].*stop/i);         // control hint
+  assert.match(out, /\[k\]/);                // skip hint
+  assert.match(out, /\[p\]/);                // pause hint
+});
+
+test('renderFrame shows a PAUSED banner when paused', () => {
+  const paused = renderFrame(buildModel({ dir: fixture({ paused: true }), now: Date.now() }), { color: false });
+  assert.match(paused, /PAUSED/);
+  const running = renderFrame(buildModel({ dir: fixture({ paused: false }), now: Date.now() }), { color: false });
+  assert.doesNotMatch(running, /PAUSED/);
+});
+
+test('renderFrame on no active run is a friendly message, not a crash', () => {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none2-'));
+  const out = renderFrame(buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() }), { color: false });
+  assert.match(out, /no active.*run/i);
+});

From 15970c6af2ed8566a9087ba4ac823937eb65a9bc Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 09:38:59 -0400
Subject: [PATCH 07/12] shift watch: address adversarial-review findings
 (verdict was SHIP)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two P2s + cheap P3s from the verification pass (no P0/P1; nothing crashed or
corrupted state):

- Terminal hygiene (P2): restore cursor + raw mode on SIGTERM/SIGHUP/exit, not
  just SIGINT/keys; wrap draw() in try/catch; idempotent cleanup; drop Esc as an
  exit key (a split arrow escape sends a lone \x1b).
- Stale SKIP (P2): consume-on-read in the hook — a skip that misses its target is
  discarded, never left to fire on a later bin.
- STOP honored while paused (P3): pause+stop no longer parks until the time box.
- Progress bar fills by resolved bins (done+blocked+skipped), so a finalized run
  shows a full bar instead of ~40% under '● finalized'.
- Atomic state.json write (temp+rename) so a redraw never reads a half-written file.
- Ellipsis on truncated bin ids.
- examples/watch-demo.cjs: zero-cost demo of the dashboard + control flow.

Tests: 79 shift (+2: stop-while-paused, stale-skip-discarded), all green.
Residual P3s documented: [k] no-op feedback when no current bin; narrow-terminal
line wrapping.
---
 shift/bin/shift               | 25 +++++++++++++------
 shift/examples/watch-demo.cjs | 47 +++++++++++++++++++++++++++++++++++
 shift/hooks/shift-stop.cjs    |  2 +-
 shift/lib/run-loop.cjs        |  2 ++
 shift/lib/state.cjs           |  6 ++++-
 shift/lib/watch-model.cjs     | 12 ++++++---
 shift/test/hook.test.cjs      | 10 ++++++++
 shift/test/run-loop.test.cjs  | 13 ++++++++++
 8 files changed, 104 insertions(+), 13 deletions(-)
 create mode 100644 shift/examples/watch-demo.cjs

diff --git a/shift/bin/shift b/shift/bin/shift
index 7b055d2..e7d98d3 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -105,16 +105,21 @@ function cmdWatch() {
 
   let model;
   const draw = () => {
-    model = buildModel({ dir, now: Date.now() });
-    const frame = renderFrame(model, { width: out.columns || 80, color: true });
-    if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame
-    else out.write(frame);
+    try { // a transient read/write error must never wedge the terminal — retry next tick
+      model = buildModel({ dir, now: Date.now() });
+      const frame = renderFrame(model, { width: out.columns || 80, color: true });
+      if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame
+      else out.write(frame);
+    } catch { /* keep the watcher alive */ }
   };
 
   if (!interactive) { draw(); return; } // piped / non-TTY: print one frame and exit
 
   let timer = null;
-  const cleanup = () => {
+  let closed = false;
+  const cleanup = () => { // idempotent; ALWAYS restores the terminal
+    if (closed) return;
+    closed = true;
     if (timer) clearInterval(timer);
     try { process.stdin.setRawMode(false); } catch { /* ignore */ }
     process.stdin.pause();
@@ -126,7 +131,7 @@ function cmdWatch() {
   process.stdin.resume();
   process.stdin.setEncoding('utf8');
   process.stdin.on('data', (key) => {
-    if (key === 'x' || key === '\x1b' || key === '\x03') { // x / Esc / Ctrl-C
+    if (key === 'x' || key === '\x03') { // x / Ctrl-C (Esc omitted: a split arrow escape sends a lone \x1b)
       cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0);
     } else if (key === 'p') {
       setPause(dir, !isPaused(dir)); draw();
@@ -138,7 +143,10 @@ function cmdWatch() {
       requestStop(dir); draw();
     }
   });
-  process.on('SIGINT', () => { cleanup(); process.exit(0); });
+  // Restore on every exit path, not just the keys: a closed terminal (SIGHUP),
+  // kill (SIGTERM), or Ctrl-C must not leave the next shell with a hidden cursor + raw mode.
+  for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP']) process.on(sig, () => { cleanup(); process.exit(0); });
+  process.on('exit', cleanup);
 
   draw();
   timer = setInterval(draw, 800);
@@ -164,7 +172,7 @@ async function cmdRun() {
   const mode = config.permissionMode || 'acceptEdits';
   const { runLoop } = require('../lib/run-loop.cjs');
   const { readUsageCache } = require('../lib/usage.cjs');
-  const { isPaused } = require('../lib/control.cjs');
+  const { isPaused, isStopRequested } = require('../lib/control.cjs');
 
   // A headless `-p` run cannot answer permission prompts. Only bypassPermissions/dontAsk
   // auto-approve tool calls like Bash(git commit) — anything else stalls or denies on the
@@ -189,6 +197,7 @@ async function cmdRun() {
     log: (m) => console.log(`[shift] ${m}`),
     finalized: () => fs.existsSync(path.join(dir, 'summary.md')),
     isPaused: () => isPaused(dir),
+    isStopRequested: () => isStopRequested(dir),
     sleepUntil: (ms) => new Promise(r => setTimeout(r, Math.max(0, ms - Date.now()))),
     spawn: () => {
       const args = ['-p', '--permission-mode', mode];
diff --git a/shift/examples/watch-demo.cjs b/shift/examples/watch-demo.cjs
new file mode 100644
index 0000000..2af254e
--- /dev/null
+++ b/shift/examples/watch-demo.cjs
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+'use strict';
+// Zero-cost demo of `shift watch`: spins up a throwaway run, drives the real Stop
+// hook through it, and prints the live dashboard at each step — including a [k] skip
+// and a [q] stop — so you can see the visibility + control surface without spawning
+// a real `claude`. Run:  node shift/examples/watch-demo.cjs
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const cp = require('node:child_process');
+
+const SHIFT = path.resolve(__dirname, '..');
+const { buildModel, renderFrame } = require(path.join(SHIFT, 'lib', 'watch-model.cjs'));
+const { requestSkip, requestStop } = require(path.join(SHIFT, 'lib', 'control.cjs'));
+const HOOK = path.join(SHIFT, 'hooks', 'shift-stop.cjs');
+
+const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-demo-'));
+const dir = path.join(cwd, '.shift');
+fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
+fs.mkdirSync(dir, { recursive: true });
+for (const [n, t] of [['01-build.md', 'build the thing'], ['02-flaky.md', 'flaky task'], ['03-docs.md', 'write docs']]) {
+  fs.writeFileSync(path.join(cwd, 'queue', n), t);
+}
+fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({
+  sources: [{ path: 'queue', kind: 'briefs' }],
+  bounds: { maxHours: 24, maxIterations: 10 }, definitionOfDone: 'done', git: {}
+}));
+fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+  runId: 'demo', startedAt: new Date(Date.now() - 5 * 60000).toISOString(),
+  iterations: 0, branch: 'shift/demo', currentBinId: null, bins: []
+}));
+fs.writeFileSync(path.join(dir, 'log.md'), '# log\n');
+
+const fire = (active) => cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify({ stop_hook_active: active, cwd }), encoding: 'utf8' });
+const show = (label) => {
+  process.stdout.write(`\n\x1b[1m=== ${label} ===\x1b[0m\n`);
+  process.stdout.write(renderFrame(buildModel({ dir, now: Date.now() }), { width: 78, color: true }));
+};
+
+fire(false);                            show('1) run started — bin 01 working');
+fire(true);                             show('2) bin 01 done -> bin 02 working');
+requestSkip(dir, 'queue/02-flaky.md');  // you press [k] now, while bin 02 is the current bin
+fire(true);                             show('3) you pressed [k] on bin 02 -> SKIPPED, bin 03 working');
+requestStop(dir);                       show('4) you pressed [q] -> stopping banner');
+fire(true);                             show('5) bin 03 done, STOP honored -> finalized');
+process.stdout.write('\n--- .shift/summary.md ---\n' + fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'));
+process.stdout.write(`\n(throwaway repo: ${cwd})\n`);
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
index af99ed1..1d04399 100755
--- a/shift/hooks/shift-stop.cjs
+++ b/shift/hooks/shift-stop.cjs
@@ -83,11 +83,11 @@ function main() {
   // Attribute the just-finished work to the current bin (skipped / blocked / verify gate / done).
   if (prevBinId) {
     const skipId = readSkip(dir);
+    if (skipId) clearSkip(dir); // consume on read: a skip that misses its target is discarded, never left to fire on a later bin
     const blocked = readBlocked(dir).find(x => x.id === prevBinId);
     if (skipId === prevBinId) {
       // User hit [k] in `shift watch`: drop this bin and move on (work, if any, stays on the branch).
       state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user' });
-      clearSkip(dir);
     } else if (blocked) {
       state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note });
     } else if (verifyCmd) {
diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs
index 9e14be2..4da90cd 100644
--- a/shift/lib/run-loop.cjs
+++ b/shift/lib/run-loop.cjs
@@ -36,6 +36,8 @@ async function runLoop({ config, effects }) {
     // Paused via `shift watch` ([p]): idle without spawning until resumed. Still bounded
     // by maxHours/usage (re-checked each poll), so a forgotten pause can't run forever.
     if (effects.isPaused && effects.isPaused()) {
+      // [q] stops even while paused — otherwise pause+stop would park until the time box.
+      if (effects.isStopRequested && effects.isStopRequested()) return { reason: 'stopped while paused', spawns };
       effects.log('paused — waiting (resume with [p] in `shift watch`)');
       await effects.sleepUntil(now + PAUSE_POLL_MS);
       continue;
diff --git a/shift/lib/state.cjs b/shift/lib/state.cjs
index 9d10a99..1ab2e3a 100644
--- a/shift/lib/state.cjs
+++ b/shift/lib/state.cjs
@@ -11,7 +11,11 @@ function saveState(dir, state) {
   // Persist lean: the bin `text` is re-read from disk on each discovery pass, so
   // keep it out of state.json (avoids bloating state with full brief/plan bodies).
   const lean = { ...state, bins: state.bins.map(({ text, ...b }) => b) };
-  fs.writeFileSync(statePath(dir), JSON.stringify(lean, null, 2));
+  // Write-then-rename so a concurrent reader (e.g. `shift watch`) never parses a
+  // half-written file; renameSync is atomic within the same directory.
+  const tmp = statePath(dir) + '.tmp';
+  fs.writeFileSync(tmp, JSON.stringify(lean, null, 2));
+  fs.renameSync(tmp, statePath(dir));
 }
 
 function initState({ runId, startedAt, branch }) {
diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs
index 5eadf6e..f9cdbdd 100644
--- a/shift/lib/watch-model.cjs
+++ b/shift/lib/watch-model.cjs
@@ -79,7 +79,11 @@ function bar(done, total, width) {
   return '█'.repeat(filled) + '░'.repeat(Math.max(0, width - filled));
 }
 
-function pad(s, n) { s = String(s); return s.length >= n ? s.slice(0, n) : s + ' '.repeat(n - s.length); }
+function pad(s, n) {
+  s = String(s);
+  if (s.length > n) return s.slice(0, n - 1) + '…'; // truncate long bin ids with an ellipsis
+  return s + ' '.repeat(n - s.length);
+}
 
 // renderFrame(model, { width, color }) -> string. Pure.
 function renderFrame(model, opts = {}) {
@@ -99,8 +103,10 @@ function renderFrame(model, opts = {}) {
   L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations}   ${status}`);
   L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64))));
 
-  const { done, total } = { done: model.counts.done, total: model.counts.total };
-  L.push(`${c(ANSI.green, bar(done, total, 24))}  ${c(ANSI.bold, `${done}/${total}`)} bins ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`);
+  const { done, blocked, skipped, total } = model.counts;
+  const resolved = done + blocked + skipped; // bar fills as the queue is dealt with (reaches full at finalize)
+  const extra = (blocked + skipped) ? c(ANSI.dim, ` (${blocked + skipped} blocked/skipped)`) : '';
+  L.push(`${c(ANSI.green, bar(resolved, total, 24))}  ${c(ANSI.bold, `${done}/${total}`)} done${extra} ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`);
   L.push('');
 
   for (const b of model.bins) {
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
index 17f9726..f319f49 100644
--- a/shift/test/hook.test.cjs
+++ b/shift/test/hook.test.cjs
@@ -86,6 +86,16 @@ test('SKIP control marks the current bin skipped and advances to the next', () =
   assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'SKIP is consumed');
 });
 
+test('a SKIP naming a non-current bin is consumed and discarded, not applied to a later bin', () => {
+  const { cwd, dir } = setupRun();
+  runHook(cwd, { stop_hook_active: false });                       // start bin 1
+  fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/99-nope.md');    // stale / wrong id
+  runHook(cwd, { stop_hook_active: true });                        // bin 1 -> done (skip ignored)
+  const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done');
+  assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'stale SKIP is consumed, never left to fire on a later bin');
+});
+
 test('kill switch ends the run immediately', () => {
   const { cwd, dir } = setupRun();
   fs.writeFileSync(path.join(dir, 'STOP'), '');
diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs
index 4e32e61..90ab4da 100644
--- a/shift/test/run-loop.test.cjs
+++ b/shift/test/run-loop.test.cjs
@@ -127,6 +127,19 @@ test('pause idles the runner (no spawn) until unpaused, then proceeds', async ()
   assert.equal(calls.spawns, 1, 'no spawn while paused; one after resume');
 });
 
+test('stop requested while paused ends the run (does not park until the time box)', async () => {
+  const { effects, calls, config } = makeEffects({
+    spawns: [{ result: { status: 0 }, finalize: true }],
+    usage: null
+  });
+  effects.isPaused = () => true;            // stays paused
+  effects.isStopRequested = () => true;     // ...but the user also hit [q]
+  const r = await runLoop({ config, effects });
+  assert.match(r.reason, /stop/i);
+  assert.equal(calls.spawns, 0);
+  assert.equal(calls.sleepUntil.length, 0, 'must not idle when a stop is pending');
+});
+
 test('rate-limited with a stale/past reset stops instead of busy-spinning', async () => {
   // Reset time is already in the past (stale cache). sleepUntil(past) would return
   // instantly and re-spawn forever (bounded only by maxResumes) — guard must stop.

From 7f44160d3cbf8053c2233c5c263b7eeafb586152 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 10:57:04 -0400
Subject: [PATCH 08/12] shift watch: per-bin + run tokens/runtime, drill-down,
 and a work record
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Tokens (output, the honest 'work' figure — not cache-inflated total) + runtime in
  the dashboard header, status --line, and per-bin columns. Summed from the session
  transcript (transcript_path from the hook payload; usage in message.usage).
- Up/down select a bin, Enter opens a detail view (status, runtime, token breakdown,
  commit, brief), esc back.
- Work record: every finalized run appended to .shift/history.jsonl; 'shift history'
  shows per-run rows + a totals footer; 'shift history <runId>' drills into one run.
- New pure modules: transcript.cjs (window-sum usage), timeline.cjs (append-only bin
  boundaries), history.cjs (ledger append/read/aggregate). Hook attributes per-bin
  runtime+tokens and writes the history record on finalize.
- Brief now tells the agent .shift/ is append-only bookkeeping (never edit state.json).

Known limitation (SPEC §13): per-bin attribution is best-effort in fully-headless runs
— an autonomous agent rewrites/deletes .shift/ mid-run and Claude Code sandboxes hook
writes to the project dir, so the boundary record can't be put out of reach. Run-level
tokens/runtime + the history record (the hook's final write) are authoritative.

96 shift tests, all green.
---
 shift/README.md                 |  20 ++++-
 shift/SPEC.md                   |   6 ++
 shift/bin/shift                 |  53 +++++++++--
 shift/examples/watch-demo.cjs   |  38 +++++---
 shift/hooks/shift-stop.cjs      | 102 ++++++++++++++++++---
 shift/lib/brief.cjs             |   3 +-
 shift/lib/history.cjs           |  39 ++++++++
 shift/lib/timeline.cjs          |  43 +++++++++
 shift/lib/transcript.cjs        |  41 +++++++++
 shift/lib/watch-model.cjs       | 153 +++++++++++++++++++++++++-------
 shift/test/brief.test.cjs       |   6 ++
 shift/test/history.test.cjs     |  50 +++++++++++
 shift/test/hook.test.cjs        |  41 +++++++++
 shift/test/timeline.test.cjs    |  45 ++++++++++
 shift/test/transcript.test.cjs  |  44 +++++++++
 shift/test/watch-model.test.cjs | 117 +++++++++++++++---------
 16 files changed, 691 insertions(+), 110 deletions(-)
 create mode 100644 shift/lib/history.cjs
 create mode 100644 shift/lib/timeline.cjs
 create mode 100644 shift/lib/transcript.cjs
 create mode 100644 shift/test/history.test.cjs
 create mode 100644 shift/test/timeline.test.cjs
 create mode 100644 shift/test/transcript.test.cjs

diff --git a/shift/README.md b/shift/README.md
index b9e42fd..4f1ebd6 100644
--- a/shift/README.md
+++ b/shift/README.md
@@ -70,12 +70,14 @@ An unattended run is the *least* transparent mode there is — so `shift` gives
 cd your-repo && shift watch
 ```
 
-A dashboard redraws on an interval: a progress bar (`done/total`), every bin with its status (`✓` done · `▶` current · `·` pending · `⤫` skipped · `✗` blocked), elapsed time, the decision-log tail, and the "Needs you" count. Because a run is otherwise a black box, this is where you *see* it working.
+A dashboard redraws on an interval: a progress bar, every bin with its status (`✓` done · `▶` current · `·` pending · `⤫` skipped · `✗` blocked) plus its **runtime and output tokens**, elapsed time, the run's live output-token total (`↑…out`), and the "Needs you" count. Because a run is otherwise a black box, this is where you *see* it working.
 
-It's also the **control surface** — a status bar can show state but can't take input, so `watch` captures keys and writes signals the engine honors at the next stop:
+It's also the **control + drill-down surface** — a status bar can show state but can't take input, so `watch` captures keys:
 
 | key | action |
 |---|---|
+| `↑` / `↓` | move the selection between bins |
+| `⏎` | open a bin's detail view (status, runtime, token breakdown in/out/cache, commit, brief); `esc` back |
 | `p` | pause / resume (the headless runner idles until you resume; still bounded by the time box) |
 | `k` | skip the current bin (marks it `skipped`, moves on — any work stays on the branch) |
 | `q` | stop the run (finalizes after the current bin — same as `shift stop`) |
@@ -83,9 +85,19 @@ It's also the **control surface** — a status bar can show state but can't take
 
 Control is file-based under `.shift/` (`PAUSE` / `SKIP` / `STOP`), so it works whether the run is interactive or headless, and from any terminal in the repo.
 
+> **Tokens are the *output* count** — the honest "work produced" figure, read from the session transcript. A warm run's `input`/cache tokens balloon with re-sent context, so the headline deliberately isn't `total` (that's in the detail view). Run-level tokens + runtime are authoritative; **per-bin** token/runtime columns are best-effort and may show `—` in a fully-headless run (an autonomous agent can rewrite `.shift/` mid-run) — see [SPEC §13](./SPEC.md).
+
+### The work record — `shift history`
+
+Every finalized run is appended to `.shift/history.jsonl`. `shift history` prints the ledger — one row per run (when, branch, runtime, output tokens, bin tally) and a **totals** footer across all runs; `shift history <runId>` drills into a single run's bins.
+
 ### In your status bar (module 1)
 
-For an at-a-glance signal in the [Code Status Bar](../code-status-bar), `shift status --line` prints a one-liner (`⚙ shift 2/5 · 18m · ⚑1`) — empty when no run is active. Wire it into a ccstatusline `custom-command` widget to surface shift "in the place you're already looking."
+For an at-a-glance signal in the [Code Status Bar](../code-status-bar), `shift status --line` prints a one-liner (`⚙ shift 2/5 · 18m · ↑412k ⚑1`) — empty when no run is active. Wire it into a ccstatusline `custom-command` widget to surface shift "in the place you're already looking."
+
+### See it without a run
+
+`node shift/examples/watch-demo.cjs` drives the real engine through a scripted run (with a synthetic transcript) and prints the dashboard at each step — tokens, a `[k]` skip, a `[q]` stop, the detail view, and the history ledger — at zero cost.
 
 ## Configure (`.shift/config.json`)
 
@@ -133,4 +145,4 @@ Pick the narrowest mode that lets the work actually proceed.
 cd shift && npm test     # node --test, zero dependencies
 ```
 
-Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model) and is unit-tested — including `renderFrame`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules.
+Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model, transcript, timeline, history) and is unit-tested — including `renderFrame`/`renderDetail`/`renderHistory`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules.
diff --git a/shift/SPEC.md b/shift/SPEC.md
index 527c190..df7f731 100644
--- a/shift/SPEC.md
+++ b/shift/SPEC.md
@@ -289,3 +289,9 @@ A real bounded `shift run` smoke (2 commit-a-file bins, `bypassPermissions`) **e
 The candor gap in v2 was that a headless run is opaque *while* it runs (good paper trail after, black box during). `shift watch` closes it: a zero-dependency live TUI that reads `.shift/` on an interval and renders a dashboard (progress bar, per-bin status, current bin, elapsed, decision-log tail, "Needs you"), plus **two-way control**. Since an output-only surface (a status bar) can't take input, control is a separate file-based channel under `.shift/` that the engine honors: `STOP` (existing kill switch / `q`), `PAUSE` (`p` — the runner idles, still bounded by the time box), `SKIP` (`k` — the hook marks the current bin `skipped` and advances). New status value: `skipped`. New modules: `lib/control.cjs` (signal channel) and `lib/watch-model.cjs` (`buildModel` + a **pure** `renderFrame`/`renderLine`, so the dashboard and the status-bar one-liner are unit-tested without a TTY). `bin/shift` gains `watch` and `status --line` (a one-liner for the module-1 status bar — ties the two modules together). **Tests:** 77 in `shift`, all green.
 
 *Known limitation:* `pause` and `skip` apply at the next stop-hook boundary (between bins), not mid-bin — the hook is the only point the engine re-evaluates. Mid-bin interruption would need a different mechanism.
+
+### Tokens, runtime + the work record (2026-06-16)
+
+Per the candor goal of making consumption legible: the dashboard header and `status --line` show **output tokens** (the honest "work produced" figure — not inflated by context resends / cache reads, which dominate `total`), summed from the session **transcript** (`transcript_path` from the hook payload; tokens live in `message.usage`). Each bin gets a runtime + token column; `↑/↓` selects a bin and `⏎` opens a **detail view** (status, runtime, token breakdown in/out/cache, commit, brief). Every finalized run is appended to an append-only **work record** at `.shift/history.jsonl`; `shift history` prints per-run rows + a totals footer (all runs, total time, total output tokens), and `shift history <runId>` drills into one run's bins. New modules: `lib/transcript.cjs` (sum `usage` over a `[start, end)` window — pure), `lib/timeline.cjs` (append-only bin boundaries), `lib/history.cjs` (ledger append/read/aggregate). **Tests:** 96 in `shift`, all green.
+
+*Known limitation — per-bin attribution is best-effort in fully-headless autonomous runs.* Investigation (2026-06-16) established three constraints that, together, make reliable *per-bin* token/runtime attribution impossible while a `claude -p` agent runs unattended: (1) an autonomous agent **rewrites/deletes files under `.shift/`** mid-run (observed: it rewrote `state.json` + `log.md` and deleted `config.json`/`timeline.jsonl`), clobbering the hook's per-bin stamps; (2) Claude Code **sandboxes hook file-writes to the project directory**, so the boundary record can't be relocated out-of-repo where the agent can't reach it; (3) the transcript carries no per-bin marker to reconstruct boundaries from. What **is** reliable and authoritative regardless: **run-level** output tokens + runtime, and the **work-record history** row (written as the hook's *final* action on finalize, after the agent's last turn, so it's never clobbered). Per-bin columns populate in interactive runs / when the agent leaves `.shift/` alone / in `shift/examples/watch-demo.cjs`, and show `—` otherwise. The brief now instructs the agent to treat `.shift/` as append-only bookkeeping; tightening that — or an engine-owned state store the agent can't reach — is the path to making per-bin robust.
diff --git a/shift/bin/shift b/shift/bin/shift
index e7d98d3..784695e 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -62,6 +62,7 @@ function cmdStart(args) {
 
   fs.mkdirSync(dir, { recursive: true });
   if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP'));
+  require('../lib/timeline.cjs').clearTimeline(dir); // fresh run → fresh boundary record
   fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2));
   let state = initState({ runId: isoStamp(now), startedAt: now.toISOString(), branch });
   state = mergeDiscovered(state, discovered);
@@ -98,16 +99,23 @@ function cmdStatus(args) {
 // (a status bar) can't take input, so this is the interactive control surface.
 function cmdWatch() {
   const dir = path.join(process.cwd(), '.shift');
-  const { buildModel, renderFrame } = require('../lib/watch-model.cjs');
+  const { buildModel, renderFrame, renderDetail } = require('../lib/watch-model.cjs');
   const { setPause, isPaused, requestSkip, requestStop } = require('../lib/control.cjs');
   const out = process.stdout;
   const interactive = !!(process.stdin.isTTY && out.isTTY);
 
   let model;
+  let selected = -1;     // -1 = no selection yet; set to the current bin on first draw
+  let mode = 'list';     // 'list' | 'detail'
   const draw = () => {
     try { // a transient read/write error must never wedge the terminal — retry next tick
       model = buildModel({ dir, now: Date.now() });
-      const frame = renderFrame(model, { width: out.columns || 80, color: true });
+      const n = (model.bins || []).length;
+      if (selected < 0 && n) selected = Math.max(0, model.bins.findIndex(b => b.current));
+      if (selected >= n) selected = n - 1; // bins can change between draws — clamp
+      const frame = (mode === 'detail' && selected >= 0)
+        ? renderDetail(model, selected, { width: out.columns || 80, color: true })
+        : renderFrame(model, { width: out.columns || 80, color: true, selectedIndex: selected });
       if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame
       else out.write(frame);
     } catch { /* keep the watcher alive */ }
@@ -131,8 +139,17 @@ function cmdWatch() {
   process.stdin.resume();
   process.stdin.setEncoding('utf8');
   process.stdin.on('data', (key) => {
-    if (key === 'x' || key === '\x03') { // x / Ctrl-C (Esc omitted: a split arrow escape sends a lone \x1b)
+    const n = (model && model.bins) ? model.bins.length : 0;
+    if (key === 'x' || key === '\x03') { // x / Ctrl-C
       cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0);
+    } else if (key === '\x1b[A') {        // ↑ select up
+      if (n) selected = (selected <= 0 ? n : selected) - 1; draw();
+    } else if (key === '\x1b[B') {        // ↓ select down
+      if (n) selected = (selected + 1) % n; draw();
+    } else if (key === '\r' || key === '\n') { // ⏎ open detail
+      if (selected >= 0) mode = 'detail'; draw();
+    } else if (key === '\x1b') {          // esc back to list (lone Esc, not an arrow sequence)
+      mode = 'list'; draw();
     } else if (key === 'p') {
       setPause(dir, !isPaused(dir)); draw();
     } else if (key === 'k') {
@@ -159,6 +176,28 @@ function cmdStop() {
   console.log('shift will stop cleanly after the current bin.');
 }
 
+// The work record: every finalized run (.shift/history.jsonl). `shift history` prints the
+// ledger + totals; `shift history <runId|branch-suffix>` drills into one run's bins.
+function cmdHistory(args) {
+  const dir = path.join(process.cwd(), '.shift');
+  const { readHistory, aggregate } = require('../lib/history.cjs');
+  const { renderHistory, fmtDur, fmtTok } = require('../lib/watch-model.cjs');
+  const records = readHistory(dir);
+  const target = (args || []).find(a => !a.startsWith('-'));
+  if (target) {
+    const r = records.filter(x => x.runId === target || (x.branch || '').endsWith(target)).pop();
+    if (!r) { console.log(`No recorded run matching "${target}".`); return; }
+    const g = s => (s === 'done' ? '✓' : s === 'skipped' ? '⤫' : s === 'blocked' ? '✗' : '·');
+    console.log(`run ${r.runId} · ${r.branch} · ${r.endReason}`);
+    console.log(`  ${fmtDur(r.durationMs)} · ${fmtTok(r.tokens && r.tokens.output)} output · ${r.iterations} iters · ${r.bins.done}✓ ${r.bins.skipped}⤫ ${r.bins.blocked}✗`);
+    for (const b of (r.perBin || [])) {
+      console.log(`  ${g(b.status)} ${b.id}  ${fmtDur(b.durationMs)}  ${fmtTok(b.tokensOutput)} out  ${b.commit || ''}`);
+    }
+    return;
+  }
+  process.stdout.write(renderHistory(records, aggregate(records), { color: !!process.stdout.isTTY }));
+}
+
 // v2: headless outer loop — keeps spawning claude until the engine finalizes,
 // a bound trips, or (on a rate-limit wall) it waits for the window to reopen.
 async function cmdRun() {
@@ -228,11 +267,13 @@ const [, , sub, ...rest] = process.argv;
 if (sub === 'start') cmdStart(rest);
 else if (sub === 'status') cmdStatus(rest);
 else if (sub === 'watch') cmdWatch();
+else if (sub === 'history') cmdHistory(rest);
 else if (sub === 'stop') cmdStop();
 else if (sub === 'run') cmdRun().catch(e => { console.error(e); process.exit(1); });
 else {
-  console.log('usage: shift <start|run|watch|status|stop> [--dry-run]');
-  console.log('  watch         live dashboard + control: [p]ause [k]skip [q]stop [x]exit');
-  console.log('  status --line one-line summary for a status bar');
+  console.log('usage: shift <start|run|watch|history|status|stop> [--dry-run]');
+  console.log('  watch          live dashboard + control: ↑/↓ select · ⏎ details · [p]ause [k]skip [q]stop [x]exit');
+  console.log('  history [run]  the work record: per-run runtime/tokens + totals (or one run\'s detail)');
+  console.log('  status --line  one-line summary for a status bar');
   process.exit(1);
 }
diff --git a/shift/examples/watch-demo.cjs b/shift/examples/watch-demo.cjs
index 2af254e..11502cb 100644
--- a/shift/examples/watch-demo.cjs
+++ b/shift/examples/watch-demo.cjs
@@ -1,16 +1,18 @@
 #!/usr/bin/env node
 'use strict';
-// Zero-cost demo of `shift watch`: spins up a throwaway run, drives the real Stop
-// hook through it, and prints the live dashboard at each step — including a [k] skip
-// and a [q] stop — so you can see the visibility + control surface without spawning
-// a real `claude`. Run:  node shift/examples/watch-demo.cjs
+// Zero-cost demo of `shift watch`: spins up a throwaway run, drives the real Stop hook
+// through it with a synthetic transcript, and prints the live dashboard at each step —
+// runtime + token columns, a [k] skip, a [q] stop, and the work-record history — so you
+// can see the whole visibility + control surface without spawning a real `claude`.
+//   node shift/examples/watch-demo.cjs
 const fs = require('node:fs');
 const os = require('node:os');
 const path = require('node:path');
 const cp = require('node:child_process');
 
 const SHIFT = path.resolve(__dirname, '..');
-const { buildModel, renderFrame } = require(path.join(SHIFT, 'lib', 'watch-model.cjs'));
+const { buildModel, renderFrame, renderDetail, renderHistory } = require(path.join(SHIFT, 'lib', 'watch-model.cjs'));
+const { readHistory, aggregate } = require(path.join(SHIFT, 'lib', 'history.cjs'));
 const { requestSkip, requestStop } = require(path.join(SHIFT, 'lib', 'control.cjs'));
 const HOOK = path.join(SHIFT, 'hooks', 'shift-stop.cjs');
 
@@ -30,18 +32,28 @@ fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
   iterations: 0, branch: 'shift/demo', currentBinId: null, bins: []
 }));
 fs.writeFileSync(path.join(dir, 'log.md'), '# log\n');
+const T = path.join(dir, 'transcript.jsonl');
+fs.writeFileSync(T, '');
 
-const fire = (active) => cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify({ stop_hook_active: active, cwd }), encoding: 'utf8' });
+const fire = (active) => cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify({ stop_hook_active: active, cwd, transcript_path: T }), encoding: 'utf8' });
+const work = (out) => { // simulate the agent producing `out` output tokens on the current bin
+  fs.appendFileSync(T, JSON.stringify({ type: 'assistant', timestamp: new Date().toISOString(), message: { usage: { output_tokens: out, input_tokens: out * 6, cache_read_input_tokens: out * 40 } } }) + '\n');
+};
 const show = (label) => {
   process.stdout.write(`\n\x1b[1m=== ${label} ===\x1b[0m\n`);
-  process.stdout.write(renderFrame(buildModel({ dir, now: Date.now() }), { width: 78, color: true }));
+  process.stdout.write(renderFrame(buildModel({ dir, now: Date.now() }), { width: 78, color: true, selectedIndex: 0 }));
 };
 
-fire(false);                            show('1) run started — bin 01 working');
-fire(true);                             show('2) bin 01 done -> bin 02 working');
-requestSkip(dir, 'queue/02-flaky.md');  // you press [k] now, while bin 02 is the current bin
-fire(true);                             show('3) you pressed [k] on bin 02 -> SKIPPED, bin 03 working');
-requestStop(dir);                       show('4) you pressed [q] -> stopping banner');
+fire(false); work(8400);                show('1) bin 01 working — tokens climbing live');
+fire(true); work(21300);                show('2) bin 01 done (runtime + tokens) -> bin 02 working');
+requestSkip(dir, 'queue/02-flaky.md');  // you press [k] while bin 02 is current
+fire(true); work(5100);                 show('3) you pressed [k] -> bin 02 SKIPPED, bin 03 working');
+requestStop(dir);                       show('4) you pressed [q] -> stopping after current bin');
 fire(true);                             show('5) bin 03 done, STOP honored -> finalized');
-process.stdout.write('\n--- .shift/summary.md ---\n' + fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'));
+
+process.stdout.write('\n\x1b[1m=== ⏎ details on bin 01 (drill-down) ===\x1b[0m\n');
+process.stdout.write(renderDetail(buildModel({ dir, now: Date.now() }), 0, { width: 78, color: true }));
+
+process.stdout.write('\n\x1b[1m=== shift history (work record across runs) ===\x1b[0m\n');
+process.stdout.write(renderHistory(readHistory(dir), aggregate(readHistory(dir)), { color: true }));
 process.stdout.write(`\n(throwaway repo: ${cwd})\n`);
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
index 1d04399..f935db5 100755
--- a/shift/hooks/shift-stop.cjs
+++ b/shift/hooks/shift-stop.cjs
@@ -8,6 +8,9 @@ const { decide } = require('../lib/decision.cjs');
 const { runVerify } = require('../lib/verify.cjs');
 const { writeUsageCache } = require('../lib/usage.cjs');
 const { readSkip, clearSkip } = require('../lib/control.cjs');
+const { sumTokens } = require('../lib/transcript.cjs');
+const { appendRecord } = require('../lib/history.cjs');
+const { appendEvent, readTimeline, binWindows } = require('../lib/timeline.cjs');
 
 function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } }
 
@@ -32,7 +35,14 @@ function tail(s, n) {
   return s.length > n ? s.slice(s.length - n) : s;
 }
 
-function writeSummary(dir, state, reason, now) {
+function fmtTokens(n) {
+  if (!n) return '0';
+  if (n >= 1e6) return (n / 1e6).toFixed(2) + 'M';
+  if (n >= 1e3) return Math.round(n / 1e3) + 'k';
+  return String(n);
+}
+
+function writeSummary(dir, state, reason, now, runTok) {
   const done = state.bins.filter(b => b.status === 'done').length;
   const blocked = state.bins.filter(b => b.status === 'blocked');
   const skipped = state.bins.filter(b => b.status === 'skipped').length;
@@ -47,13 +57,42 @@ function writeSummary(dir, state, reason, now) {
     `Ended: ${reason}`,
     `Duration: ${mins} min · Iterations: ${state.iterations}`,
     `Branch: ${state.branch}`,
-    `Bins: ${done} done · ${blocked.length} blocked · ${skipped} skipped · ${pending} pending`, '',
-    '## Needs you',
-    ...(items.length ? items : ['- (nothing flagged)'])
+    `Bins: ${done} done · ${blocked.length} blocked · ${skipped} skipped · ${pending} pending`
   ];
+  if (runTok) lines.push(`Tokens: ${fmtTokens(runTok.output)} output · ${fmtTokens(runTok.total)} total`);
+  lines.push('', '## Needs you', ...(items.length ? items : ['- (nothing flagged)']));
   fs.writeFileSync(path.join(dir, 'summary.md'), lines.join('\n') + '\n');
 }
 
+// Append this run to the work record (.shift/history.jsonl). One row per finalized run.
+// Per-bin metrics come from the timeline (boundaries) + transcript (tokens) so they
+// survive even if the agent rewrote state.json mid-run.
+function appendRunRecord(dir, state, reason, now, runTok, transcriptPath) {
+  const tally = s => state.bins.filter(b => b.status === s).length;
+  const windows = binWindows(readTimeline(dir));
+  const nowIso = new Date(now).toISOString();
+  appendRecord(dir, {
+    runId: state.runId, branch: state.branch,
+    startedAt: state.startedAt, endedAt: nowIso,
+    durationMs: Math.max(0, now - Date.parse(state.startedAt)),
+    iterations: state.iterations, endReason: reason,
+    bins: { total: state.bins.length, done: tally('done'), skipped: tally('skipped'), blocked: tally('blocked') },
+    tokens: { output: runTok ? runTok.output : 0, total: runTok ? runTok.total : 0 },
+    perBin: state.bins.map(b => {
+      const w = windows[b.id] || {};
+      const durationMs = (w.startedAt && w.finishedAt)
+        ? Math.max(0, Date.parse(w.finishedAt) - Date.parse(w.startedAt))
+        : (b.durationMs || null);
+      let tokensOutput = (b.tokens && b.tokens.output) || null;
+      if (tokensOutput == null && transcriptPath && w.startedAt) {
+        const t = sumTokens(transcriptPath, w.startedAt, w.finishedAt || nowIso);
+        if (t.messages > 0) tokensOutput = t.output;
+      }
+      return { id: b.id, status: b.status, durationMs, tokensOutput, commit: b.commit || null };
+    })
+  });
+}
+
 function main() {
   let input = {};
   try { input = JSON.parse(readStdin() || '{}'); } catch { input = {}; }
@@ -66,7 +105,9 @@ function main() {
 
   const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8'));
   const now = Date.now();
+  const nowIso = new Date(now).toISOString();
   const killSwitch = fs.existsSync(path.join(dir, 'STOP'));
+  const payloadTranscript = (input && typeof input.transcript_path === 'string') ? input.transcript_path : null;
 
   // Capture rate limits from the hook payload: enforce the usage cap and cache
   // reset times for the headless runner. Absent on non-Pro/Max or pre-first-response.
@@ -74,39 +115,63 @@ function main() {
 
   // Re-discover (fresh text + new files) and carry over status/attempts.
   let state = mergeDiscovered(loadState(dir), discoverBins(config.sources, cwd));
+  const transcriptPath = payloadTranscript || state.transcriptPath || null;
 
   const prevBinId = state.currentBinId;
   const verifyCmd = config.verify && config.verify.command;
   const maxAttempts = (config.verify && config.verify.maxAttempts) || 2;
   let retryFeedback = null;
 
+  // When a bin finishes, attribute its runtime + tokens. The window [start, now) comes
+  // from the append-only timeline (agent-proof) — NOT state.json, which an autonomous
+  // agent may rewrite mid-run — and tokens are summed from the transcript (also outside
+  // the repo). `fm` is merged into whichever terminal status the bin lands on, but the
+  // durable copy is the timeline + the history record, not these (clobberable) fields.
+  const prevStart = prevBinId ? (binWindows(readTimeline(dir))[prevBinId] || {}).startedAt : null;
+  let fm = {};
+  if (prevBinId) {
+    const tok = (transcriptPath && prevStart) ? sumTokens(transcriptPath, prevStart, nowIso) : null;
+    fm = {
+      finishedAt: nowIso,
+      durationMs: prevStart ? Math.max(0, now - Date.parse(prevStart)) : undefined,
+      tokens: tok ? { output: tok.output, input: tok.input, cacheRead: tok.cacheRead, total: tok.total } : undefined
+    };
+  }
+
   // Attribute the just-finished work to the current bin (skipped / blocked / verify gate / done).
+  let binFinished = false;
   if (prevBinId) {
     const skipId = readSkip(dir);
     if (skipId) clearSkip(dir); // consume on read: a skip that misses its target is discarded, never left to fire on a later bin
     const blocked = readBlocked(dir).find(x => x.id === prevBinId);
     if (skipId === prevBinId) {
       // User hit [k] in `shift watch`: drop this bin and move on (work, if any, stays on the branch).
-      state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user' });
+      state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user', ...fm });
+      binFinished = true;
     } else if (blocked) {
-      state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note });
+      state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note, ...fm });
+      binFinished = true;
     } else if (verifyCmd) {
       const v = runVerify(verifyCmd, cwd);
       if (v.ok) {
-        state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() });
+        state = setBinStatus(state, prevBinId, { status: 'done', ...fm });
+        binFinished = true;
       } else {
         const bin = state.bins.find(b => b.id === prevBinId) || {};
         const attempts = (bin.attempts || 0) + 1;
         if (attempts < maxAttempts) {
-          state = setBinStatus(state, prevBinId, { attempts }); // stays pending → re-blocked below
+          state = setBinStatus(state, prevBinId, { attempts }); // stays pending → re-blocked below (not finished yet)
           retryFeedback = `Your previous attempt failed verification (\`${verifyCmd}\`). Fix it and make it pass. Output (tail):\n${tail(v.output, 2000)}`;
         } else {
-          state = setBinStatus(state, prevBinId, { status: 'blocked', attempts, note: `failed verification after ${attempts} attempts` });
+          state = setBinStatus(state, prevBinId, { status: 'blocked', attempts, note: `failed verification after ${attempts} attempts`, ...fm });
+          binFinished = true;
         }
       }
     } else {
-      state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() });
+      state = setBinStatus(state, prevBinId, { status: 'done', ...fm });
+      binFinished = true;
     }
+    if (binFinished) appendEvent(dir, { t: nowIso, event: 'finish', id: prevBinId });
   }
 
   const result = decide({
@@ -114,19 +179,32 @@ function main() {
     stopHookActive: !!input.stop_hook_active, killSwitch
   });
 
+  if (transcriptPath) state.transcriptPath = transcriptPath; // so `shift watch` can live-parse tokens
+
   if (result.action === 'block') {
     let reason = result.reason;
     if (retryFeedback && result.nextBinId === prevBinId) reason += `\n\n${retryFeedback}`;
     state.iterations += 1;
     state.currentBinId = result.nextBinId;
+    // Record the bin's start the first time it becomes current (a new bin, not a verify
+    // retry of the same one). The timeline is the durable copy; state.bins.startedAt is a
+    // best-effort convenience that the agent may clobber.
+    if (result.nextBinId !== prevBinId) appendEvent(dir, { t: nowIso, event: 'start', id: result.nextBinId });
+    const nb = state.bins.find(b => b.id === result.nextBinId);
+    if (nb && !nb.startedAt) state = setBinStatus(state, result.nextBinId, { startedAt: nowIso });
     saveState(dir, state);
     fs.appendFileSync(path.join(dir, 'log.md'),
-      `\n## ${new Date(now).toISOString()} — work ${result.nextBinId} (iter ${state.iterations})\n`);
+      `\n## ${nowIso} — work ${result.nextBinId} (iter ${state.iterations})\n`);
     process.stdout.write(JSON.stringify({ decision: 'block', reason }));
   } else {
+    // First finalize only (summary.md absent) appends the work record — guards against a
+    // stray extra Stop firing after the run already finalized.
+    const alreadyFinalized = fs.existsSync(path.join(dir, 'summary.md'));
+    const runTok = transcriptPath ? sumTokens(transcriptPath, state.startedAt, nowIso) : null;
     state.currentBinId = null;
     saveState(dir, state);
-    writeSummary(dir, state, result.reason, now);
+    if (!alreadyFinalized) appendRunRecord(dir, state, result.reason, now, runTok, transcriptPath);
+    writeSummary(dir, state, result.reason, now, runTok);
     process.stdout.write('{}');
   }
 }
diff --git a/shift/lib/brief.cjs b/shift/lib/brief.cjs
index 7a68aa2..c54d19e 100644
--- a/shift/lib/brief.cjs
+++ b/shift/lib/brief.cjs
@@ -12,9 +12,10 @@ function renderBrief(bin, config) {
     : '';
   return [
     'You are running unattended under `shift`. Complete the brief below end-to-end using your best judgment.',
-    'Do NOT ask questions — if you would normally ask, decide and record the decision in .shift/log.md.',
+    'Do NOT ask questions — if you would normally ask, decide and APPEND the decision as a line to .shift/log.md.',
     `Definition of done: ${dod}`,
     'When finished, commit your work on the current branch.',
+    '`.shift/` is shift\'s own run bookkeeping. The ONLY writes you may make under it are APPENDING a line to .shift/log.md or .shift/blocked.jsonl. Never edit, overwrite, or "tidy" .shift/state.json, .shift/config.json, .shift/summary.md, and never rewrite .shift/log.md — shift maintains those itself (run progress, per-bin runtime + tokens), and changing them corrupts the run record.',
     'Flag anything that needs the human (a deferred decision, an action you could not take) by appending a line to .shift/log.md as: "Needs you: <detail>" — these surface in the run summary.',
     'If a true blocker stops you from finishing this bin, append one line to .shift/blocked.jsonl: {"id":"<bin id>","note":"<reason>"} then stop.',
     guard,
diff --git a/shift/lib/history.cjs b/shift/lib/history.cjs
new file mode 100644
index 0000000..c440e14
--- /dev/null
+++ b/shift/lib/history.cjs
@@ -0,0 +1,39 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+
+// The shift work record: an append-only ledger of finalized runs at .shift/history.jsonl.
+// `shift start` rewrites state.json but never touches this, so it accumulates across runs.
+// One JSON line per run (totals + per-bin breakdown). Read for `shift history` + aggregates.
+
+function historyPath(dir) { return path.join(dir, 'history.jsonl'); }
+
+function appendRecord(dir, record) {
+  try {
+    fs.mkdirSync(dir, { recursive: true });
+    fs.appendFileSync(historyPath(dir), JSON.stringify(record) + '\n');
+  } catch { /* best-effort: never let a logging failure break the run */ }
+}
+
+function readHistory(dir) {
+  let raw;
+  try { raw = fs.readFileSync(historyPath(dir), 'utf8'); } catch { return []; }
+  return raw.split('\n').filter(Boolean)
+    .map(l => { try { return JSON.parse(l); } catch { return null; } })
+    .filter(Boolean);
+}
+
+// aggregate(records) -> totals across the ledger.
+function aggregate(records) {
+  const a = { runs: 0, durationMs: 0, outputTokens: 0, bins: { total: 0, done: 0, skipped: 0, blocked: 0 } };
+  for (const r of records) {
+    a.runs += 1;
+    a.durationMs += (r.durationMs || 0);
+    a.outputTokens += ((r.tokens && r.tokens.output) || 0);
+    const b = r.bins || {};
+    for (const k of ['total', 'done', 'skipped', 'blocked']) a.bins[k] += (b[k] || 0);
+  }
+  return a;
+}
+
+module.exports = { historyPath, appendRecord, readHistory, aggregate };
diff --git a/shift/lib/timeline.cjs b/shift/lib/timeline.cjs
new file mode 100644
index 0000000..90b12b8
--- /dev/null
+++ b/shift/lib/timeline.cjs
@@ -0,0 +1,43 @@
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+
+// An append-only record of bin boundaries (one event per line in .shift/timeline.jsonl)
+// — the source of per-bin runtime + token windows, paired with the transcript for tokens.
+//
+// Best-effort, by design: in a fully-headless autonomous run the agent may rewrite or
+// delete files under .shift/ (observed), so per-bin metrics can be lost — the run-level
+// totals + the work-record history (the hook's final write) remain authoritative
+// regardless. Writing this out-of-repo isn't an option: Claude Code sandboxes hook
+// file-writes to the project directory. See SPEC §13.
+
+function timelinePath(dir) { return path.join(dir, 'timeline.jsonl'); }
+
+function appendEvent(dir, ev) { // ev: { t: iso, event: 'start'|'finish', id }
+  try { fs.mkdirSync(dir, { recursive: true }); fs.appendFileSync(timelinePath(dir), JSON.stringify(ev) + '\n'); }
+  catch { /* best-effort */ }
+}
+
+function readTimeline(dir) {
+  let raw;
+  try { raw = fs.readFileSync(timelinePath(dir), 'utf8'); } catch { return []; }
+  return raw.split('\n').filter(Boolean)
+    .map(l => { try { return JSON.parse(l); } catch { return null; } })
+    .filter(Boolean);
+}
+
+function clearTimeline(dir) { try { fs.unlinkSync(timelinePath(dir)); } catch { /* none */ } }
+
+// binWindows(events) -> { id: { startedAt, finishedAt } } — first start, last finish.
+function binWindows(events) {
+  const w = {};
+  for (const e of events) {
+    if (!e || !e.id) continue;
+    if (!w[e.id]) w[e.id] = { startedAt: null, finishedAt: null };
+    if (e.event === 'start' && !w[e.id].startedAt) w[e.id].startedAt = e.t;
+    if (e.event === 'finish') w[e.id].finishedAt = e.t;
+  }
+  return w;
+}
+
+module.exports = { timelinePath, appendEvent, readTimeline, clearTimeline, binWindows };
diff --git a/shift/lib/transcript.cjs b/shift/lib/transcript.cjs
new file mode 100644
index 0000000..07e5cdb
--- /dev/null
+++ b/shift/lib/transcript.cjs
@@ -0,0 +1,41 @@
+'use strict';
+const fs = require('node:fs');
+
+// Token accounting from a Claude Code transcript JSONL. Each assistant message line
+// carries message.usage { input_tokens, output_tokens, cache_read_input_tokens,
+// cache_creation_input_tokens } and a top-level ISO `timestamp` — so we can attribute
+// tokens to a bin by summing the usage of messages within that bin's [start, end) window.
+
+// sumUsage(lines, fromMs, toMs) — pure. fromMs/toMs are epoch ms or null (open bound).
+function sumUsage(lines, fromMs, toMs) {
+  const acc = { output: 0, input: 0, cacheRead: 0, cacheCreate: 0, total: 0, messages: 0 };
+  for (const line of lines) {
+    let o;
+    try { o = JSON.parse(line); } catch { continue; }
+    if (!o || o.type !== 'assistant' || !o.message || !o.message.usage) continue;
+    const t = Date.parse(o.timestamp);
+    if (!Number.isFinite(t)) continue;
+    if (fromMs != null && t < fromMs) continue;
+    if (toMs != null && t >= toMs) continue;
+    const u = o.message.usage;
+    const out = u.output_tokens || 0;
+    const inp = u.input_tokens || 0;
+    const cr = u.cache_read_input_tokens || 0;
+    const cc = u.cache_creation_input_tokens || 0;
+    acc.output += out; acc.input += inp; acc.cacheRead += cr; acc.cacheCreate += cc;
+    acc.total += out + inp + cr + cc; acc.messages += 1;
+  }
+  return acc;
+}
+
+function readLines(file) {
+  try { return fs.readFileSync(file, 'utf8').split('\n').filter(Boolean); }
+  catch { return []; }
+}
+
+// Convenience over a file path within an ISO window (either bound optional).
+function sumTokens(file, fromIso, toIso) {
+  return sumUsage(readLines(file), fromIso ? Date.parse(fromIso) : null, toIso ? Date.parse(toIso) : null);
+}
+
+module.exports = { sumUsage, sumTokens, readLines };
diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs
index f9cdbdd..717117a 100644
--- a/shift/lib/watch-model.cjs
+++ b/shift/lib/watch-model.cjs
@@ -3,6 +3,8 @@ const fs = require('node:fs');
 const path = require('node:path');
 const { loadState } = require('./state.cjs');
 const { isPaused, isStopRequested } = require('./control.cjs');
+const { sumUsage, readLines } = require('./transcript.cjs');
+const { readTimeline, binWindows } = require('./timeline.cjs');
 
 // --- model -----------------------------------------------------------------
 
@@ -13,8 +15,7 @@ function readLog(dir) {
   const recent = [];
   const needsYou = [];
   for (const line of lines) {
-    // hook writes: "## <iso> — work <id> (iter N)"
-    const m = line.match(/^##\s*(\S+)\s*—\s*(.+)$/);
+    const m = line.match(/^##\s*(\S+)\s*—\s*(.+)$/); // "## <iso> — work <id> (iter N)"
     if (m) {
       const time = (m[1].match(/T(\d{2}:\d{2})/) || [])[1] || m[1];
       recent.push(`${time}  ${m[2]}`);
@@ -25,15 +26,40 @@ function readLog(dir) {
   return { recent: recent.slice(-6), needsYou };
 }
 
+function readBrief(cwd, binId) {
+  try { return fs.readFileSync(path.join(cwd, binId), 'utf8'); } catch { return ''; }
+}
+
 // buildModel({ dir, now }) — read .shift/ into a plain view model. Pure of rendering.
 function buildModel({ dir, now }) {
   let state;
   try { state = loadState(dir); } catch { return { exists: false }; }
 
-  const bins = (state.bins || []).map(b => ({
-    id: b.id, status: b.status, commit: b.commit || null, note: b.note || null,
-    current: b.id === state.currentBinId && b.status === 'pending'
-  }));
+  // Per-bin runtime + tokens are derived from the timeline (agent-proof boundaries) and
+  // the transcript (parsed once), so they survive a state.json the agent rewrote. We fall
+  // back to any stamps the hook left on state.bins when no timeline/transcript is present.
+  const windows = binWindows(readTimeline(dir));
+  const lines = state.transcriptPath ? readLines(state.transcriptPath) : [];
+  const startMs = b => (windows[b.id] && windows[b.id].startedAt) ? Date.parse(windows[b.id].startedAt) : null;
+  const finMs = (b, current) => {
+    const w = windows[b.id] || {};
+    if (w.finishedAt) return Date.parse(w.finishedAt);
+    return current ? now : null; // current bin: open window up to now (live)
+  };
+
+  const bins = (state.bins || []).map(b => {
+    const current = b.id === state.currentBinId && b.status === 'pending';
+    const s = startMs(b), f = finMs(b, current);
+    let durationMs = (s != null && f != null) ? Math.max(0, f - s)
+      : (typeof b.durationMs === 'number' ? b.durationMs : null);
+    let tokens = b.tokens || null;
+    let tokensOutput = (tokens && typeof tokens.output === 'number') ? tokens.output : null;
+    if (tokensOutput == null && lines.length && s != null) {
+      const t = sumUsage(lines, s, f != null ? f : null);
+      if (t.messages > 0) { tokens = { output: t.output, input: t.input, cacheRead: t.cacheRead, total: t.total }; tokensOutput = t.output; }
+    }
+    return { id: b.id, status: b.status, commit: b.commit || null, note: b.note || null, current, durationMs, tokensOutput, tokens };
+  });
   const count = s => bins.filter(b => b.status === s).length;
   const counts = {
     done: count('done'), blocked: count('blocked'), skipped: count('skipped'),
@@ -49,10 +75,20 @@ function buildModel({ dir, now }) {
   const startedMs = Date.parse(state.startedAt);
   const elapsedMin = Number.isFinite(startedMs) ? Math.max(0, Math.round((now - startedMs) / 60000)) : 0;
 
+  // Run output tokens: the transcript over [run start, now) (climbs live during a run);
+  // fall back to the sum of per-bin tokens when no transcript is known.
+  let outputTokens = bins.reduce((s, b) => s + (b.tokensOutput || 0), 0);
+  if (lines.length && Number.isFinite(startedMs)) {
+    const t = sumUsage(lines, startedMs, now);
+    if (t.messages > 0) outputTokens = t.output;
+  }
+
   return {
     exists: true,
+    cwd: path.dirname(dir),
     runId: state.runId, branch: state.branch, iterations: state.iterations || 0,
-    elapsedMin, paused: isPaused(dir), stopping: isStopRequested(dir),
+    elapsedMin, outputTokens,
+    paused: isPaused(dir), stopping: isStopRequested(dir),
     finalized: fs.existsSync(path.join(dir, 'summary.md')),
     bins, counts, recent, needsYou
   };
@@ -61,7 +97,7 @@ function buildModel({ dir, now }) {
 // --- render ----------------------------------------------------------------
 
 const ANSI = {
-  reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m',
+  reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m', inverse: '\x1b[7m',
   green: '\x1b[32m', yellow: '\x1b[33m', red: '\x1b[31m', cyan: '\x1b[36m', gray: '\x1b[90m'
 };
 function paint(color, code, s) { return color ? code + s + ANSI.reset : s; }
@@ -78,17 +114,26 @@ function bar(done, total, width) {
   const filled = Math.round((done / total) * width);
   return '█'.repeat(filled) + '░'.repeat(Math.max(0, width - filled));
 }
-
-function pad(s, n) {
-  s = String(s);
-  if (s.length > n) return s.slice(0, n - 1) + '…'; // truncate long bin ids with an ellipsis
-  return s + ' '.repeat(n - s.length);
+function pad(s, n) { s = String(s); return s.length > n ? s.slice(0, n - 1) + '…' : s + ' '.repeat(n - s.length); }
+function lpad(s, n) { s = String(s); return s.length >= n ? s : ' '.repeat(n - s.length) + s; }
+function fmtDur(ms) {
+  if (ms == null) return '—';
+  const s = Math.round(ms / 1000);
+  if (s < 60) return s + 's';
+  return Math.floor(s / 60) + 'm' + String(s % 60).padStart(2, '0') + 's';
+}
+function fmtTok(n) {
+  if (n == null) return '—';
+  if (n >= 1e6) return (n / 1e6).toFixed(1) + 'M';
+  if (n >= 1e3) return Math.round(n / 1e3) + 'k';
+  return String(n);
 }
 
-// renderFrame(model, { width, color }) -> string. Pure.
+// renderFrame(model, { width, color, selectedIndex }) -> string. Pure.
 function renderFrame(model, opts = {}) {
   const width = opts.width || 80;
   const color = opts.color !== false;
+  const sel = typeof opts.selectedIndex === 'number' ? opts.selectedIndex : -1;
   const c = (code, s) => paint(color, code, s);
 
   if (!model || !model.exists) {
@@ -100,40 +145,82 @@ function renderFrame(model, opts = {}) {
     ? c(ANSI.green, '● finalized')
     : model.stopping ? c(ANSI.red, '■ stopping after current bin')
       : model.paused ? c(ANSI.yellow, '⏸ PAUSED') : c(ANSI.green, '▶ running');
-  L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations}   ${status}`);
+  L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations}   ${status} ${c(ANSI.dim, '·')} ${model.elapsedMin}m ${c(ANSI.dim, '·')} ${c(ANSI.bold, '↑' + fmtTok(model.outputTokens))} out`);
   L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64))));
 
   const { done, blocked, skipped, total } = model.counts;
-  const resolved = done + blocked + skipped; // bar fills as the queue is dealt with (reaches full at finalize)
+  const resolved = done + blocked + skipped; // bar reaches full at finalize
   const extra = (blocked + skipped) ? c(ANSI.dim, ` (${blocked + skipped} blocked/skipped)`) : '';
-  L.push(`${c(ANSI.green, bar(resolved, total, 24))}  ${c(ANSI.bold, `${done}/${total}`)} done${extra} ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`);
+  L.push(`${c(ANSI.green, bar(resolved, total, 24))}  ${c(ANSI.bold, `${done}/${total}`)} done${extra}`);
   L.push('');
 
-  for (const b of model.bins) {
+  model.bins.forEach((b, i) => {
+    const cursor = i === sel ? c(ANSI.cyan, '▸') : ' ';
     const g = c(binColor(b), binGlyph(b));
-    const id = c(b.current ? ANSI.cyan : (b.status === 'pending' ? ANSI.dim : ANSI.reset), pad(b.id, 28));
+    const id = c(b.current ? ANSI.cyan : (b.status === 'pending' ? ANSI.dim : ANSI.reset), pad(b.id, 24));
+    const dur = c(ANSI.dim, lpad(fmtDur(b.durationMs), 6));
+    const tok = c(ANSI.dim, lpad(b.tokensOutput == null ? '—' : fmtTok(b.tokensOutput), 6));
     let tail = b.status;
-    if (b.current) tail = 'working  ← current';
-    else if (b.commit) tail = `done  (${b.commit.slice(0, 7)})`;
-    else if (b.note) tail = `${b.status}  — ${b.note}`;
-    L.push(` ${g} ${id} ${c(ANSI.dim, tail)}`);
-  }
+    if (b.current) tail = 'working ← current';
+    else if (b.commit) tail = `(${b.commit.slice(0, 7)})`;
+    else if (b.note) tail = `— ${b.note}`;
+    else tail = '';
+    L.push(`${cursor}${g} ${id} ${dur} ${tok}  ${c(ANSI.dim, tail)}`);
+  });
   L.push('');
 
-  if (model.recent.length) {
-    L.push(c(ANSI.dim, 'recent:'));
-    for (const r of model.recent.slice(-4)) L.push(c(ANSI.gray, `   ${r}`));
-    L.push('');
-  }
-
   const needs = model.needsYou.length;
   const needsLabel = needs ? c(ANSI.yellow, `Needs you: ${needs}`) : c(ANSI.dim, 'Needs you: 0');
-  const hints = `${c(ANSI.bold, '[p]')}ause  ${c(ANSI.bold, '[k]')}skip current  ${c(ANSI.bold, '[q]')}stop  ${c(ANSI.bold, '[x]')}exit watcher`;
+  const nav = sel >= 0 ? `${c(ANSI.bold, '↑/↓')} select  ${c(ANSI.bold, '⏎')} details  ` : '';
+  const hints = `${nav}${c(ANSI.bold, '[p]')}ause  ${c(ANSI.bold, '[k]')}skip  ${c(ANSI.bold, '[q]')}stop  ${c(ANSI.bold, '[x]')}exit`;
   L.push(`${needsLabel}   ${c(ANSI.dim, '·')}   ${hints}`);
 
   return L.join('\n') + '\n';
 }
 
+// renderDetail(model, index, { width, color }) -> string. Drill-down for one bin.
+function renderDetail(model, index, opts = {}) {
+  const color = opts.color !== false;
+  const width = opts.width || 80;
+  const c = (code, s) => paint(color, code, s);
+  if (!model || !model.exists || !model.bins[index]) return renderFrame(model, opts);
+  const b = model.bins[index];
+  const t = b.tokens || {};
+  const L = [];
+  L.push(`${c(ANSI.bold, b.id)} ${c(ANSI.dim, '·')} ${c(binColor(b), b.current ? 'working (current)' : b.status)}    ${c(ANSI.dim, '[esc] back  [k] skip  [q] stop')}`);
+  L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64))));
+  L.push(`${c(ANSI.dim, 'status  ')} ${b.current ? 'working (current)' : b.status}${b.note ? '  — ' + b.note : ''}`);
+  L.push(`${c(ANSI.dim, 'runtime ')} ${fmtDur(b.durationMs)}`);
+  L.push(`${c(ANSI.dim, 'tokens  ')} ${c(ANSI.bold, fmtTok(b.tokensOutput) + ' out')} ${c(ANSI.dim, '·')} ${fmtTok(t.input)} in ${c(ANSI.dim, '·')} ${fmtTok(t.cacheRead)} cache-read ${c(ANSI.dim, '·')} ${fmtTok(t.total)} total`);
+  L.push(`${c(ANSI.dim, 'commit  ')} ${b.commit || '—'}`);
+  L.push('');
+  L.push(c(ANSI.dim, 'brief'));
+  const brief = readBrief(model.cwd, b.id).trimEnd();
+  const briefLines = brief ? brief.split('\n') : ['(brief unavailable)'];
+  for (const line of briefLines.slice(0, 14)) L.push('  ' + c(ANSI.gray, line.slice(0, width - 2)));
+  return L.join('\n') + '\n';
+}
+
+// renderHistory(records, agg, { color }) -> string. The work record ledger.
+function renderHistory(records, agg, opts = {}) {
+  const color = opts.color !== false;
+  const c = (code, s) => paint(color, code, s);
+  if (!records || !records.length) return c(ANSI.dim, 'No shift runs recorded yet. They appear here once a run finalizes.') + '\n';
+  const L = [];
+  L.push(`${c(ANSI.bold, 'shift work record')} ${c(ANSI.dim, `· ${agg.runs} run${agg.runs === 1 ? '' : 's'}`)}`);
+  L.push(c(ANSI.dim, '─'.repeat(64)));
+  L.push(c(ANSI.dim, ` ${pad('when', 17)}${pad('branch', 20)}${lpad('time', 7)} ${lpad('out', 7)}  bins`));
+  for (const r of records.slice(-25)) {
+    const when = (r.endedAt || r.startedAt || '').slice(0, 16).replace('T', ' ');
+    const b = r.bins || {};
+    const tally = `${c(ANSI.green, (b.done || 0) + '✓')} ${c(ANSI.gray, (b.skipped || 0) + '⤫')} ${c(ANSI.red, (b.blocked || 0) + '✗')}`;
+    L.push(` ${pad(when, 17)}${c(ANSI.cyan, pad(r.branch || '', 20))}${lpad(fmtDur(r.durationMs), 7)} ${lpad(fmtTok(r.tokens && r.tokens.output), 7)}  ${tally}`);
+  }
+  L.push(c(ANSI.dim, '─'.repeat(64)));
+  L.push(`${c(ANSI.bold, 'totals')}  ${agg.runs} runs ${c(ANSI.dim, '·')} ${fmtDur(agg.durationMs)} ${c(ANSI.dim, '·')} ${c(ANSI.bold, fmtTok(agg.outputTokens) + ' out')} ${c(ANSI.dim, '·')} ${agg.bins.done}✓ ${agg.bins.skipped}⤫ ${agg.bins.blocked}✗`);
+  return L.join('\n') + '\n';
+}
+
 // One-line summary for a status bar (module 1 / ccstatusline custom-command).
 function renderLine(model, opts = {}) {
   const color = opts.color !== false;
@@ -141,7 +228,7 @@ function renderLine(model, opts = {}) {
   if (!model || !model.exists) return '';
   const flag = model.finalized ? '●' : model.paused ? '⏸' : '⚙';
   const needs = model.needsYou.length ? ` ${c(ANSI.yellow, '⚑' + model.needsYou.length)}` : '';
-  return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')}${needs}`;
+  return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')} ${c(ANSI.dim, '↑' + fmtTok(model.outputTokens))}${needs}`;
 }
 
-module.exports = { buildModel, renderFrame, renderLine };
+module.exports = { buildModel, renderFrame, renderDetail, renderHistory, renderLine, fmtDur, fmtTok };
diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs
index 2212e4e..4f18435 100644
--- a/shift/test/brief.test.cjs
+++ b/shift/test/brief.test.cjs
@@ -29,3 +29,9 @@ test('always explains decision logging, the Needs-you convention, and blocker fl
   assert.match(out, /Needs you:/);
   assert.match(out, /blocked\.jsonl/);
 });
+
+test('guards .shift/ bookkeeping: append-only, never edit state.json (so the hook owns per-bin stats)', () => {
+  const out = renderBrief(bin, { git: {} });
+  assert.match(out, /Never edit.*state\.json/i);
+  assert.match(out, /append/i);
+});
diff --git a/shift/test/history.test.cjs b/shift/test/history.test.cjs
new file mode 100644
index 0000000..1e859b3
--- /dev/null
+++ b/shift/test/history.test.cjs
@@ -0,0 +1,50 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { appendRecord, readHistory, aggregate } = require('../lib/history.cjs');
+
+function tmp() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hist-')); }
+const rec = (runId, over = {}) => Object.assign({
+  runId, branch: 'shift/x', startedAt: '2026-06-16T00:00:00Z', endedAt: '2026-06-16T00:30:00Z',
+  durationMs: 30 * 60000, iterations: 4, endReason: 'queue empty',
+  bins: { total: 3, done: 2, skipped: 1, blocked: 0 }, tokens: { output: 1000, total: 50000 }
+}, over);
+
+test('append then read round-trips records in order', () => {
+  const d = tmp();
+  appendRecord(d, rec('r1'));
+  appendRecord(d, rec('r2'));
+  const h = readHistory(d);
+  assert.equal(h.length, 2);
+  assert.deepEqual(h.map(r => r.runId), ['r1', 'r2']);
+  assert.ok(fs.existsSync(path.join(d, 'history.jsonl')));
+});
+
+test('readHistory tolerates a malformed line', () => {
+  const d = tmp();
+  appendRecord(d, rec('r1'));
+  fs.appendFileSync(path.join(d, 'history.jsonl'), 'not json\n');
+  appendRecord(d, rec('r2'));
+  assert.deepEqual(readHistory(d).map(r => r.runId), ['r1', 'r2']);
+});
+
+test('readHistory on a fresh dir is empty (no throw)', () => {
+  assert.deepEqual(readHistory(tmp()), []);
+});
+
+test('aggregate totals runs, duration, output tokens, and bins', () => {
+  const recs = [
+    rec('r1', { durationMs: 10 * 60000, tokens: { output: 1000, total: 1 }, bins: { total: 2, done: 2, skipped: 0, blocked: 0 } }),
+    rec('r2', { durationMs: 20 * 60000, tokens: { output: 3000, total: 1 }, bins: { total: 5, done: 3, skipped: 1, blocked: 1 } })
+  ];
+  const a = aggregate(recs);
+  assert.equal(a.runs, 2);
+  assert.equal(a.durationMs, 30 * 60000);
+  assert.equal(a.outputTokens, 4000);
+  assert.equal(a.bins.done, 5);
+  assert.equal(a.bins.skipped, 1);
+  assert.equal(a.bins.blocked, 1);
+});
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
index f319f49..cf0def7 100644
--- a/shift/test/hook.test.cjs
+++ b/shift/test/hook.test.cjs
@@ -144,6 +144,47 @@ test('verify gate (failing) re-blocks the same bin with feedback, then blocks af
   assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'blocked');
 });
 
+// ---- watch: per-bin tokens/runtime + work-record history ----
+
+test('records per-bin tokens + runtime from the transcript and appends a history record', () => {
+  const { cwd, dir } = setupRun();
+  const tpath = path.join(dir, 'transcript.jsonl');
+  const asst = (ts, output) => JSON.stringify({
+    type: 'assistant', timestamp: ts,
+    message: { role: 'assistant', usage: { output_tokens: output, input_tokens: 10, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 } }
+  });
+
+  runHook(cwd, { stop_hook_active: false, transcript_path: tpath }); // start bin 1
+  // Use bin 1's recorded startedAt as the message timestamp so it lands in [start, now).
+  const started = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'))
+    .bins.find(b => b.id === 'queue/01.md').startedAt;
+  assert.ok(started, 'bin 1 got a startedAt when it became current');
+  fs.writeFileSync(tpath, asst(started, 500) + '\n');
+
+  runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 1, start bin 2
+  const b1 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')).bins.find(b => b.id === 'queue/01.md');
+  assert.equal(b1.status, 'done');
+  assert.equal(b1.tokens.output, 500, 'bin 1 output tokens attributed from the transcript window');
+  assert.equal(typeof b1.durationMs, 'number');
+
+  runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 2, drain -> finalize
+  const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse);
+  assert.equal(hist.length, 1, 'one history record appended on finalize');
+  assert.equal(hist[0].bins.done, 2);
+  assert.ok(hist[0].tokens.output >= 500, 'run output tokens recorded');
+  assert.equal(hist[0].perBin.length, 2);
+});
+
+test('history is append-only across runs and not duplicated by a stray extra stop', () => {
+  const { cwd, dir } = setupRun();
+  runHook(cwd, { stop_hook_active: false });
+  runHook(cwd, { stop_hook_active: true });
+  runHook(cwd, { stop_hook_active: true }); // drain -> finalize (appends record 1)
+  runHook(cwd, { stop_hook_active: true }); // stray extra stop -> summary already exists -> no 2nd append
+  const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').filter(Boolean);
+  assert.equal(hist.length, 1, 'no duplicate history record from a repeated finalize');
+});
+
 // ---- v2: usage cap + cache ----
 
 test('usage cap from the hook payload ends the run and caches usage', () => {
diff --git a/shift/test/timeline.test.cjs b/shift/test/timeline.test.cjs
new file mode 100644
index 0000000..9b6d59c
--- /dev/null
+++ b/shift/test/timeline.test.cjs
@@ -0,0 +1,45 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { appendEvent, readTimeline, clearTimeline, timelinePath, binWindows } = require('../lib/timeline.cjs');
+
+function dir() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-tl-')); }
+
+test('timeline file lives at .shift/timeline.jsonl', () => {
+  const d = dir();
+  assert.equal(timelinePath(d), path.join(d, 'timeline.jsonl'));
+});
+
+test('append + read round-trips events; clear removes them', () => {
+  const d = dir();
+  appendEvent(d, { t: '2026-06-16T00:00:00Z', event: 'start', id: 'a' });
+  appendEvent(d, { t: '2026-06-16T00:01:00Z', event: 'finish', id: 'a' });
+  assert.equal(readTimeline(d).length, 2);
+  clearTimeline(d);
+  assert.deepEqual(readTimeline(d), []);
+});
+
+test('readTimeline on a fresh dir is empty and tolerates malformed lines', () => {
+  const d = dir();
+  assert.deepEqual(readTimeline(d), []);
+  appendEvent(d, { t: 't', event: 'start', id: 'a' });
+  fs.appendFileSync(timelinePath(d), 'garbage\n');
+  assert.equal(readTimeline(d).length, 1);
+});
+
+test('binWindows takes first start and last finish per bin', () => {
+  const events = [
+    { t: 't1', event: 'start', id: 'a' },
+    { t: 't1b', event: 'start', id: 'a' },
+    { t: 't2', event: 'finish', id: 'a' },
+    { t: 't3', event: 'start', id: 'b' }
+  ];
+  const w = binWindows(events);
+  assert.equal(w.a.startedAt, 't1');
+  assert.equal(w.a.finishedAt, 't2');
+  assert.equal(w.b.startedAt, 't3');
+  assert.equal(w.b.finishedAt, null);
+});
diff --git a/shift/test/transcript.test.cjs b/shift/test/transcript.test.cjs
new file mode 100644
index 0000000..903485c
--- /dev/null
+++ b/shift/test/transcript.test.cjs
@@ -0,0 +1,44 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { sumUsage } = require('../lib/transcript.cjs');
+
+// Build a transcript line like Claude Code writes (assistant message with usage).
+function asst(tsIso, usage) {
+  return JSON.stringify({ type: 'assistant', timestamp: tsIso, message: { role: 'assistant', usage } });
+}
+const U = (output, input = 0, cacheRead = 0, cacheCreate = 0) =>
+  ({ output_tokens: output, input_tokens: input, cache_read_input_tokens: cacheRead, cache_creation_input_tokens: cacheCreate });
+
+const lines = [
+  JSON.stringify({ type: 'user', timestamp: '2026-06-16T00:00:00Z', message: {} }),     // ignored (not assistant)
+  asst('2026-06-16T00:01:00Z', U(100, 2000, 5000, 300)),                                  // in window A
+  asst('2026-06-16T00:02:00Z', U(50, 1000, 6000, 0)),                                     // in window A
+  asst('2026-06-16T00:10:00Z', U(999, 1, 1, 1)),                                          // window B
+  '{ not json',                                                                            // malformed → skipped
+  JSON.stringify({ type: 'assistant', timestamp: '2026-06-16T00:12:00Z', message: {} })   // assistant w/o usage → skipped
+];
+
+test('sums output/input/cache for assistant messages, ignores non-assistant + malformed', () => {
+  const all = sumUsage(lines, null, null);
+  assert.equal(all.output, 100 + 50 + 999);
+  assert.equal(all.input, 2000 + 1000 + 1);
+  assert.equal(all.cacheRead, 5000 + 6000 + 1);
+  assert.equal(all.messages, 3);
+  assert.equal(all.total, all.output + all.input + all.cacheRead + all.cacheCreate);
+});
+
+test('windows by [from, to): includes from, excludes to', () => {
+  const from = Date.parse('2026-06-16T00:00:30Z');
+  const to = Date.parse('2026-06-16T00:09:00Z');
+  const win = sumUsage(lines, from, to);
+  assert.equal(win.output, 150);   // only the two window-A messages
+  assert.equal(win.messages, 2);
+});
+
+test('empty / no-match window yields zeros, never throws', () => {
+  const z = sumUsage([], 0, 1);
+  assert.equal(z.output, 0);
+  assert.equal(z.total, 0);
+  assert.equal(z.messages, 0);
+});
diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs
index dc93b44..4feb318 100644
--- a/shift/test/watch-model.test.cjs
+++ b/shift/test/watch-model.test.cjs
@@ -4,22 +4,25 @@ const assert = require('node:assert');
 const fs = require('node:fs');
 const os = require('node:os');
 const path = require('node:path');
-const { buildModel, renderFrame } = require('../lib/watch-model.cjs');
+const { buildModel, renderFrame, renderDetail, renderHistory } = require('../lib/watch-model.cjs');
+const { aggregate } = require('../lib/history.cjs');
 
 function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-'));
   const dir = path.join(cwd, '.shift');
   fs.mkdirSync(dir, { recursive: true });
-  const startedAt = new Date(Date.now() - 12 * 60_000).toISOString(); // 12 min ago
+  fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
+  fs.writeFileSync(path.join(cwd, 'queue', '03-build.md'), '# Build the thing\n\nCompile and commit.\n');
+  const startedAt = new Date(Date.now() - 12 * 60_000).toISOString();
   fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
     runId: '2026-06-16T00-00-00', startedAt, iterations: 7, branch: 'shift/smoke',
     currentBinId,
     bins: [
-      { id: 'queue/01-hello.md', status: 'done', commit: 'a1b2c3d' },
-      { id: 'queue/02-notes.md', status: 'done', commit: 'd4e5f6a' },
-      { id: 'queue/03-build.md', status: 'pending' },
+      { id: 'queue/01-hello.md', status: 'done', commit: 'a1b2c3d', durationMs: 68000, tokens: { output: 84000, input: 1000, cacheRead: 50000, total: 135000 } },
+      { id: 'queue/02-notes.md', status: 'done', commit: 'd4e5f6a', durationMs: 161000, tokens: { output: 213000, input: 2000, cacheRead: 90000, total: 305000 } },
+      { id: 'queue/03-build.md', status: 'pending', startedAt },
       { id: 'queue/04-test.md', status: 'pending' },
-      { id: 'queue/05-ship.md', status: 'blocked', note: 'needs API key' }
+      { id: 'queue/05-ship.md', status: 'blocked', note: 'needs API key', durationMs: 52000, tokens: { output: 31000, input: 500, cacheRead: 0, total: 31500 } }
     ]
   }));
   fs.writeFileSync(path.join(dir, 'log.md'),
@@ -28,57 +31,89 @@ function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) {
   return dir;
 }
 
-test('buildModel reads run state and computes counts + elapsed', () => {
+test('buildModel reads per-bin runtime + tokens and a run output-token total', () => {
   const m = buildModel({ dir: fixture(), now: Date.now() });
-  assert.equal(m.exists, true);
-  assert.equal(m.branch, 'shift/smoke');
-  assert.equal(m.iterations, 7);
   assert.equal(m.counts.done, 2);
   assert.equal(m.counts.blocked, 1);
-  assert.equal(m.counts.pending, 2);
-  assert.equal(m.counts.total, 5);
-  assert.ok(m.elapsedMin >= 11 && m.elapsedMin <= 13);
+  const b1 = m.bins.find(b => b.id === 'queue/01-hello.md');
+  assert.equal(b1.durationMs, 68000);
+  assert.equal(b1.tokensOutput, 84000);
+  // no transcriptPath in fixture -> run output tokens = sum of recorded per-bin output
+  assert.equal(m.outputTokens, 84000 + 213000 + 31000);
 });
 
 test('buildModel marks the current bin and surfaces Needs you', () => {
   const m = buildModel({ dir: fixture(), now: Date.now() });
-  const current = m.bins.find(b => b.current);
-  assert.equal(current.id, 'queue/03-build.md');
-  assert.ok(m.needsYou.some(n => /API key/.test(n)));        // blocked note
-  assert.ok(m.needsYou.some(n => /deploy target/.test(n)));  // logged "Needs you:" line
+  assert.equal(m.bins.find(b => b.current).id, 'queue/03-build.md');
+  assert.ok(m.needsYou.some(n => /API key/.test(n)));
+  assert.ok(m.needsYou.some(n => /deploy target/.test(n)));
 });
 
-test('buildModel reflects pause state', () => {
+test('buildModel reflects pause + exists:false when no run', () => {
   assert.equal(buildModel({ dir: fixture({ paused: true }), now: Date.now() }).paused, true);
-  assert.equal(buildModel({ dir: fixture({ paused: false }), now: Date.now() }).paused, false);
+  const none = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none-'));
+  assert.equal(buildModel({ dir: path.join(none, '.shift'), now: Date.now() }).exists, false);
 });
 
-test('buildModel returns exists:false when no run is present', () => {
-  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none-'));
-  const m = buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() });
-  assert.equal(m.exists, false);
+test('renderFrame shows progress, the token header, runtime/token columns, and nav hints', () => {
+  const m = buildModel({ dir: fixture(), now: Date.now() });
+  const out = renderFrame(m, { width: 80, color: false, selectedIndex: 2 });
+  assert.match(out, /2\/5/);                  // progress
+  assert.match(out, /shift\/smoke/);          // branch
+  assert.match(out, /↑\d+k out/);             // run output-token header
+  assert.match(out, /queue\/05-ship\.md/);    // bin row
+  assert.match(out, /needs API key/);         // blocker note
+  assert.match(out, /1m08s/);                 // bin 1 runtime column
+  assert.match(out, /84k/);                   // bin 1 token column
+  assert.match(out, /▸/);                     // selection cursor (selectedIndex)
+  assert.match(out, /select/);                // nav hint shown when selecting
+  assert.match(out, /\[q\].*stop/i);
+});
+
+test('renderFrame PAUSED banner toggles', () => {
+  assert.match(renderFrame(buildModel({ dir: fixture({ paused: true }), now: Date.now() }), { color: false }), /PAUSED/);
+  assert.doesNotMatch(renderFrame(buildModel({ dir: fixture({ paused: false }), now: Date.now() }), { color: false }), /PAUSED/);
+});
+
+test('renderDetail shows the bin brief + token breakdown', () => {
+  const m = buildModel({ dir: fixture(), now: Date.now() });
+  const idx = m.bins.findIndex(b => b.id === 'queue/01-hello.md');
+  const out = renderDetail(m, idx, { color: false });
+  assert.match(out, /queue\/01-hello\.md/);
+  assert.match(out, /84k out/);          // token breakdown
+  assert.match(out, /cache-read/);
+  assert.match(out, /1m08s/);            // runtime
+  assert.match(out, /a1b2c3d/);          // commit
+});
+
+test('renderDetail reads the brief file for the current bin', () => {
+  const m = buildModel({ dir: fixture(), now: Date.now() });
+  const idx = m.bins.findIndex(b => b.id === 'queue/03-build.md');
+  const out = renderDetail(m, idx, { color: false });
+  assert.match(out, /brief/);
+  assert.match(out, /Build the thing/); // read from queue/03-build.md
 });
 
-test('renderFrame (no color) shows progress, the current bin, and control hints', () => {
-  const out = renderFrame(buildModel({ dir: fixture(), now: Date.now() }), { width: 80, color: false });
-  assert.match(out, /2\/5/);                 // progress count
-  assert.match(out, /shift\/smoke/);         // branch
-  assert.match(out, /queue\/05-ship\.md/);   // a bin row
-  assert.match(out, /needs API key/);        // blocker surfaced
-  assert.match(out, /\[q\].*stop/i);         // control hint
-  assert.match(out, /\[k\]/);                // skip hint
-  assert.match(out, /\[p\]/);                // pause hint
+test('renderFrame on no active run is a friendly message', () => {
+  const none = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none2-'));
+  assert.match(renderFrame(buildModel({ dir: path.join(none, '.shift'), now: Date.now() }), { color: false }), /no active.*run/i);
 });
 
-test('renderFrame shows a PAUSED banner when paused', () => {
-  const paused = renderFrame(buildModel({ dir: fixture({ paused: true }), now: Date.now() }), { color: false });
-  assert.match(paused, /PAUSED/);
-  const running = renderFrame(buildModel({ dir: fixture({ paused: false }), now: Date.now() }), { color: false });
-  assert.doesNotMatch(running, /PAUSED/);
+test('renderHistory shows per-run rows and a totals footer', () => {
+  const records = [
+    { runId: 'r1', branch: 'shift/a', endedAt: '2026-06-16T01:00:00Z', durationMs: 600000, iterations: 3, tokens: { output: 120000, total: 1 }, bins: { total: 2, done: 2, skipped: 0, blocked: 0 } },
+    { runId: 'r2', branch: 'shift/b', endedAt: '2026-06-16T02:00:00Z', durationMs: 1200000, iterations: 5, tokens: { output: 340000, total: 1 }, bins: { total: 5, done: 3, skipped: 1, blocked: 1 } }
+  ];
+  const out = renderHistory(records, aggregate(records), { color: false });
+  assert.match(out, /work record/);
+  assert.match(out, /shift\/a/);
+  assert.match(out, /shift\/b/);
+  assert.match(out, /totals/);
+  assert.match(out, /2 runs/);
+  assert.match(out, /460k out/);  // 120k + 340k aggregate output
+  assert.match(out, /5✓/);        // aggregate done
 });
 
-test('renderFrame on no active run is a friendly message, not a crash', () => {
-  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none2-'));
-  const out = renderFrame(buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() }), { color: false });
-  assert.match(out, /no active.*run/i);
+test('renderHistory with no records is a friendly message', () => {
+  assert.match(renderHistory([], aggregate([]), { color: false }), /No shift runs recorded/i);
 });

From 5044dae6093f5cc8e252f128979dd4551b14ec29 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 11:58:47 -0400
Subject: [PATCH 09/12] shift: move engine state out of the repo so the agent
 can't usurp it (real per-bin fix)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause (verified): a headless autonomous agent rewrites .shift/state.json to mark
bins done itself — bypassing the keep-going engine so the hook never drives the queue
or records per-bin boundaries. A probe hook disproved a sandbox: a Stop hook can write
anywhere (~/.local/state, /tmp, env-provided). So the fix is an engine-owned store
OUTSIDE the repo, where the agent (which only works in the repo) can't reach it.

- lib/store.cjs: engineDir(cwd) = $XDG_STATE_HOME/shift/<sha256(realpath(cwd))>
  (realpath so /tmp == /private/tmp; full-path hash so siblings don't collide;
  SHIFT_STATE_DIR override). mkdir -p.
- state.json, usage.json, history.jsonl, timeline now live in engineDir; the hook +
  bin/shift + watch-model read/write there. config.json stays user-editable in .shift/
  and is snapshotted into engineDir so a deletion can't break a run; summary/log/control
  stay in .shift/. A stray agent-written .shift/state.json is simply ignored.
- Always emit a timeline 'start' per bin (binWindows dedupes) so every bin has a window.

Validated: a real bypassPermissions run now records per-bin runtime+tokens for every
bin (35s/7k, 13s/2k) + a full history row. 99 shift tests green.
---
 shift/README.md                 | 10 ++--
 shift/SPEC.md                   |  6 ++-
 shift/bin/shift                 | 25 ++++++----
 shift/hooks/shift-stop.cjs      | 41 +++++++++-------
 shift/lib/store.cjs             | 40 ++++++++++++++++
 shift/lib/timeline.cjs          | 25 ++++------
 shift/lib/watch-model.cjs       | 10 ++--
 shift/test/cli.test.cjs         | 17 ++++---
 shift/test/hook.test.cjs        | 83 +++++++++++++++++++--------------
 shift/test/timeline.test.cjs    | 62 ++++++++++++++++--------
 shift/test/watch-model.test.cjs |  5 +-
 11 files changed, 212 insertions(+), 112 deletions(-)
 create mode 100644 shift/lib/store.cjs

diff --git a/shift/README.md b/shift/README.md
index 4f1ebd6..83f01f4 100644
--- a/shift/README.md
+++ b/shift/README.md
@@ -85,11 +85,15 @@ It's also the **control + drill-down surface** — a status bar can show state b
 
 Control is file-based under `.shift/` (`PAUSE` / `SKIP` / `STOP`), so it works whether the run is interactive or headless, and from any terminal in the repo.
 
-> **Tokens are the *output* count** — the honest "work produced" figure, read from the session transcript. A warm run's `input`/cache tokens balloon with re-sent context, so the headline deliberately isn't `total` (that's in the detail view). Run-level tokens + runtime are authoritative; **per-bin** token/runtime columns are best-effort and may show `—` in a fully-headless run (an autonomous agent can rewrite `.shift/` mid-run) — see [SPEC §13](./SPEC.md).
+> **Tokens are the *output* count** — the honest "work produced" figure, read from the session transcript. A warm run's `input`/cache tokens balloon with re-sent context, so the headline deliberately isn't `total` (that's in the detail view). Both run-level and per-bin tokens/runtime are reliable, including in fully-headless runs: the engine's state lives **outside the repo** (see below), so an autonomous agent can't corrupt it.
+
+### Where state lives (and why)
+
+Shift keeps the engine's authoritative state — run state, timeline, usage, and the work-record history — **outside the repo**, under `$XDG_STATE_HOME/shift/<hash-of-repo-path>/` (or `~/.local/state/shift/…`). The reason is candor-meets-reality: an autonomous agent will rewrite or delete files it finds in the repo (it was caught marking bins done in `.shift/state.json` itself), so the engine puts its state where the agent — which only works inside the repo — can't reach it. `.shift/` in your repo holds only what you and the agent legitimately touch: `config.json` (you edit it), `summary.md` (you read it), `log.md`/`blocked.jsonl` (the agent appends), and the control signals. Override the location with `SHIFT_STATE_DIR`.
 
 ### The work record — `shift history`
 
-Every finalized run is appended to `.shift/history.jsonl`. `shift history` prints the ledger — one row per run (when, branch, runtime, output tokens, bin tally) and a **totals** footer across all runs; `shift history <runId>` drills into a single run's bins.
+Every finalized run is appended to an append-only ledger in the engine state dir. `shift history` prints it — one row per run (when, branch, runtime, output tokens, bin tally) and a **totals** footer across all runs; `shift history <runId>` drills into a single run's bins.
 
 ### In your status bar (module 1)
 
@@ -145,4 +149,4 @@ Pick the narrowest mode that lets the work actually proceed.
 cd shift && npm test     # node --test, zero dependencies
 ```
 
-Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model, transcript, timeline, history) and is unit-tested — including `renderFrame`/`renderDetail`/`renderHistory`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules.
+Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model, transcript, timeline, history, store) and is unit-tested — including `renderFrame`/`renderDetail`/`renderHistory`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules.
diff --git a/shift/SPEC.md b/shift/SPEC.md
index df7f731..132ec97 100644
--- a/shift/SPEC.md
+++ b/shift/SPEC.md
@@ -292,6 +292,8 @@ The candor gap in v2 was that a headless run is opaque *while* it runs (good pap
 
 ### Tokens, runtime + the work record (2026-06-16)
 
-Per the candor goal of making consumption legible: the dashboard header and `status --line` show **output tokens** (the honest "work produced" figure — not inflated by context resends / cache reads, which dominate `total`), summed from the session **transcript** (`transcript_path` from the hook payload; tokens live in `message.usage`). Each bin gets a runtime + token column; `↑/↓` selects a bin and `⏎` opens a **detail view** (status, runtime, token breakdown in/out/cache, commit, brief). Every finalized run is appended to an append-only **work record** at `.shift/history.jsonl`; `shift history` prints per-run rows + a totals footer (all runs, total time, total output tokens), and `shift history <runId>` drills into one run's bins. New modules: `lib/transcript.cjs` (sum `usage` over a `[start, end)` window — pure), `lib/timeline.cjs` (append-only bin boundaries), `lib/history.cjs` (ledger append/read/aggregate). **Tests:** 96 in `shift`, all green.
+Per the candor goal of making consumption legible: the dashboard header and `status --line` show **output tokens** (the honest "work produced" figure — not inflated by context resends / cache reads, which dominate `total`), summed from the session **transcript** (`transcript_path` from the hook payload; tokens live in `message.usage`). Each bin gets a runtime + token column; `↑/↓` selects a bin and `⏎` opens a **detail view** (status, runtime, token breakdown in/out/cache, commit, brief). Every finalized run is appended to an append-only **work record**; `shift history` prints per-run rows + a totals footer (all runs, total time, total output tokens), and `shift history <runId>` drills into one run's bins. New modules: `lib/transcript.cjs` (sum `usage` over a `[start, end)` window — pure), `lib/timeline.cjs` (append-only bin boundaries), `lib/history.cjs` (ledger append/read/aggregate).
 
-*Known limitation — per-bin attribution is best-effort in fully-headless autonomous runs.* Investigation (2026-06-16) established three constraints that, together, make reliable *per-bin* token/runtime attribution impossible while a `claude -p` agent runs unattended: (1) an autonomous agent **rewrites/deletes files under `.shift/`** mid-run (observed: it rewrote `state.json` + `log.md` and deleted `config.json`/`timeline.jsonl`), clobbering the hook's per-bin stamps; (2) Claude Code **sandboxes hook file-writes to the project directory**, so the boundary record can't be relocated out-of-repo where the agent can't reach it; (3) the transcript carries no per-bin marker to reconstruct boundaries from. What **is** reliable and authoritative regardless: **run-level** output tokens + runtime, and the **work-record history** row (written as the hook's *final* action on finalize, after the agent's last turn, so it's never clobbered). Per-bin columns populate in interactive runs / when the agent leaves `.shift/` alone / in `shift/examples/watch-demo.cjs`, and show `—` otherwise. The brief now instructs the agent to treat `.shift/` as append-only bookkeeping; tightening that — or an engine-owned state store the agent can't reach — is the path to making per-bin robust.
+### Engine state moved out of the repo — per-bin attribution made robust (2026-06-16)
+
+The first cut of per-bin attribution was unreliable headless, and the investigation found the true cause: an autonomous agent **rewrites `.shift/state.json` to mark bins done itself** (and rewrites `log.md`, deletes `config.json`/`timeline.jsonl`) — usurping the keep-going engine so the hook never drives the queue and records no boundaries. A probe hook (one real `claude -p` run) then **disproved a sandbox**: a Stop hook can write anywhere, including `~/.local/state` and `/tmp`. So the fix is an **engine-owned store outside the working repo**, in `lib/store.cjs`: `engineDir(cwd)` = `$XDG_STATE_HOME/shift/<sha256(realpath(cwd))>` (canonicalized so `/tmp` and `/private/tmp` agree; full-path hash so siblings don't collide; `SHIFT_STATE_DIR` overrides). `state.json`, `usage.json`, `history.jsonl`, and the timeline now live there — the hook owns them and the agent (which only operates inside the repo) can't see or touch them. `.shift/` keeps only what the user/agent legitimately use: `config.json` (user-edited, also snapshotted into the engine dir so a deletion can't break a run), `summary.md` (user-read), `log.md`/`blocked.jsonl` (agent-appended), and `STOP`/`PAUSE`/`SKIP` (control). The engine is also robust if the agent *does* still write a stray `.shift/state.json` — that file is simply ignored. **Validated:** a real fully-headless `bypassPermissions` run now records per-bin runtime + tokens for every bin (e.g. `35s · 7k`, `13s · 2k`) and a complete history row. **Tests:** 99 in `shift`, all green.
diff --git a/shift/bin/shift b/shift/bin/shift
index 784695e..ad4bb85 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -5,6 +5,7 @@ const path = require('node:path');
 const cp = require('node:child_process');
 const { discoverBins } = require('../lib/discovery.cjs');
 const { initState, saveState, loadState, mergeDiscovered } = require('../lib/state.cjs');
+const { engineDir } = require('../lib/store.cjs');
 
 function isoStamp(d) { return d.toISOString().replace(/[:.]/g, '-').slice(0, 19); }
 function dateStr(d) { return d.toISOString().slice(0, 10); }
@@ -61,12 +62,16 @@ function cmdStart(args) {
   }
 
   fs.mkdirSync(dir, { recursive: true });
+  const edir = engineDir(cwd); // engine state lives out of the repo so the agent can't usurp it
   if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP'));
-  require('../lib/timeline.cjs').clearTimeline(dir); // fresh run → fresh boundary record
-  fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2));
+  try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ }
+  require('../lib/timeline.cjs').clearTimeline(cwd); // fresh run → fresh boundary record
+  try { fs.unlinkSync(path.join(edir, 'usage.json')); } catch { /* none */ } // stale usage from a prior run
+  fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2));                 // .shift/config.json (user-editable)
+  fs.writeFileSync(path.join(edir, 'config.json'), JSON.stringify(config, null, 2)); // snapshot the agent can't delete
   let state = initState({ runId: isoStamp(now), startedAt: now.toISOString(), branch });
   state = mergeDiscovered(state, discovered);
-  saveState(dir, state);
+  saveState(edir, state);
   fs.writeFileSync(path.join(dir, 'log.md'), `# shift log — ${state.runId}\n`);
 
   if (!ensureBranch(cwd, branch)) {
@@ -179,10 +184,10 @@ function cmdStop() {
 // The work record: every finalized run (.shift/history.jsonl). `shift history` prints the
 // ledger + totals; `shift history <runId|branch-suffix>` drills into one run's bins.
 function cmdHistory(args) {
-  const dir = path.join(process.cwd(), '.shift');
+  const edir = engineDir(process.cwd());
   const { readHistory, aggregate } = require('../lib/history.cjs');
   const { renderHistory, fmtDur, fmtTok } = require('../lib/watch-model.cjs');
-  const records = readHistory(dir);
+  const records = readHistory(edir);
   const target = (args || []).find(a => !a.startsWith('-'));
   if (target) {
     const r = records.filter(x => x.runId === target || (x.branch || '').endsWith(target)).pop();
@@ -203,11 +208,13 @@ function cmdHistory(args) {
 async function cmdRun() {
   const cwd = process.cwd();
   const dir = path.join(cwd, '.shift');
-  if (!fs.existsSync(path.join(dir, 'state.json'))) {
+  const edir = engineDir(cwd);
+  if (!fs.existsSync(path.join(edir, 'state.json'))) {
     console.log('No active run. Run `shift start` first.');
     process.exit(1);
   }
-  const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8'));
+  const cfgFile = fs.existsSync(path.join(edir, 'config.json')) ? path.join(edir, 'config.json') : path.join(dir, 'config.json');
+  const config = JSON.parse(fs.readFileSync(cfgFile, 'utf8'));
   const mode = config.permissionMode || 'acceptEdits';
   const { runLoop } = require('../lib/run-loop.cjs');
   const { readUsageCache } = require('../lib/usage.cjs');
@@ -231,8 +238,8 @@ async function cmdRun() {
   let first = true;
   const effects = {
     now: () => Date.now(),
-    loadState: () => loadState(dir),
-    readUsage: () => readUsageCache(dir),
+    loadState: () => loadState(edir),
+    readUsage: () => readUsageCache(edir),
     log: (m) => console.log(`[shift] ${m}`),
     finalized: () => fs.existsSync(path.join(dir, 'summary.md')),
     isPaused: () => isPaused(dir),
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
index f935db5..03ad77e 100755
--- a/shift/hooks/shift-stop.cjs
+++ b/shift/hooks/shift-stop.cjs
@@ -11,6 +11,7 @@ const { readSkip, clearSkip } = require('../lib/control.cjs');
 const { sumTokens } = require('../lib/transcript.cjs');
 const { appendRecord } = require('../lib/history.cjs');
 const { appendEvent, readTimeline, binWindows } = require('../lib/timeline.cjs');
+const { engineDir } = require('../lib/store.cjs');
 
 function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } }
 
@@ -67,11 +68,11 @@ function writeSummary(dir, state, reason, now, runTok) {
 // Append this run to the work record (.shift/history.jsonl). One row per finalized run.
 // Per-bin metrics come from the timeline (boundaries) + transcript (tokens) so they
 // survive even if the agent rewrote state.json mid-run.
-function appendRunRecord(dir, state, reason, now, runTok, transcriptPath) {
+function appendRunRecord(edir, cwd, state, reason, now, runTok, transcriptPath) {
   const tally = s => state.bins.filter(b => b.status === s).length;
-  const windows = binWindows(readTimeline(dir));
+  const windows = binWindows(readTimeline(cwd));
   const nowIso = new Date(now).toISOString();
-  appendRecord(dir, {
+  appendRecord(edir, {
     runId: state.runId, branch: state.branch,
     startedAt: state.startedAt, endedAt: nowIso,
     durationMs: Math.max(0, now - Date.parse(state.startedAt)),
@@ -100,10 +101,14 @@ function main() {
   // Resolve the repo from the hook payload's cwd (the hook's process cwd is not
   // guaranteed to be the project root); fall back to process.cwd().
   const cwd = (input && typeof input.cwd === 'string' && input.cwd) ? input.cwd : process.cwd();
-  const dir = path.join(cwd, '.shift');
-  if (!fs.existsSync(path.join(dir, 'state.json'))) { process.stdout.write('{}'); return; }
-
-  const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8'));
+  const dir = path.join(cwd, '.shift');           // user/agent-facing: config, summary, log, control
+  const edir = engineDir(cwd);                    // engine-owned, out of the agent's reach: state, usage, history, timeline
+  if (!fs.existsSync(path.join(edir, 'state.json'))) { process.stdout.write('{}'); return; }
+
+  // config is snapshotted into the engine dir at `shift start`; prefer that (the agent
+  // can't delete it) and fall back to the repo copy.
+  const cfgFile = fs.existsSync(path.join(edir, 'config.json')) ? path.join(edir, 'config.json') : path.join(dir, 'config.json');
+  const config = JSON.parse(fs.readFileSync(cfgFile, 'utf8'));
   const now = Date.now();
   const nowIso = new Date(now).toISOString();
   const killSwitch = fs.existsSync(path.join(dir, 'STOP'));
@@ -111,10 +116,10 @@ function main() {
 
   // Capture rate limits from the hook payload: enforce the usage cap and cache
   // reset times for the headless runner. Absent on non-Pro/Max or pre-first-response.
-  const usagePercent = writeUsageCache(dir, input.rate_limits, Math.floor(now / 1000));
+  const usagePercent = writeUsageCache(edir, input.rate_limits, Math.floor(now / 1000));
 
   // Re-discover (fresh text + new files) and carry over status/attempts.
-  let state = mergeDiscovered(loadState(dir), discoverBins(config.sources, cwd));
+  let state = mergeDiscovered(loadState(edir), discoverBins(config.sources, cwd));
   const transcriptPath = payloadTranscript || state.transcriptPath || null;
 
   const prevBinId = state.currentBinId;
@@ -127,7 +132,7 @@ function main() {
   // agent may rewrite mid-run — and tokens are summed from the transcript (also outside
   // the repo). `fm` is merged into whichever terminal status the bin lands on, but the
   // durable copy is the timeline + the history record, not these (clobberable) fields.
-  const prevStart = prevBinId ? (binWindows(readTimeline(dir))[prevBinId] || {}).startedAt : null;
+  const prevStart = prevBinId ? (binWindows(readTimeline(cwd))[prevBinId] || {}).startedAt : null;
   let fm = {};
   if (prevBinId) {
     const tok = (transcriptPath && prevStart) ? sumTokens(transcriptPath, prevStart, nowIso) : null;
@@ -171,7 +176,7 @@ function main() {
       state = setBinStatus(state, prevBinId, { status: 'done', ...fm });
       binFinished = true;
     }
-    if (binFinished) appendEvent(dir, { t: nowIso, event: 'finish', id: prevBinId });
+    if (binFinished) appendEvent(cwd, { t: nowIso, event: 'finish', id: prevBinId });
   }
 
   const result = decide({
@@ -186,13 +191,13 @@ function main() {
     if (retryFeedback && result.nextBinId === prevBinId) reason += `\n\n${retryFeedback}`;
     state.iterations += 1;
     state.currentBinId = result.nextBinId;
-    // Record the bin's start the first time it becomes current (a new bin, not a verify
-    // retry of the same one). The timeline is the durable copy; state.bins.startedAt is a
-    // best-effort convenience that the agent may clobber.
-    if (result.nextBinId !== prevBinId) appendEvent(dir, { t: nowIso, event: 'start', id: result.nextBinId });
+    // Record the bin's start. binWindows keeps the FIRST start per bin, so re-emitting on
+    // a verify retry (or after the agent clobbers state.json so prevBinId looks unchanged)
+    // is harmless — and unconditionally appending guarantees every bin has a start event.
+    appendEvent(cwd, { t: nowIso, event: 'start', id: result.nextBinId });
     const nb = state.bins.find(b => b.id === result.nextBinId);
     if (nb && !nb.startedAt) state = setBinStatus(state, result.nextBinId, { startedAt: nowIso });
-    saveState(dir, state);
+    saveState(edir, state);
     fs.appendFileSync(path.join(dir, 'log.md'),
       `\n## ${nowIso} — work ${result.nextBinId} (iter ${state.iterations})\n`);
     process.stdout.write(JSON.stringify({ decision: 'block', reason }));
@@ -202,8 +207,8 @@ function main() {
     const alreadyFinalized = fs.existsSync(path.join(dir, 'summary.md'));
     const runTok = transcriptPath ? sumTokens(transcriptPath, state.startedAt, nowIso) : null;
     state.currentBinId = null;
-    saveState(dir, state);
-    if (!alreadyFinalized) appendRunRecord(dir, state, result.reason, now, runTok, transcriptPath);
+    saveState(edir, state);
+    if (!alreadyFinalized) appendRunRecord(edir, cwd, state, result.reason, now, runTok, transcriptPath);
     writeSummary(dir, state, result.reason, now, runTok);
     process.stdout.write('{}');
   }
diff --git a/shift/lib/store.cjs b/shift/lib/store.cjs
new file mode 100644
index 0000000..3087964
--- /dev/null
+++ b/shift/lib/store.cjs
@@ -0,0 +1,40 @@
+'use strict';
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const crypto = require('node:crypto');
+
+// Where shift keeps the engine's AUTHORITATIVE mutable state — state.json, timeline,
+// usage cache, work-record history. It lives OUTSIDE the working repo, keyed by the
+// repo's canonical path, because an autonomous agent rewrites/deletes files it finds
+// under .shift/ (observed: it marked bins done in state.json itself, usurping the engine
+// and erasing per-bin boundaries). A Stop hook is NOT sandboxed (verified it can write
+// ~/.local/state), so the hook owns this dir while the agent — which only operates inside
+// the repo — can't reach it.
+//
+// .shift/ in the repo keeps only what the user or agent legitimately touches: config.json
+// (user-edited), summary.md (user-read), log.md / blocked.jsonl (agent-appended), and the
+// control signals (STOP/PAUSE/SKIP, written by `shift watch`).
+//
+// Two rules keep the hook (writer) and watch/history (readers) on the same path:
+//   1. realpathSync — macOS /tmp is a symlink to /private/tmp; the hook payload cwd is
+//      already canonical, so readers must canonicalize too.
+//   2. hash the FULL canonical path (a prefix slice collides for sibling temp dirs).
+// SHIFT_STATE_DIR overrides the base (tests; also a valid explicit override).
+
+function base() {
+  return process.env.SHIFT_STATE_DIR
+    || path.join(process.env.XDG_STATE_HOME || path.join(os.homedir(), '.local', 'state'), 'shift');
+}
+function canonical(cwd) {
+  try { return fs.realpathSync(path.resolve(cwd)); } catch { return path.resolve(cwd); }
+}
+
+// engineDir(cwd) -> the out-of-repo state directory for the repo rooted at cwd.
+function engineDir(cwd) {
+  const dir = path.join(base(), crypto.createHash('sha256').update(canonical(cwd)).digest('hex').slice(0, 16));
+  try { fs.mkdirSync(dir, { recursive: true }); } catch { /* best-effort */ }
+  return dir;
+}
+
+module.exports = { engineDir };
diff --git a/shift/lib/timeline.cjs b/shift/lib/timeline.cjs
index 90b12b8..e8c5f4a 100644
--- a/shift/lib/timeline.cjs
+++ b/shift/lib/timeline.cjs
@@ -1,32 +1,27 @@
 'use strict';
 const fs = require('node:fs');
 const path = require('node:path');
+const { engineDir } = require('./store.cjs');
 
-// An append-only record of bin boundaries (one event per line in .shift/timeline.jsonl)
-// — the source of per-bin runtime + token windows, paired with the transcript for tokens.
-//
-// Best-effort, by design: in a fully-headless autonomous run the agent may rewrite or
-// delete files under .shift/ (observed), so per-bin metrics can be lost — the run-level
-// totals + the work-record history (the hook's final write) remain authoritative
-// regardless. Writing this out-of-repo isn't an option: Claude Code sandboxes hook
-// file-writes to the project directory. See SPEC §13.
+// Append-only record of bin boundaries (one event per line) — the source of per-bin
+// runtime + token windows, paired with the transcript for tokens. Lives in the engine's
+// out-of-repo state dir (see store.cjs) so the agent can't delete or rewrite it.
 
-function timelinePath(dir) { return path.join(dir, 'timeline.jsonl'); }
+function timelinePath(cwd) { return path.join(engineDir(cwd), 'timeline.jsonl'); }
 
-function appendEvent(dir, ev) { // ev: { t: iso, event: 'start'|'finish', id }
-  try { fs.mkdirSync(dir, { recursive: true }); fs.appendFileSync(timelinePath(dir), JSON.stringify(ev) + '\n'); }
-  catch { /* best-effort */ }
+function appendEvent(cwd, ev) { // ev: { t: iso, event: 'start'|'finish', id }
+  try { fs.appendFileSync(timelinePath(cwd), JSON.stringify(ev) + '\n'); } catch { /* best-effort */ }
 }
 
-function readTimeline(dir) {
+function readTimeline(cwd) {
   let raw;
-  try { raw = fs.readFileSync(timelinePath(dir), 'utf8'); } catch { return []; }
+  try { raw = fs.readFileSync(timelinePath(cwd), 'utf8'); } catch { return []; }
   return raw.split('\n').filter(Boolean)
     .map(l => { try { return JSON.parse(l); } catch { return null; } })
     .filter(Boolean);
 }
 
-function clearTimeline(dir) { try { fs.unlinkSync(timelinePath(dir)); } catch { /* none */ } }
+function clearTimeline(cwd) { try { fs.unlinkSync(timelinePath(cwd)); } catch { /* none */ } }
 
 // binWindows(events) -> { id: { startedAt, finishedAt } } — first start, last finish.
 function binWindows(events) {
diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs
index 717117a..aee1a30 100644
--- a/shift/lib/watch-model.cjs
+++ b/shift/lib/watch-model.cjs
@@ -5,6 +5,7 @@ const { loadState } = require('./state.cjs');
 const { isPaused, isStopRequested } = require('./control.cjs');
 const { sumUsage, readLines } = require('./transcript.cjs');
 const { readTimeline, binWindows } = require('./timeline.cjs');
+const { engineDir } = require('./store.cjs');
 
 // --- model -----------------------------------------------------------------
 
@@ -30,15 +31,18 @@ function readBrief(cwd, binId) {
   try { return fs.readFileSync(path.join(cwd, binId), 'utf8'); } catch { return ''; }
 }
 
-// buildModel({ dir, now }) — read .shift/ into a plain view model. Pure of rendering.
+// buildModel({ dir, now }) — read the run into a plain view model. `dir` is the repo's
+// .shift/ (log, control, summary); the engine state (state.json) lives out-of-repo.
 function buildModel({ dir, now }) {
+  const cwd = path.dirname(dir);
+  const edir = engineDir(cwd);
   let state;
-  try { state = loadState(dir); } catch { return { exists: false }; }
+  try { state = loadState(edir); } catch { return { exists: false }; }
 
   // Per-bin runtime + tokens are derived from the timeline (agent-proof boundaries) and
   // the transcript (parsed once), so they survive a state.json the agent rewrote. We fall
   // back to any stamps the hook left on state.bins when no timeline/transcript is present.
-  const windows = binWindows(readTimeline(dir));
+  const windows = binWindows(readTimeline(path.dirname(dir))); // timeline keyed by repo cwd, not .shift
   const lines = state.transcriptPath ? readLines(state.transcriptPath) : [];
   const startMs = b => (windows[b.id] && windows[b.id].startedAt) ? Date.parse(windows[b.id].startedAt) : null;
   const finMs = (b, current) => {
diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs
index 62dbad1..d1a10d5 100644
--- a/shift/test/cli.test.cjs
+++ b/shift/test/cli.test.cjs
@@ -7,6 +7,10 @@ const path = require('node:path');
 const cp = require('node:child_process');
 
 const CLI = path.resolve(__dirname, '..', 'bin', 'shift');
+// Engine state lives out of the repo; pin its base to a tmp dir for the test process + CLI.
+const STATE_BASE = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-cli-base-'));
+process.env.SHIFT_STATE_DIR = STATE_BASE;
+const { engineDir } = require('../lib/store.cjs');
 
 function repoWithQueue() {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-cli-'));
@@ -20,24 +24,25 @@ function repoWithQueue() {
 }
 
 function run(cwd, args) {
-  return cp.execFileSync('node', [CLI, ...args], { cwd, encoding: 'utf8' });
+  return cp.execFileSync('node', [CLI, ...args], { cwd, encoding: 'utf8', env: { ...process.env, SHIFT_STATE_DIR: STATE_BASE } });
 }
 
 test('--dry-run lists the queue and writes nothing', () => {
   const cwd = repoWithQueue();
   const out = run(cwd, ['start', '--dry-run']);
   assert.match(out, /queue\/01\.md/);
-  assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json')));
+  assert.ok(!fs.existsSync(path.join(engineDir(cwd), 'state.json')));
 });
 
-test('start writes config + state and creates the run branch', () => {
+test('start writes config (repo) + state (engine dir) and creates the run branch', () => {
   const cwd = repoWithQueue();
   run(cwd, ['start']);
-  assert.ok(fs.existsSync(path.join(cwd, '.shift', 'state.json')));
-  assert.ok(fs.existsSync(path.join(cwd, '.shift', 'config.json')));
+  assert.ok(fs.existsSync(path.join(cwd, '.shift', 'config.json')), 'config stays in the repo (user-editable)');
+  assert.ok(fs.existsSync(path.join(engineDir(cwd), 'state.json')), 'engine state lives out of the repo');
+  assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json')), 'no state.json in the repo for the agent to clobber');
   const branch = cp.execSync('git branch --show-current', { cwd, encoding: 'utf8' }).trim();
   assert.match(branch, /^shift\//);
-  const state = JSON.parse(fs.readFileSync(path.join(cwd, '.shift', 'state.json'), 'utf8'));
+  const state = JSON.parse(fs.readFileSync(path.join(engineDir(cwd), 'state.json'), 'utf8'));
   assert.equal(state.bins.length, 1);
 });
 
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
index cf0def7..c6c7dee 100644
--- a/shift/test/hook.test.cjs
+++ b/shift/test/hook.test.cjs
@@ -7,39 +7,50 @@ const path = require('node:path');
 const cp = require('node:child_process');
 
 const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs');
+// Engine state lives out of the repo; point its base at a tmp dir so tests never touch
+// ~/.local/state, and so the test process's engineDir() matches the spawned hook's.
+const STATE_BASE = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-statebase-'));
+process.env.SHIFT_STATE_DIR = STATE_BASE;
+const { engineDir } = require('../lib/store.cjs');
 
 function setupRun(configOverride) {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hook-'));
   fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
   fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one');
   fs.writeFileSync(path.join(cwd, 'queue', '02.md'), 'bin two');
-  const dir = path.join(cwd, '.shift');
+  const dir = path.join(cwd, '.shift');          // repo-side: log, summary, control
+  const edir = engineDir(cwd);                   // out-of-repo: state, config snapshot, history, usage
   fs.mkdirSync(dir, { recursive: true });
-  fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify(Object.assign({
+  const config = JSON.stringify(Object.assign({
     sources: [{ path: 'queue', kind: 'briefs' }],
     bounds: { maxHours: 24, maxIterations: 10 },
     definitionOfDone: 'done', git: {}
-  }, configOverride || {})));
-  fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+  }, configOverride || {}));
+  fs.writeFileSync(path.join(edir, 'config.json'), config);
+  fs.writeFileSync(path.join(edir, 'state.json'), JSON.stringify({
     runId: 'r', startedAt: new Date().toISOString(), iterations: 0,
     branch: 'shift/x', currentBinId: null, bins: []
   }));
   fs.writeFileSync(path.join(dir, 'log.md'), '# log\n');
-  return { cwd, dir };
+  return { cwd, dir, edir };
 }
 
 function runHook(cwd, input) {
-  const out = cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify(input), encoding: 'utf8' });
+  const out = cp.execFileSync('node', [HOOK], {
+    cwd, input: JSON.stringify(input), encoding: 'utf8',
+    env: { ...process.env, SHIFT_STATE_DIR: STATE_BASE }
+  });
   return JSON.parse(out || '{}');
 }
+const readState = edir => JSON.parse(fs.readFileSync(path.join(edir, 'state.json'), 'utf8'));
 
-test('no-ops (allows stop) when no .shift/state.json exists', () => {
+test('no-ops (allows stop) when there is no active run', () => {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-none-'));
   assert.deepEqual(runHook(cwd, { stop_hook_active: false }), {});
 });
 
 test('first stop blocks bin 1; second marks it done + blocks bin 2; third drains -> allow + summary', () => {
-  const { cwd, dir } = setupRun();
+  const { cwd, dir, edir } = setupRun();
   const r1 = runHook(cwd, { stop_hook_active: false });
   assert.equal(r1.decision, 'block');
   assert.match(r1.reason, /bin one/);
@@ -47,8 +58,7 @@ test('first stop blocks bin 1; second marks it done + blocks bin 2; third drains
   const r2 = runHook(cwd, { stop_hook_active: true });
   assert.equal(r2.decision, 'block');
   assert.match(r2.reason, /bin two/);
-  const s2 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
-  assert.equal(s2.bins.find(b => b.id === 'queue/01.md').status, 'done');
+  assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'done');
 
   const r3 = runHook(cwd, { stop_hook_active: true });
   assert.deepEqual(r3, {});
@@ -75,24 +85,22 @@ test('logged "Needs you:" lines surface in the summary', () => {
 });
 
 test('SKIP control marks the current bin skipped and advances to the next', () => {
-  const { cwd, dir } = setupRun();
+  const { cwd, dir, edir } = setupRun();
   runHook(cwd, { stop_hook_active: false });            // start bin 1 (current = queue/01.md)
   fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/01.md');
   const r = runHook(cwd, { stop_hook_active: true });   // skip bin 1, block bin 2
   assert.equal(r.decision, 'block');
   assert.match(r.reason, /bin two/);
-  const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
-  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'skipped');
+  assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'skipped');
   assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'SKIP is consumed');
 });
 
 test('a SKIP naming a non-current bin is consumed and discarded, not applied to a later bin', () => {
-  const { cwd, dir } = setupRun();
+  const { cwd, dir, edir } = setupRun();
   runHook(cwd, { stop_hook_active: false });                       // start bin 1
   fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/99-nope.md');    // stale / wrong id
   runHook(cwd, { stop_hook_active: true });                        // bin 1 -> done (skip ignored)
-  const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
-  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done');
+  assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'done');
   assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'stale SKIP is consumed, never left to fire on a later bin');
 });
 
@@ -103,51 +111,58 @@ test('kill switch ends the run immediately', () => {
   assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /kill switch/);
 });
 
-test('resolves .shift from the hook payload cwd, not the process cwd', () => {
+test('resolves the repo from the hook payload cwd, not the process cwd', () => {
   const { cwd } = setupRun();
   const neutral = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-neutral-'));
   const out = cp.execFileSync('node', [HOOK], {
     cwd: neutral,
     input: JSON.stringify({ stop_hook_active: false, cwd }),
-    encoding: 'utf8'
+    encoding: 'utf8',
+    env: { ...process.env, SHIFT_STATE_DIR: STATE_BASE }
   });
   const r = JSON.parse(out || '{}');
   assert.equal(r.decision, 'block');
   assert.match(r.reason, /bin one/);
 });
 
+test('engine state lives OUTSIDE the repo (agent cannot reach it)', () => {
+  const { cwd, edir } = setupRun();
+  runHook(cwd, { stop_hook_active: false });
+  assert.ok(fs.existsSync(path.join(edir, 'state.json')), 'state.json is in the engine dir');
+  assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json')), 'state.json is NOT in the repo .shift/');
+  assert.ok(!edir.startsWith(cwd), 'the engine dir is outside the working repo');
+});
+
 // ---- v3: verify gate ----
 
 test('verify gate (passing) marks bins done and drains', () => {
-  const { cwd, dir } = setupRun({ verify: { command: 'true', maxAttempts: 2 } });
+  const { cwd, edir } = setupRun({ verify: { command: 'true', maxAttempts: 2 } });
   runHook(cwd, { stop_hook_active: false }); // start bin 1
   runHook(cwd, { stop_hook_active: true });  // verify passes -> bin1 done, start bin2
-  const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
-  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done');
+  assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'done');
 });
 
 test('verify gate (failing) re-blocks the same bin with feedback, then blocks after maxAttempts', () => {
-  const { cwd, dir } = setupRun({ verify: { command: 'false', maxAttempts: 2 } });
+  const { cwd, edir } = setupRun({ verify: { command: 'false', maxAttempts: 2 } });
   runHook(cwd, { stop_hook_active: false });            // start bin 1
   const r1 = runHook(cwd, { stop_hook_active: true });  // verify fails, attempt 1 < 2 -> retry SAME bin
   assert.equal(r1.decision, 'block');
   assert.match(r1.reason, /failed verification/);
   assert.match(r1.reason, /bin one/);
-  let s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
+  let s = readState(edir);
   assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'pending');
   assert.equal(s.bins.find(b => b.id === 'queue/01.md').attempts, 1);
 
   const r2 = runHook(cwd, { stop_hook_active: true });  // verify fails again, attempt 2 == max -> blocked, move on
   assert.equal(r2.decision, 'block');
   assert.match(r2.reason, /bin two/);
-  s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'));
-  assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'blocked');
+  assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'blocked');
 });
 
 // ---- watch: per-bin tokens/runtime + work-record history ----
 
 test('records per-bin tokens + runtime from the transcript and appends a history record', () => {
-  const { cwd, dir } = setupRun();
+  const { cwd, dir, edir } = setupRun();
   const tpath = path.join(dir, 'transcript.jsonl');
   const asst = (ts, output) => JSON.stringify({
     type: 'assistant', timestamp: ts,
@@ -155,20 +170,18 @@ test('records per-bin tokens + runtime from the transcript and appends a history
   });
 
   runHook(cwd, { stop_hook_active: false, transcript_path: tpath }); // start bin 1
-  // Use bin 1's recorded startedAt as the message timestamp so it lands in [start, now).
-  const started = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8'))
-    .bins.find(b => b.id === 'queue/01.md').startedAt;
+  const started = readState(edir).bins.find(b => b.id === 'queue/01.md').startedAt;
   assert.ok(started, 'bin 1 got a startedAt when it became current');
   fs.writeFileSync(tpath, asst(started, 500) + '\n');
 
   runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 1, start bin 2
-  const b1 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')).bins.find(b => b.id === 'queue/01.md');
+  const b1 = readState(edir).bins.find(b => b.id === 'queue/01.md');
   assert.equal(b1.status, 'done');
   assert.equal(b1.tokens.output, 500, 'bin 1 output tokens attributed from the transcript window');
   assert.equal(typeof b1.durationMs, 'number');
 
   runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 2, drain -> finalize
-  const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse);
+  const hist = fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse);
   assert.equal(hist.length, 1, 'one history record appended on finalize');
   assert.equal(hist[0].bins.done, 2);
   assert.ok(hist[0].tokens.output >= 500, 'run output tokens recorded');
@@ -176,19 +189,19 @@ test('records per-bin tokens + runtime from the transcript and appends a history
 });
 
 test('history is append-only across runs and not duplicated by a stray extra stop', () => {
-  const { cwd, dir } = setupRun();
+  const { cwd, edir } = setupRun();
   runHook(cwd, { stop_hook_active: false });
   runHook(cwd, { stop_hook_active: true });
   runHook(cwd, { stop_hook_active: true }); // drain -> finalize (appends record 1)
   runHook(cwd, { stop_hook_active: true }); // stray extra stop -> summary already exists -> no 2nd append
-  const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').filter(Boolean);
+  const hist = fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8').trim().split('\n').filter(Boolean);
   assert.equal(hist.length, 1, 'no duplicate history record from a repeated finalize');
 });
 
 // ---- v2: usage cap + cache ----
 
 test('usage cap from the hook payload ends the run and caches usage', () => {
-  const { cwd, dir } = setupRun({ bounds: { maxHours: 24, maxIterations: 10, usageCapPercent: 90 } });
+  const { cwd, dir, edir } = setupRun({ bounds: { maxHours: 24, maxIterations: 10, usageCapPercent: 90 } });
   const reset = Math.floor(Date.now() / 1000) + 3600;
   const r = runHook(cwd, {
     stop_hook_active: false,
@@ -199,7 +212,7 @@ test('usage cap from the hook payload ends the run and caches usage', () => {
   });
   assert.deepEqual(r, {});
   assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /usage cap/);
-  const usage = JSON.parse(fs.readFileSync(path.join(dir, 'usage.json'), 'utf8'));
+  const usage = JSON.parse(fs.readFileSync(path.join(edir, 'usage.json'), 'utf8'));
   assert.equal(usage.weeklyPercent, 95);
   assert.equal(usage.sessionResetAt, reset);
 });
diff --git a/shift/test/timeline.test.cjs b/shift/test/timeline.test.cjs
index 9b6d59c..cb15b66 100644
--- a/shift/test/timeline.test.cjs
+++ b/shift/test/timeline.test.cjs
@@ -4,40 +4,62 @@ const assert = require('node:assert');
 const fs = require('node:fs');
 const os = require('node:os');
 const path = require('node:path');
+
+// Keep the out-of-repo timeline in a tmp base so tests never touch ~/.local/state.
+process.env.SHIFT_STATE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-statebase-'));
 const { appendEvent, readTimeline, clearTimeline, timelinePath, binWindows } = require('../lib/timeline.cjs');
+const { engineDir } = require('../lib/store.cjs');
+
+function repo() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-repo-')); }
 
-function dir() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-tl-')); }
+test('timeline lives OUTSIDE the repo (agent-proof), under the state base', () => {
+  const c = repo();
+  const f = timelinePath(c);
+  assert.ok(f.startsWith(process.env.SHIFT_STATE_DIR), 'under the state base');
+  assert.ok(!f.startsWith(path.resolve(c)), 'NOT inside the working repo');
+});
+
+test('append + read round-trips; clear removes', () => {
+  const c = repo();
+  appendEvent(c, { t: '2026-06-16T00:00:00Z', event: 'start', id: 'a' });
+  appendEvent(c, { t: '2026-06-16T00:01:00Z', event: 'finish', id: 'a' });
+  assert.equal(readTimeline(c).length, 2);
+  clearTimeline(c);
+  assert.deepEqual(readTimeline(c), []);
+});
 
-test('timeline file lives at .shift/timeline.jsonl', () => {
-  const d = dir();
-  assert.equal(timelinePath(d), path.join(d, 'timeline.jsonl'));
+test('distinct repos get distinct timelines (no key collision)', () => {
+  const a = repo(), b = repo();
+  appendEvent(a, { t: 't', event: 'start', id: 'x' });
+  assert.equal(readTimeline(a).length, 1);
+  assert.equal(readTimeline(b).length, 0);
 });
 
-test('append + read round-trips events; clear removes them', () => {
-  const d = dir();
-  appendEvent(d, { t: '2026-06-16T00:00:00Z', event: 'start', id: 'a' });
-  appendEvent(d, { t: '2026-06-16T00:01:00Z', event: 'finish', id: 'a' });
-  assert.equal(readTimeline(d).length, 2);
-  clearTimeline(d);
-  assert.deepEqual(readTimeline(d), []);
+test('the key is canonical: /tmp and /private/tmp resolve to the same store (macOS symlink)', () => {
+  // realpath collapses the symlink, so a reader using either form agrees with the hook.
+  const real = fs.realpathSync(repo());
+  if (real.startsWith('/private/')) {
+    const aliased = real.replace(/^\/private/, '');
+    assert.equal(engineDir(aliased), engineDir(real), '/tmp alias must map to the same store as /private/tmp');
+  } else {
+    assert.ok(true); // not on a /private symlink platform; nothing to assert
+  }
 });
 
-test('readTimeline on a fresh dir is empty and tolerates malformed lines', () => {
-  const d = dir();
-  assert.deepEqual(readTimeline(d), []);
-  appendEvent(d, { t: 't', event: 'start', id: 'a' });
-  fs.appendFileSync(timelinePath(d), 'garbage\n');
-  assert.equal(readTimeline(d).length, 1);
+test('readTimeline tolerates malformed lines', () => {
+  const c = repo();
+  appendEvent(c, { t: 't', event: 'start', id: 'a' });
+  fs.appendFileSync(timelinePath(c), 'garbage\n');
+  assert.equal(readTimeline(c).length, 1);
 });
 
 test('binWindows takes first start and last finish per bin', () => {
-  const events = [
+  const w = binWindows([
     { t: 't1', event: 'start', id: 'a' },
     { t: 't1b', event: 'start', id: 'a' },
     { t: 't2', event: 'finish', id: 'a' },
     { t: 't3', event: 'start', id: 'b' }
-  ];
-  const w = binWindows(events);
+  ]);
   assert.equal(w.a.startedAt, 't1');
   assert.equal(w.a.finishedAt, 't2');
   assert.equal(w.b.startedAt, 't3');
diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs
index 4feb318..7409e7f 100644
--- a/shift/test/watch-model.test.cjs
+++ b/shift/test/watch-model.test.cjs
@@ -4,8 +4,11 @@ const assert = require('node:assert');
 const fs = require('node:fs');
 const os = require('node:os');
 const path = require('node:path');
+// Out-of-repo timeline base → tmp (fixtures have no timeline → per-bin falls back to state.bins).
+process.env.SHIFT_STATE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-wmbase-'));
 const { buildModel, renderFrame, renderDetail, renderHistory } = require('../lib/watch-model.cjs');
 const { aggregate } = require('../lib/history.cjs');
+const { engineDir } = require('../lib/store.cjs');
 
 function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-'));
@@ -14,7 +17,7 @@ function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) {
   fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
   fs.writeFileSync(path.join(cwd, 'queue', '03-build.md'), '# Build the thing\n\nCompile and commit.\n');
   const startedAt = new Date(Date.now() - 12 * 60_000).toISOString();
-  fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+  fs.writeFileSync(path.join(engineDir(cwd), 'state.json'), JSON.stringify({
     runId: '2026-06-16T00-00-00', startedAt, iterations: 7, branch: 'shift/smoke',
     currentBinId,
     bins: [

From 6b58697bb897d2e7ba2b9469a1317f9a155ab603 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 12:08:09 -0400
Subject: [PATCH 10/12] shift: fixes from adversarial verification of the
 state-relocation refactor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Verification of 5044dae surfaced four issues (core path-agreement was already correct):

- P1 cross-run leak: cmdStart cleared STOP/summary/usage/timeline but NOT the other
  repo-side control/blocker signals — a stale PAUSE made `shift start && shift run` a
  multi-hour no-op, a stale SKIP silently skipped a bin, a stale blocked.jsonl re-blocked
  one. Now scrubs PAUSE/SKIP/blocked.jsonl/summary.md too. + regression test (cli.test).
- P1 broken demo: examples/watch-demo.cjs still seeded/read repo-side state after the
  relocation → 6 empty frames. Migrated to engineDir (seed state + snapshot config there,
  readHistory(edir)); now renders real per-bin data + history.
- P3 stale prompt: brief.cjs no longer tells the agent not to touch .shift/state.json
  (gone from the repo); guards the real repo-side surface + notes engine state is external.
  Test updated.
- P3 stale comments: history.cjs / shift-stop.cjs / bin/shift now say history lives in the
  engine dir, not .shift/.

100 shift tests green; demo verified end-to-end.
---
 shift/bin/shift               |  9 +++++++--
 shift/examples/watch-demo.cjs | 14 ++++++++++----
 shift/hooks/shift-stop.cjs    |  2 +-
 shift/lib/brief.cjs           |  2 +-
 shift/lib/history.cjs         |  7 ++++---
 shift/test/brief.test.cjs     |  7 +++++--
 shift/test/cli.test.cjs       | 16 ++++++++++++++++
 7 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/shift/bin/shift b/shift/bin/shift
index ad4bb85..e49c601 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -64,7 +64,12 @@ function cmdStart(args) {
   fs.mkdirSync(dir, { recursive: true });
   const edir = engineDir(cwd); // engine state lives out of the repo so the agent can't usurp it
   if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP'));
-  try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ }
+  // Scrub every stale control/blocker signal so a prior run can't corrupt this one:
+  // PAUSE → run idle-polls forever; SKIP → a bin is silently skipped; blocked.jsonl →
+  // a bin is re-blocked with last run's note. All live repo-side, beside STOP.
+  for (const f of ['PAUSE', 'SKIP', 'blocked.jsonl', 'summary.md']) {
+    try { fs.unlinkSync(path.join(dir, f)); } catch { /* none */ }
+  }
   require('../lib/timeline.cjs').clearTimeline(cwd); // fresh run → fresh boundary record
   try { fs.unlinkSync(path.join(edir, 'usage.json')); } catch { /* none */ } // stale usage from a prior run
   fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2));                 // .shift/config.json (user-editable)
@@ -181,7 +186,7 @@ function cmdStop() {
   console.log('shift will stop cleanly after the current bin.');
 }
 
-// The work record: every finalized run (.shift/history.jsonl). `shift history` prints the
+// The work record: every finalized run (<engineDir>/history.jsonl, out-of-repo). `shift history` prints the
 // ledger + totals; `shift history <runId|branch-suffix>` drills into one run's bins.
 function cmdHistory(args) {
   const edir = engineDir(process.cwd());
diff --git a/shift/examples/watch-demo.cjs b/shift/examples/watch-demo.cjs
index 11502cb..2535a30 100644
--- a/shift/examples/watch-demo.cjs
+++ b/shift/examples/watch-demo.cjs
@@ -14,20 +14,26 @@ const SHIFT = path.resolve(__dirname, '..');
 const { buildModel, renderFrame, renderDetail, renderHistory } = require(path.join(SHIFT, 'lib', 'watch-model.cjs'));
 const { readHistory, aggregate } = require(path.join(SHIFT, 'lib', 'history.cjs'));
 const { requestSkip, requestStop } = require(path.join(SHIFT, 'lib', 'control.cjs'));
+const { engineDir } = require(path.join(SHIFT, 'lib', 'store.cjs'));
 const HOOK = path.join(SHIFT, 'hooks', 'shift-stop.cjs');
 
 const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-demo-'));
 const dir = path.join(cwd, '.shift');
+// Authoritative engine state (state.json, history, timeline) lives OUT of the repo,
+// keyed by the canonical cwd — same dir the hook writes and watch/history read.
+const edir = engineDir(cwd);
 fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true });
 fs.mkdirSync(dir, { recursive: true });
 for (const [n, t] of [['01-build.md', 'build the thing'], ['02-flaky.md', 'flaky task'], ['03-docs.md', 'write docs']]) {
   fs.writeFileSync(path.join(cwd, 'queue', n), t);
 }
-fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({
+const config = JSON.stringify({
   sources: [{ path: 'queue', kind: 'briefs' }],
   bounds: { maxHours: 24, maxIterations: 10 }, definitionOfDone: 'done', git: {}
-}));
-fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+});
+fs.writeFileSync(path.join(dir, 'config.json'), config);          // user-editable copy in the repo
+fs.writeFileSync(path.join(edir, 'config.json'), config);         // engine snapshot (what cmdStart does)
+fs.writeFileSync(path.join(edir, 'state.json'), JSON.stringify({  // engine-owned, out of the agent's reach
   runId: 'demo', startedAt: new Date(Date.now() - 5 * 60000).toISOString(),
   iterations: 0, branch: 'shift/demo', currentBinId: null, bins: []
 }));
@@ -55,5 +61,5 @@ process.stdout.write('\n\x1b[1m=== ⏎ details on bin 01 (drill-down) ===\x1b[0m
 process.stdout.write(renderDetail(buildModel({ dir, now: Date.now() }), 0, { width: 78, color: true }));
 
 process.stdout.write('\n\x1b[1m=== shift history (work record across runs) ===\x1b[0m\n');
-process.stdout.write(renderHistory(readHistory(dir), aggregate(readHistory(dir)), { color: true }));
+process.stdout.write(renderHistory(readHistory(edir), aggregate(readHistory(edir)), { color: true }));
 process.stdout.write(`\n(throwaway repo: ${cwd})\n`);
diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs
index 03ad77e..143be03 100755
--- a/shift/hooks/shift-stop.cjs
+++ b/shift/hooks/shift-stop.cjs
@@ -65,7 +65,7 @@ function writeSummary(dir, state, reason, now, runTok) {
   fs.writeFileSync(path.join(dir, 'summary.md'), lines.join('\n') + '\n');
 }
 
-// Append this run to the work record (.shift/history.jsonl). One row per finalized run.
+// Append this run to the work record (<engineDir>/history.jsonl, out-of-repo). One row per finalized run.
 // Per-bin metrics come from the timeline (boundaries) + transcript (tokens) so they
 // survive even if the agent rewrote state.json mid-run.
 function appendRunRecord(edir, cwd, state, reason, now, runTok, transcriptPath) {
diff --git a/shift/lib/brief.cjs b/shift/lib/brief.cjs
index c54d19e..02584c3 100644
--- a/shift/lib/brief.cjs
+++ b/shift/lib/brief.cjs
@@ -15,7 +15,7 @@ function renderBrief(bin, config) {
     'Do NOT ask questions — if you would normally ask, decide and APPEND the decision as a line to .shift/log.md.',
     `Definition of done: ${dod}`,
     'When finished, commit your work on the current branch.',
-    '`.shift/` is shift\'s own run bookkeeping. The ONLY writes you may make under it are APPENDING a line to .shift/log.md or .shift/blocked.jsonl. Never edit, overwrite, or "tidy" .shift/state.json, .shift/config.json, .shift/summary.md, and never rewrite .shift/log.md — shift maintains those itself (run progress, per-bin runtime + tokens), and changing them corrupts the run record.',
+    '`.shift/` is shift\'s own run bookkeeping. The ONLY writes you may make under it are APPENDING a line to .shift/log.md or .shift/blocked.jsonl. Never edit, overwrite, or "tidy" .shift/config.json or .shift/summary.md, and never rewrite .shift/log.md — shift maintains those itself (run progress, per-bin runtime + tokens), and changing them corrupts the run record. (Authoritative engine state — run progress, usage, timeline, history — lives outside the repo and is maintained by shift; you do not need to touch it.)',
     'Flag anything that needs the human (a deferred decision, an action you could not take) by appending a line to .shift/log.md as: "Needs you: <detail>" — these surface in the run summary.',
     'If a true blocker stops you from finishing this bin, append one line to .shift/blocked.jsonl: {"id":"<bin id>","note":"<reason>"} then stop.',
     guard,
diff --git a/shift/lib/history.cjs b/shift/lib/history.cjs
index c440e14..d69f994 100644
--- a/shift/lib/history.cjs
+++ b/shift/lib/history.cjs
@@ -2,9 +2,10 @@
 const fs = require('node:fs');
 const path = require('node:path');
 
-// The shift work record: an append-only ledger of finalized runs at .shift/history.jsonl.
-// `shift start` rewrites state.json but never touches this, so it accumulates across runs.
-// One JSON line per run (totals + per-bin breakdown). Read for `shift history` + aggregates.
+// The shift work record: an append-only ledger of finalized runs at <engineDir>/history.jsonl
+// (out-of-repo, alongside state.json/usage.json/timeline.jsonl — see store.cjs). `shift start`
+// resets the engine state but never touches this, so it accumulates across runs. All callers
+// pass the engineDir. One JSON line per run (totals + per-bin breakdown). Read for `shift history`.
 
 function historyPath(dir) { return path.join(dir, 'history.jsonl'); }
 
diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs
index 4f18435..d86f6e4 100644
--- a/shift/test/brief.test.cjs
+++ b/shift/test/brief.test.cjs
@@ -30,8 +30,11 @@ test('always explains decision logging, the Needs-you convention, and blocker fl
   assert.match(out, /blocked\.jsonl/);
 });
 
-test('guards .shift/ bookkeeping: append-only, never edit state.json (so the hook owns per-bin stats)', () => {
+test('guards .shift/ bookkeeping: append-only, never edit config.json (so the hook owns the run record)', () => {
   const out = renderBrief(bin, { git: {} });
-  assert.match(out, /Never edit.*state\.json/i);
+  // state.json now lives out-of-repo (engineDir, store.cjs) and is beyond the agent's reach,
+  // so the prompt no longer names it; it guards the repo-side surface the agent CAN touch.
+  assert.match(out, /Never edit.*config\.json/i);
+  assert.doesNotMatch(out, /state\.json/);
   assert.match(out, /append/i);
 });
diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs
index d1a10d5..35fe54f 100644
--- a/shift/test/cli.test.cjs
+++ b/shift/test/cli.test.cjs
@@ -52,3 +52,19 @@ test('stop creates the kill switch', () => {
   run(cwd, ['stop']);
   assert.ok(fs.existsSync(path.join(cwd, '.shift', 'STOP')));
 });
+
+test('a second `shift start` scrubs stale control/blocker signals from the prior run', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  const dir = path.join(cwd, '.shift');
+  // Simulate residue from a prior run: a stale skip, pause, blocker, kill switch, summary.
+  fs.writeFileSync(path.join(dir, 'STOP'), '');
+  fs.writeFileSync(path.join(dir, 'PAUSE'), '');
+  fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/01.md');
+  fs.writeFileSync(path.join(dir, 'blocked.jsonl'), JSON.stringify({ id: 'queue/01.md', note: 'stale' }) + '\n');
+  fs.writeFileSync(path.join(dir, 'summary.md'), '# stale\n');
+  run(cwd, ['start']);
+  for (const f of ['STOP', 'PAUSE', 'SKIP', 'blocked.jsonl', 'summary.md']) {
+    assert.ok(!fs.existsSync(path.join(dir, f)), `${f} must not survive a fresh start (would corrupt the new run)`);
+  }
+});

From 7451d7991714bb2424c156d00a50c93ec52d1477 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 14:29:03 -0400
Subject: [PATCH 11/12] ci: run both modules' tests on push + PR (GitHub
 Actions)

---
 .github/workflows/test.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..92dfa12
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,23 @@
+name: tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+      # Both modules are zero-dependency (Node built-in test runner), so there's
+      # nothing to install — just run each module's test script.
+      - name: shift tests
+        run: npm test
+        working-directory: shift
+      - name: code-status-bar tests
+        run: npm test
+        working-directory: code-status-bar

From 2521ff6104c02be68823b91309f594d1d433a795 Mon Sep 17 00:00:00 2001
From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com>
Date: Tue, 16 Jun 2026 16:49:29 -0400
Subject: [PATCH 12/12] shift: fill test-coverage gaps before merge (+18 tests,
 118 total)

Adversarial coverage audit found the CLI surface had zero integration coverage and
several agent-proof contracts were untested. Added:

- CLI (cli.test): status (plain/PAUSED/no-run), status --line (the finalize-suppression
  gate + color), history <runId> drill-down + branch-suffix + no-match, unknown-subcommand
  usage/exit, config shallow-merge, history-preserved-across-restart.
- Agent-proof contracts (hook.test): a planted repo-side .shift/state.json is ignored;
  config falls back to the repo copy when the engine snapshot is gone; per-bin tokens
  recover from the transcript window when state.bins was clobbered.
- watch-model: transcript-derived per-bin/run tokens, the current-bin open window
  (live runtime/tokens), finalized read from .shift/summary.md while state is out-of-repo.
- store.test (new): engineDir key = sha256(realpath) basename, idempotent, sibling-collision-
  resistant, SHIFT_STATE_DIR/XDG base precedence.
- brief: per-git-flag forbid-guard combinations.
- Extracted moveSelection/clampSelection from cmdWatch into watch-model (pure, unit-tested).

118 shift tests, all green.
---
 shift/bin/shift                 |  8 +--
 shift/lib/watch-model.cjs       | 16 +++++-
 shift/test/brief.test.cjs       | 10 ++++
 shift/test/cli.test.cjs         | 89 +++++++++++++++++++++++++++++++++
 shift/test/hook.test.cjs        | 45 +++++++++++++++++
 shift/test/store.test.cjs       | 53 ++++++++++++++++++++
 shift/test/watch-model.test.cjs | 73 ++++++++++++++++++++++++++-
 7 files changed, 288 insertions(+), 6 deletions(-)
 create mode 100644 shift/test/store.test.cjs

diff --git a/shift/bin/shift b/shift/bin/shift
index e49c601..40d8b83 100755
--- a/shift/bin/shift
+++ b/shift/bin/shift
@@ -109,7 +109,7 @@ function cmdStatus(args) {
 // (a status bar) can't take input, so this is the interactive control surface.
 function cmdWatch() {
   const dir = path.join(process.cwd(), '.shift');
-  const { buildModel, renderFrame, renderDetail } = require('../lib/watch-model.cjs');
+  const { buildModel, renderFrame, renderDetail, moveSelection, clampSelection } = require('../lib/watch-model.cjs');
   const { setPause, isPaused, requestSkip, requestStop } = require('../lib/control.cjs');
   const out = process.stdout;
   const interactive = !!(process.stdin.isTTY && out.isTTY);
@@ -122,7 +122,7 @@ function cmdWatch() {
       model = buildModel({ dir, now: Date.now() });
       const n = (model.bins || []).length;
       if (selected < 0 && n) selected = Math.max(0, model.bins.findIndex(b => b.current));
-      if (selected >= n) selected = n - 1; // bins can change between draws — clamp
+      else selected = clampSelection(selected, n); // bins can change between draws — keep it valid
       const frame = (mode === 'detail' && selected >= 0)
         ? renderDetail(model, selected, { width: out.columns || 80, color: true })
         : renderFrame(model, { width: out.columns || 80, color: true, selectedIndex: selected });
@@ -153,9 +153,9 @@ function cmdWatch() {
     if (key === 'x' || key === '\x03') { // x / Ctrl-C
       cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0);
     } else if (key === '\x1b[A') {        // ↑ select up
-      if (n) selected = (selected <= 0 ? n : selected) - 1; draw();
+      selected = moveSelection(selected, n, 'up'); draw();
     } else if (key === '\x1b[B') {        // ↓ select down
-      if (n) selected = (selected + 1) % n; draw();
+      selected = moveSelection(selected, n, 'down'); draw();
     } else if (key === '\r' || key === '\n') { // ⏎ open detail
       if (selected >= 0) mode = 'detail'; draw();
     } else if (key === '\x1b') {          // esc back to list (lone Esc, not an arrow sequence)
diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs
index aee1a30..a6b3fae 100644
--- a/shift/lib/watch-model.cjs
+++ b/shift/lib/watch-model.cjs
@@ -235,4 +235,18 @@ function renderLine(model, opts = {}) {
   return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')} ${c(ANSI.dim, '↑' + fmtTok(model.outputTokens))}${needs}`;
 }
 
-module.exports = { buildModel, renderFrame, renderDetail, renderHistory, renderLine, fmtDur, fmtTok };
+// Pure selection arithmetic for the `shift watch` TUI (n = bin count). Extracted so the
+// off-by-one-prone wrap/clamp cases are unit-testable without a TTY.
+function moveSelection(sel, n, dir) {
+  if (n <= 0) return -1;
+  if (dir === 'up') return (sel <= 0 ? n : sel) - 1; // wrap to the last bin
+  if (dir === 'down') return (sel + 1) % n;          // wrap to the first
+  return sel;
+}
+function clampSelection(sel, n) { // keep a selection valid when the bin list grows/shrinks
+  if (n <= 0) return -1;
+  if (sel < 0) return 0;
+  return sel >= n ? n - 1 : sel;
+}
+
+module.exports = { buildModel, renderFrame, renderDetail, renderHistory, renderLine, fmtDur, fmtTok, moveSelection, clampSelection };
diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs
index d86f6e4..47ef892 100644
--- a/shift/test/brief.test.cjs
+++ b/shift/test/brief.test.cjs
@@ -30,6 +30,16 @@ test('always explains decision logging, the Needs-you convention, and blocker fl
   assert.match(out, /blocked\.jsonl/);
 });
 
+test('the forbid-guard reflects each git flag combination independently', () => {
+  const pushOnly = renderBrief(bin, { git: { allowPush: false, allowOutwardActions: true } });
+  assert.match(pushOnly, /Do NOT push to any remote/);
+  assert.doesNotMatch(pushOnly, /publish, send to external/);
+
+  const outwardOnly = renderBrief(bin, { git: { allowPush: true, allowOutwardActions: false } });
+  assert.match(outwardOnly, /Do NOT publish, send to external/);
+  assert.doesNotMatch(outwardOnly, /push to any remote/);
+});
+
 test('guards .shift/ bookkeeping: append-only, never edit config.json (so the hook owns the run record)', () => {
   const out = renderBrief(bin, { git: {} });
   // state.json now lives out-of-repo (engineDir, store.cjs) and is beyond the agent's reach,
diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs
index 35fe54f..04acb79 100644
--- a/shift/test/cli.test.cjs
+++ b/shift/test/cli.test.cjs
@@ -68,3 +68,92 @@ test('a second `shift start` scrubs stale control/blocker signals from the prior
     assert.ok(!fs.existsSync(path.join(dir, f)), `${f} must not survive a fresh start (would corrupt the new run)`);
   }
 });
+
+const { appendRecord } = require('../lib/history.cjs');
+
+function runSafe(cwd, args) { // capture output + exit code even on non-zero exit
+  try { return { out: run(cwd, args), code: 0 }; }
+  catch (e) { return { out: (e.stdout || '') + (e.stderr || ''), code: e.status }; }
+}
+
+test('status (plain) shows the run + bins lines, a PAUSED suffix, and a no-run message', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  const out = run(cwd, ['status']);
+  assert.match(out, /run .* · branch shift\/.* · iter 0/);
+  assert.match(out, /bins: .*done.*blocked.*skipped.*pending.*\(\dm\)/);
+  fs.writeFileSync(path.join(cwd, '.shift', 'PAUSE'), '');
+  assert.match(run(cwd, ['status']), /· PAUSED/);
+  const fresh = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-norun-'));
+  assert.match(run(fresh, ['status']), /No active shift run here/);
+});
+
+test('status --line prints a line while running and suppresses it once finalized', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  const line = run(cwd, ['status', '--line']);
+  assert.match(line, /⚙ shift/);
+  assert.match(line, /\x1b\[/, 'default is colored');
+  const plain = run(cwd, ['status', '--line', '--no-color']);
+  assert.match(plain, /⚙ shift \d+\/\d+/);
+  assert.doesNotMatch(plain, /\x1b\[/, '--no-color strips ANSI');
+  fs.writeFileSync(path.join(cwd, '.shift', 'summary.md'), '# done\n'); // finalize
+  assert.equal(run(cwd, ['status', '--line']).trim(), '', 'status-bar line vanishes once finalized');
+});
+
+test('history <runId> drills into one run; a branch suffix resolves; unknown -> message', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  const edir = engineDir(cwd);
+  const rec = (runId, branch, perBin) => ({
+    runId, branch, startedAt: '2026-06-16T00:00:00Z', endedAt: '2026-06-16T00:10:00Z',
+    durationMs: 600000, iterations: 2, endReason: 'queue empty',
+    bins: { total: 2, done: 1, skipped: 1, blocked: 0 }, tokens: { output: 1000, total: 5000 }, perBin
+  });
+  appendRecord(edir, rec('R1', 'shift/alpha', [
+    { id: 'queue/01.md', status: 'done', durationMs: 60000, tokensOutput: 500, commit: 'abc1234def' },
+    { id: 'queue/02.md', status: 'skipped', durationMs: null, tokensOutput: null, commit: null }
+  ]));
+  appendRecord(edir, rec('R2', 'shift/beta', [
+    { id: 'queue/01.md', status: 'blocked', durationMs: 1000, tokensOutput: 9, commit: null }
+  ]));
+
+  const r1 = run(cwd, ['history', 'R1']);
+  assert.match(r1, /run R1 · shift\/alpha/);
+  assert.match(r1, /✓ queue\/01\.md/);   // done glyph
+  assert.match(r1, /⤫ queue\/02\.md/);   // skipped glyph
+  assert.match(r1, /abc1234/);           // commit short sha
+  assert.match(run(cwd, ['history', 'beta']), /✗ queue\/01\.md/); // branch-suffix → R2's blocked bin
+  assert.match(run(cwd, ['history', 'does-not-exist']), /No recorded run matching/);
+});
+
+test('unknown subcommand prints usage and exits non-zero', () => {
+  const r = runSafe(repoWithQueue(), ['bogus']);
+  assert.equal(r.code, 1);
+  assert.match(r.out, /usage: shift <start\|run\|watch\|history\|status\|stop>/);
+});
+
+test('start shallow-merges a partial .shift/config.json over the defaults', () => {
+  const cwd = repoWithQueue();
+  fs.mkdirSync(path.join(cwd, '.shift'), { recursive: true });
+  fs.writeFileSync(path.join(cwd, '.shift', 'config.json'),
+    JSON.stringify({ definitionOfDone: 'custom DoD', git: { branch: 'shift/custom' } }));
+  run(cwd, ['start']);
+  const repoCfg = JSON.parse(fs.readFileSync(path.join(cwd, '.shift', 'config.json'), 'utf8'));
+  const snapCfg = JSON.parse(fs.readFileSync(path.join(engineDir(cwd), 'config.json'), 'utf8'));
+  assert.equal(repoCfg.definitionOfDone, 'custom DoD');  // user override wins
+  assert.equal(repoCfg.permissionMode, 'acceptEdits');   // unspecified default survives
+  assert.equal(repoCfg.git.branch, 'shift/custom');       // shallow merge: user git object replaces default git
+  assert.deepEqual(repoCfg, snapCfg);                     // repo copy + engine snapshot are identical
+});
+
+test('a second `shift start` preserves the work record while resetting run state', () => {
+  const cwd = repoWithQueue();
+  run(cwd, ['start']);
+  const edir = engineDir(cwd);
+  fs.appendFileSync(path.join(edir, 'history.jsonl'), JSON.stringify({ runId: 'PRIOR', bins: {} }) + '\n');
+  fs.writeFileSync(path.join(edir, 'usage.json'), '{"weeklyPercent":50}');
+  run(cwd, ['start']);
+  assert.match(fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8'), /PRIOR/, 'history is append-only across runs');
+  assert.ok(!fs.existsSync(path.join(edir, 'usage.json')), 'stale usage is cleared on a fresh start');
+});
diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs
index c6c7dee..5b613e6 100644
--- a/shift/test/hook.test.cjs
+++ b/shift/test/hook.test.cjs
@@ -198,6 +198,51 @@ test('history is append-only across runs and not duplicated by a stray extra sto
   assert.equal(hist.length, 1, 'no duplicate history record from a repeated finalize');
 });
 
+test('a planted repo-side .shift/state.json is ignored — the engine drives from out-of-repo state', () => {
+  const { cwd, dir } = setupRun();
+  // a confused/hostile agent writes a repo-side state.json claiming everything is done
+  fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({
+    runId: 'r', startedAt: new Date().toISOString(), iterations: 9, branch: 'shift/x',
+    currentBinId: null, bins: [{ id: 'queue/01.md', status: 'done' }, { id: 'queue/02.md', status: 'done' }]
+  }));
+  const r = runHook(cwd, { stop_hook_active: false });
+  assert.equal(r.decision, 'block');     // still blocks bin 1 from the real (engine-dir) state
+  assert.match(r.reason, /bin one/);
+});
+
+test('config falls back to the repo .shift/config.json when the engine snapshot is absent', () => {
+  const { cwd, dir, edir } = setupRun();
+  fs.unlinkSync(path.join(edir, 'config.json')); // no engine snapshot → must fall back to repo copy
+  fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({
+    sources: [{ path: 'queue', kind: 'briefs' }], bounds: { maxHours: 24, maxIterations: 10 },
+    definitionOfDone: 'done', git: {}
+  }));
+  const r = runHook(cwd, { stop_hook_active: false });
+  assert.equal(r.decision, 'block');
+  assert.match(r.reason, /bin one/);
+});
+
+test('history per-bin tokens fall back to the transcript window when state.bins was clobbered', () => {
+  const { cwd, dir, edir } = setupRun();
+  const tpath = path.join(dir, 'transcript.jsonl');
+  const asst = (ts, out) => JSON.stringify({ type: 'assistant', timestamp: ts, message: { usage: { output_tokens: out, input_tokens: 1 } } });
+
+  runHook(cwd, { stop_hook_active: false, transcript_path: tpath }); // start bin 1
+  const started = readState(edir).bins.find(b => b.id === 'queue/01.md').startedAt;
+  fs.writeFileSync(tpath, asst(started, 700) + '\n');
+  runHook(cwd, { stop_hook_active: true, transcript_path: tpath });  // finish bin 1 (tokens=700), start bin 2
+
+  // simulate the agent clobbering state: strip every bin's recorded tokens
+  const s = readState(edir);
+  s.bins = s.bins.map(({ tokens, ...rest }) => rest);
+  fs.writeFileSync(path.join(edir, 'state.json'), JSON.stringify(s));
+
+  runHook(cwd, { stop_hook_active: true, transcript_path: tpath });  // finish bin 2, drain -> finalize
+  const hist = fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse);
+  const b1 = hist[0].perBin.find(p => p.id === 'queue/01.md');
+  assert.equal(b1.tokensOutput, 700, 'recovered from the timeline window + transcript, not from state.bins');
+});
+
 // ---- v2: usage cap + cache ----
 
 test('usage cap from the hook payload ends the run and caches usage', () => {
diff --git a/shift/test/store.test.cjs b/shift/test/store.test.cjs
new file mode 100644
index 0000000..557fe4c
--- /dev/null
+++ b/shift/test/store.test.cjs
@@ -0,0 +1,53 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const crypto = require('node:crypto');
+
+const BASE = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-store-'));
+process.env.SHIFT_STATE_DIR = BASE;
+const { engineDir } = require('../lib/store.cjs');
+
+function repo() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-storerepo-')); }
+
+test('engineDir basename is the 16-hex sha256 of the realpath, under the state base', () => {
+  const c = repo();
+  const d = engineDir(c);
+  assert.equal(path.dirname(d), BASE);
+  const expected = crypto.createHash('sha256').update(fs.realpathSync(c)).digest('hex').slice(0, 16);
+  assert.equal(path.basename(d), expected);
+  assert.match(path.basename(d), /^[0-9a-f]{16}$/);
+});
+
+test('engineDir is idempotent and lives outside the repo', () => {
+  const c = repo();
+  assert.equal(engineDir(c), engineDir(c));
+  assert.ok(!engineDir(c).startsWith(path.resolve(c)), 'not inside the working repo');
+});
+
+test('sibling repos sharing a basename get distinct engine dirs (full-path hash, no prefix collision)', () => {
+  const parentA = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-collide-aaaa-'));
+  const parentB = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-collide-bbbb-'));
+  fs.mkdirSync(path.join(parentA, 'repo')); fs.mkdirSync(path.join(parentB, 'repo'));
+  assert.notEqual(engineDir(path.join(parentA, 'repo')), engineDir(path.join(parentB, 'repo')));
+});
+
+test('base resolution: SHIFT_STATE_DIR wins; else XDG_STATE_HOME/shift; (homedir/.local/state/shift is the documented default)', () => {
+  const c = repo();
+  // SHIFT_STATE_DIR (set above) takes precedence
+  assert.ok(engineDir(c).startsWith(BASE));
+  // when SHIFT_STATE_DIR is unset, XDG_STATE_HOME is used
+  const savedShift = process.env.SHIFT_STATE_DIR;
+  const savedXdg = process.env.XDG_STATE_HOME;
+  const xdg = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-xdg-'));
+  try {
+    delete process.env.SHIFT_STATE_DIR;
+    process.env.XDG_STATE_HOME = xdg;
+    assert.ok(engineDir(c).startsWith(path.join(xdg, 'shift') + path.sep), 'XDG_STATE_HOME/shift base');
+  } finally {
+    process.env.SHIFT_STATE_DIR = savedShift;
+    if (savedXdg === undefined) delete process.env.XDG_STATE_HOME; else process.env.XDG_STATE_HOME = savedXdg;
+  }
+});
diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs
index 7409e7f..2344a64 100644
--- a/shift/test/watch-model.test.cjs
+++ b/shift/test/watch-model.test.cjs
@@ -6,9 +6,20 @@ const os = require('node:os');
 const path = require('node:path');
 // Out-of-repo timeline base → tmp (fixtures have no timeline → per-bin falls back to state.bins).
 process.env.SHIFT_STATE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-wmbase-'));
-const { buildModel, renderFrame, renderDetail, renderHistory } = require('../lib/watch-model.cjs');
+const { buildModel, renderFrame, renderDetail, renderHistory, renderLine, moveSelection, clampSelection } = require('../lib/watch-model.cjs');
 const { aggregate } = require('../lib/history.cjs');
 const { engineDir } = require('../lib/store.cjs');
+const { appendEvent } = require('../lib/timeline.cjs');
+
+// A bare run: state in the engine dir, .shift/ for log; caller adds timeline/transcript.
+function bareRun(state) {
+  const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-wm2-'));
+  const dir = path.join(cwd, '.shift');
+  fs.mkdirSync(dir, { recursive: true });
+  fs.writeFileSync(path.join(engineDir(cwd), 'state.json'), JSON.stringify(state));
+  fs.writeFileSync(path.join(dir, 'log.md'), '# log\n');
+  return { cwd, dir };
+}
 
 function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) {
   const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-'));
@@ -120,3 +131,63 @@ test('renderHistory shows per-run rows and a totals footer', () => {
 test('renderHistory with no records is a friendly message', () => {
   assert.match(renderHistory([], aggregate([]), { color: false }), /No shift runs recorded/i);
 });
+
+test('buildModel derives per-bin + run tokens from the transcript when state has none', () => {
+  const { cwd, dir } = bareRun({
+    runId: 'r', startedAt: '2026-06-15T23:59:00.000Z', iterations: 1, branch: 'shift/x',
+    currentBinId: null, transcriptPath: path.join(/* set below */ os.tmpdir(), 'x'),
+    bins: [{ id: 'queue/01.md', status: 'done' }] // no tokens / no durationMs in state
+  });
+  const tpath = path.join(dir, 'transcript.jsonl');
+  fs.writeFileSync(tpath, JSON.stringify({ type: 'assistant', timestamp: '2026-06-16T00:01:00.000Z', message: { usage: { output_tokens: 4200, input_tokens: 10, cache_read_input_tokens: 5 } } }) + '\n');
+  // point state at the real transcript + lay down the timeline boundaries (keyed by cwd)
+  const sp = path.join(engineDir(cwd), 'state.json');
+  const s = JSON.parse(fs.readFileSync(sp, 'utf8')); s.transcriptPath = tpath; fs.writeFileSync(sp, JSON.stringify(s));
+  appendEvent(cwd, { t: '2026-06-16T00:00:00.000Z', event: 'start', id: 'queue/01.md' });
+  appendEvent(cwd, { t: '2026-06-16T00:02:00.000Z', event: 'finish', id: 'queue/01.md' });
+
+  const m = buildModel({ dir, now: Date.parse('2026-06-16T00:05:00.000Z') });
+  const b = m.bins.find(x => x.id === 'queue/01.md');
+  assert.equal(b.tokensOutput, 4200, 'per-bin tokens from the transcript window [start, finish)');
+  assert.equal(b.durationMs, 120000, 'runtime from the timeline window (2m)');
+  assert.equal(m.outputTokens, 4200, 'run output tokens from the transcript over [run start, now)');
+});
+
+test('buildModel gives the CURRENT bin an open window (start..now) for live runtime + tokens', () => {
+  const { cwd, dir } = bareRun({
+    runId: 'r', startedAt: '2026-06-16T00:00:00.000Z', iterations: 1, branch: 'shift/x',
+    currentBinId: 'queue/01.md', bins: [{ id: 'queue/01.md', status: 'pending' }]
+  });
+  const tpath = path.join(dir, 'transcript.jsonl');
+  fs.writeFileSync(tpath, JSON.stringify({ type: 'assistant', timestamp: '2026-06-16T00:03:00.000Z', message: { usage: { output_tokens: 900, input_tokens: 1 } } }) + '\n');
+  const sp = path.join(engineDir(cwd), 'state.json');
+  const s = JSON.parse(fs.readFileSync(sp, 'utf8')); s.transcriptPath = tpath; fs.writeFileSync(sp, JSON.stringify(s));
+  appendEvent(cwd, { t: '2026-06-16T00:00:00.000Z', event: 'start', id: 'queue/01.md' }); // started, not finished
+
+  const m = buildModel({ dir, now: Date.parse('2026-06-16T00:05:00.000Z') });
+  const b = m.bins.find(x => x.id === 'queue/01.md');
+  assert.equal(b.current, true);
+  assert.equal(b.durationMs, 300000, 'open window start..now = 5m');
+  assert.equal(b.tokensOutput, 900, 'tokens summed up to now (open window)');
+});
+
+test('buildModel reads finalized from .shift/summary.md while state lives out-of-repo', () => {
+  const dir = fixture();
+  fs.writeFileSync(path.join(dir, 'summary.md'), '# done\n');
+  const m = buildModel({ dir, now: Date.now() });
+  assert.equal(m.finalized, true);
+  assert.match(renderFrame(m, { color: false }), /finalized/);
+  assert.ok(renderLine(m, { color: false }).startsWith('●'));
+});
+
+test('moveSelection wraps; clampSelection keeps a selection valid as the list changes', () => {
+  assert.equal(moveSelection(0, 5, 'up'), 4);    // wrap to last
+  assert.equal(moveSelection(4, 5, 'down'), 0);  // wrap to first
+  assert.equal(moveSelection(2, 5, 'up'), 1);
+  assert.equal(moveSelection(2, 5, 'down'), 3);
+  assert.equal(moveSelection(0, 0, 'down'), -1); // no bins
+  assert.equal(clampSelection(4, 3), 2);         // list shrank
+  assert.equal(clampSelection(-1, 3), 0);
+  assert.equal(clampSelection(1, 3), 1);
+  assert.equal(clampSelection(2, 0), -1);        // empty
+});