From 6578288b9957f6b5475604c0f3e948c75e477b22 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Sat, 13 Jun 2026 20:37:00 -0400 Subject: [PATCH 01/12] code-status-bar: add usage-bar tests + harden installer (review #1, #4) --- code-status-bar/install.sh | 41 ++++++++++++-- code-status-bar/package.json | 8 +++ code-status-bar/test/usage-bar.test.cjs | 73 +++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 code-status-bar/package.json create mode 100644 code-status-bar/test/usage-bar.test.cjs diff --git a/code-status-bar/install.sh b/code-status-bar/install.sh index 12e2b87..b023a2d 100755 --- a/code-status-bar/install.sh +++ b/code-status-bar/install.sh @@ -12,17 +12,47 @@ DEST="$CONFIG_DIR/settings.json" COLORED=0 [ "${1:-}" = "--colored" ] && COLORED=1 +if [ "$COLORED" -eq 1 ] && ! command -v node >/dev/null 2>&1; then + echo "Error: --colored needs Node on your PATH (the helper runs via node)." >&2 + echo "Install Node, or use the default (no-flag) config." >&2 + exit 1 +fi + mkdir -p "$CONFIG_DIR" + +# Only treat the script's directory as a real clone if it actually contains this +# module (both files present). When run via `curl | bash`, BASH_SOURCE is unset and +# this stays 0, so we always download instead of copying a stray local file. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" 2>/dev/null && pwd || echo "")" +LOCAL_OK=0 +if [ -n "$SCRIPT_DIR" ] && [ -f "$SCRIPT_DIR/install.sh" ] && [ -f "$SCRIPT_DIR/settings.json" ]; then + LOCAL_OK=1 +fi -# fetch — prefer a local clone, fall back to download. +# fetch : download (or copy from a verified clone) to a +# temp file, validate, then move into place — so a failed fetch never leaves a broken +# or empty config at the destination. fetch() { - local rel="$1" out="$2" - if [ -n "$SCRIPT_DIR" ] && [ -f "$SCRIPT_DIR/$rel" ]; then - cp "$SCRIPT_DIR/$rel" "$out" + local rel="$1" out="$2" tmp + tmp="$(mktemp)" + if [ "$LOCAL_OK" -eq 1 ] && [ -f "$SCRIPT_DIR/$rel" ]; then + cp "$SCRIPT_DIR/$rel" "$tmp" else - curl -fsSL "$REPO_RAW/$rel" -o "$out" + curl -fsSL "$REPO_RAW/$rel" -o "$tmp" + fi + if [ ! -s "$tmp" ]; then + echo "Error: fetched '$rel' is empty; aborting (your existing config is untouched)." >&2 + rm -f "$tmp"; exit 1 fi + case "$rel" in + *.json) + if command -v node >/dev/null 2>&1; then + node -e 'JSON.parse(require("fs").readFileSync(process.argv[1],"utf8"))' "$tmp" 2>/dev/null \ + || { echo "Error: fetched '$rel' is not valid JSON; aborting." >&2; rm -f "$tmp"; exit 1; } + fi + ;; + esac + mv "$tmp" "$out" } if [ -f "$DEST" ]; then @@ -37,7 +67,6 @@ if [ "$COLORED" -eq 1 ]; then fetch "settings.colored.json" "$DEST" echo "Installed COLORED variant -> $DEST" echo "Helper script -> $SCRIPTS_DIR/usage-bar.cjs" - echo "(needs Node on your PATH at render time — ccstatusline already provides it)" else fetch "settings.json" "$DEST" echo "Installed -> $DEST" diff --git a/code-status-bar/package.json b/code-status-bar/package.json new file mode 100644 index 0000000..8df5cd1 --- /dev/null +++ b/code-status-bar/package.json @@ -0,0 +1,8 @@ +{ + "name": "code-status-bar", + "version": "0.1.0", + "private": true, + "description": "Usage-limit-aware Claude Code status bar (Agentic Workflow Toolkit module 1)", + "engines": { "node": ">=18" }, + "scripts": { "test": "node --test" } +} diff --git a/code-status-bar/test/usage-bar.test.cjs b/code-status-bar/test/usage-bar.test.cjs new file mode 100644 index 0000000..40e7284 --- /dev/null +++ b/code-status-bar/test/usage-bar.test.cjs @@ -0,0 +1,73 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const cp = require('node:child_process'); +const path = require('node:path'); + +const SCRIPT = path.resolve(__dirname, '..', 'scripts', 'usage-bar.cjs'); +const ESC = '\x1b'; +const YELLOW = `${ESC}[38;2;252;233;79m`; +const GREEN = `${ESC}[38;2;138;226;52m`; +const RED = `${ESC}[38;2;239;41;41m`; + +function run(args, payload) { + return cp.execFileSync('node', [SCRIPT, ...args], { + input: payload === undefined ? '' : JSON.stringify(payload), + encoding: 'utf8' + }); +} + +function rl(over) { + const now = Math.floor(Date.now() / 1000); + return { + rate_limits: Object.assign({ + five_hour: { used_percentage: 72, resets_at: now + 7200 }, + seven_day: { used_percentage: 41, resets_at: now + 432000 }, + seven_day_opus: { used_percentage: 88, resets_at: now + 432000 } + }, over || {}) + }; +} + +test('session at 72% renders bold yellow with label and percent', () => { + const out = run(['session'], rl()); + assert.ok(out.includes(YELLOW), 'expected yellow'); + assert.ok(out.includes('Session: '), 'expected label'); + assert.ok(out.includes('72.0%'), 'expected percent'); + assert.ok(out.startsWith(`${ESC}[1m`), 'expected bold prefix'); +}); + +test('weekly at 41% is green, opus at 88% is red', () => { + assert.ok(run(['weekly'], rl()).includes(GREEN)); + assert.ok(run(['opus'], rl()).includes(RED)); +}); + +test('multiple limits are joined with a separator', () => { + const out = run(['weekly', 'opus'], rl()); + assert.ok(out.includes('Weekly: ')); + assert.ok(out.includes('Weekly Opus: ')); + assert.ok(out.includes(' | ')); +}); + +test('absent data renders nothing so the widget collapses', () => { + assert.equal(run(['session'], {}), ''); + assert.equal(run(['session']), ''); + assert.equal(run(['session'], rl({ five_hour: undefined })), ''); +}); + +test('non-numeric percentage renders nothing', () => { + assert.equal(run(['session'], rl({ five_hour: { used_percentage: 'oops', resets_at: 0 } })), ''); +}); + +test('thresholds: 50 -> yellow, just under -> green; 85 -> red, just under -> yellow', () => { + const at = (p) => run(['session'], rl({ + five_hour: { used_percentage: p, resets_at: Math.floor(Date.now() / 1000) + 1 } + })); + assert.ok(at(50).includes(YELLOW), '50 should be yellow'); + assert.ok(at(49.9).includes(GREEN), '49.9 should be green'); + assert.ok(at(85).includes(RED), '85 should be red'); + assert.ok(at(84.9).includes(YELLOW), '84.9 should be yellow'); +}); + +test('unknown limit name renders nothing', () => { + assert.equal(run(['bogus'], rl()), ''); +}); From bebeeb2d09d2fcff1e49e894c11363945fd46b37 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Sat, 13 Jun 2026 20:37:00 -0400 Subject: [PATCH 02/12] shift: implement v1 keep-going engine with tests (module 2) --- .gitignore | 1 + shift/PLAN.md | 14 +++++ shift/README.md | 72 +++++++++++++++++++++++ shift/bin/shift | 88 ++++++++++++++++++++++++++++ shift/examples/queue/00-hello.md | 6 ++ shift/hooks/shift-stop.cjs | 98 ++++++++++++++++++++++++++++++++ shift/lib/bounds.cjs | 17 ++++++ shift/lib/brief.cjs | 27 +++++++++ shift/lib/decision.cjs | 18 ++++++ shift/lib/discovery.cjs | 35 ++++++++++++ shift/lib/state.cjs | 42 ++++++++++++++ shift/package.json | 9 +++ shift/test/bounds.test.cjs | 27 +++++++++ shift/test/brief.test.cjs | 31 ++++++++++ shift/test/cli.test.cjs | 49 ++++++++++++++++ shift/test/decision.test.cjs | 38 +++++++++++++ shift/test/discovery.test.cjs | 37 ++++++++++++ shift/test/hook.test.cjs | 95 +++++++++++++++++++++++++++++++ shift/test/state.test.cjs | 49 ++++++++++++++++ 19 files changed, 753 insertions(+) create mode 100644 shift/README.md create mode 100755 shift/bin/shift create mode 100644 shift/examples/queue/00-hello.md create mode 100755 shift/hooks/shift-stop.cjs create mode 100644 shift/lib/bounds.cjs create mode 100644 shift/lib/brief.cjs create mode 100644 shift/lib/decision.cjs create mode 100644 shift/lib/discovery.cjs create mode 100644 shift/lib/state.cjs create mode 100644 shift/package.json create mode 100644 shift/test/bounds.test.cjs create mode 100644 shift/test/brief.test.cjs create mode 100644 shift/test/cli.test.cjs create mode 100644 shift/test/decision.test.cjs create mode 100644 shift/test/discovery.test.cjs create mode 100644 shift/test/hook.test.cjs create mode 100644 shift/test/state.test.cjs diff --git a/.gitignore b/.gitignore index e0a2660..26ec0e5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.bak *.bak-* node_modules/ +.shift/ diff --git a/shift/PLAN.md b/shift/PLAN.md index 5eddb56..d9f79bf 100644 --- a/shift/PLAN.md +++ b/shift/PLAN.md @@ -1018,3 +1018,17 @@ Re-run with `maxIterations: 1`; confirm the run ends on "max iterations" with pe - **Testing strategy (unit pure modules + integration hook/CLI + manual smoke + dry-run):** Tasks 1–7, 9. ✔ - **No third-party deps:** all `node:` built-ins. ✔ - **Known gaps (deferred, documented):** usage-cap data source and rate-limit termination signature → v2 (SPEC §9). Mid-bin early-stop accepted in v1, reviewer-caught; verify pass → v3. + +--- + +## Implementation notes (as-built deviations) + +Built on branch `shift-v1`. The draft code blocks above are the design intent; these corrections were applied during implementation: + +- **`state.cjs` — carry `text` through the merge, strip it on save.** `mergeDiscovered` copies each bin's freshly-read `text` into the in-memory bin (the brief needs the body); `saveState` strips `text` before writing so `state.json` stays lean. Without this the fed-back brief had the instructions but not the task body — caught by the hook integration test, not the unit tests. +- **Review fix #2 — `shift-stop.cjs` resolves the repo from the hook payload's `cwd`** (`input.cwd || process.cwd()`); a hook's process cwd isn't guaranteed to be the project root. Has a dedicated test. +- **Review fix #3 — summary surfaces logged `Needs you:` lines**, not just blocked bins; `brief.cjs` documents the `Needs you: ` convention. Has a test. +- **Security — `bin/shift` uses `execFileSync('git', [...args])`** (argument array, no shell) for branch ops, so a config-supplied branch name can't inject shell metacharacters; added `git checkout` fallbacks for Git < 2.23. +- **`package.json` — `"test": "node --test"`** (Node ≥18 auto-discovery; a bare `test/` arg isn't accepted) and `"engines": { "node": ">=18" }`. + +All 28 `shift` tests + 7 `code-status-bar` tests pass; `install.sh` verified end-to-end. diff --git a/shift/README.md b/shift/README.md new file mode 100644 index 0000000..5ebe712 --- /dev/null +++ b/shift/README.md @@ -0,0 +1,72 @@ +# shift + +Autonomous work-queue runner for **Claude Code** — module 2 of the [Agentic Workflow Toolkit](../). Pre-load bins of work, leave, and `shift` keeps Claude working through them past natural stop points, using its best judgment, until the queue is empty or a bound is hit. You review the output at the end. + +> **This is v1** — the intra-session engine (a Stop hook). It keeps a *running* session grinding the queue, bounded by a time box + max iterations. Surviving the 5-hour rate-limit wall (auto-resume) and a usage cap are **v2**. See [SPEC.md](./SPEC.md) and [PLAN.md](./PLAN.md). + +## How it works + +You drop work into source folders (hand-written briefs and/or plugin-generated plans). `shift start` discovers them, records a run in `.shift/`, and creates a `shift/` branch. You open Claude Code and say "begin the shift." From then on, a **Stop hook** runs each time the agent would stop: it marks the finished bin done, picks the next pending bin, and feeds it back as the next instruction — so the agent keeps going. When the queue drains (or a bound trips, or you hit the kill switch), it lets the session stop and writes `.shift/summary.md`. + +The hook is safe to register globally: it no-ops in any repo that isn't an active `shift` run. + +## Safety model + +Full best-judgment autonomy on reversible, in-worktree work. By default it will **not** push, publish, send externally, or delete outside the worktree — it does the preparable part and records a `Needs you:` line instead, which the summary collects. All work lands on the `shift/` branch, so review is a clean diff. Every decision is logged. Hard stops: time box, max iterations, and a kill switch (`shift stop`). + +## Install + +1. Get the files (clone the toolkit, or copy the `shift/` folder). +2. Register the Stop hook **once** in `~/.claude/settings.json`: + +```json +{ + "hooks": { + "Stop": [ + { "matcher": "", "hooks": [ + { "type": "command", "command": "node /ABSOLUTE/PATH/TO/shift/hooks/shift-stop.cjs" } + ] } + ] + } +} +``` + +> Verify the exact hook schema against the current Claude Code hooks docs. The engine only needs "block + feed `reason` back" and the `stop_hook_active` re-entry flag, and it resolves the repo from the hook payload's `cwd`. + +3. (Optional) put `shift/bin/shift` on your PATH. + +## Use + +```bash +cd your-repo +mkdir queue && $EDITOR queue/01-first-task.md # one brief per file +shift start --dry-run # preview the queue, branch, bounds +shift start # init run + create shift/ branch +# open Claude Code here and say: "begin the shift" +shift status # check progress anytime +shift stop # stop cleanly after the current bin +``` + +Point at plan folders too (e.g. Superpowers output) by editing `.shift/config.json`: + +```json +{ + "sources": [ + { "path": "queue", "kind": "briefs" }, + { "path": "docs/superpowers/plans", "kind": "plans" } + ], + "bounds": { "maxHours": 4, "maxIterations": 30 }, + "definitionOfDone": "Builds and tests pass; work committed on the run branch.", + "git": { "branch": "shift/{date}", "allowPush": false, "allowOutwardActions": false } +} +``` + +When the run ends, read `.shift/summary.md` (it lists bins done/blocked and a "Needs you" section), then review the `shift/` branch. + +## Develop + +```bash +cd shift && npm test # node --test, no dependencies +``` + +Pure logic lives in `lib/` (discovery, state, bounds, brief, decision) and is unit-tested; `hooks/shift-stop.cjs` is the thin I/O shell, integration-tested by driving it with crafted hook input. diff --git a/shift/bin/shift b/shift/bin/shift new file mode 100755 index 0000000..6d6f1d4 --- /dev/null +++ b/shift/bin/shift @@ -0,0 +1,88 @@ +#!/usr/bin/env node +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); +const cp = require('node:child_process'); +const { discoverBins } = require('../lib/discovery.cjs'); +const { initState, saveState, loadState, mergeDiscovered } = require('../lib/state.cjs'); + +function isoStamp(d) { return d.toISOString().replace(/[:.]/g, '-').slice(0, 19); } +function dateStr(d) { return d.toISOString().slice(0, 10); } + +const DEFAULT_CONFIG = { + sources: [{ path: 'queue', kind: 'briefs' }], + bounds: { maxHours: 2, maxIterations: 20 }, + definitionOfDone: 'Builds and tests pass; work committed on the run branch.', + git: { branch: 'shift/{date}', allowPush: false, allowOutwardActions: false } +}; + +function ensureBranch(cwd, branch) { + // execFileSync with an argument array — no shell, so a branch name from config + // can't inject shell metacharacters. + for (const args of [ + ['switch', '-c', branch], ['switch', branch], + ['checkout', '-b', branch], ['checkout', branch] + ]) { + try { cp.execFileSync('git', args, { cwd, stdio: 'ignore' }); return true; } catch { /* try next */ } + } + return false; +} + +function cmdStart(args) { + const cwd = process.cwd(); + const dir = path.join(cwd, '.shift'); + const now = new Date(); + const dryRun = args.includes('--dry-run'); + + let config = DEFAULT_CONFIG; + const cfgFile = path.join(dir, 'config.json'); + if (fs.existsSync(cfgFile)) { + config = { ...DEFAULT_CONFIG, ...JSON.parse(fs.readFileSync(cfgFile, 'utf8')) }; + } + const branch = (config.git.branch || 'shift/{date}').replace('{date}', dateStr(now)); + const discovered = discoverBins(config.sources, cwd); + + if (dryRun) { + console.log('shift dry-run'); + console.log(`branch: ${branch}`); + console.log(`bounds: ${JSON.stringify(config.bounds)}`); + console.log(`queue (${discovered.length}):`); + discovered.forEach((b, i) => console.log(` ${i + 1}. ${b.id} [${b.kind}]`)); + return; + } + + fs.mkdirSync(dir, { recursive: true }); + if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP')); + fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2)); + let state = initState({ runId: isoStamp(now), startedAt: now.toISOString(), branch }); + state = mergeDiscovered(state, discovered); + saveState(dir, state); + fs.writeFileSync(path.join(dir, 'log.md'), `# shift log — ${state.runId}\n`); + + if (!ensureBranch(cwd, branch)) { + console.log(`warning: could not create/switch to branch ${branch} (is this a git repo?)`); + } + + console.log(`shift started: ${discovered.length} bins on branch ${branch}`); + console.log('Now open Claude Code in this repo and say: "begin the shift".'); +} + +function cmdStatus() { + const state = loadState(path.join(process.cwd(), '.shift')); + const c = s => state.bins.filter(b => b.status === s).length; + console.log(`run ${state.runId} · branch ${state.branch} · iter ${state.iterations}`); + console.log(`bins: ${c('done')} done · ${c('blocked')} blocked · ${c('pending')} pending`); +} + +function cmdStop() { + const dir = path.join(process.cwd(), '.shift'); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, 'STOP'), ''); + console.log('shift will stop cleanly after the current bin.'); +} + +const [, , sub, ...rest] = process.argv; +if (sub === 'start') cmdStart(rest); +else if (sub === 'status') cmdStatus(); +else if (sub === 'stop') cmdStop(); +else { console.log('usage: shift [--dry-run]'); process.exit(1); } diff --git a/shift/examples/queue/00-hello.md b/shift/examples/queue/00-hello.md new file mode 100644 index 0000000..ecfed01 --- /dev/null +++ b/shift/examples/queue/00-hello.md @@ -0,0 +1,6 @@ +# Add a project HELLO file + +Create a file `HELLO.md` at the repo root containing one sentence describing +what this repository is. Commit it. + +Definition of done: `HELLO.md` exists and is committed on the run branch. diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs new file mode 100755 index 0000000..243f3d5 --- /dev/null +++ b/shift/hooks/shift-stop.cjs @@ -0,0 +1,98 @@ +#!/usr/bin/env node +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); +const { discoverBins } = require('../lib/discovery.cjs'); +const { loadState, saveState, mergeDiscovered, setBinStatus } = require('../lib/state.cjs'); +const { decide } = require('../lib/decision.cjs'); + +function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } } + +function readBlocked(dir) { + try { + return fs.readFileSync(path.join(dir, 'blocked.jsonl'), 'utf8') + .split('\n').filter(Boolean) + .map(l => { try { return JSON.parse(l); } catch { return null; } }) + .filter(Boolean); + } catch { return []; } +} + +// "Needs you: " lines the agent appended to the log (non-blocking flags). +function readNeedsYou(dir) { + try { + return fs.readFileSync(path.join(dir, 'log.md'), 'utf8') + .split('\n') + .map(l => l.match(/^Needs you:\s*(.+)$/)) + .filter(Boolean) + .map(m => m[1].trim()); + } catch { return []; } +} + +function writeSummary(dir, state, reason, now) { + const done = state.bins.filter(b => b.status === 'done').length; + const blocked = state.bins.filter(b => b.status === 'blocked'); + const pending = state.bins.filter(b => b.status === 'pending').length; + const mins = Math.round((now - Date.parse(state.startedAt)) / 60000); + const items = [ + ...blocked.map(b => `- ${b.id}: ${b.note || 'blocked'}`), + ...readNeedsYou(dir).map(n => `- ${n}`) + ]; + const lines = [ + `# shift summary — ${state.runId}`, '', + `Ended: ${reason}`, + `Duration: ${mins} min · Iterations: ${state.iterations}`, + `Branch: ${state.branch}`, + `Bins: ${done} done · ${blocked.length} blocked · ${pending} pending`, '', + '## Needs you', + ...(items.length ? items : ['- (nothing flagged)']) + ]; + fs.writeFileSync(path.join(dir, 'summary.md'), lines.join('\n') + '\n'); +} + +function main() { + let input = {}; + try { input = JSON.parse(readStdin() || '{}'); } catch { input = {}; } + + // Resolve the repo from the hook payload's cwd (the hook's process cwd is not + // guaranteed to be the project root); fall back to process.cwd(). + const cwd = (input && typeof input.cwd === 'string' && input.cwd) ? input.cwd : process.cwd(); + const dir = path.join(cwd, '.shift'); + if (!fs.existsSync(path.join(dir, 'state.json'))) { process.stdout.write('{}'); return; } + + const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8')); + let state = loadState(dir); + const now = Date.now(); + const killSwitch = fs.existsSync(path.join(dir, 'STOP')); + + // Attribute the just-finished work to the current bin. + if (state.currentBinId) { + const b = readBlocked(dir).find(x => x.id === state.currentBinId); + state = setBinStatus(state, state.currentBinId, b + ? { status: 'blocked', note: b.note } + : { status: 'done', finishedAt: new Date(now).toISOString() }); + } + + // Re-discover (picks up newly added files) and carry over statuses. + state = mergeDiscovered(state, discoverBins(config.sources, cwd)); + + const result = decide({ + bins: state.bins, state, config, now, + stopHookActive: !!input.stop_hook_active, killSwitch + }); + + if (result.action === 'block') { + state.iterations += 1; + state.currentBinId = result.nextBinId; + saveState(dir, state); + fs.appendFileSync(path.join(dir, 'log.md'), + `\n## ${new Date(now).toISOString()} — start ${result.nextBinId} (iter ${state.iterations})\n`); + process.stdout.write(JSON.stringify({ decision: 'block', reason: result.reason })); + } else { + state.currentBinId = null; + saveState(dir, state); + writeSummary(dir, state, result.reason, now); + process.stdout.write('{}'); + } +} + +main(); diff --git a/shift/lib/bounds.cjs b/shift/lib/bounds.cjs new file mode 100644 index 0000000..6708260 --- /dev/null +++ b/shift/lib/bounds.cjs @@ -0,0 +1,17 @@ +'use strict'; + +// now: epoch ms. Returns null (continue) or { reason } (terminate the run). +function evaluateBounds(state, config, now) { + const b = (config && config.bounds) || {}; + if (typeof b.maxIterations === 'number' && state.iterations >= b.maxIterations) { + return { reason: `max iterations (${b.maxIterations}) reached` }; + } + if (typeof b.maxHours === 'number') { + if (now - Date.parse(state.startedAt) >= b.maxHours * 3_600_000) { + return { reason: `time box (${b.maxHours}h) reached` }; + } + } + return null; +} + +module.exports = { evaluateBounds }; diff --git a/shift/lib/brief.cjs b/shift/lib/brief.cjs new file mode 100644 index 0000000..7a68aa2 --- /dev/null +++ b/shift/lib/brief.cjs @@ -0,0 +1,27 @@ +'use strict'; + +// Render the unattended instruction + bin text fed back to the agent on `block`. +function renderBrief(bin, config) { + const dod = (config && config.definitionOfDone) || 'Complete the task and commit your work.'; + const git = (config && config.git) || {}; + const forbidden = []; + if (!git.allowPush) forbidden.push('push to any remote'); + if (!git.allowOutwardActions) forbidden.push('publish, send to external services, or delete files outside the working tree'); + const guard = forbidden.length + ? `Do NOT ${forbidden.join(', or ')}; if the work needs one, treat it as a "Needs you" item (below) and continue with the rest.` + : ''; + return [ + 'You are running unattended under `shift`. Complete the brief below end-to-end using your best judgment.', + 'Do NOT ask questions — if you would normally ask, decide and record the decision in .shift/log.md.', + `Definition of done: ${dod}`, + 'When finished, commit your work on the current branch.', + 'Flag anything that needs the human (a deferred decision, an action you could not take) by appending a line to .shift/log.md as: "Needs you: " — these surface in the run summary.', + 'If a true blocker stops you from finishing this bin, append one line to .shift/blocked.jsonl: {"id":"","note":""} then stop.', + guard, + '', + `--- BIN: ${bin.id} ---`, + bin.text + ].filter(Boolean).join('\n'); +} + +module.exports = { renderBrief }; diff --git a/shift/lib/decision.cjs b/shift/lib/decision.cjs new file mode 100644 index 0000000..5a9cfe3 --- /dev/null +++ b/shift/lib/decision.cjs @@ -0,0 +1,18 @@ +'use strict'; +const { evaluateBounds } = require('./bounds.cjs'); +const { firstPending } = require('./state.cjs'); +const { renderBrief } = require('./brief.cjs'); + +// ctx: { bins, state, config, now, stopHookActive, killSwitch } +// returns { action:'allow', reason } | { action:'block', reason, nextBinId } +function decide(ctx) { + const { bins, state, config, now, killSwitch } = ctx; + if (killSwitch) return { action: 'allow', reason: 'kill switch (.shift/STOP) present' }; + const bound = evaluateBounds(state, config, now); + if (bound) return { action: 'allow', reason: bound.reason }; + const next = firstPending(bins); + if (!next) return { action: 'allow', reason: 'queue empty' }; + return { action: 'block', reason: renderBrief(next, config), nextBinId: next.id }; +} + +module.exports = { decide }; diff --git a/shift/lib/discovery.cjs b/shift/lib/discovery.cjs new file mode 100644 index 0000000..49931d5 --- /dev/null +++ b/shift/lib/discovery.cjs @@ -0,0 +1,35 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); +const crypto = require('node:crypto'); + +function hashText(text) { + return crypto.createHash('sha256').update(text).digest('hex').slice(0, 12); +} + +function listMarkdown(dirAbs) { + let entries; + try { entries = fs.readdirSync(dirAbs, { withFileTypes: true }); } + catch { return []; } + return entries.filter(e => e.isFile() && e.name.endsWith('.md')).map(e => e.name).sort(); +} + +// sources: [{ path, kind }]. cwd: repo root. Returns ordered bins (source then filename). +function discoverBins(sources, cwd) { + const bins = []; + for (const source of sources) { + const dirAbs = path.resolve(cwd, source.path); + for (const name of listMarkdown(dirAbs)) { + const text = fs.readFileSync(path.join(dirAbs, name), 'utf8'); + bins.push({ + id: path.posix.join(source.path, name), + hash: hashText(text), + kind: source.kind || 'briefs', + text + }); + } + } + return bins; +} + +module.exports = { discoverBins, hashText }; diff --git a/shift/lib/state.cjs b/shift/lib/state.cjs new file mode 100644 index 0000000..9d10a99 --- /dev/null +++ b/shift/lib/state.cjs @@ -0,0 +1,42 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); + +function statePath(dir) { return path.join(dir, 'state.json'); } + +function loadState(dir) { return JSON.parse(fs.readFileSync(statePath(dir), 'utf8')); } + +function saveState(dir, state) { + fs.mkdirSync(dir, { recursive: true }); + // Persist lean: the bin `text` is re-read from disk on each discovery pass, so + // keep it out of state.json (avoids bloating state with full brief/plan bodies). + const lean = { ...state, bins: state.bins.map(({ text, ...b }) => b) }; + fs.writeFileSync(statePath(dir), JSON.stringify(lean, null, 2)); +} + +function initState({ runId, startedAt, branch }) { + return { runId, startedAt, iterations: 0, branch, currentBinId: null, bins: [] }; +} + +// Merge freshly discovered bins into state, carrying over status by id+hash. +// New or content-changed files appear as 'pending'. +function mergeDiscovered(state, discovered) { + const prev = new Map(state.bins.map(b => [b.id + '@' + b.hash, b])); + const bins = discovered.map(d => { + const carried = prev.get(d.id + '@' + d.hash); + // Always carry the freshly-read `text` (needed to render the brief); status + // comes from the prior run if this id+hash was already seen. + return carried + ? { ...carried, kind: d.kind, text: d.text } + : { id: d.id, hash: d.hash, kind: d.kind, status: 'pending', text: d.text }; + }); + return { ...state, bins }; +} + +function firstPending(bins) { return bins.find(b => b.status === 'pending') || null; } + +function setBinStatus(state, id, patch) { + return { ...state, bins: state.bins.map(b => (b.id === id ? { ...b, ...patch } : b)) }; +} + +module.exports = { statePath, loadState, saveState, initState, mergeDiscovered, firstPending, setBinStatus }; diff --git a/shift/package.json b/shift/package.json new file mode 100644 index 0000000..52fabb4 --- /dev/null +++ b/shift/package.json @@ -0,0 +1,9 @@ +{ + "name": "shift", + "version": "0.1.0", + "private": true, + "description": "Autonomous work-queue runner for Claude Code (Agentic Workflow Toolkit module 2)", + "bin": { "shift": "bin/shift" }, + "engines": { "node": ">=18" }, + "scripts": { "test": "node --test" } +} diff --git a/shift/test/bounds.test.cjs b/shift/test/bounds.test.cjs new file mode 100644 index 0000000..b46cce3 --- /dev/null +++ b/shift/test/bounds.test.cjs @@ -0,0 +1,27 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const { evaluateBounds } = require('../lib/bounds.cjs'); + +const base = { startedAt: '2026-06-13T00:00:00Z', iterations: 0 }; +const t0 = Date.parse(base.startedAt); + +test('returns null when within bounds', () => { + const cfg = { bounds: { maxHours: 2, maxIterations: 10 } }; + assert.equal(evaluateBounds(base, cfg, t0 + 60_000), null); +}); + +test('terminates on max iterations', () => { + const cfg = { bounds: { maxHours: 2, maxIterations: 5 } }; + assert.match(evaluateBounds({ ...base, iterations: 5 }, cfg, t0 + 1000).reason, /max iterations/); +}); + +test('terminates on time box', () => { + const cfg = { bounds: { maxHours: 1, maxIterations: 100 } }; + assert.match(evaluateBounds(base, cfg, t0 + 3_600_001).reason, /time box/); +}); + +test('iterations checked before time', () => { + const cfg = { bounds: { maxHours: 1, maxIterations: 1 } }; + assert.match(evaluateBounds({ ...base, iterations: 1 }, cfg, t0 + 3_600_001).reason, /max iterations/); +}); diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs new file mode 100644 index 0000000..2212e4e --- /dev/null +++ b/shift/test/brief.test.cjs @@ -0,0 +1,31 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const { renderBrief } = require('../lib/brief.cjs'); + +const bin = { id: 'queue/01.md', text: 'Do the thing.' }; + +test('includes the bin text, id, and definition of done', () => { + const out = renderBrief(bin, { definitionOfDone: 'tests pass', git: {} }); + assert.match(out, /Do the thing\./); + assert.match(out, /queue\/01\.md/); + assert.match(out, /tests pass/); +}); + +test('forbids push and outward actions by default', () => { + const out = renderBrief(bin, { git: { allowPush: false, allowOutwardActions: false } }); + assert.match(out, /Do NOT/); + assert.match(out, /push to any remote/); +}); + +test('omits the forbid-guard when everything is allowed', () => { + const out = renderBrief(bin, { git: { allowPush: true, allowOutwardActions: true } }); + assert.doesNotMatch(out, /Do NOT push/); +}); + +test('always explains decision logging, the Needs-you convention, and blocker flagging', () => { + const out = renderBrief(bin, { git: {} }); + assert.match(out, /\.shift\/log\.md/); + assert.match(out, /Needs you:/); + assert.match(out, /blocked\.jsonl/); +}); diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs new file mode 100644 index 0000000..62dbad1 --- /dev/null +++ b/shift/test/cli.test.cjs @@ -0,0 +1,49 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const cp = require('node:child_process'); + +const CLI = path.resolve(__dirname, '..', 'bin', 'shift'); + +function repoWithQueue() { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-cli-')); + cp.execSync('git init -q', { cwd }); + cp.execSync('git config user.email t@t.co', { cwd }); + cp.execSync('git config user.name t', { cwd }); + cp.execSync('git commit -q --allow-empty -m init', { cwd }); + fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); + fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one'); + return cwd; +} + +function run(cwd, args) { + return cp.execFileSync('node', [CLI, ...args], { cwd, encoding: 'utf8' }); +} + +test('--dry-run lists the queue and writes nothing', () => { + const cwd = repoWithQueue(); + const out = run(cwd, ['start', '--dry-run']); + assert.match(out, /queue\/01\.md/); + assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json'))); +}); + +test('start writes config + state and creates the run branch', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + assert.ok(fs.existsSync(path.join(cwd, '.shift', 'state.json'))); + assert.ok(fs.existsSync(path.join(cwd, '.shift', 'config.json'))); + const branch = cp.execSync('git branch --show-current', { cwd, encoding: 'utf8' }).trim(); + assert.match(branch, /^shift\//); + const state = JSON.parse(fs.readFileSync(path.join(cwd, '.shift', 'state.json'), 'utf8')); + assert.equal(state.bins.length, 1); +}); + +test('stop creates the kill switch', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + run(cwd, ['stop']); + assert.ok(fs.existsSync(path.join(cwd, '.shift', 'STOP'))); +}); diff --git a/shift/test/decision.test.cjs b/shift/test/decision.test.cjs new file mode 100644 index 0000000..76cbd66 --- /dev/null +++ b/shift/test/decision.test.cjs @@ -0,0 +1,38 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const { decide } = require('../lib/decision.cjs'); + +const cfg = { bounds: { maxHours: 2, maxIterations: 10 }, definitionOfDone: 'done', git: {} }; +const state = { startedAt: '2026-06-13T00:00:00Z', iterations: 0, currentBinId: null }; +const t0 = Date.parse(state.startedAt) + 1000; + +test('blocks with the first pending bin', () => { + const bins = [{ id: 'a', status: 'done' }, { id: 'b', status: 'pending', text: 'work b' }]; + const r = decide({ bins, state, config: cfg, now: t0, stopHookActive: false, killSwitch: false }); + assert.equal(r.action, 'block'); + assert.equal(r.nextBinId, 'b'); + assert.match(r.reason, /work b/); +}); + +test('allows stop when queue empty', () => { + const bins = [{ id: 'a', status: 'done' }]; + const r = decide({ bins, state, config: cfg, now: t0, stopHookActive: false, killSwitch: false }); + assert.equal(r.action, 'allow'); + assert.match(r.reason, /queue empty/); +}); + +test('kill switch allows stop even with pending work', () => { + const bins = [{ id: 'b', status: 'pending', text: 'x' }]; + const r = decide({ bins, state, config: cfg, now: t0, stopHookActive: false, killSwitch: true }); + assert.equal(r.action, 'allow'); + assert.match(r.reason, /kill switch/); +}); + +test('a bound (time box) allows stop even with pending work', () => { + const bins = [{ id: 'b', status: 'pending', text: 'x' }]; + const late = Date.parse(state.startedAt) + 3 * 3_600_000; + const r = decide({ bins, state, config: cfg, now: late, stopHookActive: false, killSwitch: false }); + assert.equal(r.action, 'allow'); + assert.match(r.reason, /time box/); +}); diff --git a/shift/test/discovery.test.cjs b/shift/test/discovery.test.cjs new file mode 100644 index 0000000..3dfad7c --- /dev/null +++ b/shift/test/discovery.test.cjs @@ -0,0 +1,37 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { discoverBins, hashText } = require('../lib/discovery.cjs'); + +function tmpRepo() { + const d = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-disc-')); + fs.mkdirSync(path.join(d, 'queue'), { recursive: true }); + fs.mkdirSync(path.join(d, 'plans'), { recursive: true }); + fs.writeFileSync(path.join(d, 'queue', '02-b.md'), 'second'); + fs.writeFileSync(path.join(d, 'queue', '01-a.md'), 'first'); + fs.writeFileSync(path.join(d, 'queue', 'notes.txt'), 'ignored'); + fs.writeFileSync(path.join(d, 'plans', 'p1.md'), 'plan one'); + return d; +} + +test('discovers .md files, ordered by source then filename', () => { + const cwd = tmpRepo(); + const bins = discoverBins([{ path: 'queue', kind: 'briefs' }, { path: 'plans', kind: 'plans' }], cwd); + assert.deepEqual(bins.map(b => b.id), ['queue/01-a.md', 'queue/02-b.md', 'plans/p1.md']); + assert.equal(bins[0].kind, 'briefs'); + assert.equal(bins[2].kind, 'plans'); + assert.equal(bins[0].text, 'first'); +}); + +test('hash is stable for same content, differs for different content', () => { + assert.equal(hashText('x'), hashText('x')); + assert.notEqual(hashText('x'), hashText('y')); +}); + +test('missing source folder yields no bins (no throw)', () => { + const cwd = tmpRepo(); + assert.deepEqual(discoverBins([{ path: 'does-not-exist', kind: 'briefs' }], cwd), []); +}); diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs new file mode 100644 index 0000000..aae0144 --- /dev/null +++ b/shift/test/hook.test.cjs @@ -0,0 +1,95 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const cp = require('node:child_process'); + +const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs'); + +function setupRun() { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hook-')); + fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); + fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one'); + fs.writeFileSync(path.join(cwd, 'queue', '02.md'), 'bin two'); + const dir = path.join(cwd, '.shift'); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({ + sources: [{ path: 'queue', kind: 'briefs' }], + bounds: { maxHours: 24, maxIterations: 10 }, + definitionOfDone: 'done', git: {} + })); + fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ + runId: 'r', startedAt: new Date().toISOString(), iterations: 0, + branch: 'shift/x', currentBinId: null, bins: [] + })); + fs.writeFileSync(path.join(dir, 'log.md'), '# log\n'); + return { cwd, dir }; +} + +function runHook(cwd, input) { + const out = cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify(input), encoding: 'utf8' }); + return JSON.parse(out || '{}'); +} + +test('no-ops (allows stop) when no .shift/state.json exists', () => { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-none-')); + assert.deepEqual(runHook(cwd, { stop_hook_active: false }), {}); +}); + +test('first stop blocks bin 1; second marks it done + blocks bin 2; third drains -> allow + summary', () => { + const { cwd, dir } = setupRun(); + const r1 = runHook(cwd, { stop_hook_active: false }); + assert.equal(r1.decision, 'block'); + assert.match(r1.reason, /bin one/); + + const r2 = runHook(cwd, { stop_hook_active: true }); + assert.equal(r2.decision, 'block'); + assert.match(r2.reason, /bin two/); + const s2 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + assert.equal(s2.bins.find(b => b.id === 'queue/01.md').status, 'done'); + + const r3 = runHook(cwd, { stop_hook_active: true }); + assert.deepEqual(r3, {}); + assert.ok(fs.existsSync(path.join(dir, 'summary.md'))); + assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /queue empty/); +}); + +test('blocked.jsonl marks the current bin blocked and surfaces it in the summary', () => { + const { cwd, dir } = setupRun(); + runHook(cwd, { stop_hook_active: false }); + fs.writeFileSync(path.join(dir, 'blocked.jsonl'), JSON.stringify({ id: 'queue/01.md', note: 'needs key' }) + '\n'); + runHook(cwd, { stop_hook_active: true }); + runHook(cwd, { stop_hook_active: true }); + assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /needs key/); +}); + +test('logged "Needs you:" lines surface in the summary', () => { + const { cwd, dir } = setupRun(); + runHook(cwd, { stop_hook_active: false }); + fs.appendFileSync(path.join(dir, 'log.md'), '\nNeeds you: push the release tag\n'); + runHook(cwd, { stop_hook_active: true }); + runHook(cwd, { stop_hook_active: true }); + assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /push the release tag/); +}); + +test('kill switch ends the run immediately', () => { + const { cwd, dir } = setupRun(); + fs.writeFileSync(path.join(dir, 'STOP'), ''); + assert.deepEqual(runHook(cwd, { stop_hook_active: false }), {}); + assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /kill switch/); +}); + +test('resolves .shift from the hook payload cwd, not the process cwd', () => { + const { cwd } = setupRun(); + const neutral = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-neutral-')); + const out = cp.execFileSync('node', [HOOK], { + cwd: neutral, + input: JSON.stringify({ stop_hook_active: false, cwd }), + encoding: 'utf8' + }); + const r = JSON.parse(out || '{}'); + assert.equal(r.decision, 'block'); + assert.match(r.reason, /bin one/); +}); diff --git a/shift/test/state.test.cjs b/shift/test/state.test.cjs new file mode 100644 index 0000000..e123620 --- /dev/null +++ b/shift/test/state.test.cjs @@ -0,0 +1,49 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { initState, saveState, loadState, mergeDiscovered, firstPending, setBinStatus } = require('../lib/state.cjs'); + +test('init + save + load round-trips', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-state-')); + const s = initState({ runId: 'r1', startedAt: '2026-06-13T00:00:00Z', branch: 'shift/x' }); + assert.equal(s.iterations, 0); + assert.equal(s.currentBinId, null); + saveState(dir, s); + assert.deepEqual(loadState(dir), s); +}); + +test('mergeDiscovered carries status by id+hash, new files are pending', () => { + let s = initState({ runId: 'r', startedAt: '2026-06-13T00:00:00Z', branch: 'b' }); + s = mergeDiscovered(s, [{ id: 'queue/a.md', hash: 'h1', kind: 'briefs' }]); + assert.equal(s.bins[0].status, 'pending'); + s = setBinStatus(s, 'queue/a.md', { status: 'done' }); + s = mergeDiscovered(s, [ + { id: 'queue/a.md', hash: 'h1', kind: 'briefs' }, + { id: 'queue/b.md', hash: 'h2', kind: 'briefs' } + ]); + assert.equal(s.bins.find(b => b.id === 'queue/a.md').status, 'done'); + assert.equal(s.bins.find(b => b.id === 'queue/b.md').status, 'pending'); +}); + +test('edited file (new hash) becomes pending again', () => { + let s = initState({ runId: 'r', startedAt: 't', branch: 'b' }); + s = mergeDiscovered(s, [{ id: 'q/a.md', hash: 'h1', kind: 'briefs' }]); + s = setBinStatus(s, 'q/a.md', { status: 'done' }); + s = mergeDiscovered(s, [{ id: 'q/a.md', hash: 'h2', kind: 'briefs' }]); + assert.equal(s.bins[0].status, 'pending'); +}); + +test('firstPending returns first pending or null', () => { + let s = initState({ runId: 'r', startedAt: 't', branch: 'b' }); + s = mergeDiscovered(s, [ + { id: 'a', hash: '1', kind: 'briefs' }, + { id: 'b', hash: '2', kind: 'briefs' } + ]); + s = setBinStatus(s, 'a', { status: 'done' }); + assert.equal(firstPending(s.bins).id, 'b'); + s = setBinStatus(s, 'b', { status: 'done' }); + assert.equal(firstPending(s.bins), null); +}); From f5e67ad49695372c98e1e6be28c2c45061805f49 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Sat, 13 Jun 2026 22:45:57 -0400 Subject: [PATCH 03/12] shift: add v2 (headless auto-resume + usage cap) and v3 (per-bin verify gate) --- shift/PLAN.md | 11 +++++ shift/README.md | 62 +++++++++++++++++++------- shift/SPEC.md | 13 ++++++ shift/bin/shift | 53 +++++++++++++++++++++- shift/hooks/shift-stop.cjs | 65 +++++++++++++++++++-------- shift/lib/bounds.cjs | 9 +++- shift/lib/decision.cjs | 6 +-- shift/lib/outcome.cjs | 30 +++++++++++++ shift/lib/run-loop.cjs | 63 ++++++++++++++++++++++++++ shift/lib/usage.cjs | 35 +++++++++++++++ shift/lib/verify.cjs | 23 ++++++++++ shift/test/bounds.test.cjs | 11 +++++ shift/test/decision.test.cjs | 8 ++++ shift/test/hook.test.cjs | 53 ++++++++++++++++++++-- shift/test/outcome.test.cjs | 33 ++++++++++++++ shift/test/run-loop.test.cjs | 86 ++++++++++++++++++++++++++++++++++++ shift/test/usage.test.cjs | 40 +++++++++++++++++ shift/test/verify.test.cjs | 25 +++++++++++ 18 files changed, 583 insertions(+), 43 deletions(-) create mode 100644 shift/lib/outcome.cjs create mode 100644 shift/lib/run-loop.cjs create mode 100644 shift/lib/usage.cjs create mode 100644 shift/lib/verify.cjs create mode 100644 shift/test/outcome.test.cjs create mode 100644 shift/test/run-loop.test.cjs create mode 100644 shift/test/usage.test.cjs create mode 100644 shift/test/verify.test.cjs diff --git a/shift/PLAN.md b/shift/PLAN.md index d9f79bf..387f464 100644 --- a/shift/PLAN.md +++ b/shift/PLAN.md @@ -1032,3 +1032,14 @@ Built on branch `shift-v1`. The draft code blocks above are the design intent; t - **`package.json` — `"test": "node --test"`** (Node ≥18 auto-discovery; a bare `test/` arg isn't accepted) and `"engines": { "node": ">=18" }`. All 28 `shift` tests + 7 `code-status-bar` tests pass; `install.sh` verified end-to-end. + +--- + +## v2 + v3 (built on the same branch) + +Added after v1, same TDD discipline (52 `shift` tests total). See SPEC §13 for the design decisions. + +- **v3 verify gate** — `lib/verify.cjs` (injectable exec) + a gate in the Stop hook: a bin passes only if `verify.command` exits 0; failures re-feed the bin with the output up to `verify.maxAttempts`, then block it. Tests: `verify.test.cjs` + hook gate cases. +- **v2 usage cap** — `lib/usage.cjs` caches the hook payload's `rate_limits` to `.shift/usage.json`; `evaluateBounds` gains a `usagePercent` arg (cap on weekly %); the hook reads it from the payload and degrades gracefully when absent. Tests: `usage.test.cjs` + bounds/decision/hook cases. +- **v2 headless runner** — `lib/outcome.cjs` (classify a spawn: completed / rate_limited / error, inferring rate-limit from cached usage since the exit signature is undocumented) + `lib/run-loop.cjs` (pure outer loop with injected effects: bounds, max-resumes backstop, wait-until-reset auto-resume) + `bin/shift run` (thin real-effects wiring). Tests: `outcome.test.cjs`, `run-loop.test.cjs`. +- **Security** — `lib/verify.cjs` uses `spawnSync(command, { shell: true })` with the whole user-config command (not interpolated); documented inline. diff --git a/shift/README.md b/shift/README.md index 5ebe712..d871a96 100644 --- a/shift/README.md +++ b/shift/README.md @@ -1,23 +1,27 @@ # shift -Autonomous work-queue runner for **Claude Code** — module 2 of the [Agentic Workflow Toolkit](../). Pre-load bins of work, leave, and `shift` keeps Claude working through them past natural stop points, using its best judgment, until the queue is empty or a bound is hit. You review the output at the end. +Autonomous work-queue runner for **Claude Code** — module 2 of the [Agentic Workflow Toolkit](../). Pre-load bins of work, leave, and `shift` keeps Claude working through them past natural stop points, using its best judgment, until the queue is empty or a bound is hit — surviving the 5-hour rate-limit wall by waiting for the window to reopen. You review the output at the end. -> **This is v1** — the intra-session engine (a Stop hook). It keeps a *running* session grinding the queue, bounded by a time box + max iterations. Surviving the 5-hour rate-limit wall (auto-resume) and a usage cap are **v2**. See [SPEC.md](./SPEC.md) and [PLAN.md](./PLAN.md). +See [SPEC.md](./SPEC.md) and [PLAN.md](./PLAN.md) for the design. ## How it works -You drop work into source folders (hand-written briefs and/or plugin-generated plans). `shift start` discovers them, records a run in `.shift/`, and creates a `shift/` branch. You open Claude Code and say "begin the shift." From then on, a **Stop hook** runs each time the agent would stop: it marks the finished bin done, picks the next pending bin, and feeds it back as the next instruction — so the agent keeps going. When the queue drains (or a bound trips, or you hit the kill switch), it lets the session stop and writes `.shift/summary.md`. +You drop work into source folders — hand-written briefs and/or plugin-generated plans (e.g. Superpowers' plans dir). `shift start` discovers them, records a run in `.shift/`, and creates a `shift/` branch. Then: -The hook is safe to register globally: it no-ops in any repo that isn't an active `shift` run. +- **Keep-going engine (Stop hook).** Each time the agent would stop, the hook marks the finished bin done, picks the next pending one, and feeds it back as the next instruction — so the session keeps working. When the queue drains (or a bound trips, or the kill switch is set) it lets the session stop and writes `.shift/summary.md`. +- **Verify gate.** If you set a `verify.command`, each bin must pass it (e.g. `npm test`) before it counts as done; failures re-feed the bin with the output (up to `maxAttempts`), then mark it blocked. This catches "looked done but wasn't." +- **All-day runner (`shift run`).** A headless outer loop that spawns Claude, lets the engine grind, and — when a spawn dies on the rate-limit wall — waits until the window resets and resumes. Bounded by wall-clock, max iterations, a usage cap, and a resume backstop. + +The hook is safe to register globally: it no-ops in any repo that isn't an active `shift` run, and resolves the repo from the hook payload's `cwd`. ## Safety model -Full best-judgment autonomy on reversible, in-worktree work. By default it will **not** push, publish, send externally, or delete outside the worktree — it does the preparable part and records a `Needs you:` line instead, which the summary collects. All work lands on the `shift/` branch, so review is a clean diff. Every decision is logged. Hard stops: time box, max iterations, and a kill switch (`shift stop`). +Full best-judgment autonomy on reversible, in-worktree work. By default it will **not** push, publish, send externally, or delete outside the worktree — it does the preparable part and records a `Needs you:` line, which the summary collects. All work lands on the `shift/` branch, so review is a clean diff. Every decision is logged. Hard stops: time box, max iterations, usage cap, kill switch (`shift stop`). ## Install 1. Get the files (clone the toolkit, or copy the `shift/` folder). -2. Register the Stop hook **once** in `~/.claude/settings.json`: +2. Register the Stop hook **once** in `~/.claude/settings.json` (safe globally — no-ops outside an active run): ```json { @@ -31,7 +35,7 @@ Full best-judgment autonomy on reversible, in-worktree work. By default it will } ``` -> Verify the exact hook schema against the current Claude Code hooks docs. The engine only needs "block + feed `reason` back" and the `stop_hook_active` re-entry flag, and it resolves the repo from the hook payload's `cwd`. +> Verify the hook schema against the current Claude Code hooks docs. The engine needs only: "block + feed `reason` back", the `stop_hook_active` flag, the payload `cwd`, and (for the usage cap / auto-resume) the payload `rate_limits`. 3. (Optional) put `shift/bin/shift` on your PATH. @@ -42,12 +46,21 @@ cd your-repo mkdir queue && $EDITOR queue/01-first-task.md # one brief per file shift start --dry-run # preview the queue, branch, bounds shift start # init run + create shift/ branch -# open Claude Code here and say: "begin the shift" -shift status # check progress anytime -shift stop # stop cleanly after the current bin ``` -Point at plan folders too (e.g. Superpowers output) by editing `.shift/config.json`: +Then either: + +- **Interactive:** open Claude Code in the repo and say *"begin the shift"* — the Stop hook drives it while you're away (within this session). +- **All-day / unattended:** `shift run` — the headless loop drives Claude, survives rate-limit resets, and stops on a bound. + +```bash +shift status # progress anytime +shift stop # stop cleanly after the current bin +``` + +When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" section) and review the `shift/` branch. + +## Configure (`.shift/config.json`) ```json { @@ -55,18 +68,37 @@ Point at plan folders too (e.g. Superpowers output) by editing `.shift/config.js { "path": "queue", "kind": "briefs" }, { "path": "docs/superpowers/plans", "kind": "plans" } ], - "bounds": { "maxHours": 4, "maxIterations": 30 }, + "bounds": { + "maxHours": 4, + "maxIterations": 30, + "maxResumes": 12, + "usageCapPercent": 90, + "autoResumeOnReset": true + }, "definitionOfDone": "Builds and tests pass; work committed on the run branch.", + "verify": { "command": "npm test", "maxAttempts": 2 }, + "permissionMode": "acceptEdits", "git": { "branch": "shift/{date}", "allowPush": false, "allowOutwardActions": false } } ``` -When the run ends, read `.shift/summary.md` (it lists bins done/blocked and a "Needs you" section), then review the `shift/` branch. +- **`usageCapPercent`** — stop when weekly usage reaches this (read from the hook payload's `rate_limits`; skipped when that data is absent, e.g. non-Pro/Max). +- **`autoResumeOnReset`** — on a rate-limit wall, `shift run` waits for the 5-hour window to reopen and resumes (never past the time box). +- **`verify.command`** — per-bin acceptance gate; `null` disables it. + +### Permissions for unattended runs + +`shift run` invokes `claude -p --permission-mode `. `acceptEdits` (the default) auto-approves file edits but **other tools (e.g. Bash) can still prompt — and a headless run can't answer prompts.** For real unattended work that runs tests/commands, either: + +- pre-allow the tools the work needs via `permissions.allow` in your Claude settings and set `"permissionMode": "dontAsk"`, or +- set `"permissionMode": "bypassPermissions"` (broadest; rely on the branch-only / no-push safety model and bounds). + +Pick the narrowest mode that lets the work actually proceed. ## Develop ```bash -cd shift && npm test # node --test, no dependencies +cd shift && npm test # node --test, zero dependencies ``` -Pure logic lives in `lib/` (discovery, state, bounds, brief, decision) and is unit-tested; `hooks/shift-stop.cjs` is the thin I/O shell, integration-tested by driving it with crafted hook input. +Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop) and is unit-tested; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. diff --git a/shift/SPEC.md b/shift/SPEC.md index b250662..e81e1eb 100644 --- a/shift/SPEC.md +++ b/shift/SPEC.md @@ -259,3 +259,16 @@ shift/ └─ examples/ └─ queue/ # sample bins ``` + +--- + +## 13. Implementation status (as built — v1 + v2 + v3) + +All three phases are implemented on branch `shift-v1`. Notable as-built decisions: + +- **Rate-limit detection without the undocumented exit signature (resolves §9.2).** Research confirmed the headless rate-limit termination signature is undocumented, but the **Stop hook payload includes `rate_limits`**. So the engine caches the latest reset/usage to `.shift/usage.json`, and `lib/outcome.cjs` classifies a non-finalized, non-zero spawn as `rate_limited` by **inference** — near-limit cached usage (≥95%) + a future reset — with config-overridable stderr patterns as a fallback. No dependency on an exact exit code/message. +- **Usage cap source (resolves §9.1).** Enforced from the hook payload's `rate_limits.seven_day.used_percentage`; absent data (non-Pro/Max, pre-first-response) degrades to "cap skipped," never an error. +- **Verify gate (v3, resolves §9.3).** `verify.command` runs per bin; failures re-feed the bin with the output up to `maxAttempts`, then block it — so "looked done but wasn't" is caught, not silently accepted. +- **Permissions.** `shift run` uses `--permission-mode` (default `acceptEdits`). Truly unattended work that runs commands typically needs `dontAsk` + a `permissions.allow` list, or `bypassPermissions` — documented in the README; the branch-only/no-push model and bounds are the backstop. + +**New modules beyond §12:** `lib/verify.cjs`, `lib/usage.cjs`, `lib/outcome.cjs`, `lib/run-loop.cjs`; `bin/shift` gains `run`. **Tests:** 52 in `shift` (pure unit + hook/CLI/run-loop integration), all green. diff --git a/shift/bin/shift b/shift/bin/shift index 6d6f1d4..f0d4327 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -11,8 +11,16 @@ function dateStr(d) { return d.toISOString().slice(0, 10); } const DEFAULT_CONFIG = { sources: [{ path: 'queue', kind: 'briefs' }], - bounds: { maxHours: 2, maxIterations: 20 }, + bounds: { + maxHours: 2, + maxIterations: 20, + maxResumes: 12, + usageCapPercent: 90, + autoResumeOnReset: true + }, definitionOfDone: 'Builds and tests pass; work committed on the run branch.', + verify: { command: null, maxAttempts: 2 }, + permissionMode: 'acceptEdits', git: { branch: 'shift/{date}', allowPush: false, allowOutwardActions: false } }; @@ -81,8 +89,49 @@ function cmdStop() { console.log('shift will stop cleanly after the current bin.'); } +// v2: headless outer loop — keeps spawning claude until the engine finalizes, +// a bound trips, or (on a rate-limit wall) it waits for the window to reopen. +async function cmdRun() { + const cwd = process.cwd(); + const dir = path.join(cwd, '.shift'); + if (!fs.existsSync(path.join(dir, 'state.json'))) { + console.log('No active run. Run `shift start` first.'); + process.exit(1); + } + const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8')); + const mode = config.permissionMode || 'acceptEdits'; + const { runLoop } = require('../lib/run-loop.cjs'); + const { readUsageCache } = require('../lib/usage.cjs'); + + // Clear any stale summary so finalized() reflects THIS run. + try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ } + + let first = true; + const effects = { + now: () => Date.now(), + loadState: () => loadState(dir), + readUsage: () => readUsageCache(dir), + log: (m) => console.log(`[shift] ${m}`), + finalized: () => fs.existsSync(path.join(dir, 'summary.md')), + sleepUntil: (ms) => new Promise(r => setTimeout(r, Math.max(0, ms - Date.now()))), + spawn: () => { + const args = ['-p', '--permission-mode', mode]; + if (first) { args.push('begin the shift'); first = false; } + else { args.push('--continue', 'continue the shift'); } + return cp.spawnSync('claude', args, { + cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], maxBuffer: 64 * 1024 * 1024 + }); + } + }; + + const result = await runLoop({ config, effects }); + console.log(`[shift] stopped: ${result.reason} (after ${result.spawns} spawn(s))`); + console.log(`[shift] review: ${path.join(dir, 'summary.md')}`); +} + const [, , sub, ...rest] = process.argv; if (sub === 'start') cmdStart(rest); else if (sub === 'status') cmdStatus(); else if (sub === 'stop') cmdStop(); -else { console.log('usage: shift [--dry-run]'); process.exit(1); } +else if (sub === 'run') cmdRun().catch(e => { console.error(e); process.exit(1); }); +else { console.log('usage: shift [--dry-run]'); process.exit(1); } diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs index 243f3d5..16358a4 100755 --- a/shift/hooks/shift-stop.cjs +++ b/shift/hooks/shift-stop.cjs @@ -5,6 +5,8 @@ const path = require('node:path'); const { discoverBins } = require('../lib/discovery.cjs'); const { loadState, saveState, mergeDiscovered, setBinStatus } = require('../lib/state.cjs'); const { decide } = require('../lib/decision.cjs'); +const { runVerify } = require('../lib/verify.cjs'); +const { writeUsageCache } = require('../lib/usage.cjs'); function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } } @@ -17,17 +19,18 @@ function readBlocked(dir) { } catch { return []; } } -// "Needs you: " lines the agent appended to the log (non-blocking flags). function readNeedsYou(dir) { try { return fs.readFileSync(path.join(dir, 'log.md'), 'utf8') - .split('\n') - .map(l => l.match(/^Needs you:\s*(.+)$/)) - .filter(Boolean) - .map(m => m[1].trim()); + .split('\n').map(l => l.match(/^Needs you:\s*(.+)$/)).filter(Boolean).map(m => m[1].trim()); } catch { return []; } } +function tail(s, n) { + if (typeof s !== 'string') return ''; + return s.length > n ? s.slice(s.length - n) : s; +} + function writeSummary(dir, state, reason, now) { const done = state.bins.filter(b => b.status === 'done').length; const blocked = state.bins.filter(b => b.status === 'blocked'); @@ -60,33 +63,59 @@ function main() { if (!fs.existsSync(path.join(dir, 'state.json'))) { process.stdout.write('{}'); return; } const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8')); - let state = loadState(dir); const now = Date.now(); const killSwitch = fs.existsSync(path.join(dir, 'STOP')); - // Attribute the just-finished work to the current bin. - if (state.currentBinId) { - const b = readBlocked(dir).find(x => x.id === state.currentBinId); - state = setBinStatus(state, state.currentBinId, b - ? { status: 'blocked', note: b.note } - : { status: 'done', finishedAt: new Date(now).toISOString() }); - } + // Capture rate limits from the hook payload: enforce the usage cap and cache + // reset times for the headless runner. Absent on non-Pro/Max or pre-first-response. + const usagePercent = writeUsageCache(dir, input.rate_limits, Math.floor(now / 1000)); - // Re-discover (picks up newly added files) and carry over statuses. - state = mergeDiscovered(state, discoverBins(config.sources, cwd)); + // Re-discover (fresh text + new files) and carry over status/attempts. + let state = mergeDiscovered(loadState(dir), discoverBins(config.sources, cwd)); + + const prevBinId = state.currentBinId; + const verifyCmd = config.verify && config.verify.command; + const maxAttempts = (config.verify && config.verify.maxAttempts) || 2; + let retryFeedback = null; + + // Attribute the just-finished work to the current bin (blocked / verify gate / done). + if (prevBinId) { + const blocked = readBlocked(dir).find(x => x.id === prevBinId); + if (blocked) { + state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note }); + } else if (verifyCmd) { + const v = runVerify(verifyCmd, cwd); + if (v.ok) { + state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() }); + } else { + const bin = state.bins.find(b => b.id === prevBinId) || {}; + const attempts = (bin.attempts || 0) + 1; + if (attempts < maxAttempts) { + state = setBinStatus(state, prevBinId, { attempts }); // stays pending → re-blocked below + retryFeedback = `Your previous attempt failed verification (\`${verifyCmd}\`). Fix it and make it pass. Output (tail):\n${tail(v.output, 2000)}`; + } else { + state = setBinStatus(state, prevBinId, { status: 'blocked', attempts, note: `failed verification after ${attempts} attempts` }); + } + } + } else { + state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() }); + } + } const result = decide({ - bins: state.bins, state, config, now, + bins: state.bins, state, config, now, usagePercent, stopHookActive: !!input.stop_hook_active, killSwitch }); if (result.action === 'block') { + let reason = result.reason; + if (retryFeedback && result.nextBinId === prevBinId) reason += `\n\n${retryFeedback}`; state.iterations += 1; state.currentBinId = result.nextBinId; saveState(dir, state); fs.appendFileSync(path.join(dir, 'log.md'), - `\n## ${new Date(now).toISOString()} — start ${result.nextBinId} (iter ${state.iterations})\n`); - process.stdout.write(JSON.stringify({ decision: 'block', reason: result.reason })); + `\n## ${new Date(now).toISOString()} — work ${result.nextBinId} (iter ${state.iterations})\n`); + process.stdout.write(JSON.stringify({ decision: 'block', reason })); } else { state.currentBinId = null; saveState(dir, state); diff --git a/shift/lib/bounds.cjs b/shift/lib/bounds.cjs index 6708260..4040f42 100644 --- a/shift/lib/bounds.cjs +++ b/shift/lib/bounds.cjs @@ -1,11 +1,16 @@ 'use strict'; -// now: epoch ms. Returns null (continue) or { reason } (terminate the run). -function evaluateBounds(state, config, now) { +// now: epoch ms. usagePercent: latest weekly usage % (or undefined/null if unknown). +// Returns null (continue) or { reason } (terminate the run). +function evaluateBounds(state, config, now, usagePercent) { const b = (config && config.bounds) || {}; if (typeof b.maxIterations === 'number' && state.iterations >= b.maxIterations) { return { reason: `max iterations (${b.maxIterations}) reached` }; } + if (typeof b.usageCapPercent === 'number' && typeof usagePercent === 'number' + && usagePercent >= b.usageCapPercent) { + return { reason: `usage cap (${b.usageCapPercent}%) reached at ${usagePercent}%` }; + } if (typeof b.maxHours === 'number') { if (now - Date.parse(state.startedAt) >= b.maxHours * 3_600_000) { return { reason: `time box (${b.maxHours}h) reached` }; diff --git a/shift/lib/decision.cjs b/shift/lib/decision.cjs index 5a9cfe3..e6a985a 100644 --- a/shift/lib/decision.cjs +++ b/shift/lib/decision.cjs @@ -3,12 +3,12 @@ const { evaluateBounds } = require('./bounds.cjs'); const { firstPending } = require('./state.cjs'); const { renderBrief } = require('./brief.cjs'); -// ctx: { bins, state, config, now, stopHookActive, killSwitch } +// ctx: { bins, state, config, now, usagePercent, stopHookActive, killSwitch } // returns { action:'allow', reason } | { action:'block', reason, nextBinId } function decide(ctx) { - const { bins, state, config, now, killSwitch } = ctx; + const { bins, state, config, now, usagePercent, killSwitch } = ctx; if (killSwitch) return { action: 'allow', reason: 'kill switch (.shift/STOP) present' }; - const bound = evaluateBounds(state, config, now); + const bound = evaluateBounds(state, config, now, usagePercent); if (bound) return { action: 'allow', reason: bound.reason }; const next = firstPending(bins); if (!next) return { action: 'allow', reason: 'queue empty' }; diff --git a/shift/lib/outcome.cjs b/shift/lib/outcome.cjs new file mode 100644 index 0000000..07ff0e9 --- /dev/null +++ b/shift/lib/outcome.cjs @@ -0,0 +1,30 @@ +'use strict'; + +// The rate-limit termination signature of a headless `claude -p` run is not +// documented, so we classify defensively: prefer inference from cached usage +// (near-limit + a future reset), then fall back to stderr patterns. +const DEFAULT_PATTERNS = [/rate.?limit/i, /usage limit/i, /quota/i, /\b429\b/]; +const NEAR_LIMIT_PERCENT = 95; + +// ctx: { finalized, code, stderr, usage, now (ms), patterns? } +// returns 'completed' | 'rate_limited' | 'error' +function classifyOutcome(ctx) { + const { finalized, code, stderr, usage, now, patterns } = ctx; + if (finalized) return 'completed'; // the engine wrote summary.md → run is done + if (code === 0) return 'completed'; // clean exit without finalize (nothing left to do) + + const nowSec = (typeof now === 'number' ? now : Date.now()) / 1000; + const resetFuture = usage && typeof usage.sessionResetAt === 'number' && usage.sessionResetAt > nowSec; + const nearLimit = usage && ( + (typeof usage.sessionUsedPercent === 'number' && usage.sessionUsedPercent >= NEAR_LIMIT_PERCENT) || + (typeof usage.weeklyPercent === 'number' && usage.weeklyPercent >= NEAR_LIMIT_PERCENT) + ); + if (resetFuture && nearLimit) return 'rate_limited'; + + const pats = patterns || DEFAULT_PATTERNS; + if (typeof stderr === 'string' && pats.some(p => p.test(stderr))) return 'rate_limited'; + + return 'error'; +} + +module.exports = { classifyOutcome, DEFAULT_PATTERNS, NEAR_LIMIT_PERCENT }; diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs new file mode 100644 index 0000000..41ad2b2 --- /dev/null +++ b/shift/lib/run-loop.cjs @@ -0,0 +1,63 @@ +'use strict'; +const { evaluateBounds } = require('./bounds.cjs'); +const { classifyOutcome } = require('./outcome.cjs'); + +const RESET_BUFFER_MS = 60_000; + +// The headless outer loop (v2). All side effects are injected so the loop is +// fully testable without a real `claude` or real sleeping. +// +// effects: { +// now(): ms, loadState(): state, readUsage(): usageCache|null, log(msg), +// finalized(): bool, // did the engine write summary.md this run? +// spawn(n): { status, stderr }, // run claude once (n = 1-based spawn count) +// sleepUntil(ms): Promise +// } +// Returns { reason, spawns }. +async function runLoop({ config, effects }) { + const bounds = (config && config.bounds) || {}; + const maxResumes = typeof bounds.maxResumes === 'number' ? bounds.maxResumes : 12; + let spawns = 0; + let lastOutcome = null; + + for (;;) { + const state = effects.loadState(); + const now = effects.now(); + const usage = effects.readUsage(); + + const bound = evaluateBounds(state, config, now, usage ? usage.weeklyPercent : undefined); + if (bound) return { reason: bound.reason, spawns }; + if (spawns >= maxResumes) return { reason: `max resumes (${maxResumes}) reached`, spawns }; + + if (lastOutcome === 'completed') return { reason: 'run finalized by the engine', spawns }; + if (lastOutcome === 'error') return { reason: 'run errored — stopping (see output)', spawns }; + + if (lastOutcome === 'rate_limited') { + if (!bounds.autoResumeOnReset) return { reason: 'rate limited; auto-resume disabled', spawns }; + const resetAt = usage && typeof usage.sessionResetAt === 'number' ? usage.sessionResetAt * 1000 : null; + if (!resetAt) return { reason: 'rate limited but no reset time available — stopping', spawns }; + const until = resetAt + RESET_BUFFER_MS; + if (typeof bounds.maxHours === 'number') { + const deadline = Date.parse(state.startedAt) + bounds.maxHours * 3_600_000; + if (until >= deadline) return { reason: 'rate limited; reset is past the time box — stopping', spawns }; + } + effects.log(`rate limited — waiting until ${new Date(until).toISOString()}`); + await effects.sleepUntil(until); + lastOutcome = null; + continue; + } + + spawns += 1; + effects.log(`spawn #${spawns}: running claude`); + const res = effects.spawn(spawns); + lastOutcome = classifyOutcome({ + finalized: effects.finalized(), + code: res ? res.status : 1, + stderr: res ? res.stderr : '', + usage: effects.readUsage(), + now: effects.now() + }); + } +} + +module.exports = { runLoop, RESET_BUFFER_MS }; diff --git a/shift/lib/usage.cjs b/shift/lib/usage.cjs new file mode 100644 index 0000000..77af349 --- /dev/null +++ b/shift/lib/usage.cjs @@ -0,0 +1,35 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); + +function cachePath(dir) { return path.join(dir, 'usage.json'); } + +function num(v) { return (typeof v === 'number' && Number.isFinite(v)) ? v : null; } + +// Cache the rate-limit data from a hook payload so the headless runner can read +// the reset time and current usage between spawns. Returns the weekly % (or null). +// Absent/partial rate_limits degrade to null and write nothing. +function writeUsageCache(dir, rateLimits, nowSec) { + if (!rateLimits || typeof rateLimits !== 'object') return null; + const fh = rateLimits.five_hour || {}; + const sd = rateLimits.seven_day || {}; + const cache = { + weeklyPercent: num(sd.used_percentage), + sessionUsedPercent: num(fh.used_percentage), + sessionResetAt: num(fh.resets_at), + weeklyResetAt: num(sd.resets_at), + capturedAt: typeof nowSec === 'number' ? nowSec : null + }; + try { + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(cachePath(dir), JSON.stringify(cache, null, 2)); + } catch { /* best-effort */ } + return cache.weeklyPercent; +} + +function readUsageCache(dir) { + try { return JSON.parse(fs.readFileSync(cachePath(dir), 'utf8')); } + catch { return null; } +} + +module.exports = { writeUsageCache, readUsageCache }; diff --git a/shift/lib/verify.cjs b/shift/lib/verify.cjs new file mode 100644 index 0000000..60b22a9 --- /dev/null +++ b/shift/lib/verify.cjs @@ -0,0 +1,23 @@ +'use strict'; +const cp = require('node:child_process'); + +// Run a per-bin verification command in `cwd`. `exec` is injectable for tests. +// Returns { ok: boolean, output: string }. A null/empty command is a pass. +function runVerify(command, cwd, exec) { + if (!command) return { ok: true, output: '' }; + return (exec || defaultExec)(command, cwd); +} + +function defaultExec(command, cwd) { + // shell:true is intentional — `command` is the user's own config value (e.g. + // "npm test && npm run build") and is passed as a whole, not interpolated into + // a larger string. It is never built from untrusted input. + const r = cp.spawnSync(command, { + cwd, shell: true, encoding: 'utf8', + timeout: 10 * 60 * 1000, + maxBuffer: 10 * 1024 * 1024 + }); + return { ok: r.status === 0, output: `${r.stdout || ''}${r.stderr || ''}` }; +} + +module.exports = { runVerify }; diff --git a/shift/test/bounds.test.cjs b/shift/test/bounds.test.cjs index b46cce3..78a5f5b 100644 --- a/shift/test/bounds.test.cjs +++ b/shift/test/bounds.test.cjs @@ -25,3 +25,14 @@ test('iterations checked before time', () => { const cfg = { bounds: { maxHours: 1, maxIterations: 1 } }; assert.match(evaluateBounds({ ...base, iterations: 1 }, cfg, t0 + 3_600_001).reason, /max iterations/); }); + +test('terminates on usage cap when usage is known', () => { + const cfg = { bounds: { maxHours: 8, usageCapPercent: 90 } }; + assert.match(evaluateBounds(base, cfg, t0 + 1000, 92).reason, /usage cap/); +}); + +test('usage cap is ignored when usage is unknown', () => { + const cfg = { bounds: { maxHours: 8, usageCapPercent: 90 } }; + assert.equal(evaluateBounds(base, cfg, t0 + 1000, undefined), null); + assert.equal(evaluateBounds(base, cfg, t0 + 1000, null), null); +}); diff --git a/shift/test/decision.test.cjs b/shift/test/decision.test.cjs index 76cbd66..9619b70 100644 --- a/shift/test/decision.test.cjs +++ b/shift/test/decision.test.cjs @@ -36,3 +36,11 @@ test('a bound (time box) allows stop even with pending work', () => { assert.equal(r.action, 'allow'); assert.match(r.reason, /time box/); }); + +test('usage cap allows stop even with pending work', () => { + const bins = [{ id: 'b', status: 'pending', text: 'x' }]; + const capCfg = { bounds: { maxHours: 8, usageCapPercent: 90 }, definitionOfDone: 'd', git: {} }; + const r = decide({ bins, state, config: capCfg, now: t0, usagePercent: 95, killSwitch: false }); + assert.equal(r.action, 'allow'); + assert.match(r.reason, /usage cap/); +}); diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs index aae0144..ad2c603 100644 --- a/shift/test/hook.test.cjs +++ b/shift/test/hook.test.cjs @@ -8,18 +8,18 @@ const cp = require('node:child_process'); const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs'); -function setupRun() { +function setupRun(configOverride) { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hook-')); fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one'); fs.writeFileSync(path.join(cwd, 'queue', '02.md'), 'bin two'); const dir = path.join(cwd, '.shift'); fs.mkdirSync(dir, { recursive: true }); - fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({ + fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify(Object.assign({ sources: [{ path: 'queue', kind: 'briefs' }], bounds: { maxHours: 24, maxIterations: 10 }, definitionOfDone: 'done', git: {} - })); + }, configOverride || {}))); fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ runId: 'r', startedAt: new Date().toISOString(), iterations: 0, branch: 'shift/x', currentBinId: null, bins: [] @@ -93,3 +93,50 @@ test('resolves .shift from the hook payload cwd, not the process cwd', () => { assert.equal(r.decision, 'block'); assert.match(r.reason, /bin one/); }); + +// ---- v3: verify gate ---- + +test('verify gate (passing) marks bins done and drains', () => { + const { cwd, dir } = setupRun({ verify: { command: 'true', maxAttempts: 2 } }); + runHook(cwd, { stop_hook_active: false }); // start bin 1 + runHook(cwd, { stop_hook_active: true }); // verify passes -> bin1 done, start bin2 + const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done'); +}); + +test('verify gate (failing) re-blocks the same bin with feedback, then blocks after maxAttempts', () => { + const { cwd, dir } = setupRun({ verify: { command: 'false', maxAttempts: 2 } }); + runHook(cwd, { stop_hook_active: false }); // start bin 1 + const r1 = runHook(cwd, { stop_hook_active: true }); // verify fails, attempt 1 < 2 -> retry SAME bin + assert.equal(r1.decision, 'block'); + assert.match(r1.reason, /failed verification/); + assert.match(r1.reason, /bin one/); + let s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'pending'); + assert.equal(s.bins.find(b => b.id === 'queue/01.md').attempts, 1); + + const r2 = runHook(cwd, { stop_hook_active: true }); // verify fails again, attempt 2 == max -> blocked, move on + assert.equal(r2.decision, 'block'); + assert.match(r2.reason, /bin two/); + s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'blocked'); +}); + +// ---- v2: usage cap + cache ---- + +test('usage cap from the hook payload ends the run and caches usage', () => { + const { cwd, dir } = setupRun({ bounds: { maxHours: 24, maxIterations: 10, usageCapPercent: 90 } }); + const reset = Math.floor(Date.now() / 1000) + 3600; + const r = runHook(cwd, { + stop_hook_active: false, + rate_limits: { + five_hour: { used_percentage: 30, resets_at: reset }, + seven_day: { used_percentage: 95, resets_at: reset } + } + }); + assert.deepEqual(r, {}); + assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /usage cap/); + const usage = JSON.parse(fs.readFileSync(path.join(dir, 'usage.json'), 'utf8')); + assert.equal(usage.weeklyPercent, 95); + assert.equal(usage.sessionResetAt, reset); +}); diff --git a/shift/test/outcome.test.cjs b/shift/test/outcome.test.cjs new file mode 100644 index 0000000..a436e6f --- /dev/null +++ b/shift/test/outcome.test.cjs @@ -0,0 +1,33 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const { classifyOutcome } = require('../lib/outcome.cjs'); + +const nowMs = 1_000_000_000_000; +const nowSec = nowMs / 1000; + +test('finalized run is completed', () => { + assert.equal(classifyOutcome({ finalized: true, code: 1, now: nowMs }), 'completed'); +}); + +test('clean exit (code 0) is completed', () => { + assert.equal(classifyOutcome({ finalized: false, code: 0, now: nowMs }), 'completed'); +}); + +test('nonzero + near-limit usage + future reset is rate_limited', () => { + const usage = { sessionUsedPercent: 99, weeklyPercent: 50, sessionResetAt: nowSec + 3600 }; + assert.equal(classifyOutcome({ finalized: false, code: 1, usage, now: nowMs }), 'rate_limited'); +}); + +test('nonzero + rate-limit stderr is rate_limited', () => { + assert.equal(classifyOutcome({ finalized: false, code: 1, stderr: 'Error: rate limit exceeded', now: nowMs }), 'rate_limited'); +}); + +test('nonzero with no signal is error', () => { + assert.equal(classifyOutcome({ finalized: false, code: 1, stderr: 'boom', now: nowMs }), 'error'); +}); + +test('near-limit but reset already past is NOT rate_limited (no future window)', () => { + const usage = { sessionUsedPercent: 99, sessionResetAt: nowSec - 10 }; + assert.equal(classifyOutcome({ finalized: false, code: 1, usage, stderr: 'boom', now: nowMs }), 'error'); +}); diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs new file mode 100644 index 0000000..b4c1c22 --- /dev/null +++ b/shift/test/run-loop.test.cjs @@ -0,0 +1,86 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const { runLoop } = require('../lib/run-loop.cjs'); + +function makeEffects({ spawns, usage, bounds }) { + const state = { startedAt: new Date(Date.now()).toISOString(), iterations: 0 }; + let i = 0; + let finalized = false; + const calls = { sleepUntil: [], spawns: 0 }; + const effects = { + now: () => Date.now(), + loadState: () => state, + readUsage: () => usage, + log: () => {}, + finalized: () => finalized, + sleepUntil: (ms) => { calls.sleepUntil.push(ms); return Promise.resolve(); }, + spawn: () => { + calls.spawns += 1; + const s = spawns[i++] || { result: { status: 1, stderr: '' }, finalize: false }; + finalized = s.finalize; + return s.result; + } + }; + return { effects, calls, config: { bounds: bounds || { maxHours: 8, maxResumes: 12, autoResumeOnReset: true } } }; +} + +test('a single finalizing spawn completes the run', async () => { + const { effects, calls, config } = makeEffects({ + spawns: [{ result: { status: 0 }, finalize: true }], + usage: null + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /finalized/); + assert.equal(r.spawns, 1); + assert.equal(calls.sleepUntil.length, 0); +}); + +test('rate-limited spawn waits for reset, then resumes and finishes', async () => { + const usage = { weeklyPercent: 50, sessionUsedPercent: 99, sessionResetAt: Math.floor(Date.now() / 1000) + 3600 }; + const { effects, calls, config } = makeEffects({ + spawns: [ + { result: { status: 1, stderr: '' }, finalize: false }, // rate-limited (inferred from usage) + { result: { status: 0 }, finalize: true } // resumes, finalizes + ], + usage + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /finalized/); + assert.equal(r.spawns, 2); + assert.equal(calls.sleepUntil.length, 1, 'should have waited once'); +}); + +test('rate-limited with auto-resume disabled stops', async () => { + const usage = { weeklyPercent: 50, sessionUsedPercent: 99, sessionResetAt: Math.floor(Date.now() / 1000) + 3600 }; + const { effects, config } = makeEffects({ + spawns: [{ result: { status: 1, stderr: '' }, finalize: false }], + usage, + bounds: { maxHours: 8, maxResumes: 12, autoResumeOnReset: false } + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /auto-resume disabled/); + assert.equal(r.spawns, 1); +}); + +test('usage cap stops before any spawn', async () => { + const { effects, calls, config } = makeEffects({ + spawns: [{ result: { status: 0 }, finalize: true }], + usage: { weeklyPercent: 95 }, + bounds: { maxHours: 8, usageCapPercent: 90, autoResumeOnReset: true } + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /usage cap/); + assert.equal(calls.spawns, 0); +}); + +test('maxResumes acts as a runaway backstop', async () => { + const { effects, config } = makeEffects({ + spawns: [{ result: { status: 0 }, finalize: true }], + usage: null, + bounds: { maxHours: 8, maxResumes: 0, autoResumeOnReset: true } + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /max resumes/); + assert.equal(r.spawns, 0); +}); diff --git a/shift/test/usage.test.cjs b/shift/test/usage.test.cjs new file mode 100644 index 0000000..66c3f8d --- /dev/null +++ b/shift/test/usage.test.cjs @@ -0,0 +1,40 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { writeUsageCache, readUsageCache } = require('../lib/usage.cjs'); + +function tmp() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-usage-')); } + +test('write + read round-trips the full rate-limit payload', () => { + const dir = tmp(); + const weekly = writeUsageCache(dir, { + five_hour: { used_percentage: 72, resets_at: 1000 }, + seven_day: { used_percentage: 41, resets_at: 2000 } + }, 123); + assert.equal(weekly, 41); + assert.deepEqual(readUsageCache(dir), { + weeklyPercent: 41, sessionUsedPercent: 72, sessionResetAt: 1000, weeklyResetAt: 2000, capturedAt: 123 + }); +}); + +test('absent rate_limits returns null and writes nothing', () => { + const dir = tmp(); + assert.equal(writeUsageCache(dir, undefined, 1), null); + assert.equal(readUsageCache(dir), null); +}); + +test('partial windows degrade to null fields', () => { + const dir = tmp(); + const weekly = writeUsageCache(dir, { five_hour: { used_percentage: 60, resets_at: 5 } }, 9); + assert.equal(weekly, null); + const c = readUsageCache(dir); + assert.equal(c.sessionUsedPercent, 60); + assert.equal(c.weeklyPercent, null); +}); + +test('readUsageCache returns null when no cache exists', () => { + assert.equal(readUsageCache(tmp()), null); +}); diff --git a/shift/test/verify.test.cjs b/shift/test/verify.test.cjs new file mode 100644 index 0000000..09a4a90 --- /dev/null +++ b/shift/test/verify.test.cjs @@ -0,0 +1,25 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const os = require('node:os'); +const { runVerify } = require('../lib/verify.cjs'); + +test('a null/empty command is a pass', () => { + assert.deepEqual(runVerify(null, '.'), { ok: true, output: '' }); + assert.deepEqual(runVerify('', '.'), { ok: true, output: '' }); +}); + +test('uses the injected exec and returns its result', () => { + const fake = (cmd, cwd) => ({ ok: false, output: `ran ${cmd} in ${cwd}` }); + const r = runVerify('npm test', '/repo', fake); + assert.equal(r.ok, false); + assert.match(r.output, /ran npm test in \/repo/); +}); + +test('default exec: zero exit passes, non-zero fails, output captured', () => { + assert.equal(runVerify('true', os.tmpdir()).ok, true); + assert.equal(runVerify('false', os.tmpdir()).ok, false); + const r = runVerify('echo hi', os.tmpdir()); + assert.equal(r.ok, true); + assert.match(r.output, /hi/); +}); From e172bb0bf5870320ae1305d4e9dd2c2f0ee5c35e Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Mon, 15 Jun 2026 14:06:53 -0400 Subject: [PATCH 04/12] shift: one-command Stop-hook installer + list module 2 in root README - shift/install.sh wires the Stop hook into ~/.claude/settings.json idempotently (backup -> merge -> validate -> atomic move); never duplicates, updates the path on repo move, preserves existing hooks/settings. - shift/lib/install.cjs: pure mergeStopHook() (tested); install.sh is a thin shell. - shift/test/install.test.cjs: 7 tests (unit merge + live install.sh integration). - README (root): list shift in the Modules table + candor pointer. - shift/README: swap manual hook-wiring for the installer; resolve the hook-schema caveat (block/reason contract verified against the Claude Code hooks docs). --- README.md | 3 ++ shift/README.md | 28 +++++----- shift/install.sh | 79 ++++++++++++++++++++++++++++ shift/lib/install.cjs | 47 +++++++++++++++++ shift/test/install.test.cjs | 102 ++++++++++++++++++++++++++++++++++++ 5 files changed, 246 insertions(+), 13 deletions(-) create mode 100755 shift/install.sh create mode 100644 shift/lib/install.cjs create mode 100644 shift/test/install.test.cjs diff --git a/README.md b/README.md index 5c7576c..eea0110 100644 --- a/README.md +++ b/README.md @@ -19,9 +19,12 @@ Transparency isn't a feature bolted on the side; for agentic coding it's the who | Module | What it is | Targets | |---|---|---| | [**code-status-bar**](./code-status-bar) | A status line that shows usage limits, cost, context health, and git/worktree state at a glance | Claude Code (via [ccstatusline](https://github.com/sirmalloc/ccstatusline)) | +| [**shift**](./shift) | An autonomous work-queue runner: pre-load bins of work, leave, and it keeps the agent grinding through them — past natural stop points and across rate-limit resets — leaving every decision logged and every change a reviewable commit | Claude Code (Stop hook + headless `-p`) | > **New here? Start with the [Code Status Bar](./code-status-bar).** It installs as a portable, zero-dependency default, or an [opt-in colored variant](./code-status-bar#color--static-by-default-status-driven-by-opt-in) that recolors the usage bars **green → yellow → red** as you approach each limit — so you *feel* a wall coming before you read a single number. You could build it by hand in ccstatusline's editor; this is that setup already done — one command, no configuration, and still fully editable. +> **Going heads-down?** [**shift**](./shift) turns an unattended run — the *least* transparent mode there is — into an honest paper trail: you trade real-time steering for a `shift/` branch, a decision log, and a "here's what I did and what needs you" summary. One command wires the hook; the safety model keeps the work on a branch and off your remotes. + More to come. Each module is self-contained, declares which agent it targets, and explains *why* every piece earns its place — because justifying the real estate is part of the philosophy. ## License diff --git a/shift/README.md b/shift/README.md index d871a96..36fc316 100644 --- a/shift/README.md +++ b/shift/README.md @@ -20,24 +20,26 @@ Full best-judgment autonomy on reversible, in-worktree work. By default it will ## Install -1. Get the files (clone the toolkit, or copy the `shift/` folder). -2. Register the Stop hook **once** in `~/.claude/settings.json` (safe globally — no-ops outside an active run): +1. Clone the toolkit (the hook runs from these files by absolute path, so it installs locally — no `curl | bash`). +2. Wire the Stop hook into `~/.claude/settings.json` — one command, idempotent: + +```bash +bash shift/install.sh +``` + +It merges the entry below (safe globally — the hook no-ops in any repo without an active `.shift/` run), backs up any existing settings first, and never duplicates on re-run — re-running after a `git pull` or a repo move just updates the path: ```json -{ - "hooks": { - "Stop": [ - { "matcher": "", "hooks": [ - { "type": "command", "command": "node /ABSOLUTE/PATH/TO/shift/hooks/shift-stop.cjs" } - ] } - ] - } -} +{ "hooks": { "Stop": [ + { "matcher": "", "hooks": [ + { "type": "command", "command": "node /ABSOLUTE/PATH/TO/shift/hooks/shift-stop.cjs" } + ] } +] } } ``` -> Verify the hook schema against the current Claude Code hooks docs. The engine needs only: "block + feed `reason` back", the `stop_hook_active` flag, the payload `cwd`, and (for the usage cap / auto-resume) the payload `rate_limits`. +> **Hook contract (verified against the [Claude Code hooks docs](https://code.claude.com/docs/en/hooks)).** The Stop hook returns `{"decision":"block","reason":…}` to keep the session going — the `reason` becomes the next instruction — and omits `decision` (or exits 0) to allow the stop. The usage cap and `shift run` auto-resume read the hook payload's `rate_limits` when present and **skip cleanly when it's absent** (e.g. non-Pro/Max), so the engine never depends on it. -3. (Optional) put `shift/bin/shift` on your PATH. +3. (Optional) put `shift/bin/shift` on your PATH — the installer prints the `ln -s` command. ## Use diff --git a/shift/install.sh b/shift/install.sh new file mode 100755 index 0000000..eb291d3 --- /dev/null +++ b/shift/install.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# shift installer — Agentic Workflow Toolkit (module 2) +# Wires shift's Stop hook into ~/.claude/settings.json, idempotently. +# +# Unlike the status-bar installer, this one is LOCAL-ONLY: the hook entry points at +# this clone's hooks/shift-stop.cjs by absolute path, so it must run from the files +# on disk (no curl | bash). Re-running after `git pull` (or after moving the repo) +# updates the path in place — it never duplicates the hook. +set -euo pipefail + +if ! command -v node >/dev/null 2>&1; then + echo "Error: shift needs Node on your PATH (the hook + this installer run via node)." >&2 + exit 1 +fi + +# Resolve this script's directory; the hook lives next to it under hooks/. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" 2>/dev/null && pwd || echo "")" +HOOK="$SCRIPT_DIR/hooks/shift-stop.cjs" +MERGER="$SCRIPT_DIR/lib/install.cjs" +if [ -z "$SCRIPT_DIR" ] || [ ! -f "$HOOK" ] || [ ! -f "$MERGER" ]; then + echo "Error: run this from a shift clone — couldn't find hooks/shift-stop.cjs next to install.sh." >&2 + echo " (clone the toolkit, then: bash shift/install.sh)" >&2 + exit 1 +fi + +COMMAND="node $HOOK" +SETTINGS_DIR="$HOME/.claude" +DEST="$SETTINGS_DIR/settings.json" +mkdir -p "$SETTINGS_DIR" + +# Compute the merged settings into a temp file via the unit-tested merger, then +# move it into place — a failed merge never leaves a broken settings.json behind. +TMP="$(mktemp)" +ACTION="$(node -e ' + const fs = require("node:fs"); + const { mergeStopHook } = require(process.argv[1]); + const dest = process.argv[2], command = process.argv[3], tmp = process.argv[4]; + let settings = {}; + if (fs.existsSync(dest)) { + const raw = fs.readFileSync(dest, "utf8").trim(); + if (raw) { + try { settings = JSON.parse(raw); } + catch { console.error("Error: " + dest + " is not valid JSON; fix or move it, then re-run."); process.exit(2); } + } + } + const r = mergeStopHook(settings, command); + fs.writeFileSync(tmp, JSON.stringify(r.settings, null, 2) + "\n"); + process.stdout.write(r.action); +' "$MERGER" "$DEST" "$COMMAND" "$TMP")" || { rm -f "$TMP"; exit 1; } + +if [ ! -s "$TMP" ]; then + echo "Error: merge produced an empty file; aborting (your settings are untouched)." >&2 + rm -f "$TMP"; exit 1 +fi + +if [ "$ACTION" = "unchanged" ]; then + echo "Already wired: shift Stop hook is present in $DEST (no change)." + rm -f "$TMP" +else + if [ -f "$DEST" ]; then + BAK="$DEST.bak-$(date +%Y%m%d-%H%M%S)" + cp "$DEST" "$BAK" + echo "Backed up existing settings -> $BAK" + fi + mv "$TMP" "$DEST" + case "$ACTION" in + added) echo "Installed: shift Stop hook -> $DEST" ;; + updated) echo "Updated: shift Stop hook path -> $DEST" ;; + *) echo "Wrote: $DEST ($ACTION)" ;; + esac +fi + +echo " hook: $COMMAND" +echo +echo "Safe globally — the hook no-ops in any repo without an active .shift/ run." +echo "Next: cd into a repo, add briefs under queue/, then: ${SCRIPT_DIR}/bin/shift start" +echo "(optional) put it on PATH: ln -s ${SCRIPT_DIR}/bin/shift /usr/local/bin/shift" +echo +echo "To remove later, delete the shift Stop entry from $DEST (restore a .bak-* backup)." diff --git a/shift/lib/install.cjs b/shift/lib/install.cjs new file mode 100644 index 0000000..993e427 --- /dev/null +++ b/shift/lib/install.cjs @@ -0,0 +1,47 @@ +'use strict'; +// Pure logic for wiring shift's Stop hook into a Claude Code settings object. +// The I/O (read/back-up/validate/write ~/.claude/settings.json) lives in install.sh; +// this stays a pure function so it can be unit-tested without touching the filesystem. + +// A command string belongs to shift if it invokes our Stop hook script. +function isShiftCommand(command) { + return typeof command === 'string' && command.includes('shift-stop.cjs'); +} + +function makeGroup(command) { + return { matcher: '', hooks: [{ type: 'command', command }] }; +} + +// mergeStopHook(settings, command) -> { settings, changed, action } +// action: 'added' (no prior shift hook) | 'updated' (path changed) | 'unchanged' (already wired). +// Never mutates the input; returns a fresh deep-ish copy of the parts it touches. +function mergeStopHook(settings, command) { + const next = { ...(settings || {}) }; + const hooks = { ...(next.hooks || {}) }; + const stop = Array.isArray(hooks.Stop) ? hooks.Stop.map(g => ({ ...g })) : []; + + // Find an existing group that already points at shift's hook. + const idx = stop.findIndex(g => + Array.isArray(g.hooks) && g.hooks.some(h => isShiftCommand(h && h.command))); + + let action; + if (idx === -1) { + stop.push(makeGroup(command)); + action = 'added'; + } else { + const current = stop[idx].hooks.find(h => isShiftCommand(h && h.command)); + if (current.command === command) { + action = 'unchanged'; + } else { + // Repo moved: rewrite that group to the canonical single shift command. + stop[idx] = makeGroup(command); + action = 'updated'; + } + } + + hooks.Stop = stop; + next.hooks = hooks; + return { settings: next, changed: action !== 'unchanged', action }; +} + +module.exports = { mergeStopHook, isShiftCommand }; diff --git a/shift/test/install.test.cjs b/shift/test/install.test.cjs new file mode 100644 index 0000000..a8a1352 --- /dev/null +++ b/shift/test/install.test.cjs @@ -0,0 +1,102 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const cp = require('node:child_process'); +const { mergeStopHook } = require('../lib/install.cjs'); + +const CMD = 'node /abs/path/to/shift/hooks/shift-stop.cjs'; +const INSTALL = path.resolve(__dirname, '..', 'install.sh'); +const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs'); + +function runInstall(home) { + return cp.execFileSync('bash', [INSTALL], { + env: { ...process.env, HOME: home }, encoding: 'utf8' + }); +} +function readSettings(home) { + return JSON.parse(fs.readFileSync(path.join(home, '.claude', 'settings.json'), 'utf8')); +} + +test('adds the Stop hook to empty settings', () => { + const r = mergeStopHook({}, CMD); + assert.equal(r.action, 'added'); + assert.equal(r.changed, true); + const groups = r.settings.hooks.Stop; + assert.equal(groups.length, 1); + assert.deepEqual(groups[0], { matcher: '', hooks: [{ type: 'command', command: CMD }] }); +}); + +test('is idempotent — same command twice does not duplicate', () => { + const once = mergeStopHook({}, CMD).settings; + const twice = mergeStopHook(once, CMD); + assert.equal(twice.action, 'unchanged'); + assert.equal(twice.changed, false); + assert.equal(twice.settings.hooks.Stop.length, 1); +}); + +test('preserves unrelated hooks and existing Stop groups', () => { + const existing = { + statusLine: { type: 'command', command: 'x' }, + hooks: { + PreToolUse: [{ matcher: 'Bash', hooks: [{ type: 'command', command: 'guard' }] }], + Stop: [{ matcher: '', hooks: [{ type: 'command', command: 'other-stop-hook' }] }] + } + }; + const r = mergeStopHook(existing, CMD); + assert.equal(r.action, 'added'); + // unrelated settings + hooks untouched + assert.deepEqual(r.settings.statusLine, { type: 'command', command: 'x' }); + assert.equal(r.settings.hooks.PreToolUse.length, 1); + // shift appended, the foreign Stop group kept + assert.equal(r.settings.hooks.Stop.length, 2); + assert.equal(r.settings.hooks.Stop[0].hooks[0].command, 'other-stop-hook'); + assert.equal(r.settings.hooks.Stop[1].hooks[0].command, CMD); +}); + +test('updates the path when the shift hook moved', () => { + const old = mergeStopHook({}, 'node /old/path/shift/hooks/shift-stop.cjs').settings; + const r = mergeStopHook(old, CMD); + assert.equal(r.action, 'updated'); + assert.equal(r.changed, true); + assert.equal(r.settings.hooks.Stop.length, 1); + assert.equal(r.settings.hooks.Stop[0].hooks[0].command, CMD); +}); + +test('does not mutate the input settings object', () => { + const input = { hooks: { Stop: [] } }; + const snapshot = JSON.stringify(input); + mergeStopHook(input, CMD); + assert.equal(JSON.stringify(input), snapshot); +}); + +test('install.sh wires the hook into a fresh ~/.claude/settings.json', () => { + const home = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-inst-')); + const out = runInstall(home); + assert.match(out, /Installed: shift Stop hook/); + const s = readSettings(home); + assert.equal(s.hooks.Stop.length, 1); + assert.equal(s.hooks.Stop[0].hooks[0].command, `node ${HOOK}`); +}); + +test('install.sh is idempotent and preserves existing settings + backs up', () => { + const home = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-inst-')); + const claude = path.join(home, '.claude'); + fs.mkdirSync(claude, { recursive: true }); + fs.writeFileSync(path.join(claude, 'settings.json'), + JSON.stringify({ statusLine: { type: 'command', command: 'x' } }, null, 2)); + + const out1 = runInstall(home); + assert.match(out1, /Backed up existing settings/); + const s1 = readSettings(home); + assert.deepEqual(s1.statusLine, { type: 'command', command: 'x' }); // preserved + assert.equal(s1.hooks.Stop.length, 1); + + const out2 = runInstall(home); + assert.match(out2, /Already wired/); + assert.equal(readSettings(home).hooks.Stop.length, 1); // no duplicate + const baks = fs.readdirSync(claude).filter(f => f.startsWith('settings.json.bak-')); + assert.equal(baks.length, 1); // unchanged run made no second backup +}); From c7adfb889843f6c47e324fffb04b32571caf8f04 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:42:25 -0400 Subject: [PATCH 05/12] shift: harden the headless runner (post-smoke-audit fixes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A real `shift run` smoke confirmed headless `claude -p` honors the Stop-hook block and drives the queue warm (resolves the SPEC §9.2 open question). A pre-flight audit of the previously-untested runner path drove these fixes: - No false-green: classifyOutcome returns 'completed' only when the engine finalized (summary.md). A code-0 exit without finalize is 'incomplete' — the runner resumes if the queue advanced, else stops with a 'is the Stop hook wired?' diagnostic. `shift run` grades on summary.md, not the exit line. - Stale-reset guard: auto-resume stops cleanly when the cached reset time is already in the past (was a maxResumes-bounded busy-spin). - Per-spawn timeout (spawnTimeoutMinutes, default 30) kills a wedged claude so spawnSync can't hang the runner; launch failures + kills are surfaced. - Warn when a headless run uses a Bash-prompting permission mode. - Dropped a spurious audit suggestion (runner writing state.iterations) that would have double-counted the hook's bound tracking. 63 shift tests green (pure unit + hook/CLI/run-loop/install integration). --- shift/README.md | 7 +++++- shift/SPEC.md | 15 ++++++++++-- shift/bin/shift | 29 +++++++++++++++++++++--- shift/lib/outcome.cjs | 7 ++++-- shift/lib/run-loop.cjs | 20 +++++++++++++++- shift/test/outcome.test.cjs | 11 +++++++-- shift/test/run-loop.test.cjs | 44 ++++++++++++++++++++++++++++++++++++ 7 files changed, 122 insertions(+), 11 deletions(-) diff --git a/shift/README.md b/shift/README.md index 36fc316..8bb45c0 100644 --- a/shift/README.md +++ b/shift/README.md @@ -74,6 +74,7 @@ When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" sectio "maxHours": 4, "maxIterations": 30, "maxResumes": 12, + "spawnTimeoutMinutes": 30, "usageCapPercent": 90, "autoResumeOnReset": true }, @@ -85,9 +86,13 @@ When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" sectio ``` - **`usageCapPercent`** — stop when weekly usage reaches this (read from the hook payload's `rate_limits`; skipped when that data is absent, e.g. non-Pro/Max). -- **`autoResumeOnReset`** — on a rate-limit wall, `shift run` waits for the 5-hour window to reopen and resumes (never past the time box). +- **`autoResumeOnReset`** — on a rate-limit wall, `shift run` waits for the 5-hour window to reopen and resumes (never past the time box). If the cached reset time is stale/in the past it stops cleanly rather than busy-spinning. +- **`maxResumes`** — the runner's own backstop on the number of `claude` spawns (independent of the hook-maintained `maxIterations`/`maxHours`). +- **`spawnTimeoutMinutes`** — hard per-spawn wall: a wedged `claude` is killed (SIGTERM) so it can't hang the runner. Default 30. - **`verify.command`** — per-bin acceptance gate; `null` disables it. +> A headless `shift run` grades success on `.shift/summary.md` (written only when the engine finalizes), not on the exit line: a `claude -p` that exits without finalizing is reported as *"no summary written — did NOT finalize"* with a hint to check the hook wiring, never as a false success. + ### Permissions for unattended runs `shift run` invokes `claude -p --permission-mode `. `acceptEdits` (the default) auto-approves file edits but **other tools (e.g. Bash) can still prompt — and a headless run can't answer prompts.** For real unattended work that runs tests/commands, either: diff --git a/shift/SPEC.md b/shift/SPEC.md index e81e1eb..229c6ac 100644 --- a/shift/SPEC.md +++ b/shift/SPEC.md @@ -269,6 +269,17 @@ All three phases are implemented on branch `shift-v1`. Notable as-built decision - **Rate-limit detection without the undocumented exit signature (resolves §9.2).** Research confirmed the headless rate-limit termination signature is undocumented, but the **Stop hook payload includes `rate_limits`**. So the engine caches the latest reset/usage to `.shift/usage.json`, and `lib/outcome.cjs` classifies a non-finalized, non-zero spawn as `rate_limited` by **inference** — near-limit cached usage (≥95%) + a future reset — with config-overridable stderr patterns as a fallback. No dependency on an exact exit code/message. - **Usage cap source (resolves §9.1).** Enforced from the hook payload's `rate_limits.seven_day.used_percentage`; absent data (non-Pro/Max, pre-first-response) degrades to "cap skipped," never an error. - **Verify gate (v3, resolves §9.3).** `verify.command` runs per bin; failures re-feed the bin with the output up to `maxAttempts`, then block it — so "looked done but wasn't" is caught, not silently accepted. -- **Permissions.** `shift run` uses `--permission-mode` (default `acceptEdits`). Truly unattended work that runs commands typically needs `dontAsk` + a `permissions.allow` list, or `bypassPermissions` — documented in the README; the branch-only/no-push model and bounds are the backstop. +- **Permissions.** `shift run` uses `--permission-mode` (default `acceptEdits`). Truly unattended work that runs commands typically needs `dontAsk` + a `permissions.allow` list, or `bypassPermissions` — documented in the README; the branch-only/no-push model and bounds are the backstop. The runner now **warns** at startup when `permissionMode` would prompt on Bash (a headless run can't answer), since that combination otherwise exits without finalizing. -**New modules beyond §12:** `lib/verify.cjs`, `lib/usage.cjs`, `lib/outcome.cjs`, `lib/run-loop.cjs`; `bin/shift` gains `run`. **Tests:** 52 in `shift` (pure unit + hook/CLI/run-loop integration), all green. +**New modules beyond §12:** `lib/verify.cjs`, `lib/usage.cjs`, `lib/outcome.cjs`, `lib/run-loop.cjs`, `lib/install.cjs`; `bin/shift` gains `run`; `install.sh` wires the Stop hook. + +### Smoke validation + post-smoke hardening (2026-06-15) + +A real bounded `shift run` smoke (2 commit-a-file bins, `bypassPermissions`) **empirically resolved the open question behind §9.2**: headless `claude -p` **does** honor the Stop hook's `{"decision":"block"}` and continues the session warm — both bins were completed and committed within a single spawn. A pre-flight audit of the (previously untested) runner path then drove four fixes: + +- **No false-green.** `classifyOutcome` only returns `completed` when the engine actually finalized (`summary.md` written). A `claude -p` that exits 0 without finalizing is `incomplete` — the runner **resumes** if the queue advanced, else **stops with a "is the Stop hook wired?" diagnostic** instead of reporting success. `shift run` grades on `summary.md`, not the exit line. +- **Stale-reset guard.** Auto-resume stops cleanly when the cached reset time is already in the past (previously a `maxResumes`-bounded busy-spin). +- **Per-spawn timeout.** `spawnTimeoutMinutes` (default 30) kills a wedged `claude` so a blocking `spawnSync` can't hang the runner; launch failures (`claude` not on PATH) and kills are now surfaced, not swallowed. *Known limitation:* the timeout SIGTERMs the `claude` process only, not any tool-subprocess grandchildren it spawned (an inherent `spawnSync` behavior) — a wedged grandchild can outlive the kill; a detached-process-group reap is a future improvement. +- **Hook-install is required for `shift run`** and `install.sh` automates it (the bin's task text reaches the agent only via the Stop-hook block). + +**Tests:** 63 in `shift` (pure unit + hook/CLI/run-loop/install integration), all green. diff --git a/shift/bin/shift b/shift/bin/shift index f0d4327..2c5f557 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -15,6 +15,7 @@ const DEFAULT_CONFIG = { maxHours: 2, maxIterations: 20, maxResumes: 12, + spawnTimeoutMinutes: 30, usageCapPercent: 90, autoResumeOnReset: true }, @@ -103,6 +104,18 @@ async function cmdRun() { const { runLoop } = require('../lib/run-loop.cjs'); const { readUsageCache } = require('../lib/usage.cjs'); + // A headless `-p` run cannot answer permission prompts. Only bypassPermissions/dontAsk + // auto-approve tool calls like Bash(git commit) — anything else stalls or denies on the + // first command the work needs (the engine then exits without finalizing). + if (!['bypassPermissions', 'dontAsk'].includes(mode)) { + console.log(`[shift] warning: permissionMode "${mode}" prompts on tools like Bash, which a headless run can't answer.`); + console.log('[shift] set "permissionMode":"dontAsk" (+ permissions.allow) or "bypassPermissions" in .shift/config.json.'); + } + + // Hard per-spawn timeout so a wedged `claude` can't hang the runner forever + // (spawnSync is blocking; the loop's time bounds can't interrupt it). + const spawnTimeoutMs = (((config.bounds && config.bounds.spawnTimeoutMinutes) || 30)) * 60_000; + // Clear any stale summary so finalized() reflects THIS run. try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ } @@ -118,15 +131,25 @@ async function cmdRun() { const args = ['-p', '--permission-mode', mode]; if (first) { args.push('begin the shift'); first = false; } else { args.push('--continue', 'continue the shift'); } - return cp.spawnSync('claude', args, { - cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], maxBuffer: 64 * 1024 * 1024 + const res = cp.spawnSync('claude', args, { + cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], + maxBuffer: 64 * 1024 * 1024, timeout: spawnTimeoutMs, killSignal: 'SIGTERM' }); + // Surface launch failures (claude not on PATH → ENOENT) and timeouts/kills — + // otherwise they classify as a bare 'error' with no diagnostics. + if (res && res.error) console.log(`[shift] spawn failed to run claude: ${res.error.message}`); + if (res && res.signal) console.log(`[shift] spawn killed by signal ${res.signal} (timeout ${spawnTimeoutMs / 60000}min?)`); + return res; } }; const result = await runLoop({ config, effects }); console.log(`[shift] stopped: ${result.reason} (after ${result.spawns} spawn(s))`); - console.log(`[shift] review: ${path.join(dir, 'summary.md')}`); + if (effects.finalized()) { + console.log(`[shift] review: ${path.join(dir, 'summary.md')}`); + } else { + console.log('[shift] no summary written — the run did NOT finalize; see the [shift] lines above. Nothing was committed by the engine.'); + } } const [, , sub, ...rest] = process.argv; diff --git a/shift/lib/outcome.cjs b/shift/lib/outcome.cjs index 07ff0e9..9638983 100644 --- a/shift/lib/outcome.cjs +++ b/shift/lib/outcome.cjs @@ -7,11 +7,14 @@ const DEFAULT_PATTERNS = [/rate.?limit/i, /usage limit/i, /quota/i, /\b429\b/]; const NEAR_LIMIT_PERCENT = 95; // ctx: { finalized, code, stderr, usage, now (ms), patterns? } -// returns 'completed' | 'rate_limited' | 'error' +// returns 'completed' | 'incomplete' | 'rate_limited' | 'error' function classifyOutcome(ctx) { const { finalized, code, stderr, usage, now, patterns } = ctx; if (finalized) return 'completed'; // the engine wrote summary.md → run is done - if (code === 0) return 'completed'; // clean exit without finalize (nothing left to do) + // A clean exit WITHOUT finalize is NOT success: claude stopped but the engine never + // wrote summary.md (hook not wired, or a partial stop). Caller resumes or stops — it + // must never be reported as 'completed' (that was a silent false-green). + if (code === 0) return 'incomplete'; const nowSec = (typeof now === 'number' ? now : Date.now()) / 1000; const resetFuture = usage && typeof usage.sessionResetAt === 'number' && usage.sessionResetAt > nowSec; diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs index 41ad2b2..8703b92 100644 --- a/shift/lib/run-loop.cjs +++ b/shift/lib/run-loop.cjs @@ -37,6 +37,10 @@ async function runLoop({ config, effects }) { const resetAt = usage && typeof usage.sessionResetAt === 'number' ? usage.sessionResetAt * 1000 : null; if (!resetAt) return { reason: 'rate limited but no reset time available — stopping', spawns }; const until = resetAt + RESET_BUFFER_MS; + // The cached reset time is only refreshed by the Stop hook; a wall that kills the + // session before any hook fires leaves it stale. If it's already in the past, + // sleepUntil(past) returns instantly and we'd re-spawn in a tight loop — stop instead. + if (until <= now) return { reason: 'rate limited but the reset window is stale/past — stopping', spawns }; if (typeof bounds.maxHours === 'number') { const deadline = Date.parse(state.startedAt) + bounds.maxHours * 3_600_000; if (until >= deadline) return { reason: 'rate limited; reset is past the time box — stopping', spawns }; @@ -47,16 +51,30 @@ async function runLoop({ config, effects }) { continue; } + const iterBefore = (state && typeof state.iterations === 'number') ? state.iterations : 0; spawns += 1; effects.log(`spawn #${spawns}: running claude`); const res = effects.spawn(spawns); - lastOutcome = classifyOutcome({ + const outcome = classifyOutcome({ finalized: effects.finalized(), code: res ? res.status : 1, stderr: res ? res.stderr : '', usage: effects.readUsage(), now: effects.now() }); + + // 'incomplete' = claude exited cleanly but the engine never finalized. If it advanced + // the queue (partial progress), resume to finish it; if it advanced nothing, resuming + // won't help — stop with a diagnostic rather than spin or report a false-green. + if (outcome === 'incomplete') { + const after = effects.loadState(); + const iterAfter = (after && typeof after.iterations === 'number') ? after.iterations : iterBefore; + if (iterAfter <= iterBefore) { + return { reason: 'claude exited without finalizing and made no progress — is the Stop hook wired? (nothing committed)', spawns }; + } + effects.log('claude exited mid-queue with progress — resuming'); + } + lastOutcome = outcome; } } diff --git a/shift/test/outcome.test.cjs b/shift/test/outcome.test.cjs index a436e6f..0f6cd84 100644 --- a/shift/test/outcome.test.cjs +++ b/shift/test/outcome.test.cjs @@ -10,8 +10,15 @@ test('finalized run is completed', () => { assert.equal(classifyOutcome({ finalized: true, code: 1, now: nowMs }), 'completed'); }); -test('clean exit (code 0) is completed', () => { - assert.equal(classifyOutcome({ finalized: false, code: 0, now: nowMs }), 'completed'); +test('finalized wins even on a clean exit', () => { + assert.equal(classifyOutcome({ finalized: true, code: 0, now: nowMs }), 'completed'); +}); + +test('clean exit (code 0) WITHOUT finalize is incomplete, not completed', () => { + // The engine writes summary.md (finalized) on a real drain; a code-0 exit without + // it means claude stopped without the engine finalizing (e.g. hook not wired, or a + // partial stop). That must NOT read as success — it is 'incomplete' (resume/stop). + assert.equal(classifyOutcome({ finalized: false, code: 0, now: nowMs }), 'incomplete'); }); test('nonzero + near-limit usage + future reset is rate_limited', () => { diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs index b4c1c22..e06cae2 100644 --- a/shift/test/run-loop.test.cjs +++ b/shift/test/run-loop.test.cjs @@ -19,6 +19,7 @@ function makeEffects({ spawns, usage, bounds }) { calls.spawns += 1; const s = spawns[i++] || { result: { status: 1, stderr: '' }, finalize: false }; finalized = s.finalize; + if (typeof s.iterations === 'number') state.iterations = s.iterations; // simulate engine progress return s.result; } }; @@ -84,3 +85,46 @@ test('maxResumes acts as a runaway backstop', async () => { assert.match(r.reason, /max resumes/); assert.equal(r.spawns, 0); }); + +test('incomplete spawn WITH progress resumes and finishes', async () => { + // spawn 1: clean exit, no finalize, but the engine advanced iterations (partial work); + // spawn 2: resumes and finalizes. + const { effects, calls, config } = makeEffects({ + spawns: [ + { result: { status: 0 }, finalize: false, iterations: 1 }, // progress, not done + { result: { status: 0 }, finalize: true, iterations: 2 } // resume → drain + ], + usage: null + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /finalized/); + assert.equal(calls.spawns, 2); +}); + +test('incomplete spawn WITHOUT progress stops with a hook-wiring diagnostic (no false-green)', async () => { + // claude exits 0 but the engine never advanced (e.g. Stop hook not wired). Must NOT + // report success, and must NOT keep re-spawning pointlessly. + const { effects, calls, config } = makeEffects({ + spawns: [{ result: { status: 0 }, finalize: false }], // iterations stays 0 + usage: null + }); + const r = await runLoop({ config, effects }); + assert.doesNotMatch(r.reason, /finalized/); + assert.match(r.reason, /no progress|hook/i); + assert.equal(calls.spawns, 1, 'must not spin'); +}); + +test('rate-limited with a stale/past reset stops instead of busy-spinning', async () => { + // Reset time is already in the past (stale cache). sleepUntil(past) would return + // instantly and re-spawn forever (bounded only by maxResumes) — guard must stop. + const usage = { weeklyPercent: 50, sessionUsedPercent: 99, sessionResetAt: Math.floor(Date.now() / 1000) - 600 }; + const { effects, calls, config } = makeEffects({ + spawns: [{ result: { status: 1, stderr: 'Error: rate limit exceeded' }, finalize: false }], + usage, + bounds: { maxHours: 8, maxResumes: 12, autoResumeOnReset: true } + }); + const r = await runLoop({ config, effects }); + assert.match(r.reason, /stale|past|reset/i); + assert.equal(calls.spawns, 1); + assert.equal(calls.sleepUntil.length, 0, 'must not sleep on a past reset'); +}); From 45f2c25969b7afb2034f4db6fa2f572f76ef2e67 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 09:29:07 -0400 Subject: [PATCH 06/12] shift: live dashboard + keyboard control (shift watch) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A headless run was a black box while running — good trail after, no visibility during. `shift watch` is a zero-dep live TUI over .shift/ that closes that gap: - Dashboard: progress bar, per-bin status (done/current/pending/skipped/blocked), elapsed, decision-log tail, Needs-you count. Redraws on an interval. - Two-way control via a file-based channel the engine honors: [p]ause (runner idles, still time-boxed), [k] skip current bin (new 'skipped' status), [q] stop (existing kill switch), [x] close watcher. - lib/control.cjs (signal channel) + lib/watch-model.cjs (buildModel + a PURE renderFrame/renderLine, so the dashboard is unit-tested without a TTY). - bin/shift gains 'watch' and 'status --line' (one-liner for the module-1 status bar — surfaces shift where you're already looking). Engine integration: Stop hook applies SKIP (marks current bin skipped, advances) and summary now reports skipped; run-loop honors PAUSE between spawns. 77 shift tests green. --- shift/README.md | 29 ++++++- shift/SPEC.md | 6 ++ shift/bin/shift | 83 +++++++++++++++++-- shift/hooks/shift-stop.cjs | 13 ++- shift/lib/control.cjs | 41 ++++++++++ shift/lib/run-loop.cjs | 9 ++ shift/lib/watch-model.cjs | 141 ++++++++++++++++++++++++++++++++ shift/test/control.test.cjs | 56 +++++++++++++ shift/test/hook.test.cjs | 12 +++ shift/test/run-loop.test.cjs | 13 +++ shift/test/watch-model.test.cjs | 84 +++++++++++++++++++ 11 files changed, 475 insertions(+), 12 deletions(-) create mode 100644 shift/lib/control.cjs create mode 100644 shift/lib/watch-model.cjs create mode 100644 shift/test/control.test.cjs create mode 100644 shift/test/watch-model.test.cjs diff --git a/shift/README.md b/shift/README.md index 8bb45c0..b9e42fd 100644 --- a/shift/README.md +++ b/shift/README.md @@ -60,7 +60,32 @@ shift status # progress anytime shift stop # stop cleanly after the current bin ``` -When it ends, read `.shift/summary.md` (bins done/blocked + a "Needs you" section) and review the `shift/` branch. +When it ends, read `.shift/summary.md` (bins done/blocked/skipped + a "Needs you" section) and review the `shift/` branch. + +## Watch it live + steer it (`shift watch`) + +An unattended run is the *least* transparent mode there is — so `shift` gives you a live window into it. In a second terminal: + +```bash +cd your-repo && shift watch +``` + +A dashboard redraws on an interval: a progress bar (`done/total`), every bin with its status (`✓` done · `▶` current · `·` pending · `⤫` skipped · `✗` blocked), elapsed time, the decision-log tail, and the "Needs you" count. Because a run is otherwise a black box, this is where you *see* it working. + +It's also the **control surface** — a status bar can show state but can't take input, so `watch` captures keys and writes signals the engine honors at the next stop: + +| key | action | +|---|---| +| `p` | pause / resume (the headless runner idles until you resume; still bounded by the time box) | +| `k` | skip the current bin (marks it `skipped`, moves on — any work stays on the branch) | +| `q` | stop the run (finalizes after the current bin — same as `shift stop`) | +| `x` | close the watcher (the run keeps going) | + +Control is file-based under `.shift/` (`PAUSE` / `SKIP` / `STOP`), so it works whether the run is interactive or headless, and from any terminal in the repo. + +### In your status bar (module 1) + +For an at-a-glance signal in the [Code Status Bar](../code-status-bar), `shift status --line` prints a one-liner (`⚙ shift 2/5 · 18m · ⚑1`) — empty when no run is active. Wire it into a ccstatusline `custom-command` widget to surface shift "in the place you're already looking." ## Configure (`.shift/config.json`) @@ -108,4 +133,4 @@ Pick the narrowest mode that lets the work actually proceed. cd shift && npm test # node --test, zero dependencies ``` -Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop) and is unit-tested; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. +Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model) and is unit-tested — including `renderFrame`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules. diff --git a/shift/SPEC.md b/shift/SPEC.md index 229c6ac..527c190 100644 --- a/shift/SPEC.md +++ b/shift/SPEC.md @@ -283,3 +283,9 @@ A real bounded `shift run` smoke (2 commit-a-file bins, `bypassPermissions`) **e - **Hook-install is required for `shift run`** and `install.sh` automates it (the bin's task text reaches the agent only via the Stop-hook block). **Tests:** 63 in `shift` (pure unit + hook/CLI/run-loop/install integration), all green. + +### Live visibility + control — `shift watch` (2026-06-16) + +The candor gap in v2 was that a headless run is opaque *while* it runs (good paper trail after, black box during). `shift watch` closes it: a zero-dependency live TUI that reads `.shift/` on an interval and renders a dashboard (progress bar, per-bin status, current bin, elapsed, decision-log tail, "Needs you"), plus **two-way control**. Since an output-only surface (a status bar) can't take input, control is a separate file-based channel under `.shift/` that the engine honors: `STOP` (existing kill switch / `q`), `PAUSE` (`p` — the runner idles, still bounded by the time box), `SKIP` (`k` — the hook marks the current bin `skipped` and advances). New status value: `skipped`. New modules: `lib/control.cjs` (signal channel) and `lib/watch-model.cjs` (`buildModel` + a **pure** `renderFrame`/`renderLine`, so the dashboard and the status-bar one-liner are unit-tested without a TTY). `bin/shift` gains `watch` and `status --line` (a one-liner for the module-1 status bar — ties the two modules together). **Tests:** 77 in `shift`, all green. + +*Known limitation:* `pause` and `skip` apply at the next stop-hook boundary (between bins), not mid-bin — the hook is the only point the engine re-evaluates. Mid-bin interruption would need a different mechanism. diff --git a/shift/bin/shift b/shift/bin/shift index 2c5f557..7b055d2 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -76,11 +76,72 @@ function cmdStart(args) { console.log('Now open Claude Code in this repo and say: "begin the shift".'); } -function cmdStatus() { - const state = loadState(path.join(process.cwd(), '.shift')); - const c = s => state.bins.filter(b => b.status === s).length; - console.log(`run ${state.runId} · branch ${state.branch} · iter ${state.iterations}`); - console.log(`bins: ${c('done')} done · ${c('blocked')} blocked · ${c('pending')} pending`); +function cmdStatus(args) { + const dir = path.join(process.cwd(), '.shift'); + const { buildModel, renderLine } = require('../lib/watch-model.cjs'); + const model = buildModel({ dir, now: Date.now() }); + + // `shift status --line` → a one-line summary for a status bar (module 1 / ccstatusline + // custom-command widget pipes the session payload in; this just prints shift's line). + if (args && args.includes('--line')) { + if (model.exists && !model.finalized) process.stdout.write(renderLine(model, { color: !args.includes('--no-color') }) + '\n'); + return; + } + if (!model.exists) { console.log('No active shift run here. Start one with `shift start`.'); return; } + const cs = model.counts; + console.log(`run ${model.runId} · branch ${model.branch} · iter ${model.iterations}${model.paused ? ' · PAUSED' : ''}`); + console.log(`bins: ${cs.done} done · ${cs.blocked} blocked · ${cs.skipped} skipped · ${cs.pending} pending (${model.elapsedMin}m)`); +} + +// v3: live dashboard + keyboard control. Reads .shift/ on an interval and writes +// control signals (PAUSE/SKIP/STOP) that the engine honors. Output-only surfaces +// (a status bar) can't take input, so this is the interactive control surface. +function cmdWatch() { + const dir = path.join(process.cwd(), '.shift'); + const { buildModel, renderFrame } = require('../lib/watch-model.cjs'); + const { setPause, isPaused, requestSkip, requestStop } = require('../lib/control.cjs'); + const out = process.stdout; + const interactive = !!(process.stdin.isTTY && out.isTTY); + + let model; + const draw = () => { + model = buildModel({ dir, now: Date.now() }); + const frame = renderFrame(model, { width: out.columns || 80, color: true }); + if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame + else out.write(frame); + }; + + if (!interactive) { draw(); return; } // piped / non-TTY: print one frame and exit + + let timer = null; + const cleanup = () => { + if (timer) clearInterval(timer); + try { process.stdin.setRawMode(false); } catch { /* ignore */ } + process.stdin.pause(); + out.write('\x1b[?25h'); // show cursor + }; + + out.write('\x1b[?25l'); // hide cursor + process.stdin.setRawMode(true); + process.stdin.resume(); + process.stdin.setEncoding('utf8'); + process.stdin.on('data', (key) => { + if (key === 'x' || key === '\x1b' || key === '\x03') { // x / Esc / Ctrl-C + cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0); + } else if (key === 'p') { + setPause(dir, !isPaused(dir)); draw(); + } else if (key === 'k') { + const cur = (model.bins || []).find(b => b.current); + if (cur) requestSkip(dir, cur.id); + draw(); + } else if (key === 'q') { + requestStop(dir); draw(); + } + }); + process.on('SIGINT', () => { cleanup(); process.exit(0); }); + + draw(); + timer = setInterval(draw, 800); } function cmdStop() { @@ -103,6 +164,7 @@ async function cmdRun() { const mode = config.permissionMode || 'acceptEdits'; const { runLoop } = require('../lib/run-loop.cjs'); const { readUsageCache } = require('../lib/usage.cjs'); + const { isPaused } = require('../lib/control.cjs'); // A headless `-p` run cannot answer permission prompts. Only bypassPermissions/dontAsk // auto-approve tool calls like Bash(git commit) — anything else stalls or denies on the @@ -126,6 +188,7 @@ async function cmdRun() { readUsage: () => readUsageCache(dir), log: (m) => console.log(`[shift] ${m}`), finalized: () => fs.existsSync(path.join(dir, 'summary.md')), + isPaused: () => isPaused(dir), sleepUntil: (ms) => new Promise(r => setTimeout(r, Math.max(0, ms - Date.now()))), spawn: () => { const args = ['-p', '--permission-mode', mode]; @@ -154,7 +217,13 @@ async function cmdRun() { const [, , sub, ...rest] = process.argv; if (sub === 'start') cmdStart(rest); -else if (sub === 'status') cmdStatus(); +else if (sub === 'status') cmdStatus(rest); +else if (sub === 'watch') cmdWatch(); else if (sub === 'stop') cmdStop(); else if (sub === 'run') cmdRun().catch(e => { console.error(e); process.exit(1); }); -else { console.log('usage: shift [--dry-run]'); process.exit(1); } +else { + console.log('usage: shift [--dry-run]'); + console.log(' watch live dashboard + control: [p]ause [k]skip [q]stop [x]exit'); + console.log(' status --line one-line summary for a status bar'); + process.exit(1); +} diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs index 16358a4..af99ed1 100755 --- a/shift/hooks/shift-stop.cjs +++ b/shift/hooks/shift-stop.cjs @@ -7,6 +7,7 @@ const { loadState, saveState, mergeDiscovered, setBinStatus } = require('../lib/ const { decide } = require('../lib/decision.cjs'); const { runVerify } = require('../lib/verify.cjs'); const { writeUsageCache } = require('../lib/usage.cjs'); +const { readSkip, clearSkip } = require('../lib/control.cjs'); function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } } @@ -34,6 +35,7 @@ function tail(s, n) { function writeSummary(dir, state, reason, now) { const done = state.bins.filter(b => b.status === 'done').length; const blocked = state.bins.filter(b => b.status === 'blocked'); + const skipped = state.bins.filter(b => b.status === 'skipped').length; const pending = state.bins.filter(b => b.status === 'pending').length; const mins = Math.round((now - Date.parse(state.startedAt)) / 60000); const items = [ @@ -45,7 +47,7 @@ function writeSummary(dir, state, reason, now) { `Ended: ${reason}`, `Duration: ${mins} min · Iterations: ${state.iterations}`, `Branch: ${state.branch}`, - `Bins: ${done} done · ${blocked.length} blocked · ${pending} pending`, '', + `Bins: ${done} done · ${blocked.length} blocked · ${skipped} skipped · ${pending} pending`, '', '## Needs you', ...(items.length ? items : ['- (nothing flagged)']) ]; @@ -78,10 +80,15 @@ function main() { const maxAttempts = (config.verify && config.verify.maxAttempts) || 2; let retryFeedback = null; - // Attribute the just-finished work to the current bin (blocked / verify gate / done). + // Attribute the just-finished work to the current bin (skipped / blocked / verify gate / done). if (prevBinId) { + const skipId = readSkip(dir); const blocked = readBlocked(dir).find(x => x.id === prevBinId); - if (blocked) { + if (skipId === prevBinId) { + // User hit [k] in `shift watch`: drop this bin and move on (work, if any, stays on the branch). + state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user' }); + clearSkip(dir); + } else if (blocked) { state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note }); } else if (verifyCmd) { const v = runVerify(verifyCmd, cwd); diff --git a/shift/lib/control.cjs b/shift/lib/control.cjs new file mode 100644 index 0000000..37d7d45 --- /dev/null +++ b/shift/lib/control.cjs @@ -0,0 +1,41 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); + +// File-based control channel between `shift watch` (writer) and the engine +// (reader: the Stop hook + the headless runner). Files live in .shift/: +// STOP — kill switch (already honored by the hook); finalize after current bin. +// PAUSE — the headless runner idles while this exists; cleared to resume. +// SKIP — contains a bin id; the hook marks that bin 'skipped' and moves on. +// Everything is best-effort and absence-means-off, so a missing dir never throws. + +function p(dir, name) { return path.join(dir, name); } +function exists(file) { try { return fs.existsSync(file); } catch { return false; } } +function touch(dir, name) { + try { fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync(p(dir, name), ''); } catch { /* best-effort */ } +} +function remove(dir, name) { try { fs.unlinkSync(p(dir, name)); } catch { /* already gone */ } } + +function requestStop(dir) { touch(dir, 'STOP'); } +function isStopRequested(dir) { return exists(p(dir, 'STOP')); } + +function setPause(dir, on) { if (on) touch(dir, 'PAUSE'); else remove(dir, 'PAUSE'); } +function isPaused(dir) { return exists(p(dir, 'PAUSE')); } + +function requestSkip(dir, binId) { + try { fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync(p(dir, 'SKIP'), String(binId || '')); } + catch { /* best-effort */ } +} +function readSkip(dir) { + try { + const v = fs.readFileSync(p(dir, 'SKIP'), 'utf8').trim(); + return v || null; + } catch { return null; } +} +function clearSkip(dir) { remove(dir, 'SKIP'); } + +module.exports = { + requestStop, isStopRequested, + setPause, isPaused, + requestSkip, readSkip, clearSkip +}; diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs index 8703b92..9e14be2 100644 --- a/shift/lib/run-loop.cjs +++ b/shift/lib/run-loop.cjs @@ -3,6 +3,7 @@ const { evaluateBounds } = require('./bounds.cjs'); const { classifyOutcome } = require('./outcome.cjs'); const RESET_BUFFER_MS = 60_000; +const PAUSE_POLL_MS = 5_000; // The headless outer loop (v2). All side effects are injected so the loop is // fully testable without a real `claude` or real sleeping. @@ -32,6 +33,14 @@ async function runLoop({ config, effects }) { if (lastOutcome === 'completed') return { reason: 'run finalized by the engine', spawns }; if (lastOutcome === 'error') return { reason: 'run errored — stopping (see output)', spawns }; + // Paused via `shift watch` ([p]): idle without spawning until resumed. Still bounded + // by maxHours/usage (re-checked each poll), so a forgotten pause can't run forever. + if (effects.isPaused && effects.isPaused()) { + effects.log('paused — waiting (resume with [p] in `shift watch`)'); + await effects.sleepUntil(now + PAUSE_POLL_MS); + continue; + } + if (lastOutcome === 'rate_limited') { if (!bounds.autoResumeOnReset) return { reason: 'rate limited; auto-resume disabled', spawns }; const resetAt = usage && typeof usage.sessionResetAt === 'number' ? usage.sessionResetAt * 1000 : null; diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs new file mode 100644 index 0000000..5eadf6e --- /dev/null +++ b/shift/lib/watch-model.cjs @@ -0,0 +1,141 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); +const { loadState } = require('./state.cjs'); +const { isPaused, isStopRequested } = require('./control.cjs'); + +// --- model ----------------------------------------------------------------- + +function readLog(dir) { + let raw; + try { raw = fs.readFileSync(path.join(dir, 'log.md'), 'utf8'); } catch { return { recent: [], needsYou: [] }; } + const lines = raw.split('\n'); + const recent = []; + const needsYou = []; + for (const line of lines) { + // hook writes: "## — work (iter N)" + const m = line.match(/^##\s*(\S+)\s*—\s*(.+)$/); + if (m) { + const time = (m[1].match(/T(\d{2}:\d{2})/) || [])[1] || m[1]; + recent.push(`${time} ${m[2]}`); + } + const n = line.match(/^Needs you:\s*(.+)$/); + if (n) needsYou.push(n[1].trim()); + } + return { recent: recent.slice(-6), needsYou }; +} + +// buildModel({ dir, now }) — read .shift/ into a plain view model. Pure of rendering. +function buildModel({ dir, now }) { + let state; + try { state = loadState(dir); } catch { return { exists: false }; } + + const bins = (state.bins || []).map(b => ({ + id: b.id, status: b.status, commit: b.commit || null, note: b.note || null, + current: b.id === state.currentBinId && b.status === 'pending' + })); + const count = s => bins.filter(b => b.status === s).length; + const counts = { + done: count('done'), blocked: count('blocked'), skipped: count('skipped'), + pending: count('pending'), total: bins.length + }; + + const { recent, needsYou: logged } = readLog(dir); + const needsYou = [ + ...bins.filter(b => b.status === 'blocked').map(b => `${b.id}: ${b.note || 'blocked'}`), + ...logged + ]; + + const startedMs = Date.parse(state.startedAt); + const elapsedMin = Number.isFinite(startedMs) ? Math.max(0, Math.round((now - startedMs) / 60000)) : 0; + + return { + exists: true, + runId: state.runId, branch: state.branch, iterations: state.iterations || 0, + elapsedMin, paused: isPaused(dir), stopping: isStopRequested(dir), + finalized: fs.existsSync(path.join(dir, 'summary.md')), + bins, counts, recent, needsYou + }; +} + +// --- render ---------------------------------------------------------------- + +const ANSI = { + reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m', + green: '\x1b[32m', yellow: '\x1b[33m', red: '\x1b[31m', cyan: '\x1b[36m', gray: '\x1b[90m' +}; +function paint(color, code, s) { return color ? code + s + ANSI.reset : s; } + +const GLYPH = { done: '✓', blocked: '✗', skipped: '⤫', pending: '·' }; +function binGlyph(b) { return b.current ? '▶' : (GLYPH[b.status] || '·'); } +function binColor(b) { + if (b.current) return ANSI.cyan; + return { done: ANSI.green, blocked: ANSI.red, skipped: ANSI.gray, pending: ANSI.dim }[b.status] || ''; +} + +function bar(done, total, width) { + if (total <= 0) return ''; + const filled = Math.round((done / total) * width); + return '█'.repeat(filled) + '░'.repeat(Math.max(0, width - filled)); +} + +function pad(s, n) { s = String(s); return s.length >= n ? s.slice(0, n) : s + ' '.repeat(n - s.length); } + +// renderFrame(model, { width, color }) -> string. Pure. +function renderFrame(model, opts = {}) { + const width = opts.width || 80; + const color = opts.color !== false; + const c = (code, s) => paint(color, code, s); + + if (!model || !model.exists) { + return c(ANSI.dim, 'No active shift run in this directory. Start one with `shift start`.') + '\n'; + } + + const L = []; + const status = model.finalized + ? c(ANSI.green, '● finalized') + : model.stopping ? c(ANSI.red, '■ stopping after current bin') + : model.paused ? c(ANSI.yellow, '⏸ PAUSED') : c(ANSI.green, '▶ running'); + L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations} ${status}`); + L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64)))); + + const { done, total } = { done: model.counts.done, total: model.counts.total }; + L.push(`${c(ANSI.green, bar(done, total, 24))} ${c(ANSI.bold, `${done}/${total}`)} bins ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`); + L.push(''); + + for (const b of model.bins) { + const g = c(binColor(b), binGlyph(b)); + const id = c(b.current ? ANSI.cyan : (b.status === 'pending' ? ANSI.dim : ANSI.reset), pad(b.id, 28)); + let tail = b.status; + if (b.current) tail = 'working ← current'; + else if (b.commit) tail = `done (${b.commit.slice(0, 7)})`; + else if (b.note) tail = `${b.status} — ${b.note}`; + L.push(` ${g} ${id} ${c(ANSI.dim, tail)}`); + } + L.push(''); + + if (model.recent.length) { + L.push(c(ANSI.dim, 'recent:')); + for (const r of model.recent.slice(-4)) L.push(c(ANSI.gray, ` ${r}`)); + L.push(''); + } + + const needs = model.needsYou.length; + const needsLabel = needs ? c(ANSI.yellow, `Needs you: ${needs}`) : c(ANSI.dim, 'Needs you: 0'); + const hints = `${c(ANSI.bold, '[p]')}ause ${c(ANSI.bold, '[k]')}skip current ${c(ANSI.bold, '[q]')}stop ${c(ANSI.bold, '[x]')}exit watcher`; + L.push(`${needsLabel} ${c(ANSI.dim, '·')} ${hints}`); + + return L.join('\n') + '\n'; +} + +// One-line summary for a status bar (module 1 / ccstatusline custom-command). +function renderLine(model, opts = {}) { + const color = opts.color !== false; + const c = (code, s) => paint(color, code, s); + if (!model || !model.exists) return ''; + const flag = model.finalized ? '●' : model.paused ? '⏸' : '⚙'; + const needs = model.needsYou.length ? ` ${c(ANSI.yellow, '⚑' + model.needsYou.length)}` : ''; + return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')}${needs}`; +} + +module.exports = { buildModel, renderFrame, renderLine }; diff --git a/shift/test/control.test.cjs b/shift/test/control.test.cjs new file mode 100644 index 0000000..dfb17db --- /dev/null +++ b/shift/test/control.test.cjs @@ -0,0 +1,56 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { + requestStop, isStopRequested, + setPause, isPaused, + requestSkip, readSkip, clearSkip +} = require('../lib/control.cjs'); + +function tmp() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-ctl-')); } + +test('stop: absent by default, present after request', () => { + const d = tmp(); + assert.equal(isStopRequested(d), false); + requestStop(d); + assert.equal(isStopRequested(d), true); + // STOP is the existing kill switch file name (engine already honors it) + assert.ok(fs.existsSync(path.join(d, 'STOP'))); +}); + +test('pause: toggles on and off', () => { + const d = tmp(); + assert.equal(isPaused(d), false); + setPause(d, true); + assert.equal(isPaused(d), true); + setPause(d, false); + assert.equal(isPaused(d), false); + setPause(d, false); // idempotent off + assert.equal(isPaused(d), false); +}); + +test('skip: records a bin id, reads it back, clears it', () => { + const d = tmp(); + assert.equal(readSkip(d), null); + requestSkip(d, 'queue/03-build.md'); + assert.equal(readSkip(d), 'queue/03-build.md'); + clearSkip(d); + assert.equal(readSkip(d), null); +}); + +test('skip: reading a malformed/empty file yields null (no throw)', () => { + const d = tmp(); + fs.mkdirSync(d, { recursive: true }); + fs.writeFileSync(path.join(d, 'SKIP'), ' '); + assert.equal(readSkip(d), null); +}); + +test('all readers are safe on a missing dir', () => { + const d = path.join(os.tmpdir(), 'shift-ctl-missing-' + process.pid); + assert.equal(isStopRequested(d), false); + assert.equal(isPaused(d), false); + assert.equal(readSkip(d), null); +}); diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs index ad2c603..17f9726 100644 --- a/shift/test/hook.test.cjs +++ b/shift/test/hook.test.cjs @@ -74,6 +74,18 @@ test('logged "Needs you:" lines surface in the summary', () => { assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /push the release tag/); }); +test('SKIP control marks the current bin skipped and advances to the next', () => { + const { cwd, dir } = setupRun(); + runHook(cwd, { stop_hook_active: false }); // start bin 1 (current = queue/01.md) + fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/01.md'); + const r = runHook(cwd, { stop_hook_active: true }); // skip bin 1, block bin 2 + assert.equal(r.decision, 'block'); + assert.match(r.reason, /bin two/); + const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'skipped'); + assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'SKIP is consumed'); +}); + test('kill switch ends the run immediately', () => { const { cwd, dir } = setupRun(); fs.writeFileSync(path.join(dir, 'STOP'), ''); diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs index e06cae2..4e32e61 100644 --- a/shift/test/run-loop.test.cjs +++ b/shift/test/run-loop.test.cjs @@ -114,6 +114,19 @@ test('incomplete spawn WITHOUT progress stops with a hook-wiring diagnostic (no assert.equal(calls.spawns, 1, 'must not spin'); }); +test('pause idles the runner (no spawn) until unpaused, then proceeds', async () => { + const { effects, calls, config } = makeEffects({ + spawns: [{ result: { status: 0 }, finalize: true }], + usage: null + }); + let checks = 0; + effects.isPaused = () => checks++ < 2; // paused for the first two loop iterations + const r = await runLoop({ config, effects }); + assert.match(r.reason, /finalized/); + assert.ok(calls.sleepUntil.length >= 2, 'idled while paused'); + assert.equal(calls.spawns, 1, 'no spawn while paused; one after resume'); +}); + test('rate-limited with a stale/past reset stops instead of busy-spinning', async () => { // Reset time is already in the past (stale cache). sleepUntil(past) would return // instantly and re-spawn forever (bounded only by maxResumes) — guard must stop. diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs new file mode 100644 index 0000000..dc93b44 --- /dev/null +++ b/shift/test/watch-model.test.cjs @@ -0,0 +1,84 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { buildModel, renderFrame } = require('../lib/watch-model.cjs'); + +function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-')); + const dir = path.join(cwd, '.shift'); + fs.mkdirSync(dir, { recursive: true }); + const startedAt = new Date(Date.now() - 12 * 60_000).toISOString(); // 12 min ago + fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ + runId: '2026-06-16T00-00-00', startedAt, iterations: 7, branch: 'shift/smoke', + currentBinId, + bins: [ + { id: 'queue/01-hello.md', status: 'done', commit: 'a1b2c3d' }, + { id: 'queue/02-notes.md', status: 'done', commit: 'd4e5f6a' }, + { id: 'queue/03-build.md', status: 'pending' }, + { id: 'queue/04-test.md', status: 'pending' }, + { id: 'queue/05-ship.md', status: 'blocked', note: 'needs API key' } + ] + })); + fs.writeFileSync(path.join(dir, 'log.md'), + '# shift log\n\n## 2026-06-16T00:05:00Z — work queue/03-build.md (iter 7)\nNeeds you: confirm the deploy target\n'); + if (paused) fs.writeFileSync(path.join(dir, 'PAUSE'), ''); + return dir; +} + +test('buildModel reads run state and computes counts + elapsed', () => { + const m = buildModel({ dir: fixture(), now: Date.now() }); + assert.equal(m.exists, true); + assert.equal(m.branch, 'shift/smoke'); + assert.equal(m.iterations, 7); + assert.equal(m.counts.done, 2); + assert.equal(m.counts.blocked, 1); + assert.equal(m.counts.pending, 2); + assert.equal(m.counts.total, 5); + assert.ok(m.elapsedMin >= 11 && m.elapsedMin <= 13); +}); + +test('buildModel marks the current bin and surfaces Needs you', () => { + const m = buildModel({ dir: fixture(), now: Date.now() }); + const current = m.bins.find(b => b.current); + assert.equal(current.id, 'queue/03-build.md'); + assert.ok(m.needsYou.some(n => /API key/.test(n))); // blocked note + assert.ok(m.needsYou.some(n => /deploy target/.test(n))); // logged "Needs you:" line +}); + +test('buildModel reflects pause state', () => { + assert.equal(buildModel({ dir: fixture({ paused: true }), now: Date.now() }).paused, true); + assert.equal(buildModel({ dir: fixture({ paused: false }), now: Date.now() }).paused, false); +}); + +test('buildModel returns exists:false when no run is present', () => { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none-')); + const m = buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() }); + assert.equal(m.exists, false); +}); + +test('renderFrame (no color) shows progress, the current bin, and control hints', () => { + const out = renderFrame(buildModel({ dir: fixture(), now: Date.now() }), { width: 80, color: false }); + assert.match(out, /2\/5/); // progress count + assert.match(out, /shift\/smoke/); // branch + assert.match(out, /queue\/05-ship\.md/); // a bin row + assert.match(out, /needs API key/); // blocker surfaced + assert.match(out, /\[q\].*stop/i); // control hint + assert.match(out, /\[k\]/); // skip hint + assert.match(out, /\[p\]/); // pause hint +}); + +test('renderFrame shows a PAUSED banner when paused', () => { + const paused = renderFrame(buildModel({ dir: fixture({ paused: true }), now: Date.now() }), { color: false }); + assert.match(paused, /PAUSED/); + const running = renderFrame(buildModel({ dir: fixture({ paused: false }), now: Date.now() }), { color: false }); + assert.doesNotMatch(running, /PAUSED/); +}); + +test('renderFrame on no active run is a friendly message, not a crash', () => { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none2-')); + const out = renderFrame(buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() }), { color: false }); + assert.match(out, /no active.*run/i); +}); From 15970c6af2ed8566a9087ba4ac823937eb65a9bc Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 09:38:59 -0400 Subject: [PATCH 07/12] shift watch: address adversarial-review findings (verdict was SHIP) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two P2s + cheap P3s from the verification pass (no P0/P1; nothing crashed or corrupted state): - Terminal hygiene (P2): restore cursor + raw mode on SIGTERM/SIGHUP/exit, not just SIGINT/keys; wrap draw() in try/catch; idempotent cleanup; drop Esc as an exit key (a split arrow escape sends a lone \x1b). - Stale SKIP (P2): consume-on-read in the hook — a skip that misses its target is discarded, never left to fire on a later bin. - STOP honored while paused (P3): pause+stop no longer parks until the time box. - Progress bar fills by resolved bins (done+blocked+skipped), so a finalized run shows a full bar instead of ~40% under '● finalized'. - Atomic state.json write (temp+rename) so a redraw never reads a half-written file. - Ellipsis on truncated bin ids. - examples/watch-demo.cjs: zero-cost demo of the dashboard + control flow. Tests: 79 shift (+2: stop-while-paused, stale-skip-discarded), all green. Residual P3s documented: [k] no-op feedback when no current bin; narrow-terminal line wrapping. --- shift/bin/shift | 25 +++++++++++++------ shift/examples/watch-demo.cjs | 47 +++++++++++++++++++++++++++++++++++ shift/hooks/shift-stop.cjs | 2 +- shift/lib/run-loop.cjs | 2 ++ shift/lib/state.cjs | 6 ++++- shift/lib/watch-model.cjs | 12 ++++++--- shift/test/hook.test.cjs | 10 ++++++++ shift/test/run-loop.test.cjs | 13 ++++++++++ 8 files changed, 104 insertions(+), 13 deletions(-) create mode 100644 shift/examples/watch-demo.cjs diff --git a/shift/bin/shift b/shift/bin/shift index 7b055d2..e7d98d3 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -105,16 +105,21 @@ function cmdWatch() { let model; const draw = () => { - model = buildModel({ dir, now: Date.now() }); - const frame = renderFrame(model, { width: out.columns || 80, color: true }); - if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame - else out.write(frame); + try { // a transient read/write error must never wedge the terminal — retry next tick + model = buildModel({ dir, now: Date.now() }); + const frame = renderFrame(model, { width: out.columns || 80, color: true }); + if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame + else out.write(frame); + } catch { /* keep the watcher alive */ } }; if (!interactive) { draw(); return; } // piped / non-TTY: print one frame and exit let timer = null; - const cleanup = () => { + let closed = false; + const cleanup = () => { // idempotent; ALWAYS restores the terminal + if (closed) return; + closed = true; if (timer) clearInterval(timer); try { process.stdin.setRawMode(false); } catch { /* ignore */ } process.stdin.pause(); @@ -126,7 +131,7 @@ function cmdWatch() { process.stdin.resume(); process.stdin.setEncoding('utf8'); process.stdin.on('data', (key) => { - if (key === 'x' || key === '\x1b' || key === '\x03') { // x / Esc / Ctrl-C + if (key === 'x' || key === '\x03') { // x / Ctrl-C (Esc omitted: a split arrow escape sends a lone \x1b) cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0); } else if (key === 'p') { setPause(dir, !isPaused(dir)); draw(); @@ -138,7 +143,10 @@ function cmdWatch() { requestStop(dir); draw(); } }); - process.on('SIGINT', () => { cleanup(); process.exit(0); }); + // Restore on every exit path, not just the keys: a closed terminal (SIGHUP), + // kill (SIGTERM), or Ctrl-C must not leave the next shell with a hidden cursor + raw mode. + for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP']) process.on(sig, () => { cleanup(); process.exit(0); }); + process.on('exit', cleanup); draw(); timer = setInterval(draw, 800); @@ -164,7 +172,7 @@ async function cmdRun() { const mode = config.permissionMode || 'acceptEdits'; const { runLoop } = require('../lib/run-loop.cjs'); const { readUsageCache } = require('../lib/usage.cjs'); - const { isPaused } = require('../lib/control.cjs'); + const { isPaused, isStopRequested } = require('../lib/control.cjs'); // A headless `-p` run cannot answer permission prompts. Only bypassPermissions/dontAsk // auto-approve tool calls like Bash(git commit) — anything else stalls or denies on the @@ -189,6 +197,7 @@ async function cmdRun() { log: (m) => console.log(`[shift] ${m}`), finalized: () => fs.existsSync(path.join(dir, 'summary.md')), isPaused: () => isPaused(dir), + isStopRequested: () => isStopRequested(dir), sleepUntil: (ms) => new Promise(r => setTimeout(r, Math.max(0, ms - Date.now()))), spawn: () => { const args = ['-p', '--permission-mode', mode]; diff --git a/shift/examples/watch-demo.cjs b/shift/examples/watch-demo.cjs new file mode 100644 index 0000000..2af254e --- /dev/null +++ b/shift/examples/watch-demo.cjs @@ -0,0 +1,47 @@ +#!/usr/bin/env node +'use strict'; +// Zero-cost demo of `shift watch`: spins up a throwaway run, drives the real Stop +// hook through it, and prints the live dashboard at each step — including a [k] skip +// and a [q] stop — so you can see the visibility + control surface without spawning +// a real `claude`. Run: node shift/examples/watch-demo.cjs +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const cp = require('node:child_process'); + +const SHIFT = path.resolve(__dirname, '..'); +const { buildModel, renderFrame } = require(path.join(SHIFT, 'lib', 'watch-model.cjs')); +const { requestSkip, requestStop } = require(path.join(SHIFT, 'lib', 'control.cjs')); +const HOOK = path.join(SHIFT, 'hooks', 'shift-stop.cjs'); + +const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-demo-')); +const dir = path.join(cwd, '.shift'); +fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); +fs.mkdirSync(dir, { recursive: true }); +for (const [n, t] of [['01-build.md', 'build the thing'], ['02-flaky.md', 'flaky task'], ['03-docs.md', 'write docs']]) { + fs.writeFileSync(path.join(cwd, 'queue', n), t); +} +fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({ + sources: [{ path: 'queue', kind: 'briefs' }], + bounds: { maxHours: 24, maxIterations: 10 }, definitionOfDone: 'done', git: {} +})); +fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ + runId: 'demo', startedAt: new Date(Date.now() - 5 * 60000).toISOString(), + iterations: 0, branch: 'shift/demo', currentBinId: null, bins: [] +})); +fs.writeFileSync(path.join(dir, 'log.md'), '# log\n'); + +const fire = (active) => cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify({ stop_hook_active: active, cwd }), encoding: 'utf8' }); +const show = (label) => { + process.stdout.write(`\n\x1b[1m=== ${label} ===\x1b[0m\n`); + process.stdout.write(renderFrame(buildModel({ dir, now: Date.now() }), { width: 78, color: true })); +}; + +fire(false); show('1) run started — bin 01 working'); +fire(true); show('2) bin 01 done -> bin 02 working'); +requestSkip(dir, 'queue/02-flaky.md'); // you press [k] now, while bin 02 is the current bin +fire(true); show('3) you pressed [k] on bin 02 -> SKIPPED, bin 03 working'); +requestStop(dir); show('4) you pressed [q] -> stopping banner'); +fire(true); show('5) bin 03 done, STOP honored -> finalized'); +process.stdout.write('\n--- .shift/summary.md ---\n' + fs.readFileSync(path.join(dir, 'summary.md'), 'utf8')); +process.stdout.write(`\n(throwaway repo: ${cwd})\n`); diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs index af99ed1..1d04399 100755 --- a/shift/hooks/shift-stop.cjs +++ b/shift/hooks/shift-stop.cjs @@ -83,11 +83,11 @@ function main() { // Attribute the just-finished work to the current bin (skipped / blocked / verify gate / done). if (prevBinId) { const skipId = readSkip(dir); + if (skipId) clearSkip(dir); // consume on read: a skip that misses its target is discarded, never left to fire on a later bin const blocked = readBlocked(dir).find(x => x.id === prevBinId); if (skipId === prevBinId) { // User hit [k] in `shift watch`: drop this bin and move on (work, if any, stays on the branch). state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user' }); - clearSkip(dir); } else if (blocked) { state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note }); } else if (verifyCmd) { diff --git a/shift/lib/run-loop.cjs b/shift/lib/run-loop.cjs index 9e14be2..4da90cd 100644 --- a/shift/lib/run-loop.cjs +++ b/shift/lib/run-loop.cjs @@ -36,6 +36,8 @@ async function runLoop({ config, effects }) { // Paused via `shift watch` ([p]): idle without spawning until resumed. Still bounded // by maxHours/usage (re-checked each poll), so a forgotten pause can't run forever. if (effects.isPaused && effects.isPaused()) { + // [q] stops even while paused — otherwise pause+stop would park until the time box. + if (effects.isStopRequested && effects.isStopRequested()) return { reason: 'stopped while paused', spawns }; effects.log('paused — waiting (resume with [p] in `shift watch`)'); await effects.sleepUntil(now + PAUSE_POLL_MS); continue; diff --git a/shift/lib/state.cjs b/shift/lib/state.cjs index 9d10a99..1ab2e3a 100644 --- a/shift/lib/state.cjs +++ b/shift/lib/state.cjs @@ -11,7 +11,11 @@ function saveState(dir, state) { // Persist lean: the bin `text` is re-read from disk on each discovery pass, so // keep it out of state.json (avoids bloating state with full brief/plan bodies). const lean = { ...state, bins: state.bins.map(({ text, ...b }) => b) }; - fs.writeFileSync(statePath(dir), JSON.stringify(lean, null, 2)); + // Write-then-rename so a concurrent reader (e.g. `shift watch`) never parses a + // half-written file; renameSync is atomic within the same directory. + const tmp = statePath(dir) + '.tmp'; + fs.writeFileSync(tmp, JSON.stringify(lean, null, 2)); + fs.renameSync(tmp, statePath(dir)); } function initState({ runId, startedAt, branch }) { diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs index 5eadf6e..f9cdbdd 100644 --- a/shift/lib/watch-model.cjs +++ b/shift/lib/watch-model.cjs @@ -79,7 +79,11 @@ function bar(done, total, width) { return '█'.repeat(filled) + '░'.repeat(Math.max(0, width - filled)); } -function pad(s, n) { s = String(s); return s.length >= n ? s.slice(0, n) : s + ' '.repeat(n - s.length); } +function pad(s, n) { + s = String(s); + if (s.length > n) return s.slice(0, n - 1) + '…'; // truncate long bin ids with an ellipsis + return s + ' '.repeat(n - s.length); +} // renderFrame(model, { width, color }) -> string. Pure. function renderFrame(model, opts = {}) { @@ -99,8 +103,10 @@ function renderFrame(model, opts = {}) { L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations} ${status}`); L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64)))); - const { done, total } = { done: model.counts.done, total: model.counts.total }; - L.push(`${c(ANSI.green, bar(done, total, 24))} ${c(ANSI.bold, `${done}/${total}`)} bins ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`); + const { done, blocked, skipped, total } = model.counts; + const resolved = done + blocked + skipped; // bar fills as the queue is dealt with (reaches full at finalize) + const extra = (blocked + skipped) ? c(ANSI.dim, ` (${blocked + skipped} blocked/skipped)`) : ''; + L.push(`${c(ANSI.green, bar(resolved, total, 24))} ${c(ANSI.bold, `${done}/${total}`)} done${extra} ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`); L.push(''); for (const b of model.bins) { diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs index 17f9726..f319f49 100644 --- a/shift/test/hook.test.cjs +++ b/shift/test/hook.test.cjs @@ -86,6 +86,16 @@ test('SKIP control marks the current bin skipped and advances to the next', () = assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'SKIP is consumed'); }); +test('a SKIP naming a non-current bin is consumed and discarded, not applied to a later bin', () => { + const { cwd, dir } = setupRun(); + runHook(cwd, { stop_hook_active: false }); // start bin 1 + fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/99-nope.md'); // stale / wrong id + runHook(cwd, { stop_hook_active: true }); // bin 1 -> done (skip ignored) + const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done'); + assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'stale SKIP is consumed, never left to fire on a later bin'); +}); + test('kill switch ends the run immediately', () => { const { cwd, dir } = setupRun(); fs.writeFileSync(path.join(dir, 'STOP'), ''); diff --git a/shift/test/run-loop.test.cjs b/shift/test/run-loop.test.cjs index 4e32e61..90ab4da 100644 --- a/shift/test/run-loop.test.cjs +++ b/shift/test/run-loop.test.cjs @@ -127,6 +127,19 @@ test('pause idles the runner (no spawn) until unpaused, then proceeds', async () assert.equal(calls.spawns, 1, 'no spawn while paused; one after resume'); }); +test('stop requested while paused ends the run (does not park until the time box)', async () => { + const { effects, calls, config } = makeEffects({ + spawns: [{ result: { status: 0 }, finalize: true }], + usage: null + }); + effects.isPaused = () => true; // stays paused + effects.isStopRequested = () => true; // ...but the user also hit [q] + const r = await runLoop({ config, effects }); + assert.match(r.reason, /stop/i); + assert.equal(calls.spawns, 0); + assert.equal(calls.sleepUntil.length, 0, 'must not idle when a stop is pending'); +}); + test('rate-limited with a stale/past reset stops instead of busy-spinning', async () => { // Reset time is already in the past (stale cache). sleepUntil(past) would return // instantly and re-spawn forever (bounded only by maxResumes) — guard must stop. From 7f44160d3cbf8053c2233c5c263b7eeafb586152 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 10:57:04 -0400 Subject: [PATCH 08/12] shift watch: per-bin + run tokens/runtime, drill-down, and a work record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Tokens (output, the honest 'work' figure — not cache-inflated total) + runtime in the dashboard header, status --line, and per-bin columns. Summed from the session transcript (transcript_path from the hook payload; usage in message.usage). - Up/down select a bin, Enter opens a detail view (status, runtime, token breakdown, commit, brief), esc back. - Work record: every finalized run appended to .shift/history.jsonl; 'shift history' shows per-run rows + a totals footer; 'shift history ' drills into one run. - New pure modules: transcript.cjs (window-sum usage), timeline.cjs (append-only bin boundaries), history.cjs (ledger append/read/aggregate). Hook attributes per-bin runtime+tokens and writes the history record on finalize. - Brief now tells the agent .shift/ is append-only bookkeeping (never edit state.json). Known limitation (SPEC §13): per-bin attribution is best-effort in fully-headless runs — an autonomous agent rewrites/deletes .shift/ mid-run and Claude Code sandboxes hook writes to the project dir, so the boundary record can't be put out of reach. Run-level tokens/runtime + the history record (the hook's final write) are authoritative. 96 shift tests, all green. --- shift/README.md | 20 ++++- shift/SPEC.md | 6 ++ shift/bin/shift | 53 +++++++++-- shift/examples/watch-demo.cjs | 38 +++++--- shift/hooks/shift-stop.cjs | 102 ++++++++++++++++++--- shift/lib/brief.cjs | 3 +- shift/lib/history.cjs | 39 ++++++++ shift/lib/timeline.cjs | 43 +++++++++ shift/lib/transcript.cjs | 41 +++++++++ shift/lib/watch-model.cjs | 153 +++++++++++++++++++++++++------- shift/test/brief.test.cjs | 6 ++ shift/test/history.test.cjs | 50 +++++++++++ shift/test/hook.test.cjs | 41 +++++++++ shift/test/timeline.test.cjs | 45 ++++++++++ shift/test/transcript.test.cjs | 44 +++++++++ shift/test/watch-model.test.cjs | 117 +++++++++++++++--------- 16 files changed, 691 insertions(+), 110 deletions(-) create mode 100644 shift/lib/history.cjs create mode 100644 shift/lib/timeline.cjs create mode 100644 shift/lib/transcript.cjs create mode 100644 shift/test/history.test.cjs create mode 100644 shift/test/timeline.test.cjs create mode 100644 shift/test/transcript.test.cjs diff --git a/shift/README.md b/shift/README.md index b9e42fd..4f1ebd6 100644 --- a/shift/README.md +++ b/shift/README.md @@ -70,12 +70,14 @@ An unattended run is the *least* transparent mode there is — so `shift` gives cd your-repo && shift watch ``` -A dashboard redraws on an interval: a progress bar (`done/total`), every bin with its status (`✓` done · `▶` current · `·` pending · `⤫` skipped · `✗` blocked), elapsed time, the decision-log tail, and the "Needs you" count. Because a run is otherwise a black box, this is where you *see* it working. +A dashboard redraws on an interval: a progress bar, every bin with its status (`✓` done · `▶` current · `·` pending · `⤫` skipped · `✗` blocked) plus its **runtime and output tokens**, elapsed time, the run's live output-token total (`↑…out`), and the "Needs you" count. Because a run is otherwise a black box, this is where you *see* it working. -It's also the **control surface** — a status bar can show state but can't take input, so `watch` captures keys and writes signals the engine honors at the next stop: +It's also the **control + drill-down surface** — a status bar can show state but can't take input, so `watch` captures keys: | key | action | |---|---| +| `↑` / `↓` | move the selection between bins | +| `⏎` | open a bin's detail view (status, runtime, token breakdown in/out/cache, commit, brief); `esc` back | | `p` | pause / resume (the headless runner idles until you resume; still bounded by the time box) | | `k` | skip the current bin (marks it `skipped`, moves on — any work stays on the branch) | | `q` | stop the run (finalizes after the current bin — same as `shift stop`) | @@ -83,9 +85,19 @@ It's also the **control surface** — a status bar can show state but can't take Control is file-based under `.shift/` (`PAUSE` / `SKIP` / `STOP`), so it works whether the run is interactive or headless, and from any terminal in the repo. +> **Tokens are the *output* count** — the honest "work produced" figure, read from the session transcript. A warm run's `input`/cache tokens balloon with re-sent context, so the headline deliberately isn't `total` (that's in the detail view). Run-level tokens + runtime are authoritative; **per-bin** token/runtime columns are best-effort and may show `—` in a fully-headless run (an autonomous agent can rewrite `.shift/` mid-run) — see [SPEC §13](./SPEC.md). + +### The work record — `shift history` + +Every finalized run is appended to `.shift/history.jsonl`. `shift history` prints the ledger — one row per run (when, branch, runtime, output tokens, bin tally) and a **totals** footer across all runs; `shift history ` drills into a single run's bins. + ### In your status bar (module 1) -For an at-a-glance signal in the [Code Status Bar](../code-status-bar), `shift status --line` prints a one-liner (`⚙ shift 2/5 · 18m · ⚑1`) — empty when no run is active. Wire it into a ccstatusline `custom-command` widget to surface shift "in the place you're already looking." +For an at-a-glance signal in the [Code Status Bar](../code-status-bar), `shift status --line` prints a one-liner (`⚙ shift 2/5 · 18m · ↑412k ⚑1`) — empty when no run is active. Wire it into a ccstatusline `custom-command` widget to surface shift "in the place you're already looking." + +### See it without a run + +`node shift/examples/watch-demo.cjs` drives the real engine through a scripted run (with a synthetic transcript) and prints the dashboard at each step — tokens, a `[k]` skip, a `[q]` stop, the detail view, and the history ledger — at zero cost. ## Configure (`.shift/config.json`) @@ -133,4 +145,4 @@ Pick the narrowest mode that lets the work actually proceed. cd shift && npm test # node --test, zero dependencies ``` -Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model) and is unit-tested — including `renderFrame`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules. +Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model, transcript, timeline, history) and is unit-tested — including `renderFrame`/`renderDetail`/`renderHistory`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules. diff --git a/shift/SPEC.md b/shift/SPEC.md index 527c190..df7f731 100644 --- a/shift/SPEC.md +++ b/shift/SPEC.md @@ -289,3 +289,9 @@ A real bounded `shift run` smoke (2 commit-a-file bins, `bypassPermissions`) **e The candor gap in v2 was that a headless run is opaque *while* it runs (good paper trail after, black box during). `shift watch` closes it: a zero-dependency live TUI that reads `.shift/` on an interval and renders a dashboard (progress bar, per-bin status, current bin, elapsed, decision-log tail, "Needs you"), plus **two-way control**. Since an output-only surface (a status bar) can't take input, control is a separate file-based channel under `.shift/` that the engine honors: `STOP` (existing kill switch / `q`), `PAUSE` (`p` — the runner idles, still bounded by the time box), `SKIP` (`k` — the hook marks the current bin `skipped` and advances). New status value: `skipped`. New modules: `lib/control.cjs` (signal channel) and `lib/watch-model.cjs` (`buildModel` + a **pure** `renderFrame`/`renderLine`, so the dashboard and the status-bar one-liner are unit-tested without a TTY). `bin/shift` gains `watch` and `status --line` (a one-liner for the module-1 status bar — ties the two modules together). **Tests:** 77 in `shift`, all green. *Known limitation:* `pause` and `skip` apply at the next stop-hook boundary (between bins), not mid-bin — the hook is the only point the engine re-evaluates. Mid-bin interruption would need a different mechanism. + +### Tokens, runtime + the work record (2026-06-16) + +Per the candor goal of making consumption legible: the dashboard header and `status --line` show **output tokens** (the honest "work produced" figure — not inflated by context resends / cache reads, which dominate `total`), summed from the session **transcript** (`transcript_path` from the hook payload; tokens live in `message.usage`). Each bin gets a runtime + token column; `↑/↓` selects a bin and `⏎` opens a **detail view** (status, runtime, token breakdown in/out/cache, commit, brief). Every finalized run is appended to an append-only **work record** at `.shift/history.jsonl`; `shift history` prints per-run rows + a totals footer (all runs, total time, total output tokens), and `shift history ` drills into one run's bins. New modules: `lib/transcript.cjs` (sum `usage` over a `[start, end)` window — pure), `lib/timeline.cjs` (append-only bin boundaries), `lib/history.cjs` (ledger append/read/aggregate). **Tests:** 96 in `shift`, all green. + +*Known limitation — per-bin attribution is best-effort in fully-headless autonomous runs.* Investigation (2026-06-16) established three constraints that, together, make reliable *per-bin* token/runtime attribution impossible while a `claude -p` agent runs unattended: (1) an autonomous agent **rewrites/deletes files under `.shift/`** mid-run (observed: it rewrote `state.json` + `log.md` and deleted `config.json`/`timeline.jsonl`), clobbering the hook's per-bin stamps; (2) Claude Code **sandboxes hook file-writes to the project directory**, so the boundary record can't be relocated out-of-repo where the agent can't reach it; (3) the transcript carries no per-bin marker to reconstruct boundaries from. What **is** reliable and authoritative regardless: **run-level** output tokens + runtime, and the **work-record history** row (written as the hook's *final* action on finalize, after the agent's last turn, so it's never clobbered). Per-bin columns populate in interactive runs / when the agent leaves `.shift/` alone / in `shift/examples/watch-demo.cjs`, and show `—` otherwise. The brief now instructs the agent to treat `.shift/` as append-only bookkeeping; tightening that — or an engine-owned state store the agent can't reach — is the path to making per-bin robust. diff --git a/shift/bin/shift b/shift/bin/shift index e7d98d3..784695e 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -62,6 +62,7 @@ function cmdStart(args) { fs.mkdirSync(dir, { recursive: true }); if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP')); + require('../lib/timeline.cjs').clearTimeline(dir); // fresh run → fresh boundary record fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2)); let state = initState({ runId: isoStamp(now), startedAt: now.toISOString(), branch }); state = mergeDiscovered(state, discovered); @@ -98,16 +99,23 @@ function cmdStatus(args) { // (a status bar) can't take input, so this is the interactive control surface. function cmdWatch() { const dir = path.join(process.cwd(), '.shift'); - const { buildModel, renderFrame } = require('../lib/watch-model.cjs'); + const { buildModel, renderFrame, renderDetail } = require('../lib/watch-model.cjs'); const { setPause, isPaused, requestSkip, requestStop } = require('../lib/control.cjs'); const out = process.stdout; const interactive = !!(process.stdin.isTTY && out.isTTY); let model; + let selected = -1; // -1 = no selection yet; set to the current bin on first draw + let mode = 'list'; // 'list' | 'detail' const draw = () => { try { // a transient read/write error must never wedge the terminal — retry next tick model = buildModel({ dir, now: Date.now() }); - const frame = renderFrame(model, { width: out.columns || 80, color: true }); + const n = (model.bins || []).length; + if (selected < 0 && n) selected = Math.max(0, model.bins.findIndex(b => b.current)); + if (selected >= n) selected = n - 1; // bins can change between draws — clamp + const frame = (mode === 'detail' && selected >= 0) + ? renderDetail(model, selected, { width: out.columns || 80, color: true }) + : renderFrame(model, { width: out.columns || 80, color: true, selectedIndex: selected }); if (interactive) out.write('\x1b[H\x1b[2J' + frame); // home + clear, then frame else out.write(frame); } catch { /* keep the watcher alive */ } @@ -131,8 +139,17 @@ function cmdWatch() { process.stdin.resume(); process.stdin.setEncoding('utf8'); process.stdin.on('data', (key) => { - if (key === 'x' || key === '\x03') { // x / Ctrl-C (Esc omitted: a split arrow escape sends a lone \x1b) + const n = (model && model.bins) ? model.bins.length : 0; + if (key === 'x' || key === '\x03') { // x / Ctrl-C cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0); + } else if (key === '\x1b[A') { // ↑ select up + if (n) selected = (selected <= 0 ? n : selected) - 1; draw(); + } else if (key === '\x1b[B') { // ↓ select down + if (n) selected = (selected + 1) % n; draw(); + } else if (key === '\r' || key === '\n') { // ⏎ open detail + if (selected >= 0) mode = 'detail'; draw(); + } else if (key === '\x1b') { // esc back to list (lone Esc, not an arrow sequence) + mode = 'list'; draw(); } else if (key === 'p') { setPause(dir, !isPaused(dir)); draw(); } else if (key === 'k') { @@ -159,6 +176,28 @@ function cmdStop() { console.log('shift will stop cleanly after the current bin.'); } +// The work record: every finalized run (.shift/history.jsonl). `shift history` prints the +// ledger + totals; `shift history ` drills into one run's bins. +function cmdHistory(args) { + const dir = path.join(process.cwd(), '.shift'); + const { readHistory, aggregate } = require('../lib/history.cjs'); + const { renderHistory, fmtDur, fmtTok } = require('../lib/watch-model.cjs'); + const records = readHistory(dir); + const target = (args || []).find(a => !a.startsWith('-')); + if (target) { + const r = records.filter(x => x.runId === target || (x.branch || '').endsWith(target)).pop(); + if (!r) { console.log(`No recorded run matching "${target}".`); return; } + const g = s => (s === 'done' ? '✓' : s === 'skipped' ? '⤫' : s === 'blocked' ? '✗' : '·'); + console.log(`run ${r.runId} · ${r.branch} · ${r.endReason}`); + console.log(` ${fmtDur(r.durationMs)} · ${fmtTok(r.tokens && r.tokens.output)} output · ${r.iterations} iters · ${r.bins.done}✓ ${r.bins.skipped}⤫ ${r.bins.blocked}✗`); + for (const b of (r.perBin || [])) { + console.log(` ${g(b.status)} ${b.id} ${fmtDur(b.durationMs)} ${fmtTok(b.tokensOutput)} out ${b.commit || ''}`); + } + return; + } + process.stdout.write(renderHistory(records, aggregate(records), { color: !!process.stdout.isTTY })); +} + // v2: headless outer loop — keeps spawning claude until the engine finalizes, // a bound trips, or (on a rate-limit wall) it waits for the window to reopen. async function cmdRun() { @@ -228,11 +267,13 @@ const [, , sub, ...rest] = process.argv; if (sub === 'start') cmdStart(rest); else if (sub === 'status') cmdStatus(rest); else if (sub === 'watch') cmdWatch(); +else if (sub === 'history') cmdHistory(rest); else if (sub === 'stop') cmdStop(); else if (sub === 'run') cmdRun().catch(e => { console.error(e); process.exit(1); }); else { - console.log('usage: shift [--dry-run]'); - console.log(' watch live dashboard + control: [p]ause [k]skip [q]stop [x]exit'); - console.log(' status --line one-line summary for a status bar'); + console.log('usage: shift [--dry-run]'); + console.log(' watch live dashboard + control: ↑/↓ select · ⏎ details · [p]ause [k]skip [q]stop [x]exit'); + console.log(' history [run] the work record: per-run runtime/tokens + totals (or one run\'s detail)'); + console.log(' status --line one-line summary for a status bar'); process.exit(1); } diff --git a/shift/examples/watch-demo.cjs b/shift/examples/watch-demo.cjs index 2af254e..11502cb 100644 --- a/shift/examples/watch-demo.cjs +++ b/shift/examples/watch-demo.cjs @@ -1,16 +1,18 @@ #!/usr/bin/env node 'use strict'; -// Zero-cost demo of `shift watch`: spins up a throwaway run, drives the real Stop -// hook through it, and prints the live dashboard at each step — including a [k] skip -// and a [q] stop — so you can see the visibility + control surface without spawning -// a real `claude`. Run: node shift/examples/watch-demo.cjs +// Zero-cost demo of `shift watch`: spins up a throwaway run, drives the real Stop hook +// through it with a synthetic transcript, and prints the live dashboard at each step — +// runtime + token columns, a [k] skip, a [q] stop, and the work-record history — so you +// can see the whole visibility + control surface without spawning a real `claude`. +// node shift/examples/watch-demo.cjs const fs = require('node:fs'); const os = require('node:os'); const path = require('node:path'); const cp = require('node:child_process'); const SHIFT = path.resolve(__dirname, '..'); -const { buildModel, renderFrame } = require(path.join(SHIFT, 'lib', 'watch-model.cjs')); +const { buildModel, renderFrame, renderDetail, renderHistory } = require(path.join(SHIFT, 'lib', 'watch-model.cjs')); +const { readHistory, aggregate } = require(path.join(SHIFT, 'lib', 'history.cjs')); const { requestSkip, requestStop } = require(path.join(SHIFT, 'lib', 'control.cjs')); const HOOK = path.join(SHIFT, 'hooks', 'shift-stop.cjs'); @@ -30,18 +32,28 @@ fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ iterations: 0, branch: 'shift/demo', currentBinId: null, bins: [] })); fs.writeFileSync(path.join(dir, 'log.md'), '# log\n'); +const T = path.join(dir, 'transcript.jsonl'); +fs.writeFileSync(T, ''); -const fire = (active) => cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify({ stop_hook_active: active, cwd }), encoding: 'utf8' }); +const fire = (active) => cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify({ stop_hook_active: active, cwd, transcript_path: T }), encoding: 'utf8' }); +const work = (out) => { // simulate the agent producing `out` output tokens on the current bin + fs.appendFileSync(T, JSON.stringify({ type: 'assistant', timestamp: new Date().toISOString(), message: { usage: { output_tokens: out, input_tokens: out * 6, cache_read_input_tokens: out * 40 } } }) + '\n'); +}; const show = (label) => { process.stdout.write(`\n\x1b[1m=== ${label} ===\x1b[0m\n`); - process.stdout.write(renderFrame(buildModel({ dir, now: Date.now() }), { width: 78, color: true })); + process.stdout.write(renderFrame(buildModel({ dir, now: Date.now() }), { width: 78, color: true, selectedIndex: 0 })); }; -fire(false); show('1) run started — bin 01 working'); -fire(true); show('2) bin 01 done -> bin 02 working'); -requestSkip(dir, 'queue/02-flaky.md'); // you press [k] now, while bin 02 is the current bin -fire(true); show('3) you pressed [k] on bin 02 -> SKIPPED, bin 03 working'); -requestStop(dir); show('4) you pressed [q] -> stopping banner'); +fire(false); work(8400); show('1) bin 01 working — tokens climbing live'); +fire(true); work(21300); show('2) bin 01 done (runtime + tokens) -> bin 02 working'); +requestSkip(dir, 'queue/02-flaky.md'); // you press [k] while bin 02 is current +fire(true); work(5100); show('3) you pressed [k] -> bin 02 SKIPPED, bin 03 working'); +requestStop(dir); show('4) you pressed [q] -> stopping after current bin'); fire(true); show('5) bin 03 done, STOP honored -> finalized'); -process.stdout.write('\n--- .shift/summary.md ---\n' + fs.readFileSync(path.join(dir, 'summary.md'), 'utf8')); + +process.stdout.write('\n\x1b[1m=== ⏎ details on bin 01 (drill-down) ===\x1b[0m\n'); +process.stdout.write(renderDetail(buildModel({ dir, now: Date.now() }), 0, { width: 78, color: true })); + +process.stdout.write('\n\x1b[1m=== shift history (work record across runs) ===\x1b[0m\n'); +process.stdout.write(renderHistory(readHistory(dir), aggregate(readHistory(dir)), { color: true })); process.stdout.write(`\n(throwaway repo: ${cwd})\n`); diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs index 1d04399..f935db5 100755 --- a/shift/hooks/shift-stop.cjs +++ b/shift/hooks/shift-stop.cjs @@ -8,6 +8,9 @@ const { decide } = require('../lib/decision.cjs'); const { runVerify } = require('../lib/verify.cjs'); const { writeUsageCache } = require('../lib/usage.cjs'); const { readSkip, clearSkip } = require('../lib/control.cjs'); +const { sumTokens } = require('../lib/transcript.cjs'); +const { appendRecord } = require('../lib/history.cjs'); +const { appendEvent, readTimeline, binWindows } = require('../lib/timeline.cjs'); function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } } @@ -32,7 +35,14 @@ function tail(s, n) { return s.length > n ? s.slice(s.length - n) : s; } -function writeSummary(dir, state, reason, now) { +function fmtTokens(n) { + if (!n) return '0'; + if (n >= 1e6) return (n / 1e6).toFixed(2) + 'M'; + if (n >= 1e3) return Math.round(n / 1e3) + 'k'; + return String(n); +} + +function writeSummary(dir, state, reason, now, runTok) { const done = state.bins.filter(b => b.status === 'done').length; const blocked = state.bins.filter(b => b.status === 'blocked'); const skipped = state.bins.filter(b => b.status === 'skipped').length; @@ -47,13 +57,42 @@ function writeSummary(dir, state, reason, now) { `Ended: ${reason}`, `Duration: ${mins} min · Iterations: ${state.iterations}`, `Branch: ${state.branch}`, - `Bins: ${done} done · ${blocked.length} blocked · ${skipped} skipped · ${pending} pending`, '', - '## Needs you', - ...(items.length ? items : ['- (nothing flagged)']) + `Bins: ${done} done · ${blocked.length} blocked · ${skipped} skipped · ${pending} pending` ]; + if (runTok) lines.push(`Tokens: ${fmtTokens(runTok.output)} output · ${fmtTokens(runTok.total)} total`); + lines.push('', '## Needs you', ...(items.length ? items : ['- (nothing flagged)'])); fs.writeFileSync(path.join(dir, 'summary.md'), lines.join('\n') + '\n'); } +// Append this run to the work record (.shift/history.jsonl). One row per finalized run. +// Per-bin metrics come from the timeline (boundaries) + transcript (tokens) so they +// survive even if the agent rewrote state.json mid-run. +function appendRunRecord(dir, state, reason, now, runTok, transcriptPath) { + const tally = s => state.bins.filter(b => b.status === s).length; + const windows = binWindows(readTimeline(dir)); + const nowIso = new Date(now).toISOString(); + appendRecord(dir, { + runId: state.runId, branch: state.branch, + startedAt: state.startedAt, endedAt: nowIso, + durationMs: Math.max(0, now - Date.parse(state.startedAt)), + iterations: state.iterations, endReason: reason, + bins: { total: state.bins.length, done: tally('done'), skipped: tally('skipped'), blocked: tally('blocked') }, + tokens: { output: runTok ? runTok.output : 0, total: runTok ? runTok.total : 0 }, + perBin: state.bins.map(b => { + const w = windows[b.id] || {}; + const durationMs = (w.startedAt && w.finishedAt) + ? Math.max(0, Date.parse(w.finishedAt) - Date.parse(w.startedAt)) + : (b.durationMs || null); + let tokensOutput = (b.tokens && b.tokens.output) || null; + if (tokensOutput == null && transcriptPath && w.startedAt) { + const t = sumTokens(transcriptPath, w.startedAt, w.finishedAt || nowIso); + if (t.messages > 0) tokensOutput = t.output; + } + return { id: b.id, status: b.status, durationMs, tokensOutput, commit: b.commit || null }; + }) + }); +} + function main() { let input = {}; try { input = JSON.parse(readStdin() || '{}'); } catch { input = {}; } @@ -66,7 +105,9 @@ function main() { const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8')); const now = Date.now(); + const nowIso = new Date(now).toISOString(); const killSwitch = fs.existsSync(path.join(dir, 'STOP')); + const payloadTranscript = (input && typeof input.transcript_path === 'string') ? input.transcript_path : null; // Capture rate limits from the hook payload: enforce the usage cap and cache // reset times for the headless runner. Absent on non-Pro/Max or pre-first-response. @@ -74,39 +115,63 @@ function main() { // Re-discover (fresh text + new files) and carry over status/attempts. let state = mergeDiscovered(loadState(dir), discoverBins(config.sources, cwd)); + const transcriptPath = payloadTranscript || state.transcriptPath || null; const prevBinId = state.currentBinId; const verifyCmd = config.verify && config.verify.command; const maxAttempts = (config.verify && config.verify.maxAttempts) || 2; let retryFeedback = null; + // When a bin finishes, attribute its runtime + tokens. The window [start, now) comes + // from the append-only timeline (agent-proof) — NOT state.json, which an autonomous + // agent may rewrite mid-run — and tokens are summed from the transcript (also outside + // the repo). `fm` is merged into whichever terminal status the bin lands on, but the + // durable copy is the timeline + the history record, not these (clobberable) fields. + const prevStart = prevBinId ? (binWindows(readTimeline(dir))[prevBinId] || {}).startedAt : null; + let fm = {}; + if (prevBinId) { + const tok = (transcriptPath && prevStart) ? sumTokens(transcriptPath, prevStart, nowIso) : null; + fm = { + finishedAt: nowIso, + durationMs: prevStart ? Math.max(0, now - Date.parse(prevStart)) : undefined, + tokens: tok ? { output: tok.output, input: tok.input, cacheRead: tok.cacheRead, total: tok.total } : undefined + }; + } + // Attribute the just-finished work to the current bin (skipped / blocked / verify gate / done). + let binFinished = false; if (prevBinId) { const skipId = readSkip(dir); if (skipId) clearSkip(dir); // consume on read: a skip that misses its target is discarded, never left to fire on a later bin const blocked = readBlocked(dir).find(x => x.id === prevBinId); if (skipId === prevBinId) { // User hit [k] in `shift watch`: drop this bin and move on (work, if any, stays on the branch). - state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user' }); + state = setBinStatus(state, prevBinId, { status: 'skipped', note: 'skipped by user', ...fm }); + binFinished = true; } else if (blocked) { - state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note }); + state = setBinStatus(state, prevBinId, { status: 'blocked', note: blocked.note, ...fm }); + binFinished = true; } else if (verifyCmd) { const v = runVerify(verifyCmd, cwd); if (v.ok) { - state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() }); + state = setBinStatus(state, prevBinId, { status: 'done', ...fm }); + binFinished = true; } else { const bin = state.bins.find(b => b.id === prevBinId) || {}; const attempts = (bin.attempts || 0) + 1; if (attempts < maxAttempts) { - state = setBinStatus(state, prevBinId, { attempts }); // stays pending → re-blocked below + state = setBinStatus(state, prevBinId, { attempts }); // stays pending → re-blocked below (not finished yet) retryFeedback = `Your previous attempt failed verification (\`${verifyCmd}\`). Fix it and make it pass. Output (tail):\n${tail(v.output, 2000)}`; } else { - state = setBinStatus(state, prevBinId, { status: 'blocked', attempts, note: `failed verification after ${attempts} attempts` }); + state = setBinStatus(state, prevBinId, { status: 'blocked', attempts, note: `failed verification after ${attempts} attempts`, ...fm }); + binFinished = true; } } } else { - state = setBinStatus(state, prevBinId, { status: 'done', finishedAt: new Date(now).toISOString() }); + state = setBinStatus(state, prevBinId, { status: 'done', ...fm }); + binFinished = true; } + if (binFinished) appendEvent(dir, { t: nowIso, event: 'finish', id: prevBinId }); } const result = decide({ @@ -114,19 +179,32 @@ function main() { stopHookActive: !!input.stop_hook_active, killSwitch }); + if (transcriptPath) state.transcriptPath = transcriptPath; // so `shift watch` can live-parse tokens + if (result.action === 'block') { let reason = result.reason; if (retryFeedback && result.nextBinId === prevBinId) reason += `\n\n${retryFeedback}`; state.iterations += 1; state.currentBinId = result.nextBinId; + // Record the bin's start the first time it becomes current (a new bin, not a verify + // retry of the same one). The timeline is the durable copy; state.bins.startedAt is a + // best-effort convenience that the agent may clobber. + if (result.nextBinId !== prevBinId) appendEvent(dir, { t: nowIso, event: 'start', id: result.nextBinId }); + const nb = state.bins.find(b => b.id === result.nextBinId); + if (nb && !nb.startedAt) state = setBinStatus(state, result.nextBinId, { startedAt: nowIso }); saveState(dir, state); fs.appendFileSync(path.join(dir, 'log.md'), - `\n## ${new Date(now).toISOString()} — work ${result.nextBinId} (iter ${state.iterations})\n`); + `\n## ${nowIso} — work ${result.nextBinId} (iter ${state.iterations})\n`); process.stdout.write(JSON.stringify({ decision: 'block', reason })); } else { + // First finalize only (summary.md absent) appends the work record — guards against a + // stray extra Stop firing after the run already finalized. + const alreadyFinalized = fs.existsSync(path.join(dir, 'summary.md')); + const runTok = transcriptPath ? sumTokens(transcriptPath, state.startedAt, nowIso) : null; state.currentBinId = null; saveState(dir, state); - writeSummary(dir, state, result.reason, now); + if (!alreadyFinalized) appendRunRecord(dir, state, result.reason, now, runTok, transcriptPath); + writeSummary(dir, state, result.reason, now, runTok); process.stdout.write('{}'); } } diff --git a/shift/lib/brief.cjs b/shift/lib/brief.cjs index 7a68aa2..c54d19e 100644 --- a/shift/lib/brief.cjs +++ b/shift/lib/brief.cjs @@ -12,9 +12,10 @@ function renderBrief(bin, config) { : ''; return [ 'You are running unattended under `shift`. Complete the brief below end-to-end using your best judgment.', - 'Do NOT ask questions — if you would normally ask, decide and record the decision in .shift/log.md.', + 'Do NOT ask questions — if you would normally ask, decide and APPEND the decision as a line to .shift/log.md.', `Definition of done: ${dod}`, 'When finished, commit your work on the current branch.', + '`.shift/` is shift\'s own run bookkeeping. The ONLY writes you may make under it are APPENDING a line to .shift/log.md or .shift/blocked.jsonl. Never edit, overwrite, or "tidy" .shift/state.json, .shift/config.json, .shift/summary.md, and never rewrite .shift/log.md — shift maintains those itself (run progress, per-bin runtime + tokens), and changing them corrupts the run record.', 'Flag anything that needs the human (a deferred decision, an action you could not take) by appending a line to .shift/log.md as: "Needs you: " — these surface in the run summary.', 'If a true blocker stops you from finishing this bin, append one line to .shift/blocked.jsonl: {"id":"","note":""} then stop.', guard, diff --git a/shift/lib/history.cjs b/shift/lib/history.cjs new file mode 100644 index 0000000..c440e14 --- /dev/null +++ b/shift/lib/history.cjs @@ -0,0 +1,39 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); + +// The shift work record: an append-only ledger of finalized runs at .shift/history.jsonl. +// `shift start` rewrites state.json but never touches this, so it accumulates across runs. +// One JSON line per run (totals + per-bin breakdown). Read for `shift history` + aggregates. + +function historyPath(dir) { return path.join(dir, 'history.jsonl'); } + +function appendRecord(dir, record) { + try { + fs.mkdirSync(dir, { recursive: true }); + fs.appendFileSync(historyPath(dir), JSON.stringify(record) + '\n'); + } catch { /* best-effort: never let a logging failure break the run */ } +} + +function readHistory(dir) { + let raw; + try { raw = fs.readFileSync(historyPath(dir), 'utf8'); } catch { return []; } + return raw.split('\n').filter(Boolean) + .map(l => { try { return JSON.parse(l); } catch { return null; } }) + .filter(Boolean); +} + +// aggregate(records) -> totals across the ledger. +function aggregate(records) { + const a = { runs: 0, durationMs: 0, outputTokens: 0, bins: { total: 0, done: 0, skipped: 0, blocked: 0 } }; + for (const r of records) { + a.runs += 1; + a.durationMs += (r.durationMs || 0); + a.outputTokens += ((r.tokens && r.tokens.output) || 0); + const b = r.bins || {}; + for (const k of ['total', 'done', 'skipped', 'blocked']) a.bins[k] += (b[k] || 0); + } + return a; +} + +module.exports = { historyPath, appendRecord, readHistory, aggregate }; diff --git a/shift/lib/timeline.cjs b/shift/lib/timeline.cjs new file mode 100644 index 0000000..90b12b8 --- /dev/null +++ b/shift/lib/timeline.cjs @@ -0,0 +1,43 @@ +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); + +// An append-only record of bin boundaries (one event per line in .shift/timeline.jsonl) +// — the source of per-bin runtime + token windows, paired with the transcript for tokens. +// +// Best-effort, by design: in a fully-headless autonomous run the agent may rewrite or +// delete files under .shift/ (observed), so per-bin metrics can be lost — the run-level +// totals + the work-record history (the hook's final write) remain authoritative +// regardless. Writing this out-of-repo isn't an option: Claude Code sandboxes hook +// file-writes to the project directory. See SPEC §13. + +function timelinePath(dir) { return path.join(dir, 'timeline.jsonl'); } + +function appendEvent(dir, ev) { // ev: { t: iso, event: 'start'|'finish', id } + try { fs.mkdirSync(dir, { recursive: true }); fs.appendFileSync(timelinePath(dir), JSON.stringify(ev) + '\n'); } + catch { /* best-effort */ } +} + +function readTimeline(dir) { + let raw; + try { raw = fs.readFileSync(timelinePath(dir), 'utf8'); } catch { return []; } + return raw.split('\n').filter(Boolean) + .map(l => { try { return JSON.parse(l); } catch { return null; } }) + .filter(Boolean); +} + +function clearTimeline(dir) { try { fs.unlinkSync(timelinePath(dir)); } catch { /* none */ } } + +// binWindows(events) -> { id: { startedAt, finishedAt } } — first start, last finish. +function binWindows(events) { + const w = {}; + for (const e of events) { + if (!e || !e.id) continue; + if (!w[e.id]) w[e.id] = { startedAt: null, finishedAt: null }; + if (e.event === 'start' && !w[e.id].startedAt) w[e.id].startedAt = e.t; + if (e.event === 'finish') w[e.id].finishedAt = e.t; + } + return w; +} + +module.exports = { timelinePath, appendEvent, readTimeline, clearTimeline, binWindows }; diff --git a/shift/lib/transcript.cjs b/shift/lib/transcript.cjs new file mode 100644 index 0000000..07e5cdb --- /dev/null +++ b/shift/lib/transcript.cjs @@ -0,0 +1,41 @@ +'use strict'; +const fs = require('node:fs'); + +// Token accounting from a Claude Code transcript JSONL. Each assistant message line +// carries message.usage { input_tokens, output_tokens, cache_read_input_tokens, +// cache_creation_input_tokens } and a top-level ISO `timestamp` — so we can attribute +// tokens to a bin by summing the usage of messages within that bin's [start, end) window. + +// sumUsage(lines, fromMs, toMs) — pure. fromMs/toMs are epoch ms or null (open bound). +function sumUsage(lines, fromMs, toMs) { + const acc = { output: 0, input: 0, cacheRead: 0, cacheCreate: 0, total: 0, messages: 0 }; + for (const line of lines) { + let o; + try { o = JSON.parse(line); } catch { continue; } + if (!o || o.type !== 'assistant' || !o.message || !o.message.usage) continue; + const t = Date.parse(o.timestamp); + if (!Number.isFinite(t)) continue; + if (fromMs != null && t < fromMs) continue; + if (toMs != null && t >= toMs) continue; + const u = o.message.usage; + const out = u.output_tokens || 0; + const inp = u.input_tokens || 0; + const cr = u.cache_read_input_tokens || 0; + const cc = u.cache_creation_input_tokens || 0; + acc.output += out; acc.input += inp; acc.cacheRead += cr; acc.cacheCreate += cc; + acc.total += out + inp + cr + cc; acc.messages += 1; + } + return acc; +} + +function readLines(file) { + try { return fs.readFileSync(file, 'utf8').split('\n').filter(Boolean); } + catch { return []; } +} + +// Convenience over a file path within an ISO window (either bound optional). +function sumTokens(file, fromIso, toIso) { + return sumUsage(readLines(file), fromIso ? Date.parse(fromIso) : null, toIso ? Date.parse(toIso) : null); +} + +module.exports = { sumUsage, sumTokens, readLines }; diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs index f9cdbdd..717117a 100644 --- a/shift/lib/watch-model.cjs +++ b/shift/lib/watch-model.cjs @@ -3,6 +3,8 @@ const fs = require('node:fs'); const path = require('node:path'); const { loadState } = require('./state.cjs'); const { isPaused, isStopRequested } = require('./control.cjs'); +const { sumUsage, readLines } = require('./transcript.cjs'); +const { readTimeline, binWindows } = require('./timeline.cjs'); // --- model ----------------------------------------------------------------- @@ -13,8 +15,7 @@ function readLog(dir) { const recent = []; const needsYou = []; for (const line of lines) { - // hook writes: "## — work (iter N)" - const m = line.match(/^##\s*(\S+)\s*—\s*(.+)$/); + const m = line.match(/^##\s*(\S+)\s*—\s*(.+)$/); // "## — work (iter N)" if (m) { const time = (m[1].match(/T(\d{2}:\d{2})/) || [])[1] || m[1]; recent.push(`${time} ${m[2]}`); @@ -25,15 +26,40 @@ function readLog(dir) { return { recent: recent.slice(-6), needsYou }; } +function readBrief(cwd, binId) { + try { return fs.readFileSync(path.join(cwd, binId), 'utf8'); } catch { return ''; } +} + // buildModel({ dir, now }) — read .shift/ into a plain view model. Pure of rendering. function buildModel({ dir, now }) { let state; try { state = loadState(dir); } catch { return { exists: false }; } - const bins = (state.bins || []).map(b => ({ - id: b.id, status: b.status, commit: b.commit || null, note: b.note || null, - current: b.id === state.currentBinId && b.status === 'pending' - })); + // Per-bin runtime + tokens are derived from the timeline (agent-proof boundaries) and + // the transcript (parsed once), so they survive a state.json the agent rewrote. We fall + // back to any stamps the hook left on state.bins when no timeline/transcript is present. + const windows = binWindows(readTimeline(dir)); + const lines = state.transcriptPath ? readLines(state.transcriptPath) : []; + const startMs = b => (windows[b.id] && windows[b.id].startedAt) ? Date.parse(windows[b.id].startedAt) : null; + const finMs = (b, current) => { + const w = windows[b.id] || {}; + if (w.finishedAt) return Date.parse(w.finishedAt); + return current ? now : null; // current bin: open window up to now (live) + }; + + const bins = (state.bins || []).map(b => { + const current = b.id === state.currentBinId && b.status === 'pending'; + const s = startMs(b), f = finMs(b, current); + let durationMs = (s != null && f != null) ? Math.max(0, f - s) + : (typeof b.durationMs === 'number' ? b.durationMs : null); + let tokens = b.tokens || null; + let tokensOutput = (tokens && typeof tokens.output === 'number') ? tokens.output : null; + if (tokensOutput == null && lines.length && s != null) { + const t = sumUsage(lines, s, f != null ? f : null); + if (t.messages > 0) { tokens = { output: t.output, input: t.input, cacheRead: t.cacheRead, total: t.total }; tokensOutput = t.output; } + } + return { id: b.id, status: b.status, commit: b.commit || null, note: b.note || null, current, durationMs, tokensOutput, tokens }; + }); const count = s => bins.filter(b => b.status === s).length; const counts = { done: count('done'), blocked: count('blocked'), skipped: count('skipped'), @@ -49,10 +75,20 @@ function buildModel({ dir, now }) { const startedMs = Date.parse(state.startedAt); const elapsedMin = Number.isFinite(startedMs) ? Math.max(0, Math.round((now - startedMs) / 60000)) : 0; + // Run output tokens: the transcript over [run start, now) (climbs live during a run); + // fall back to the sum of per-bin tokens when no transcript is known. + let outputTokens = bins.reduce((s, b) => s + (b.tokensOutput || 0), 0); + if (lines.length && Number.isFinite(startedMs)) { + const t = sumUsage(lines, startedMs, now); + if (t.messages > 0) outputTokens = t.output; + } + return { exists: true, + cwd: path.dirname(dir), runId: state.runId, branch: state.branch, iterations: state.iterations || 0, - elapsedMin, paused: isPaused(dir), stopping: isStopRequested(dir), + elapsedMin, outputTokens, + paused: isPaused(dir), stopping: isStopRequested(dir), finalized: fs.existsSync(path.join(dir, 'summary.md')), bins, counts, recent, needsYou }; @@ -61,7 +97,7 @@ function buildModel({ dir, now }) { // --- render ---------------------------------------------------------------- const ANSI = { - reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m', + reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m', inverse: '\x1b[7m', green: '\x1b[32m', yellow: '\x1b[33m', red: '\x1b[31m', cyan: '\x1b[36m', gray: '\x1b[90m' }; function paint(color, code, s) { return color ? code + s + ANSI.reset : s; } @@ -78,17 +114,26 @@ function bar(done, total, width) { const filled = Math.round((done / total) * width); return '█'.repeat(filled) + '░'.repeat(Math.max(0, width - filled)); } - -function pad(s, n) { - s = String(s); - if (s.length > n) return s.slice(0, n - 1) + '…'; // truncate long bin ids with an ellipsis - return s + ' '.repeat(n - s.length); +function pad(s, n) { s = String(s); return s.length > n ? s.slice(0, n - 1) + '…' : s + ' '.repeat(n - s.length); } +function lpad(s, n) { s = String(s); return s.length >= n ? s : ' '.repeat(n - s.length) + s; } +function fmtDur(ms) { + if (ms == null) return '—'; + const s = Math.round(ms / 1000); + if (s < 60) return s + 's'; + return Math.floor(s / 60) + 'm' + String(s % 60).padStart(2, '0') + 's'; +} +function fmtTok(n) { + if (n == null) return '—'; + if (n >= 1e6) return (n / 1e6).toFixed(1) + 'M'; + if (n >= 1e3) return Math.round(n / 1e3) + 'k'; + return String(n); } -// renderFrame(model, { width, color }) -> string. Pure. +// renderFrame(model, { width, color, selectedIndex }) -> string. Pure. function renderFrame(model, opts = {}) { const width = opts.width || 80; const color = opts.color !== false; + const sel = typeof opts.selectedIndex === 'number' ? opts.selectedIndex : -1; const c = (code, s) => paint(color, code, s); if (!model || !model.exists) { @@ -100,40 +145,82 @@ function renderFrame(model, opts = {}) { ? c(ANSI.green, '● finalized') : model.stopping ? c(ANSI.red, '■ stopping after current bin') : model.paused ? c(ANSI.yellow, '⏸ PAUSED') : c(ANSI.green, '▶ running'); - L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations} ${status}`); + L.push(`${c(ANSI.bold, 'shift')} ${c(ANSI.dim, '·')} ${c(ANSI.cyan, model.branch)} ${c(ANSI.dim, '·')} iter ${model.iterations} ${status} ${c(ANSI.dim, '·')} ${model.elapsedMin}m ${c(ANSI.dim, '·')} ${c(ANSI.bold, '↑' + fmtTok(model.outputTokens))} out`); L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64)))); const { done, blocked, skipped, total } = model.counts; - const resolved = done + blocked + skipped; // bar fills as the queue is dealt with (reaches full at finalize) + const resolved = done + blocked + skipped; // bar reaches full at finalize const extra = (blocked + skipped) ? c(ANSI.dim, ` (${blocked + skipped} blocked/skipped)`) : ''; - L.push(`${c(ANSI.green, bar(resolved, total, 24))} ${c(ANSI.bold, `${done}/${total}`)} done${extra} ${c(ANSI.dim, '·')} ${model.elapsedMin}m elapsed`); + L.push(`${c(ANSI.green, bar(resolved, total, 24))} ${c(ANSI.bold, `${done}/${total}`)} done${extra}`); L.push(''); - for (const b of model.bins) { + model.bins.forEach((b, i) => { + const cursor = i === sel ? c(ANSI.cyan, '▸') : ' '; const g = c(binColor(b), binGlyph(b)); - const id = c(b.current ? ANSI.cyan : (b.status === 'pending' ? ANSI.dim : ANSI.reset), pad(b.id, 28)); + const id = c(b.current ? ANSI.cyan : (b.status === 'pending' ? ANSI.dim : ANSI.reset), pad(b.id, 24)); + const dur = c(ANSI.dim, lpad(fmtDur(b.durationMs), 6)); + const tok = c(ANSI.dim, lpad(b.tokensOutput == null ? '—' : fmtTok(b.tokensOutput), 6)); let tail = b.status; - if (b.current) tail = 'working ← current'; - else if (b.commit) tail = `done (${b.commit.slice(0, 7)})`; - else if (b.note) tail = `${b.status} — ${b.note}`; - L.push(` ${g} ${id} ${c(ANSI.dim, tail)}`); - } + if (b.current) tail = 'working ← current'; + else if (b.commit) tail = `(${b.commit.slice(0, 7)})`; + else if (b.note) tail = `— ${b.note}`; + else tail = ''; + L.push(`${cursor}${g} ${id} ${dur} ${tok} ${c(ANSI.dim, tail)}`); + }); L.push(''); - if (model.recent.length) { - L.push(c(ANSI.dim, 'recent:')); - for (const r of model.recent.slice(-4)) L.push(c(ANSI.gray, ` ${r}`)); - L.push(''); - } - const needs = model.needsYou.length; const needsLabel = needs ? c(ANSI.yellow, `Needs you: ${needs}`) : c(ANSI.dim, 'Needs you: 0'); - const hints = `${c(ANSI.bold, '[p]')}ause ${c(ANSI.bold, '[k]')}skip current ${c(ANSI.bold, '[q]')}stop ${c(ANSI.bold, '[x]')}exit watcher`; + const nav = sel >= 0 ? `${c(ANSI.bold, '↑/↓')} select ${c(ANSI.bold, '⏎')} details ` : ''; + const hints = `${nav}${c(ANSI.bold, '[p]')}ause ${c(ANSI.bold, '[k]')}skip ${c(ANSI.bold, '[q]')}stop ${c(ANSI.bold, '[x]')}exit`; L.push(`${needsLabel} ${c(ANSI.dim, '·')} ${hints}`); return L.join('\n') + '\n'; } +// renderDetail(model, index, { width, color }) -> string. Drill-down for one bin. +function renderDetail(model, index, opts = {}) { + const color = opts.color !== false; + const width = opts.width || 80; + const c = (code, s) => paint(color, code, s); + if (!model || !model.exists || !model.bins[index]) return renderFrame(model, opts); + const b = model.bins[index]; + const t = b.tokens || {}; + const L = []; + L.push(`${c(ANSI.bold, b.id)} ${c(ANSI.dim, '·')} ${c(binColor(b), b.current ? 'working (current)' : b.status)} ${c(ANSI.dim, '[esc] back [k] skip [q] stop')}`); + L.push(c(ANSI.dim, '─'.repeat(Math.min(width, 64)))); + L.push(`${c(ANSI.dim, 'status ')} ${b.current ? 'working (current)' : b.status}${b.note ? ' — ' + b.note : ''}`); + L.push(`${c(ANSI.dim, 'runtime ')} ${fmtDur(b.durationMs)}`); + L.push(`${c(ANSI.dim, 'tokens ')} ${c(ANSI.bold, fmtTok(b.tokensOutput) + ' out')} ${c(ANSI.dim, '·')} ${fmtTok(t.input)} in ${c(ANSI.dim, '·')} ${fmtTok(t.cacheRead)} cache-read ${c(ANSI.dim, '·')} ${fmtTok(t.total)} total`); + L.push(`${c(ANSI.dim, 'commit ')} ${b.commit || '—'}`); + L.push(''); + L.push(c(ANSI.dim, 'brief')); + const brief = readBrief(model.cwd, b.id).trimEnd(); + const briefLines = brief ? brief.split('\n') : ['(brief unavailable)']; + for (const line of briefLines.slice(0, 14)) L.push(' ' + c(ANSI.gray, line.slice(0, width - 2))); + return L.join('\n') + '\n'; +} + +// renderHistory(records, agg, { color }) -> string. The work record ledger. +function renderHistory(records, agg, opts = {}) { + const color = opts.color !== false; + const c = (code, s) => paint(color, code, s); + if (!records || !records.length) return c(ANSI.dim, 'No shift runs recorded yet. They appear here once a run finalizes.') + '\n'; + const L = []; + L.push(`${c(ANSI.bold, 'shift work record')} ${c(ANSI.dim, `· ${agg.runs} run${agg.runs === 1 ? '' : 's'}`)}`); + L.push(c(ANSI.dim, '─'.repeat(64))); + L.push(c(ANSI.dim, ` ${pad('when', 17)}${pad('branch', 20)}${lpad('time', 7)} ${lpad('out', 7)} bins`)); + for (const r of records.slice(-25)) { + const when = (r.endedAt || r.startedAt || '').slice(0, 16).replace('T', ' '); + const b = r.bins || {}; + const tally = `${c(ANSI.green, (b.done || 0) + '✓')} ${c(ANSI.gray, (b.skipped || 0) + '⤫')} ${c(ANSI.red, (b.blocked || 0) + '✗')}`; + L.push(` ${pad(when, 17)}${c(ANSI.cyan, pad(r.branch || '', 20))}${lpad(fmtDur(r.durationMs), 7)} ${lpad(fmtTok(r.tokens && r.tokens.output), 7)} ${tally}`); + } + L.push(c(ANSI.dim, '─'.repeat(64))); + L.push(`${c(ANSI.bold, 'totals')} ${agg.runs} runs ${c(ANSI.dim, '·')} ${fmtDur(agg.durationMs)} ${c(ANSI.dim, '·')} ${c(ANSI.bold, fmtTok(agg.outputTokens) + ' out')} ${c(ANSI.dim, '·')} ${agg.bins.done}✓ ${agg.bins.skipped}⤫ ${agg.bins.blocked}✗`); + return L.join('\n') + '\n'; +} + // One-line summary for a status bar (module 1 / ccstatusline custom-command). function renderLine(model, opts = {}) { const color = opts.color !== false; @@ -141,7 +228,7 @@ function renderLine(model, opts = {}) { if (!model || !model.exists) return ''; const flag = model.finalized ? '●' : model.paused ? '⏸' : '⚙'; const needs = model.needsYou.length ? ` ${c(ANSI.yellow, '⚑' + model.needsYou.length)}` : ''; - return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')}${needs}`; + return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')} ${c(ANSI.dim, '↑' + fmtTok(model.outputTokens))}${needs}`; } -module.exports = { buildModel, renderFrame, renderLine }; +module.exports = { buildModel, renderFrame, renderDetail, renderHistory, renderLine, fmtDur, fmtTok }; diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs index 2212e4e..4f18435 100644 --- a/shift/test/brief.test.cjs +++ b/shift/test/brief.test.cjs @@ -29,3 +29,9 @@ test('always explains decision logging, the Needs-you convention, and blocker fl assert.match(out, /Needs you:/); assert.match(out, /blocked\.jsonl/); }); + +test('guards .shift/ bookkeeping: append-only, never edit state.json (so the hook owns per-bin stats)', () => { + const out = renderBrief(bin, { git: {} }); + assert.match(out, /Never edit.*state\.json/i); + assert.match(out, /append/i); +}); diff --git a/shift/test/history.test.cjs b/shift/test/history.test.cjs new file mode 100644 index 0000000..1e859b3 --- /dev/null +++ b/shift/test/history.test.cjs @@ -0,0 +1,50 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { appendRecord, readHistory, aggregate } = require('../lib/history.cjs'); + +function tmp() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hist-')); } +const rec = (runId, over = {}) => Object.assign({ + runId, branch: 'shift/x', startedAt: '2026-06-16T00:00:00Z', endedAt: '2026-06-16T00:30:00Z', + durationMs: 30 * 60000, iterations: 4, endReason: 'queue empty', + bins: { total: 3, done: 2, skipped: 1, blocked: 0 }, tokens: { output: 1000, total: 50000 } +}, over); + +test('append then read round-trips records in order', () => { + const d = tmp(); + appendRecord(d, rec('r1')); + appendRecord(d, rec('r2')); + const h = readHistory(d); + assert.equal(h.length, 2); + assert.deepEqual(h.map(r => r.runId), ['r1', 'r2']); + assert.ok(fs.existsSync(path.join(d, 'history.jsonl'))); +}); + +test('readHistory tolerates a malformed line', () => { + const d = tmp(); + appendRecord(d, rec('r1')); + fs.appendFileSync(path.join(d, 'history.jsonl'), 'not json\n'); + appendRecord(d, rec('r2')); + assert.deepEqual(readHistory(d).map(r => r.runId), ['r1', 'r2']); +}); + +test('readHistory on a fresh dir is empty (no throw)', () => { + assert.deepEqual(readHistory(tmp()), []); +}); + +test('aggregate totals runs, duration, output tokens, and bins', () => { + const recs = [ + rec('r1', { durationMs: 10 * 60000, tokens: { output: 1000, total: 1 }, bins: { total: 2, done: 2, skipped: 0, blocked: 0 } }), + rec('r2', { durationMs: 20 * 60000, tokens: { output: 3000, total: 1 }, bins: { total: 5, done: 3, skipped: 1, blocked: 1 } }) + ]; + const a = aggregate(recs); + assert.equal(a.runs, 2); + assert.equal(a.durationMs, 30 * 60000); + assert.equal(a.outputTokens, 4000); + assert.equal(a.bins.done, 5); + assert.equal(a.bins.skipped, 1); + assert.equal(a.bins.blocked, 1); +}); diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs index f319f49..cf0def7 100644 --- a/shift/test/hook.test.cjs +++ b/shift/test/hook.test.cjs @@ -144,6 +144,47 @@ test('verify gate (failing) re-blocks the same bin with feedback, then blocks af assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'blocked'); }); +// ---- watch: per-bin tokens/runtime + work-record history ---- + +test('records per-bin tokens + runtime from the transcript and appends a history record', () => { + const { cwd, dir } = setupRun(); + const tpath = path.join(dir, 'transcript.jsonl'); + const asst = (ts, output) => JSON.stringify({ + type: 'assistant', timestamp: ts, + message: { role: 'assistant', usage: { output_tokens: output, input_tokens: 10, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 } } + }); + + runHook(cwd, { stop_hook_active: false, transcript_path: tpath }); // start bin 1 + // Use bin 1's recorded startedAt as the message timestamp so it lands in [start, now). + const started = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')) + .bins.find(b => b.id === 'queue/01.md').startedAt; + assert.ok(started, 'bin 1 got a startedAt when it became current'); + fs.writeFileSync(tpath, asst(started, 500) + '\n'); + + runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 1, start bin 2 + const b1 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')).bins.find(b => b.id === 'queue/01.md'); + assert.equal(b1.status, 'done'); + assert.equal(b1.tokens.output, 500, 'bin 1 output tokens attributed from the transcript window'); + assert.equal(typeof b1.durationMs, 'number'); + + runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 2, drain -> finalize + const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse); + assert.equal(hist.length, 1, 'one history record appended on finalize'); + assert.equal(hist[0].bins.done, 2); + assert.ok(hist[0].tokens.output >= 500, 'run output tokens recorded'); + assert.equal(hist[0].perBin.length, 2); +}); + +test('history is append-only across runs and not duplicated by a stray extra stop', () => { + const { cwd, dir } = setupRun(); + runHook(cwd, { stop_hook_active: false }); + runHook(cwd, { stop_hook_active: true }); + runHook(cwd, { stop_hook_active: true }); // drain -> finalize (appends record 1) + runHook(cwd, { stop_hook_active: true }); // stray extra stop -> summary already exists -> no 2nd append + const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').filter(Boolean); + assert.equal(hist.length, 1, 'no duplicate history record from a repeated finalize'); +}); + // ---- v2: usage cap + cache ---- test('usage cap from the hook payload ends the run and caches usage', () => { diff --git a/shift/test/timeline.test.cjs b/shift/test/timeline.test.cjs new file mode 100644 index 0000000..9b6d59c --- /dev/null +++ b/shift/test/timeline.test.cjs @@ -0,0 +1,45 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const { appendEvent, readTimeline, clearTimeline, timelinePath, binWindows } = require('../lib/timeline.cjs'); + +function dir() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-tl-')); } + +test('timeline file lives at .shift/timeline.jsonl', () => { + const d = dir(); + assert.equal(timelinePath(d), path.join(d, 'timeline.jsonl')); +}); + +test('append + read round-trips events; clear removes them', () => { + const d = dir(); + appendEvent(d, { t: '2026-06-16T00:00:00Z', event: 'start', id: 'a' }); + appendEvent(d, { t: '2026-06-16T00:01:00Z', event: 'finish', id: 'a' }); + assert.equal(readTimeline(d).length, 2); + clearTimeline(d); + assert.deepEqual(readTimeline(d), []); +}); + +test('readTimeline on a fresh dir is empty and tolerates malformed lines', () => { + const d = dir(); + assert.deepEqual(readTimeline(d), []); + appendEvent(d, { t: 't', event: 'start', id: 'a' }); + fs.appendFileSync(timelinePath(d), 'garbage\n'); + assert.equal(readTimeline(d).length, 1); +}); + +test('binWindows takes first start and last finish per bin', () => { + const events = [ + { t: 't1', event: 'start', id: 'a' }, + { t: 't1b', event: 'start', id: 'a' }, + { t: 't2', event: 'finish', id: 'a' }, + { t: 't3', event: 'start', id: 'b' } + ]; + const w = binWindows(events); + assert.equal(w.a.startedAt, 't1'); + assert.equal(w.a.finishedAt, 't2'); + assert.equal(w.b.startedAt, 't3'); + assert.equal(w.b.finishedAt, null); +}); diff --git a/shift/test/transcript.test.cjs b/shift/test/transcript.test.cjs new file mode 100644 index 0000000..903485c --- /dev/null +++ b/shift/test/transcript.test.cjs @@ -0,0 +1,44 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const { sumUsage } = require('../lib/transcript.cjs'); + +// Build a transcript line like Claude Code writes (assistant message with usage). +function asst(tsIso, usage) { + return JSON.stringify({ type: 'assistant', timestamp: tsIso, message: { role: 'assistant', usage } }); +} +const U = (output, input = 0, cacheRead = 0, cacheCreate = 0) => + ({ output_tokens: output, input_tokens: input, cache_read_input_tokens: cacheRead, cache_creation_input_tokens: cacheCreate }); + +const lines = [ + JSON.stringify({ type: 'user', timestamp: '2026-06-16T00:00:00Z', message: {} }), // ignored (not assistant) + asst('2026-06-16T00:01:00Z', U(100, 2000, 5000, 300)), // in window A + asst('2026-06-16T00:02:00Z', U(50, 1000, 6000, 0)), // in window A + asst('2026-06-16T00:10:00Z', U(999, 1, 1, 1)), // window B + '{ not json', // malformed → skipped + JSON.stringify({ type: 'assistant', timestamp: '2026-06-16T00:12:00Z', message: {} }) // assistant w/o usage → skipped +]; + +test('sums output/input/cache for assistant messages, ignores non-assistant + malformed', () => { + const all = sumUsage(lines, null, null); + assert.equal(all.output, 100 + 50 + 999); + assert.equal(all.input, 2000 + 1000 + 1); + assert.equal(all.cacheRead, 5000 + 6000 + 1); + assert.equal(all.messages, 3); + assert.equal(all.total, all.output + all.input + all.cacheRead + all.cacheCreate); +}); + +test('windows by [from, to): includes from, excludes to', () => { + const from = Date.parse('2026-06-16T00:00:30Z'); + const to = Date.parse('2026-06-16T00:09:00Z'); + const win = sumUsage(lines, from, to); + assert.equal(win.output, 150); // only the two window-A messages + assert.equal(win.messages, 2); +}); + +test('empty / no-match window yields zeros, never throws', () => { + const z = sumUsage([], 0, 1); + assert.equal(z.output, 0); + assert.equal(z.total, 0); + assert.equal(z.messages, 0); +}); diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs index dc93b44..4feb318 100644 --- a/shift/test/watch-model.test.cjs +++ b/shift/test/watch-model.test.cjs @@ -4,22 +4,25 @@ const assert = require('node:assert'); const fs = require('node:fs'); const os = require('node:os'); const path = require('node:path'); -const { buildModel, renderFrame } = require('../lib/watch-model.cjs'); +const { buildModel, renderFrame, renderDetail, renderHistory } = require('../lib/watch-model.cjs'); +const { aggregate } = require('../lib/history.cjs'); function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-')); const dir = path.join(cwd, '.shift'); fs.mkdirSync(dir, { recursive: true }); - const startedAt = new Date(Date.now() - 12 * 60_000).toISOString(); // 12 min ago + fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); + fs.writeFileSync(path.join(cwd, 'queue', '03-build.md'), '# Build the thing\n\nCompile and commit.\n'); + const startedAt = new Date(Date.now() - 12 * 60_000).toISOString(); fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ runId: '2026-06-16T00-00-00', startedAt, iterations: 7, branch: 'shift/smoke', currentBinId, bins: [ - { id: 'queue/01-hello.md', status: 'done', commit: 'a1b2c3d' }, - { id: 'queue/02-notes.md', status: 'done', commit: 'd4e5f6a' }, - { id: 'queue/03-build.md', status: 'pending' }, + { id: 'queue/01-hello.md', status: 'done', commit: 'a1b2c3d', durationMs: 68000, tokens: { output: 84000, input: 1000, cacheRead: 50000, total: 135000 } }, + { id: 'queue/02-notes.md', status: 'done', commit: 'd4e5f6a', durationMs: 161000, tokens: { output: 213000, input: 2000, cacheRead: 90000, total: 305000 } }, + { id: 'queue/03-build.md', status: 'pending', startedAt }, { id: 'queue/04-test.md', status: 'pending' }, - { id: 'queue/05-ship.md', status: 'blocked', note: 'needs API key' } + { id: 'queue/05-ship.md', status: 'blocked', note: 'needs API key', durationMs: 52000, tokens: { output: 31000, input: 500, cacheRead: 0, total: 31500 } } ] })); fs.writeFileSync(path.join(dir, 'log.md'), @@ -28,57 +31,89 @@ function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) { return dir; } -test('buildModel reads run state and computes counts + elapsed', () => { +test('buildModel reads per-bin runtime + tokens and a run output-token total', () => { const m = buildModel({ dir: fixture(), now: Date.now() }); - assert.equal(m.exists, true); - assert.equal(m.branch, 'shift/smoke'); - assert.equal(m.iterations, 7); assert.equal(m.counts.done, 2); assert.equal(m.counts.blocked, 1); - assert.equal(m.counts.pending, 2); - assert.equal(m.counts.total, 5); - assert.ok(m.elapsedMin >= 11 && m.elapsedMin <= 13); + const b1 = m.bins.find(b => b.id === 'queue/01-hello.md'); + assert.equal(b1.durationMs, 68000); + assert.equal(b1.tokensOutput, 84000); + // no transcriptPath in fixture -> run output tokens = sum of recorded per-bin output + assert.equal(m.outputTokens, 84000 + 213000 + 31000); }); test('buildModel marks the current bin and surfaces Needs you', () => { const m = buildModel({ dir: fixture(), now: Date.now() }); - const current = m.bins.find(b => b.current); - assert.equal(current.id, 'queue/03-build.md'); - assert.ok(m.needsYou.some(n => /API key/.test(n))); // blocked note - assert.ok(m.needsYou.some(n => /deploy target/.test(n))); // logged "Needs you:" line + assert.equal(m.bins.find(b => b.current).id, 'queue/03-build.md'); + assert.ok(m.needsYou.some(n => /API key/.test(n))); + assert.ok(m.needsYou.some(n => /deploy target/.test(n))); }); -test('buildModel reflects pause state', () => { +test('buildModel reflects pause + exists:false when no run', () => { assert.equal(buildModel({ dir: fixture({ paused: true }), now: Date.now() }).paused, true); - assert.equal(buildModel({ dir: fixture({ paused: false }), now: Date.now() }).paused, false); + const none = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none-')); + assert.equal(buildModel({ dir: path.join(none, '.shift'), now: Date.now() }).exists, false); }); -test('buildModel returns exists:false when no run is present', () => { - const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none-')); - const m = buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() }); - assert.equal(m.exists, false); +test('renderFrame shows progress, the token header, runtime/token columns, and nav hints', () => { + const m = buildModel({ dir: fixture(), now: Date.now() }); + const out = renderFrame(m, { width: 80, color: false, selectedIndex: 2 }); + assert.match(out, /2\/5/); // progress + assert.match(out, /shift\/smoke/); // branch + assert.match(out, /↑\d+k out/); // run output-token header + assert.match(out, /queue\/05-ship\.md/); // bin row + assert.match(out, /needs API key/); // blocker note + assert.match(out, /1m08s/); // bin 1 runtime column + assert.match(out, /84k/); // bin 1 token column + assert.match(out, /▸/); // selection cursor (selectedIndex) + assert.match(out, /select/); // nav hint shown when selecting + assert.match(out, /\[q\].*stop/i); +}); + +test('renderFrame PAUSED banner toggles', () => { + assert.match(renderFrame(buildModel({ dir: fixture({ paused: true }), now: Date.now() }), { color: false }), /PAUSED/); + assert.doesNotMatch(renderFrame(buildModel({ dir: fixture({ paused: false }), now: Date.now() }), { color: false }), /PAUSED/); +}); + +test('renderDetail shows the bin brief + token breakdown', () => { + const m = buildModel({ dir: fixture(), now: Date.now() }); + const idx = m.bins.findIndex(b => b.id === 'queue/01-hello.md'); + const out = renderDetail(m, idx, { color: false }); + assert.match(out, /queue\/01-hello\.md/); + assert.match(out, /84k out/); // token breakdown + assert.match(out, /cache-read/); + assert.match(out, /1m08s/); // runtime + assert.match(out, /a1b2c3d/); // commit +}); + +test('renderDetail reads the brief file for the current bin', () => { + const m = buildModel({ dir: fixture(), now: Date.now() }); + const idx = m.bins.findIndex(b => b.id === 'queue/03-build.md'); + const out = renderDetail(m, idx, { color: false }); + assert.match(out, /brief/); + assert.match(out, /Build the thing/); // read from queue/03-build.md }); -test('renderFrame (no color) shows progress, the current bin, and control hints', () => { - const out = renderFrame(buildModel({ dir: fixture(), now: Date.now() }), { width: 80, color: false }); - assert.match(out, /2\/5/); // progress count - assert.match(out, /shift\/smoke/); // branch - assert.match(out, /queue\/05-ship\.md/); // a bin row - assert.match(out, /needs API key/); // blocker surfaced - assert.match(out, /\[q\].*stop/i); // control hint - assert.match(out, /\[k\]/); // skip hint - assert.match(out, /\[p\]/); // pause hint +test('renderFrame on no active run is a friendly message', () => { + const none = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none2-')); + assert.match(renderFrame(buildModel({ dir: path.join(none, '.shift'), now: Date.now() }), { color: false }), /no active.*run/i); }); -test('renderFrame shows a PAUSED banner when paused', () => { - const paused = renderFrame(buildModel({ dir: fixture({ paused: true }), now: Date.now() }), { color: false }); - assert.match(paused, /PAUSED/); - const running = renderFrame(buildModel({ dir: fixture({ paused: false }), now: Date.now() }), { color: false }); - assert.doesNotMatch(running, /PAUSED/); +test('renderHistory shows per-run rows and a totals footer', () => { + const records = [ + { runId: 'r1', branch: 'shift/a', endedAt: '2026-06-16T01:00:00Z', durationMs: 600000, iterations: 3, tokens: { output: 120000, total: 1 }, bins: { total: 2, done: 2, skipped: 0, blocked: 0 } }, + { runId: 'r2', branch: 'shift/b', endedAt: '2026-06-16T02:00:00Z', durationMs: 1200000, iterations: 5, tokens: { output: 340000, total: 1 }, bins: { total: 5, done: 3, skipped: 1, blocked: 1 } } + ]; + const out = renderHistory(records, aggregate(records), { color: false }); + assert.match(out, /work record/); + assert.match(out, /shift\/a/); + assert.match(out, /shift\/b/); + assert.match(out, /totals/); + assert.match(out, /2 runs/); + assert.match(out, /460k out/); // 120k + 340k aggregate output + assert.match(out, /5✓/); // aggregate done }); -test('renderFrame on no active run is a friendly message, not a crash', () => { - const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-none2-')); - const out = renderFrame(buildModel({ dir: path.join(cwd, '.shift'), now: Date.now() }), { color: false }); - assert.match(out, /no active.*run/i); +test('renderHistory with no records is a friendly message', () => { + assert.match(renderHistory([], aggregate([]), { color: false }), /No shift runs recorded/i); }); From 5044dae6093f5cc8e252f128979dd4551b14ec29 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 11:58:47 -0400 Subject: [PATCH 09/12] shift: move engine state out of the repo so the agent can't usurp it (real per-bin fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause (verified): a headless autonomous agent rewrites .shift/state.json to mark bins done itself — bypassing the keep-going engine so the hook never drives the queue or records per-bin boundaries. A probe hook disproved a sandbox: a Stop hook can write anywhere (~/.local/state, /tmp, env-provided). So the fix is an engine-owned store OUTSIDE the repo, where the agent (which only works in the repo) can't reach it. - lib/store.cjs: engineDir(cwd) = $XDG_STATE_HOME/shift/ (realpath so /tmp == /private/tmp; full-path hash so siblings don't collide; SHIFT_STATE_DIR override). mkdir -p. - state.json, usage.json, history.jsonl, timeline now live in engineDir; the hook + bin/shift + watch-model read/write there. config.json stays user-editable in .shift/ and is snapshotted into engineDir so a deletion can't break a run; summary/log/control stay in .shift/. A stray agent-written .shift/state.json is simply ignored. - Always emit a timeline 'start' per bin (binWindows dedupes) so every bin has a window. Validated: a real bypassPermissions run now records per-bin runtime+tokens for every bin (35s/7k, 13s/2k) + a full history row. 99 shift tests green. --- shift/README.md | 10 ++-- shift/SPEC.md | 6 ++- shift/bin/shift | 25 ++++++---- shift/hooks/shift-stop.cjs | 41 +++++++++------- shift/lib/store.cjs | 40 ++++++++++++++++ shift/lib/timeline.cjs | 25 ++++------ shift/lib/watch-model.cjs | 10 ++-- shift/test/cli.test.cjs | 17 ++++--- shift/test/hook.test.cjs | 83 +++++++++++++++++++-------------- shift/test/timeline.test.cjs | 62 ++++++++++++++++-------- shift/test/watch-model.test.cjs | 5 +- 11 files changed, 212 insertions(+), 112 deletions(-) create mode 100644 shift/lib/store.cjs diff --git a/shift/README.md b/shift/README.md index 4f1ebd6..83f01f4 100644 --- a/shift/README.md +++ b/shift/README.md @@ -85,11 +85,15 @@ It's also the **control + drill-down surface** — a status bar can show state b Control is file-based under `.shift/` (`PAUSE` / `SKIP` / `STOP`), so it works whether the run is interactive or headless, and from any terminal in the repo. -> **Tokens are the *output* count** — the honest "work produced" figure, read from the session transcript. A warm run's `input`/cache tokens balloon with re-sent context, so the headline deliberately isn't `total` (that's in the detail view). Run-level tokens + runtime are authoritative; **per-bin** token/runtime columns are best-effort and may show `—` in a fully-headless run (an autonomous agent can rewrite `.shift/` mid-run) — see [SPEC §13](./SPEC.md). +> **Tokens are the *output* count** — the honest "work produced" figure, read from the session transcript. A warm run's `input`/cache tokens balloon with re-sent context, so the headline deliberately isn't `total` (that's in the detail view). Both run-level and per-bin tokens/runtime are reliable, including in fully-headless runs: the engine's state lives **outside the repo** (see below), so an autonomous agent can't corrupt it. + +### Where state lives (and why) + +Shift keeps the engine's authoritative state — run state, timeline, usage, and the work-record history — **outside the repo**, under `$XDG_STATE_HOME/shift//` (or `~/.local/state/shift/…`). The reason is candor-meets-reality: an autonomous agent will rewrite or delete files it finds in the repo (it was caught marking bins done in `.shift/state.json` itself), so the engine puts its state where the agent — which only works inside the repo — can't reach it. `.shift/` in your repo holds only what you and the agent legitimately touch: `config.json` (you edit it), `summary.md` (you read it), `log.md`/`blocked.jsonl` (the agent appends), and the control signals. Override the location with `SHIFT_STATE_DIR`. ### The work record — `shift history` -Every finalized run is appended to `.shift/history.jsonl`. `shift history` prints the ledger — one row per run (when, branch, runtime, output tokens, bin tally) and a **totals** footer across all runs; `shift history ` drills into a single run's bins. +Every finalized run is appended to an append-only ledger in the engine state dir. `shift history` prints it — one row per run (when, branch, runtime, output tokens, bin tally) and a **totals** footer across all runs; `shift history ` drills into a single run's bins. ### In your status bar (module 1) @@ -145,4 +149,4 @@ Pick the narrowest mode that lets the work actually proceed. cd shift && npm test # node --test, zero dependencies ``` -Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model, transcript, timeline, history) and is unit-tested — including `renderFrame`/`renderDetail`/`renderHistory`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules. +Pure logic lives in `lib/` (discovery, state, bounds, brief, decision, verify, usage, outcome, run-loop, control, watch-model, transcript, timeline, history, store) and is unit-tested — including `renderFrame`/`renderDetail`/`renderHistory`, so the dashboard is testable without a TTY; `hooks/shift-stop.cjs` (the keep-going engine) and the `shift run` loop are integration-tested by driving them with injected effects / crafted hook input. The `bin/shift watch` TUI is a thin shell over the tested `watch-model` + `control` modules. diff --git a/shift/SPEC.md b/shift/SPEC.md index df7f731..132ec97 100644 --- a/shift/SPEC.md +++ b/shift/SPEC.md @@ -292,6 +292,8 @@ The candor gap in v2 was that a headless run is opaque *while* it runs (good pap ### Tokens, runtime + the work record (2026-06-16) -Per the candor goal of making consumption legible: the dashboard header and `status --line` show **output tokens** (the honest "work produced" figure — not inflated by context resends / cache reads, which dominate `total`), summed from the session **transcript** (`transcript_path` from the hook payload; tokens live in `message.usage`). Each bin gets a runtime + token column; `↑/↓` selects a bin and `⏎` opens a **detail view** (status, runtime, token breakdown in/out/cache, commit, brief). Every finalized run is appended to an append-only **work record** at `.shift/history.jsonl`; `shift history` prints per-run rows + a totals footer (all runs, total time, total output tokens), and `shift history ` drills into one run's bins. New modules: `lib/transcript.cjs` (sum `usage` over a `[start, end)` window — pure), `lib/timeline.cjs` (append-only bin boundaries), `lib/history.cjs` (ledger append/read/aggregate). **Tests:** 96 in `shift`, all green. +Per the candor goal of making consumption legible: the dashboard header and `status --line` show **output tokens** (the honest "work produced" figure — not inflated by context resends / cache reads, which dominate `total`), summed from the session **transcript** (`transcript_path` from the hook payload; tokens live in `message.usage`). Each bin gets a runtime + token column; `↑/↓` selects a bin and `⏎` opens a **detail view** (status, runtime, token breakdown in/out/cache, commit, brief). Every finalized run is appended to an append-only **work record**; `shift history` prints per-run rows + a totals footer (all runs, total time, total output tokens), and `shift history ` drills into one run's bins. New modules: `lib/transcript.cjs` (sum `usage` over a `[start, end)` window — pure), `lib/timeline.cjs` (append-only bin boundaries), `lib/history.cjs` (ledger append/read/aggregate). -*Known limitation — per-bin attribution is best-effort in fully-headless autonomous runs.* Investigation (2026-06-16) established three constraints that, together, make reliable *per-bin* token/runtime attribution impossible while a `claude -p` agent runs unattended: (1) an autonomous agent **rewrites/deletes files under `.shift/`** mid-run (observed: it rewrote `state.json` + `log.md` and deleted `config.json`/`timeline.jsonl`), clobbering the hook's per-bin stamps; (2) Claude Code **sandboxes hook file-writes to the project directory**, so the boundary record can't be relocated out-of-repo where the agent can't reach it; (3) the transcript carries no per-bin marker to reconstruct boundaries from. What **is** reliable and authoritative regardless: **run-level** output tokens + runtime, and the **work-record history** row (written as the hook's *final* action on finalize, after the agent's last turn, so it's never clobbered). Per-bin columns populate in interactive runs / when the agent leaves `.shift/` alone / in `shift/examples/watch-demo.cjs`, and show `—` otherwise. The brief now instructs the agent to treat `.shift/` as append-only bookkeeping; tightening that — or an engine-owned state store the agent can't reach — is the path to making per-bin robust. +### Engine state moved out of the repo — per-bin attribution made robust (2026-06-16) + +The first cut of per-bin attribution was unreliable headless, and the investigation found the true cause: an autonomous agent **rewrites `.shift/state.json` to mark bins done itself** (and rewrites `log.md`, deletes `config.json`/`timeline.jsonl`) — usurping the keep-going engine so the hook never drives the queue and records no boundaries. A probe hook (one real `claude -p` run) then **disproved a sandbox**: a Stop hook can write anywhere, including `~/.local/state` and `/tmp`. So the fix is an **engine-owned store outside the working repo**, in `lib/store.cjs`: `engineDir(cwd)` = `$XDG_STATE_HOME/shift/` (canonicalized so `/tmp` and `/private/tmp` agree; full-path hash so siblings don't collide; `SHIFT_STATE_DIR` overrides). `state.json`, `usage.json`, `history.jsonl`, and the timeline now live there — the hook owns them and the agent (which only operates inside the repo) can't see or touch them. `.shift/` keeps only what the user/agent legitimately use: `config.json` (user-edited, also snapshotted into the engine dir so a deletion can't break a run), `summary.md` (user-read), `log.md`/`blocked.jsonl` (agent-appended), and `STOP`/`PAUSE`/`SKIP` (control). The engine is also robust if the agent *does* still write a stray `.shift/state.json` — that file is simply ignored. **Validated:** a real fully-headless `bypassPermissions` run now records per-bin runtime + tokens for every bin (e.g. `35s · 7k`, `13s · 2k`) and a complete history row. **Tests:** 99 in `shift`, all green. diff --git a/shift/bin/shift b/shift/bin/shift index 784695e..ad4bb85 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -5,6 +5,7 @@ const path = require('node:path'); const cp = require('node:child_process'); const { discoverBins } = require('../lib/discovery.cjs'); const { initState, saveState, loadState, mergeDiscovered } = require('../lib/state.cjs'); +const { engineDir } = require('../lib/store.cjs'); function isoStamp(d) { return d.toISOString().replace(/[:.]/g, '-').slice(0, 19); } function dateStr(d) { return d.toISOString().slice(0, 10); } @@ -61,12 +62,16 @@ function cmdStart(args) { } fs.mkdirSync(dir, { recursive: true }); + const edir = engineDir(cwd); // engine state lives out of the repo so the agent can't usurp it if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP')); - require('../lib/timeline.cjs').clearTimeline(dir); // fresh run → fresh boundary record - fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2)); + try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ } + require('../lib/timeline.cjs').clearTimeline(cwd); // fresh run → fresh boundary record + try { fs.unlinkSync(path.join(edir, 'usage.json')); } catch { /* none */ } // stale usage from a prior run + fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2)); // .shift/config.json (user-editable) + fs.writeFileSync(path.join(edir, 'config.json'), JSON.stringify(config, null, 2)); // snapshot the agent can't delete let state = initState({ runId: isoStamp(now), startedAt: now.toISOString(), branch }); state = mergeDiscovered(state, discovered); - saveState(dir, state); + saveState(edir, state); fs.writeFileSync(path.join(dir, 'log.md'), `# shift log — ${state.runId}\n`); if (!ensureBranch(cwd, branch)) { @@ -179,10 +184,10 @@ function cmdStop() { // The work record: every finalized run (.shift/history.jsonl). `shift history` prints the // ledger + totals; `shift history ` drills into one run's bins. function cmdHistory(args) { - const dir = path.join(process.cwd(), '.shift'); + const edir = engineDir(process.cwd()); const { readHistory, aggregate } = require('../lib/history.cjs'); const { renderHistory, fmtDur, fmtTok } = require('../lib/watch-model.cjs'); - const records = readHistory(dir); + const records = readHistory(edir); const target = (args || []).find(a => !a.startsWith('-')); if (target) { const r = records.filter(x => x.runId === target || (x.branch || '').endsWith(target)).pop(); @@ -203,11 +208,13 @@ function cmdHistory(args) { async function cmdRun() { const cwd = process.cwd(); const dir = path.join(cwd, '.shift'); - if (!fs.existsSync(path.join(dir, 'state.json'))) { + const edir = engineDir(cwd); + if (!fs.existsSync(path.join(edir, 'state.json'))) { console.log('No active run. Run `shift start` first.'); process.exit(1); } - const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8')); + const cfgFile = fs.existsSync(path.join(edir, 'config.json')) ? path.join(edir, 'config.json') : path.join(dir, 'config.json'); + const config = JSON.parse(fs.readFileSync(cfgFile, 'utf8')); const mode = config.permissionMode || 'acceptEdits'; const { runLoop } = require('../lib/run-loop.cjs'); const { readUsageCache } = require('../lib/usage.cjs'); @@ -231,8 +238,8 @@ async function cmdRun() { let first = true; const effects = { now: () => Date.now(), - loadState: () => loadState(dir), - readUsage: () => readUsageCache(dir), + loadState: () => loadState(edir), + readUsage: () => readUsageCache(edir), log: (m) => console.log(`[shift] ${m}`), finalized: () => fs.existsSync(path.join(dir, 'summary.md')), isPaused: () => isPaused(dir), diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs index f935db5..03ad77e 100755 --- a/shift/hooks/shift-stop.cjs +++ b/shift/hooks/shift-stop.cjs @@ -11,6 +11,7 @@ const { readSkip, clearSkip } = require('../lib/control.cjs'); const { sumTokens } = require('../lib/transcript.cjs'); const { appendRecord } = require('../lib/history.cjs'); const { appendEvent, readTimeline, binWindows } = require('../lib/timeline.cjs'); +const { engineDir } = require('../lib/store.cjs'); function readStdin() { try { return fs.readFileSync(0, 'utf8'); } catch { return ''; } } @@ -67,11 +68,11 @@ function writeSummary(dir, state, reason, now, runTok) { // Append this run to the work record (.shift/history.jsonl). One row per finalized run. // Per-bin metrics come from the timeline (boundaries) + transcript (tokens) so they // survive even if the agent rewrote state.json mid-run. -function appendRunRecord(dir, state, reason, now, runTok, transcriptPath) { +function appendRunRecord(edir, cwd, state, reason, now, runTok, transcriptPath) { const tally = s => state.bins.filter(b => b.status === s).length; - const windows = binWindows(readTimeline(dir)); + const windows = binWindows(readTimeline(cwd)); const nowIso = new Date(now).toISOString(); - appendRecord(dir, { + appendRecord(edir, { runId: state.runId, branch: state.branch, startedAt: state.startedAt, endedAt: nowIso, durationMs: Math.max(0, now - Date.parse(state.startedAt)), @@ -100,10 +101,14 @@ function main() { // Resolve the repo from the hook payload's cwd (the hook's process cwd is not // guaranteed to be the project root); fall back to process.cwd(). const cwd = (input && typeof input.cwd === 'string' && input.cwd) ? input.cwd : process.cwd(); - const dir = path.join(cwd, '.shift'); - if (!fs.existsSync(path.join(dir, 'state.json'))) { process.stdout.write('{}'); return; } - - const config = JSON.parse(fs.readFileSync(path.join(dir, 'config.json'), 'utf8')); + const dir = path.join(cwd, '.shift'); // user/agent-facing: config, summary, log, control + const edir = engineDir(cwd); // engine-owned, out of the agent's reach: state, usage, history, timeline + if (!fs.existsSync(path.join(edir, 'state.json'))) { process.stdout.write('{}'); return; } + + // config is snapshotted into the engine dir at `shift start`; prefer that (the agent + // can't delete it) and fall back to the repo copy. + const cfgFile = fs.existsSync(path.join(edir, 'config.json')) ? path.join(edir, 'config.json') : path.join(dir, 'config.json'); + const config = JSON.parse(fs.readFileSync(cfgFile, 'utf8')); const now = Date.now(); const nowIso = new Date(now).toISOString(); const killSwitch = fs.existsSync(path.join(dir, 'STOP')); @@ -111,10 +116,10 @@ function main() { // Capture rate limits from the hook payload: enforce the usage cap and cache // reset times for the headless runner. Absent on non-Pro/Max or pre-first-response. - const usagePercent = writeUsageCache(dir, input.rate_limits, Math.floor(now / 1000)); + const usagePercent = writeUsageCache(edir, input.rate_limits, Math.floor(now / 1000)); // Re-discover (fresh text + new files) and carry over status/attempts. - let state = mergeDiscovered(loadState(dir), discoverBins(config.sources, cwd)); + let state = mergeDiscovered(loadState(edir), discoverBins(config.sources, cwd)); const transcriptPath = payloadTranscript || state.transcriptPath || null; const prevBinId = state.currentBinId; @@ -127,7 +132,7 @@ function main() { // agent may rewrite mid-run — and tokens are summed from the transcript (also outside // the repo). `fm` is merged into whichever terminal status the bin lands on, but the // durable copy is the timeline + the history record, not these (clobberable) fields. - const prevStart = prevBinId ? (binWindows(readTimeline(dir))[prevBinId] || {}).startedAt : null; + const prevStart = prevBinId ? (binWindows(readTimeline(cwd))[prevBinId] || {}).startedAt : null; let fm = {}; if (prevBinId) { const tok = (transcriptPath && prevStart) ? sumTokens(transcriptPath, prevStart, nowIso) : null; @@ -171,7 +176,7 @@ function main() { state = setBinStatus(state, prevBinId, { status: 'done', ...fm }); binFinished = true; } - if (binFinished) appendEvent(dir, { t: nowIso, event: 'finish', id: prevBinId }); + if (binFinished) appendEvent(cwd, { t: nowIso, event: 'finish', id: prevBinId }); } const result = decide({ @@ -186,13 +191,13 @@ function main() { if (retryFeedback && result.nextBinId === prevBinId) reason += `\n\n${retryFeedback}`; state.iterations += 1; state.currentBinId = result.nextBinId; - // Record the bin's start the first time it becomes current (a new bin, not a verify - // retry of the same one). The timeline is the durable copy; state.bins.startedAt is a - // best-effort convenience that the agent may clobber. - if (result.nextBinId !== prevBinId) appendEvent(dir, { t: nowIso, event: 'start', id: result.nextBinId }); + // Record the bin's start. binWindows keeps the FIRST start per bin, so re-emitting on + // a verify retry (or after the agent clobbers state.json so prevBinId looks unchanged) + // is harmless — and unconditionally appending guarantees every bin has a start event. + appendEvent(cwd, { t: nowIso, event: 'start', id: result.nextBinId }); const nb = state.bins.find(b => b.id === result.nextBinId); if (nb && !nb.startedAt) state = setBinStatus(state, result.nextBinId, { startedAt: nowIso }); - saveState(dir, state); + saveState(edir, state); fs.appendFileSync(path.join(dir, 'log.md'), `\n## ${nowIso} — work ${result.nextBinId} (iter ${state.iterations})\n`); process.stdout.write(JSON.stringify({ decision: 'block', reason })); @@ -202,8 +207,8 @@ function main() { const alreadyFinalized = fs.existsSync(path.join(dir, 'summary.md')); const runTok = transcriptPath ? sumTokens(transcriptPath, state.startedAt, nowIso) : null; state.currentBinId = null; - saveState(dir, state); - if (!alreadyFinalized) appendRunRecord(dir, state, result.reason, now, runTok, transcriptPath); + saveState(edir, state); + if (!alreadyFinalized) appendRunRecord(edir, cwd, state, result.reason, now, runTok, transcriptPath); writeSummary(dir, state, result.reason, now, runTok); process.stdout.write('{}'); } diff --git a/shift/lib/store.cjs b/shift/lib/store.cjs new file mode 100644 index 0000000..3087964 --- /dev/null +++ b/shift/lib/store.cjs @@ -0,0 +1,40 @@ +'use strict'; +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const crypto = require('node:crypto'); + +// Where shift keeps the engine's AUTHORITATIVE mutable state — state.json, timeline, +// usage cache, work-record history. It lives OUTSIDE the working repo, keyed by the +// repo's canonical path, because an autonomous agent rewrites/deletes files it finds +// under .shift/ (observed: it marked bins done in state.json itself, usurping the engine +// and erasing per-bin boundaries). A Stop hook is NOT sandboxed (verified it can write +// ~/.local/state), so the hook owns this dir while the agent — which only operates inside +// the repo — can't reach it. +// +// .shift/ in the repo keeps only what the user or agent legitimately touches: config.json +// (user-edited), summary.md (user-read), log.md / blocked.jsonl (agent-appended), and the +// control signals (STOP/PAUSE/SKIP, written by `shift watch`). +// +// Two rules keep the hook (writer) and watch/history (readers) on the same path: +// 1. realpathSync — macOS /tmp is a symlink to /private/tmp; the hook payload cwd is +// already canonical, so readers must canonicalize too. +// 2. hash the FULL canonical path (a prefix slice collides for sibling temp dirs). +// SHIFT_STATE_DIR overrides the base (tests; also a valid explicit override). + +function base() { + return process.env.SHIFT_STATE_DIR + || path.join(process.env.XDG_STATE_HOME || path.join(os.homedir(), '.local', 'state'), 'shift'); +} +function canonical(cwd) { + try { return fs.realpathSync(path.resolve(cwd)); } catch { return path.resolve(cwd); } +} + +// engineDir(cwd) -> the out-of-repo state directory for the repo rooted at cwd. +function engineDir(cwd) { + const dir = path.join(base(), crypto.createHash('sha256').update(canonical(cwd)).digest('hex').slice(0, 16)); + try { fs.mkdirSync(dir, { recursive: true }); } catch { /* best-effort */ } + return dir; +} + +module.exports = { engineDir }; diff --git a/shift/lib/timeline.cjs b/shift/lib/timeline.cjs index 90b12b8..e8c5f4a 100644 --- a/shift/lib/timeline.cjs +++ b/shift/lib/timeline.cjs @@ -1,32 +1,27 @@ 'use strict'; const fs = require('node:fs'); const path = require('node:path'); +const { engineDir } = require('./store.cjs'); -// An append-only record of bin boundaries (one event per line in .shift/timeline.jsonl) -// — the source of per-bin runtime + token windows, paired with the transcript for tokens. -// -// Best-effort, by design: in a fully-headless autonomous run the agent may rewrite or -// delete files under .shift/ (observed), so per-bin metrics can be lost — the run-level -// totals + the work-record history (the hook's final write) remain authoritative -// regardless. Writing this out-of-repo isn't an option: Claude Code sandboxes hook -// file-writes to the project directory. See SPEC §13. +// Append-only record of bin boundaries (one event per line) — the source of per-bin +// runtime + token windows, paired with the transcript for tokens. Lives in the engine's +// out-of-repo state dir (see store.cjs) so the agent can't delete or rewrite it. -function timelinePath(dir) { return path.join(dir, 'timeline.jsonl'); } +function timelinePath(cwd) { return path.join(engineDir(cwd), 'timeline.jsonl'); } -function appendEvent(dir, ev) { // ev: { t: iso, event: 'start'|'finish', id } - try { fs.mkdirSync(dir, { recursive: true }); fs.appendFileSync(timelinePath(dir), JSON.stringify(ev) + '\n'); } - catch { /* best-effort */ } +function appendEvent(cwd, ev) { // ev: { t: iso, event: 'start'|'finish', id } + try { fs.appendFileSync(timelinePath(cwd), JSON.stringify(ev) + '\n'); } catch { /* best-effort */ } } -function readTimeline(dir) { +function readTimeline(cwd) { let raw; - try { raw = fs.readFileSync(timelinePath(dir), 'utf8'); } catch { return []; } + try { raw = fs.readFileSync(timelinePath(cwd), 'utf8'); } catch { return []; } return raw.split('\n').filter(Boolean) .map(l => { try { return JSON.parse(l); } catch { return null; } }) .filter(Boolean); } -function clearTimeline(dir) { try { fs.unlinkSync(timelinePath(dir)); } catch { /* none */ } } +function clearTimeline(cwd) { try { fs.unlinkSync(timelinePath(cwd)); } catch { /* none */ } } // binWindows(events) -> { id: { startedAt, finishedAt } } — first start, last finish. function binWindows(events) { diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs index 717117a..aee1a30 100644 --- a/shift/lib/watch-model.cjs +++ b/shift/lib/watch-model.cjs @@ -5,6 +5,7 @@ const { loadState } = require('./state.cjs'); const { isPaused, isStopRequested } = require('./control.cjs'); const { sumUsage, readLines } = require('./transcript.cjs'); const { readTimeline, binWindows } = require('./timeline.cjs'); +const { engineDir } = require('./store.cjs'); // --- model ----------------------------------------------------------------- @@ -30,15 +31,18 @@ function readBrief(cwd, binId) { try { return fs.readFileSync(path.join(cwd, binId), 'utf8'); } catch { return ''; } } -// buildModel({ dir, now }) — read .shift/ into a plain view model. Pure of rendering. +// buildModel({ dir, now }) — read the run into a plain view model. `dir` is the repo's +// .shift/ (log, control, summary); the engine state (state.json) lives out-of-repo. function buildModel({ dir, now }) { + const cwd = path.dirname(dir); + const edir = engineDir(cwd); let state; - try { state = loadState(dir); } catch { return { exists: false }; } + try { state = loadState(edir); } catch { return { exists: false }; } // Per-bin runtime + tokens are derived from the timeline (agent-proof boundaries) and // the transcript (parsed once), so they survive a state.json the agent rewrote. We fall // back to any stamps the hook left on state.bins when no timeline/transcript is present. - const windows = binWindows(readTimeline(dir)); + const windows = binWindows(readTimeline(path.dirname(dir))); // timeline keyed by repo cwd, not .shift const lines = state.transcriptPath ? readLines(state.transcriptPath) : []; const startMs = b => (windows[b.id] && windows[b.id].startedAt) ? Date.parse(windows[b.id].startedAt) : null; const finMs = (b, current) => { diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs index 62dbad1..d1a10d5 100644 --- a/shift/test/cli.test.cjs +++ b/shift/test/cli.test.cjs @@ -7,6 +7,10 @@ const path = require('node:path'); const cp = require('node:child_process'); const CLI = path.resolve(__dirname, '..', 'bin', 'shift'); +// Engine state lives out of the repo; pin its base to a tmp dir for the test process + CLI. +const STATE_BASE = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-cli-base-')); +process.env.SHIFT_STATE_DIR = STATE_BASE; +const { engineDir } = require('../lib/store.cjs'); function repoWithQueue() { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-cli-')); @@ -20,24 +24,25 @@ function repoWithQueue() { } function run(cwd, args) { - return cp.execFileSync('node', [CLI, ...args], { cwd, encoding: 'utf8' }); + return cp.execFileSync('node', [CLI, ...args], { cwd, encoding: 'utf8', env: { ...process.env, SHIFT_STATE_DIR: STATE_BASE } }); } test('--dry-run lists the queue and writes nothing', () => { const cwd = repoWithQueue(); const out = run(cwd, ['start', '--dry-run']); assert.match(out, /queue\/01\.md/); - assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json'))); + assert.ok(!fs.existsSync(path.join(engineDir(cwd), 'state.json'))); }); -test('start writes config + state and creates the run branch', () => { +test('start writes config (repo) + state (engine dir) and creates the run branch', () => { const cwd = repoWithQueue(); run(cwd, ['start']); - assert.ok(fs.existsSync(path.join(cwd, '.shift', 'state.json'))); - assert.ok(fs.existsSync(path.join(cwd, '.shift', 'config.json'))); + assert.ok(fs.existsSync(path.join(cwd, '.shift', 'config.json')), 'config stays in the repo (user-editable)'); + assert.ok(fs.existsSync(path.join(engineDir(cwd), 'state.json')), 'engine state lives out of the repo'); + assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json')), 'no state.json in the repo for the agent to clobber'); const branch = cp.execSync('git branch --show-current', { cwd, encoding: 'utf8' }).trim(); assert.match(branch, /^shift\//); - const state = JSON.parse(fs.readFileSync(path.join(cwd, '.shift', 'state.json'), 'utf8')); + const state = JSON.parse(fs.readFileSync(path.join(engineDir(cwd), 'state.json'), 'utf8')); assert.equal(state.bins.length, 1); }); diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs index cf0def7..c6c7dee 100644 --- a/shift/test/hook.test.cjs +++ b/shift/test/hook.test.cjs @@ -7,39 +7,50 @@ const path = require('node:path'); const cp = require('node:child_process'); const HOOK = path.resolve(__dirname, '..', 'hooks', 'shift-stop.cjs'); +// Engine state lives out of the repo; point its base at a tmp dir so tests never touch +// ~/.local/state, and so the test process's engineDir() matches the spawned hook's. +const STATE_BASE = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-statebase-')); +process.env.SHIFT_STATE_DIR = STATE_BASE; +const { engineDir } = require('../lib/store.cjs'); function setupRun(configOverride) { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-hook-')); fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); fs.writeFileSync(path.join(cwd, 'queue', '01.md'), 'bin one'); fs.writeFileSync(path.join(cwd, 'queue', '02.md'), 'bin two'); - const dir = path.join(cwd, '.shift'); + const dir = path.join(cwd, '.shift'); // repo-side: log, summary, control + const edir = engineDir(cwd); // out-of-repo: state, config snapshot, history, usage fs.mkdirSync(dir, { recursive: true }); - fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify(Object.assign({ + const config = JSON.stringify(Object.assign({ sources: [{ path: 'queue', kind: 'briefs' }], bounds: { maxHours: 24, maxIterations: 10 }, definitionOfDone: 'done', git: {} - }, configOverride || {}))); - fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ + }, configOverride || {})); + fs.writeFileSync(path.join(edir, 'config.json'), config); + fs.writeFileSync(path.join(edir, 'state.json'), JSON.stringify({ runId: 'r', startedAt: new Date().toISOString(), iterations: 0, branch: 'shift/x', currentBinId: null, bins: [] })); fs.writeFileSync(path.join(dir, 'log.md'), '# log\n'); - return { cwd, dir }; + return { cwd, dir, edir }; } function runHook(cwd, input) { - const out = cp.execFileSync('node', [HOOK], { cwd, input: JSON.stringify(input), encoding: 'utf8' }); + const out = cp.execFileSync('node', [HOOK], { + cwd, input: JSON.stringify(input), encoding: 'utf8', + env: { ...process.env, SHIFT_STATE_DIR: STATE_BASE } + }); return JSON.parse(out || '{}'); } +const readState = edir => JSON.parse(fs.readFileSync(path.join(edir, 'state.json'), 'utf8')); -test('no-ops (allows stop) when no .shift/state.json exists', () => { +test('no-ops (allows stop) when there is no active run', () => { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-none-')); assert.deepEqual(runHook(cwd, { stop_hook_active: false }), {}); }); test('first stop blocks bin 1; second marks it done + blocks bin 2; third drains -> allow + summary', () => { - const { cwd, dir } = setupRun(); + const { cwd, dir, edir } = setupRun(); const r1 = runHook(cwd, { stop_hook_active: false }); assert.equal(r1.decision, 'block'); assert.match(r1.reason, /bin one/); @@ -47,8 +58,7 @@ test('first stop blocks bin 1; second marks it done + blocks bin 2; third drains const r2 = runHook(cwd, { stop_hook_active: true }); assert.equal(r2.decision, 'block'); assert.match(r2.reason, /bin two/); - const s2 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); - assert.equal(s2.bins.find(b => b.id === 'queue/01.md').status, 'done'); + assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'done'); const r3 = runHook(cwd, { stop_hook_active: true }); assert.deepEqual(r3, {}); @@ -75,24 +85,22 @@ test('logged "Needs you:" lines surface in the summary', () => { }); test('SKIP control marks the current bin skipped and advances to the next', () => { - const { cwd, dir } = setupRun(); + const { cwd, dir, edir } = setupRun(); runHook(cwd, { stop_hook_active: false }); // start bin 1 (current = queue/01.md) fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/01.md'); const r = runHook(cwd, { stop_hook_active: true }); // skip bin 1, block bin 2 assert.equal(r.decision, 'block'); assert.match(r.reason, /bin two/); - const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); - assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'skipped'); + assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'skipped'); assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'SKIP is consumed'); }); test('a SKIP naming a non-current bin is consumed and discarded, not applied to a later bin', () => { - const { cwd, dir } = setupRun(); + const { cwd, dir, edir } = setupRun(); runHook(cwd, { stop_hook_active: false }); // start bin 1 fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/99-nope.md'); // stale / wrong id runHook(cwd, { stop_hook_active: true }); // bin 1 -> done (skip ignored) - const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); - assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done'); + assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'done'); assert.ok(!fs.existsSync(path.join(dir, 'SKIP')), 'stale SKIP is consumed, never left to fire on a later bin'); }); @@ -103,51 +111,58 @@ test('kill switch ends the run immediately', () => { assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /kill switch/); }); -test('resolves .shift from the hook payload cwd, not the process cwd', () => { +test('resolves the repo from the hook payload cwd, not the process cwd', () => { const { cwd } = setupRun(); const neutral = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-neutral-')); const out = cp.execFileSync('node', [HOOK], { cwd: neutral, input: JSON.stringify({ stop_hook_active: false, cwd }), - encoding: 'utf8' + encoding: 'utf8', + env: { ...process.env, SHIFT_STATE_DIR: STATE_BASE } }); const r = JSON.parse(out || '{}'); assert.equal(r.decision, 'block'); assert.match(r.reason, /bin one/); }); +test('engine state lives OUTSIDE the repo (agent cannot reach it)', () => { + const { cwd, edir } = setupRun(); + runHook(cwd, { stop_hook_active: false }); + assert.ok(fs.existsSync(path.join(edir, 'state.json')), 'state.json is in the engine dir'); + assert.ok(!fs.existsSync(path.join(cwd, '.shift', 'state.json')), 'state.json is NOT in the repo .shift/'); + assert.ok(!edir.startsWith(cwd), 'the engine dir is outside the working repo'); +}); + // ---- v3: verify gate ---- test('verify gate (passing) marks bins done and drains', () => { - const { cwd, dir } = setupRun({ verify: { command: 'true', maxAttempts: 2 } }); + const { cwd, edir } = setupRun({ verify: { command: 'true', maxAttempts: 2 } }); runHook(cwd, { stop_hook_active: false }); // start bin 1 runHook(cwd, { stop_hook_active: true }); // verify passes -> bin1 done, start bin2 - const s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); - assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'done'); + assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'done'); }); test('verify gate (failing) re-blocks the same bin with feedback, then blocks after maxAttempts', () => { - const { cwd, dir } = setupRun({ verify: { command: 'false', maxAttempts: 2 } }); + const { cwd, edir } = setupRun({ verify: { command: 'false', maxAttempts: 2 } }); runHook(cwd, { stop_hook_active: false }); // start bin 1 const r1 = runHook(cwd, { stop_hook_active: true }); // verify fails, attempt 1 < 2 -> retry SAME bin assert.equal(r1.decision, 'block'); assert.match(r1.reason, /failed verification/); assert.match(r1.reason, /bin one/); - let s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); + let s = readState(edir); assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'pending'); assert.equal(s.bins.find(b => b.id === 'queue/01.md').attempts, 1); const r2 = runHook(cwd, { stop_hook_active: true }); // verify fails again, attempt 2 == max -> blocked, move on assert.equal(r2.decision, 'block'); assert.match(r2.reason, /bin two/); - s = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')); - assert.equal(s.bins.find(b => b.id === 'queue/01.md').status, 'blocked'); + assert.equal(readState(edir).bins.find(b => b.id === 'queue/01.md').status, 'blocked'); }); // ---- watch: per-bin tokens/runtime + work-record history ---- test('records per-bin tokens + runtime from the transcript and appends a history record', () => { - const { cwd, dir } = setupRun(); + const { cwd, dir, edir } = setupRun(); const tpath = path.join(dir, 'transcript.jsonl'); const asst = (ts, output) => JSON.stringify({ type: 'assistant', timestamp: ts, @@ -155,20 +170,18 @@ test('records per-bin tokens + runtime from the transcript and appends a history }); runHook(cwd, { stop_hook_active: false, transcript_path: tpath }); // start bin 1 - // Use bin 1's recorded startedAt as the message timestamp so it lands in [start, now). - const started = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')) - .bins.find(b => b.id === 'queue/01.md').startedAt; + const started = readState(edir).bins.find(b => b.id === 'queue/01.md').startedAt; assert.ok(started, 'bin 1 got a startedAt when it became current'); fs.writeFileSync(tpath, asst(started, 500) + '\n'); runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 1, start bin 2 - const b1 = JSON.parse(fs.readFileSync(path.join(dir, 'state.json'), 'utf8')).bins.find(b => b.id === 'queue/01.md'); + const b1 = readState(edir).bins.find(b => b.id === 'queue/01.md'); assert.equal(b1.status, 'done'); assert.equal(b1.tokens.output, 500, 'bin 1 output tokens attributed from the transcript window'); assert.equal(typeof b1.durationMs, 'number'); runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 2, drain -> finalize - const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse); + const hist = fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse); assert.equal(hist.length, 1, 'one history record appended on finalize'); assert.equal(hist[0].bins.done, 2); assert.ok(hist[0].tokens.output >= 500, 'run output tokens recorded'); @@ -176,19 +189,19 @@ test('records per-bin tokens + runtime from the transcript and appends a history }); test('history is append-only across runs and not duplicated by a stray extra stop', () => { - const { cwd, dir } = setupRun(); + const { cwd, edir } = setupRun(); runHook(cwd, { stop_hook_active: false }); runHook(cwd, { stop_hook_active: true }); runHook(cwd, { stop_hook_active: true }); // drain -> finalize (appends record 1) runHook(cwd, { stop_hook_active: true }); // stray extra stop -> summary already exists -> no 2nd append - const hist = fs.readFileSync(path.join(dir, 'history.jsonl'), 'utf8').trim().split('\n').filter(Boolean); + const hist = fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8').trim().split('\n').filter(Boolean); assert.equal(hist.length, 1, 'no duplicate history record from a repeated finalize'); }); // ---- v2: usage cap + cache ---- test('usage cap from the hook payload ends the run and caches usage', () => { - const { cwd, dir } = setupRun({ bounds: { maxHours: 24, maxIterations: 10, usageCapPercent: 90 } }); + const { cwd, dir, edir } = setupRun({ bounds: { maxHours: 24, maxIterations: 10, usageCapPercent: 90 } }); const reset = Math.floor(Date.now() / 1000) + 3600; const r = runHook(cwd, { stop_hook_active: false, @@ -199,7 +212,7 @@ test('usage cap from the hook payload ends the run and caches usage', () => { }); assert.deepEqual(r, {}); assert.match(fs.readFileSync(path.join(dir, 'summary.md'), 'utf8'), /usage cap/); - const usage = JSON.parse(fs.readFileSync(path.join(dir, 'usage.json'), 'utf8')); + const usage = JSON.parse(fs.readFileSync(path.join(edir, 'usage.json'), 'utf8')); assert.equal(usage.weeklyPercent, 95); assert.equal(usage.sessionResetAt, reset); }); diff --git a/shift/test/timeline.test.cjs b/shift/test/timeline.test.cjs index 9b6d59c..cb15b66 100644 --- a/shift/test/timeline.test.cjs +++ b/shift/test/timeline.test.cjs @@ -4,40 +4,62 @@ const assert = require('node:assert'); const fs = require('node:fs'); const os = require('node:os'); const path = require('node:path'); + +// Keep the out-of-repo timeline in a tmp base so tests never touch ~/.local/state. +process.env.SHIFT_STATE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-statebase-')); const { appendEvent, readTimeline, clearTimeline, timelinePath, binWindows } = require('../lib/timeline.cjs'); +const { engineDir } = require('../lib/store.cjs'); + +function repo() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-repo-')); } -function dir() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-tl-')); } +test('timeline lives OUTSIDE the repo (agent-proof), under the state base', () => { + const c = repo(); + const f = timelinePath(c); + assert.ok(f.startsWith(process.env.SHIFT_STATE_DIR), 'under the state base'); + assert.ok(!f.startsWith(path.resolve(c)), 'NOT inside the working repo'); +}); + +test('append + read round-trips; clear removes', () => { + const c = repo(); + appendEvent(c, { t: '2026-06-16T00:00:00Z', event: 'start', id: 'a' }); + appendEvent(c, { t: '2026-06-16T00:01:00Z', event: 'finish', id: 'a' }); + assert.equal(readTimeline(c).length, 2); + clearTimeline(c); + assert.deepEqual(readTimeline(c), []); +}); -test('timeline file lives at .shift/timeline.jsonl', () => { - const d = dir(); - assert.equal(timelinePath(d), path.join(d, 'timeline.jsonl')); +test('distinct repos get distinct timelines (no key collision)', () => { + const a = repo(), b = repo(); + appendEvent(a, { t: 't', event: 'start', id: 'x' }); + assert.equal(readTimeline(a).length, 1); + assert.equal(readTimeline(b).length, 0); }); -test('append + read round-trips events; clear removes them', () => { - const d = dir(); - appendEvent(d, { t: '2026-06-16T00:00:00Z', event: 'start', id: 'a' }); - appendEvent(d, { t: '2026-06-16T00:01:00Z', event: 'finish', id: 'a' }); - assert.equal(readTimeline(d).length, 2); - clearTimeline(d); - assert.deepEqual(readTimeline(d), []); +test('the key is canonical: /tmp and /private/tmp resolve to the same store (macOS symlink)', () => { + // realpath collapses the symlink, so a reader using either form agrees with the hook. + const real = fs.realpathSync(repo()); + if (real.startsWith('/private/')) { + const aliased = real.replace(/^\/private/, ''); + assert.equal(engineDir(aliased), engineDir(real), '/tmp alias must map to the same store as /private/tmp'); + } else { + assert.ok(true); // not on a /private symlink platform; nothing to assert + } }); -test('readTimeline on a fresh dir is empty and tolerates malformed lines', () => { - const d = dir(); - assert.deepEqual(readTimeline(d), []); - appendEvent(d, { t: 't', event: 'start', id: 'a' }); - fs.appendFileSync(timelinePath(d), 'garbage\n'); - assert.equal(readTimeline(d).length, 1); +test('readTimeline tolerates malformed lines', () => { + const c = repo(); + appendEvent(c, { t: 't', event: 'start', id: 'a' }); + fs.appendFileSync(timelinePath(c), 'garbage\n'); + assert.equal(readTimeline(c).length, 1); }); test('binWindows takes first start and last finish per bin', () => { - const events = [ + const w = binWindows([ { t: 't1', event: 'start', id: 'a' }, { t: 't1b', event: 'start', id: 'a' }, { t: 't2', event: 'finish', id: 'a' }, { t: 't3', event: 'start', id: 'b' } - ]; - const w = binWindows(events); + ]); assert.equal(w.a.startedAt, 't1'); assert.equal(w.a.finishedAt, 't2'); assert.equal(w.b.startedAt, 't3'); diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs index 4feb318..7409e7f 100644 --- a/shift/test/watch-model.test.cjs +++ b/shift/test/watch-model.test.cjs @@ -4,8 +4,11 @@ const assert = require('node:assert'); const fs = require('node:fs'); const os = require('node:os'); const path = require('node:path'); +// Out-of-repo timeline base → tmp (fixtures have no timeline → per-bin falls back to state.bins). +process.env.SHIFT_STATE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-wmbase-')); const { buildModel, renderFrame, renderDetail, renderHistory } = require('../lib/watch-model.cjs'); const { aggregate } = require('../lib/history.cjs'); +const { engineDir } = require('../lib/store.cjs'); function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-')); @@ -14,7 +17,7 @@ function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) { fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); fs.writeFileSync(path.join(cwd, 'queue', '03-build.md'), '# Build the thing\n\nCompile and commit.\n'); const startedAt = new Date(Date.now() - 12 * 60_000).toISOString(); - fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ + fs.writeFileSync(path.join(engineDir(cwd), 'state.json'), JSON.stringify({ runId: '2026-06-16T00-00-00', startedAt, iterations: 7, branch: 'shift/smoke', currentBinId, bins: [ From 6b58697bb897d2e7ba2b9469a1317f9a155ab603 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 12:08:09 -0400 Subject: [PATCH 10/12] shift: fixes from adversarial verification of the state-relocation refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verification of 5044dae surfaced four issues (core path-agreement was already correct): - P1 cross-run leak: cmdStart cleared STOP/summary/usage/timeline but NOT the other repo-side control/blocker signals — a stale PAUSE made `shift start && shift run` a multi-hour no-op, a stale SKIP silently skipped a bin, a stale blocked.jsonl re-blocked one. Now scrubs PAUSE/SKIP/blocked.jsonl/summary.md too. + regression test (cli.test). - P1 broken demo: examples/watch-demo.cjs still seeded/read repo-side state after the relocation → 6 empty frames. Migrated to engineDir (seed state + snapshot config there, readHistory(edir)); now renders real per-bin data + history. - P3 stale prompt: brief.cjs no longer tells the agent not to touch .shift/state.json (gone from the repo); guards the real repo-side surface + notes engine state is external. Test updated. - P3 stale comments: history.cjs / shift-stop.cjs / bin/shift now say history lives in the engine dir, not .shift/. 100 shift tests green; demo verified end-to-end. --- shift/bin/shift | 9 +++++++-- shift/examples/watch-demo.cjs | 14 ++++++++++---- shift/hooks/shift-stop.cjs | 2 +- shift/lib/brief.cjs | 2 +- shift/lib/history.cjs | 7 ++++--- shift/test/brief.test.cjs | 7 +++++-- shift/test/cli.test.cjs | 16 ++++++++++++++++ 7 files changed, 44 insertions(+), 13 deletions(-) diff --git a/shift/bin/shift b/shift/bin/shift index ad4bb85..e49c601 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -64,7 +64,12 @@ function cmdStart(args) { fs.mkdirSync(dir, { recursive: true }); const edir = engineDir(cwd); // engine state lives out of the repo so the agent can't usurp it if (fs.existsSync(path.join(dir, 'STOP'))) fs.unlinkSync(path.join(dir, 'STOP')); - try { fs.unlinkSync(path.join(dir, 'summary.md')); } catch { /* none */ } + // Scrub every stale control/blocker signal so a prior run can't corrupt this one: + // PAUSE → run idle-polls forever; SKIP → a bin is silently skipped; blocked.jsonl → + // a bin is re-blocked with last run's note. All live repo-side, beside STOP. + for (const f of ['PAUSE', 'SKIP', 'blocked.jsonl', 'summary.md']) { + try { fs.unlinkSync(path.join(dir, f)); } catch { /* none */ } + } require('../lib/timeline.cjs').clearTimeline(cwd); // fresh run → fresh boundary record try { fs.unlinkSync(path.join(edir, 'usage.json')); } catch { /* none */ } // stale usage from a prior run fs.writeFileSync(cfgFile, JSON.stringify(config, null, 2)); // .shift/config.json (user-editable) @@ -181,7 +186,7 @@ function cmdStop() { console.log('shift will stop cleanly after the current bin.'); } -// The work record: every finalized run (.shift/history.jsonl). `shift history` prints the +// The work record: every finalized run (/history.jsonl, out-of-repo). `shift history` prints the // ledger + totals; `shift history ` drills into one run's bins. function cmdHistory(args) { const edir = engineDir(process.cwd()); diff --git a/shift/examples/watch-demo.cjs b/shift/examples/watch-demo.cjs index 11502cb..2535a30 100644 --- a/shift/examples/watch-demo.cjs +++ b/shift/examples/watch-demo.cjs @@ -14,20 +14,26 @@ const SHIFT = path.resolve(__dirname, '..'); const { buildModel, renderFrame, renderDetail, renderHistory } = require(path.join(SHIFT, 'lib', 'watch-model.cjs')); const { readHistory, aggregate } = require(path.join(SHIFT, 'lib', 'history.cjs')); const { requestSkip, requestStop } = require(path.join(SHIFT, 'lib', 'control.cjs')); +const { engineDir } = require(path.join(SHIFT, 'lib', 'store.cjs')); const HOOK = path.join(SHIFT, 'hooks', 'shift-stop.cjs'); const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-demo-')); const dir = path.join(cwd, '.shift'); +// Authoritative engine state (state.json, history, timeline) lives OUT of the repo, +// keyed by the canonical cwd — same dir the hook writes and watch/history read. +const edir = engineDir(cwd); fs.mkdirSync(path.join(cwd, 'queue'), { recursive: true }); fs.mkdirSync(dir, { recursive: true }); for (const [n, t] of [['01-build.md', 'build the thing'], ['02-flaky.md', 'flaky task'], ['03-docs.md', 'write docs']]) { fs.writeFileSync(path.join(cwd, 'queue', n), t); } -fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({ +const config = JSON.stringify({ sources: [{ path: 'queue', kind: 'briefs' }], bounds: { maxHours: 24, maxIterations: 10 }, definitionOfDone: 'done', git: {} -})); -fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ +}); +fs.writeFileSync(path.join(dir, 'config.json'), config); // user-editable copy in the repo +fs.writeFileSync(path.join(edir, 'config.json'), config); // engine snapshot (what cmdStart does) +fs.writeFileSync(path.join(edir, 'state.json'), JSON.stringify({ // engine-owned, out of the agent's reach runId: 'demo', startedAt: new Date(Date.now() - 5 * 60000).toISOString(), iterations: 0, branch: 'shift/demo', currentBinId: null, bins: [] })); @@ -55,5 +61,5 @@ process.stdout.write('\n\x1b[1m=== ⏎ details on bin 01 (drill-down) ===\x1b[0m process.stdout.write(renderDetail(buildModel({ dir, now: Date.now() }), 0, { width: 78, color: true })); process.stdout.write('\n\x1b[1m=== shift history (work record across runs) ===\x1b[0m\n'); -process.stdout.write(renderHistory(readHistory(dir), aggregate(readHistory(dir)), { color: true })); +process.stdout.write(renderHistory(readHistory(edir), aggregate(readHistory(edir)), { color: true })); process.stdout.write(`\n(throwaway repo: ${cwd})\n`); diff --git a/shift/hooks/shift-stop.cjs b/shift/hooks/shift-stop.cjs index 03ad77e..143be03 100755 --- a/shift/hooks/shift-stop.cjs +++ b/shift/hooks/shift-stop.cjs @@ -65,7 +65,7 @@ function writeSummary(dir, state, reason, now, runTok) { fs.writeFileSync(path.join(dir, 'summary.md'), lines.join('\n') + '\n'); } -// Append this run to the work record (.shift/history.jsonl). One row per finalized run. +// Append this run to the work record (/history.jsonl, out-of-repo). One row per finalized run. // Per-bin metrics come from the timeline (boundaries) + transcript (tokens) so they // survive even if the agent rewrote state.json mid-run. function appendRunRecord(edir, cwd, state, reason, now, runTok, transcriptPath) { diff --git a/shift/lib/brief.cjs b/shift/lib/brief.cjs index c54d19e..02584c3 100644 --- a/shift/lib/brief.cjs +++ b/shift/lib/brief.cjs @@ -15,7 +15,7 @@ function renderBrief(bin, config) { 'Do NOT ask questions — if you would normally ask, decide and APPEND the decision as a line to .shift/log.md.', `Definition of done: ${dod}`, 'When finished, commit your work on the current branch.', - '`.shift/` is shift\'s own run bookkeeping. The ONLY writes you may make under it are APPENDING a line to .shift/log.md or .shift/blocked.jsonl. Never edit, overwrite, or "tidy" .shift/state.json, .shift/config.json, .shift/summary.md, and never rewrite .shift/log.md — shift maintains those itself (run progress, per-bin runtime + tokens), and changing them corrupts the run record.', + '`.shift/` is shift\'s own run bookkeeping. The ONLY writes you may make under it are APPENDING a line to .shift/log.md or .shift/blocked.jsonl. Never edit, overwrite, or "tidy" .shift/config.json or .shift/summary.md, and never rewrite .shift/log.md — shift maintains those itself (run progress, per-bin runtime + tokens), and changing them corrupts the run record. (Authoritative engine state — run progress, usage, timeline, history — lives outside the repo and is maintained by shift; you do not need to touch it.)', 'Flag anything that needs the human (a deferred decision, an action you could not take) by appending a line to .shift/log.md as: "Needs you: " — these surface in the run summary.', 'If a true blocker stops you from finishing this bin, append one line to .shift/blocked.jsonl: {"id":"","note":""} then stop.', guard, diff --git a/shift/lib/history.cjs b/shift/lib/history.cjs index c440e14..d69f994 100644 --- a/shift/lib/history.cjs +++ b/shift/lib/history.cjs @@ -2,9 +2,10 @@ const fs = require('node:fs'); const path = require('node:path'); -// The shift work record: an append-only ledger of finalized runs at .shift/history.jsonl. -// `shift start` rewrites state.json but never touches this, so it accumulates across runs. -// One JSON line per run (totals + per-bin breakdown). Read for `shift history` + aggregates. +// The shift work record: an append-only ledger of finalized runs at /history.jsonl +// (out-of-repo, alongside state.json/usage.json/timeline.jsonl — see store.cjs). `shift start` +// resets the engine state but never touches this, so it accumulates across runs. All callers +// pass the engineDir. One JSON line per run (totals + per-bin breakdown). Read for `shift history`. function historyPath(dir) { return path.join(dir, 'history.jsonl'); } diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs index 4f18435..d86f6e4 100644 --- a/shift/test/brief.test.cjs +++ b/shift/test/brief.test.cjs @@ -30,8 +30,11 @@ test('always explains decision logging, the Needs-you convention, and blocker fl assert.match(out, /blocked\.jsonl/); }); -test('guards .shift/ bookkeeping: append-only, never edit state.json (so the hook owns per-bin stats)', () => { +test('guards .shift/ bookkeeping: append-only, never edit config.json (so the hook owns the run record)', () => { const out = renderBrief(bin, { git: {} }); - assert.match(out, /Never edit.*state\.json/i); + // state.json now lives out-of-repo (engineDir, store.cjs) and is beyond the agent's reach, + // so the prompt no longer names it; it guards the repo-side surface the agent CAN touch. + assert.match(out, /Never edit.*config\.json/i); + assert.doesNotMatch(out, /state\.json/); assert.match(out, /append/i); }); diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs index d1a10d5..35fe54f 100644 --- a/shift/test/cli.test.cjs +++ b/shift/test/cli.test.cjs @@ -52,3 +52,19 @@ test('stop creates the kill switch', () => { run(cwd, ['stop']); assert.ok(fs.existsSync(path.join(cwd, '.shift', 'STOP'))); }); + +test('a second `shift start` scrubs stale control/blocker signals from the prior run', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + const dir = path.join(cwd, '.shift'); + // Simulate residue from a prior run: a stale skip, pause, blocker, kill switch, summary. + fs.writeFileSync(path.join(dir, 'STOP'), ''); + fs.writeFileSync(path.join(dir, 'PAUSE'), ''); + fs.writeFileSync(path.join(dir, 'SKIP'), 'queue/01.md'); + fs.writeFileSync(path.join(dir, 'blocked.jsonl'), JSON.stringify({ id: 'queue/01.md', note: 'stale' }) + '\n'); + fs.writeFileSync(path.join(dir, 'summary.md'), '# stale\n'); + run(cwd, ['start']); + for (const f of ['STOP', 'PAUSE', 'SKIP', 'blocked.jsonl', 'summary.md']) { + assert.ok(!fs.existsSync(path.join(dir, f)), `${f} must not survive a fresh start (would corrupt the new run)`); + } +}); From 7451d7991714bb2424c156d00a50c93ec52d1477 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:29:03 -0400 Subject: [PATCH 11/12] ci: run both modules' tests on push + PR (GitHub Actions) --- .github/workflows/test.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..92dfa12 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,23 @@ +name: tests + +on: + push: + branches: [main] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + # Both modules are zero-dependency (Node built-in test runner), so there's + # nothing to install — just run each module's test script. + - name: shift tests + run: npm test + working-directory: shift + - name: code-status-bar tests + run: npm test + working-directory: code-status-bar From 2521ff6104c02be68823b91309f594d1d433a795 Mon Sep 17 00:00:00 2001 From: Allen Manzano-Wight <6640236+AllenBW@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:49:29 -0400 Subject: [PATCH 12/12] shift: fill test-coverage gaps before merge (+18 tests, 118 total) Adversarial coverage audit found the CLI surface had zero integration coverage and several agent-proof contracts were untested. Added: - CLI (cli.test): status (plain/PAUSED/no-run), status --line (the finalize-suppression gate + color), history drill-down + branch-suffix + no-match, unknown-subcommand usage/exit, config shallow-merge, history-preserved-across-restart. - Agent-proof contracts (hook.test): a planted repo-side .shift/state.json is ignored; config falls back to the repo copy when the engine snapshot is gone; per-bin tokens recover from the transcript window when state.bins was clobbered. - watch-model: transcript-derived per-bin/run tokens, the current-bin open window (live runtime/tokens), finalized read from .shift/summary.md while state is out-of-repo. - store.test (new): engineDir key = sha256(realpath) basename, idempotent, sibling-collision- resistant, SHIFT_STATE_DIR/XDG base precedence. - brief: per-git-flag forbid-guard combinations. - Extracted moveSelection/clampSelection from cmdWatch into watch-model (pure, unit-tested). 118 shift tests, all green. --- shift/bin/shift | 8 +-- shift/lib/watch-model.cjs | 16 +++++- shift/test/brief.test.cjs | 10 ++++ shift/test/cli.test.cjs | 89 +++++++++++++++++++++++++++++++++ shift/test/hook.test.cjs | 45 +++++++++++++++++ shift/test/store.test.cjs | 53 ++++++++++++++++++++ shift/test/watch-model.test.cjs | 73 ++++++++++++++++++++++++++- 7 files changed, 288 insertions(+), 6 deletions(-) create mode 100644 shift/test/store.test.cjs diff --git a/shift/bin/shift b/shift/bin/shift index e49c601..40d8b83 100755 --- a/shift/bin/shift +++ b/shift/bin/shift @@ -109,7 +109,7 @@ function cmdStatus(args) { // (a status bar) can't take input, so this is the interactive control surface. function cmdWatch() { const dir = path.join(process.cwd(), '.shift'); - const { buildModel, renderFrame, renderDetail } = require('../lib/watch-model.cjs'); + const { buildModel, renderFrame, renderDetail, moveSelection, clampSelection } = require('../lib/watch-model.cjs'); const { setPause, isPaused, requestSkip, requestStop } = require('../lib/control.cjs'); const out = process.stdout; const interactive = !!(process.stdin.isTTY && out.isTTY); @@ -122,7 +122,7 @@ function cmdWatch() { model = buildModel({ dir, now: Date.now() }); const n = (model.bins || []).length; if (selected < 0 && n) selected = Math.max(0, model.bins.findIndex(b => b.current)); - if (selected >= n) selected = n - 1; // bins can change between draws — clamp + else selected = clampSelection(selected, n); // bins can change between draws — keep it valid const frame = (mode === 'detail' && selected >= 0) ? renderDetail(model, selected, { width: out.columns || 80, color: true }) : renderFrame(model, { width: out.columns || 80, color: true, selectedIndex: selected }); @@ -153,9 +153,9 @@ function cmdWatch() { if (key === 'x' || key === '\x03') { // x / Ctrl-C cleanup(); out.write('\n[shift] watcher closed — the run keeps going.\n'); process.exit(0); } else if (key === '\x1b[A') { // ↑ select up - if (n) selected = (selected <= 0 ? n : selected) - 1; draw(); + selected = moveSelection(selected, n, 'up'); draw(); } else if (key === '\x1b[B') { // ↓ select down - if (n) selected = (selected + 1) % n; draw(); + selected = moveSelection(selected, n, 'down'); draw(); } else if (key === '\r' || key === '\n') { // ⏎ open detail if (selected >= 0) mode = 'detail'; draw(); } else if (key === '\x1b') { // esc back to list (lone Esc, not an arrow sequence) diff --git a/shift/lib/watch-model.cjs b/shift/lib/watch-model.cjs index aee1a30..a6b3fae 100644 --- a/shift/lib/watch-model.cjs +++ b/shift/lib/watch-model.cjs @@ -235,4 +235,18 @@ function renderLine(model, opts = {}) { return `${flag} shift ${c(ANSI.bold, model.counts.done + '/' + model.counts.total)} ${c(ANSI.dim, model.elapsedMin + 'm')} ${c(ANSI.dim, '↑' + fmtTok(model.outputTokens))}${needs}`; } -module.exports = { buildModel, renderFrame, renderDetail, renderHistory, renderLine, fmtDur, fmtTok }; +// Pure selection arithmetic for the `shift watch` TUI (n = bin count). Extracted so the +// off-by-one-prone wrap/clamp cases are unit-testable without a TTY. +function moveSelection(sel, n, dir) { + if (n <= 0) return -1; + if (dir === 'up') return (sel <= 0 ? n : sel) - 1; // wrap to the last bin + if (dir === 'down') return (sel + 1) % n; // wrap to the first + return sel; +} +function clampSelection(sel, n) { // keep a selection valid when the bin list grows/shrinks + if (n <= 0) return -1; + if (sel < 0) return 0; + return sel >= n ? n - 1 : sel; +} + +module.exports = { buildModel, renderFrame, renderDetail, renderHistory, renderLine, fmtDur, fmtTok, moveSelection, clampSelection }; diff --git a/shift/test/brief.test.cjs b/shift/test/brief.test.cjs index d86f6e4..47ef892 100644 --- a/shift/test/brief.test.cjs +++ b/shift/test/brief.test.cjs @@ -30,6 +30,16 @@ test('always explains decision logging, the Needs-you convention, and blocker fl assert.match(out, /blocked\.jsonl/); }); +test('the forbid-guard reflects each git flag combination independently', () => { + const pushOnly = renderBrief(bin, { git: { allowPush: false, allowOutwardActions: true } }); + assert.match(pushOnly, /Do NOT push to any remote/); + assert.doesNotMatch(pushOnly, /publish, send to external/); + + const outwardOnly = renderBrief(bin, { git: { allowPush: true, allowOutwardActions: false } }); + assert.match(outwardOnly, /Do NOT publish, send to external/); + assert.doesNotMatch(outwardOnly, /push to any remote/); +}); + test('guards .shift/ bookkeeping: append-only, never edit config.json (so the hook owns the run record)', () => { const out = renderBrief(bin, { git: {} }); // state.json now lives out-of-repo (engineDir, store.cjs) and is beyond the agent's reach, diff --git a/shift/test/cli.test.cjs b/shift/test/cli.test.cjs index 35fe54f..04acb79 100644 --- a/shift/test/cli.test.cjs +++ b/shift/test/cli.test.cjs @@ -68,3 +68,92 @@ test('a second `shift start` scrubs stale control/blocker signals from the prior assert.ok(!fs.existsSync(path.join(dir, f)), `${f} must not survive a fresh start (would corrupt the new run)`); } }); + +const { appendRecord } = require('../lib/history.cjs'); + +function runSafe(cwd, args) { // capture output + exit code even on non-zero exit + try { return { out: run(cwd, args), code: 0 }; } + catch (e) { return { out: (e.stdout || '') + (e.stderr || ''), code: e.status }; } +} + +test('status (plain) shows the run + bins lines, a PAUSED suffix, and a no-run message', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + const out = run(cwd, ['status']); + assert.match(out, /run .* · branch shift\/.* · iter 0/); + assert.match(out, /bins: .*done.*blocked.*skipped.*pending.*\(\dm\)/); + fs.writeFileSync(path.join(cwd, '.shift', 'PAUSE'), ''); + assert.match(run(cwd, ['status']), /· PAUSED/); + const fresh = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-norun-')); + assert.match(run(fresh, ['status']), /No active shift run here/); +}); + +test('status --line prints a line while running and suppresses it once finalized', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + const line = run(cwd, ['status', '--line']); + assert.match(line, /⚙ shift/); + assert.match(line, /\x1b\[/, 'default is colored'); + const plain = run(cwd, ['status', '--line', '--no-color']); + assert.match(plain, /⚙ shift \d+\/\d+/); + assert.doesNotMatch(plain, /\x1b\[/, '--no-color strips ANSI'); + fs.writeFileSync(path.join(cwd, '.shift', 'summary.md'), '# done\n'); // finalize + assert.equal(run(cwd, ['status', '--line']).trim(), '', 'status-bar line vanishes once finalized'); +}); + +test('history drills into one run; a branch suffix resolves; unknown -> message', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + const edir = engineDir(cwd); + const rec = (runId, branch, perBin) => ({ + runId, branch, startedAt: '2026-06-16T00:00:00Z', endedAt: '2026-06-16T00:10:00Z', + durationMs: 600000, iterations: 2, endReason: 'queue empty', + bins: { total: 2, done: 1, skipped: 1, blocked: 0 }, tokens: { output: 1000, total: 5000 }, perBin + }); + appendRecord(edir, rec('R1', 'shift/alpha', [ + { id: 'queue/01.md', status: 'done', durationMs: 60000, tokensOutput: 500, commit: 'abc1234def' }, + { id: 'queue/02.md', status: 'skipped', durationMs: null, tokensOutput: null, commit: null } + ])); + appendRecord(edir, rec('R2', 'shift/beta', [ + { id: 'queue/01.md', status: 'blocked', durationMs: 1000, tokensOutput: 9, commit: null } + ])); + + const r1 = run(cwd, ['history', 'R1']); + assert.match(r1, /run R1 · shift\/alpha/); + assert.match(r1, /✓ queue\/01\.md/); // done glyph + assert.match(r1, /⤫ queue\/02\.md/); // skipped glyph + assert.match(r1, /abc1234/); // commit short sha + assert.match(run(cwd, ['history', 'beta']), /✗ queue\/01\.md/); // branch-suffix → R2's blocked bin + assert.match(run(cwd, ['history', 'does-not-exist']), /No recorded run matching/); +}); + +test('unknown subcommand prints usage and exits non-zero', () => { + const r = runSafe(repoWithQueue(), ['bogus']); + assert.equal(r.code, 1); + assert.match(r.out, /usage: shift /); +}); + +test('start shallow-merges a partial .shift/config.json over the defaults', () => { + const cwd = repoWithQueue(); + fs.mkdirSync(path.join(cwd, '.shift'), { recursive: true }); + fs.writeFileSync(path.join(cwd, '.shift', 'config.json'), + JSON.stringify({ definitionOfDone: 'custom DoD', git: { branch: 'shift/custom' } })); + run(cwd, ['start']); + const repoCfg = JSON.parse(fs.readFileSync(path.join(cwd, '.shift', 'config.json'), 'utf8')); + const snapCfg = JSON.parse(fs.readFileSync(path.join(engineDir(cwd), 'config.json'), 'utf8')); + assert.equal(repoCfg.definitionOfDone, 'custom DoD'); // user override wins + assert.equal(repoCfg.permissionMode, 'acceptEdits'); // unspecified default survives + assert.equal(repoCfg.git.branch, 'shift/custom'); // shallow merge: user git object replaces default git + assert.deepEqual(repoCfg, snapCfg); // repo copy + engine snapshot are identical +}); + +test('a second `shift start` preserves the work record while resetting run state', () => { + const cwd = repoWithQueue(); + run(cwd, ['start']); + const edir = engineDir(cwd); + fs.appendFileSync(path.join(edir, 'history.jsonl'), JSON.stringify({ runId: 'PRIOR', bins: {} }) + '\n'); + fs.writeFileSync(path.join(edir, 'usage.json'), '{"weeklyPercent":50}'); + run(cwd, ['start']); + assert.match(fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8'), /PRIOR/, 'history is append-only across runs'); + assert.ok(!fs.existsSync(path.join(edir, 'usage.json')), 'stale usage is cleared on a fresh start'); +}); diff --git a/shift/test/hook.test.cjs b/shift/test/hook.test.cjs index c6c7dee..5b613e6 100644 --- a/shift/test/hook.test.cjs +++ b/shift/test/hook.test.cjs @@ -198,6 +198,51 @@ test('history is append-only across runs and not duplicated by a stray extra sto assert.equal(hist.length, 1, 'no duplicate history record from a repeated finalize'); }); +test('a planted repo-side .shift/state.json is ignored — the engine drives from out-of-repo state', () => { + const { cwd, dir } = setupRun(); + // a confused/hostile agent writes a repo-side state.json claiming everything is done + fs.writeFileSync(path.join(dir, 'state.json'), JSON.stringify({ + runId: 'r', startedAt: new Date().toISOString(), iterations: 9, branch: 'shift/x', + currentBinId: null, bins: [{ id: 'queue/01.md', status: 'done' }, { id: 'queue/02.md', status: 'done' }] + })); + const r = runHook(cwd, { stop_hook_active: false }); + assert.equal(r.decision, 'block'); // still blocks bin 1 from the real (engine-dir) state + assert.match(r.reason, /bin one/); +}); + +test('config falls back to the repo .shift/config.json when the engine snapshot is absent', () => { + const { cwd, dir, edir } = setupRun(); + fs.unlinkSync(path.join(edir, 'config.json')); // no engine snapshot → must fall back to repo copy + fs.writeFileSync(path.join(dir, 'config.json'), JSON.stringify({ + sources: [{ path: 'queue', kind: 'briefs' }], bounds: { maxHours: 24, maxIterations: 10 }, + definitionOfDone: 'done', git: {} + })); + const r = runHook(cwd, { stop_hook_active: false }); + assert.equal(r.decision, 'block'); + assert.match(r.reason, /bin one/); +}); + +test('history per-bin tokens fall back to the transcript window when state.bins was clobbered', () => { + const { cwd, dir, edir } = setupRun(); + const tpath = path.join(dir, 'transcript.jsonl'); + const asst = (ts, out) => JSON.stringify({ type: 'assistant', timestamp: ts, message: { usage: { output_tokens: out, input_tokens: 1 } } }); + + runHook(cwd, { stop_hook_active: false, transcript_path: tpath }); // start bin 1 + const started = readState(edir).bins.find(b => b.id === 'queue/01.md').startedAt; + fs.writeFileSync(tpath, asst(started, 700) + '\n'); + runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 1 (tokens=700), start bin 2 + + // simulate the agent clobbering state: strip every bin's recorded tokens + const s = readState(edir); + s.bins = s.bins.map(({ tokens, ...rest }) => rest); + fs.writeFileSync(path.join(edir, 'state.json'), JSON.stringify(s)); + + runHook(cwd, { stop_hook_active: true, transcript_path: tpath }); // finish bin 2, drain -> finalize + const hist = fs.readFileSync(path.join(edir, 'history.jsonl'), 'utf8').trim().split('\n').map(JSON.parse); + const b1 = hist[0].perBin.find(p => p.id === 'queue/01.md'); + assert.equal(b1.tokensOutput, 700, 'recovered from the timeline window + transcript, not from state.bins'); +}); + // ---- v2: usage cap + cache ---- test('usage cap from the hook payload ends the run and caches usage', () => { diff --git a/shift/test/store.test.cjs b/shift/test/store.test.cjs new file mode 100644 index 0000000..557fe4c --- /dev/null +++ b/shift/test/store.test.cjs @@ -0,0 +1,53 @@ +'use strict'; +const { test } = require('node:test'); +const assert = require('node:assert'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const crypto = require('node:crypto'); + +const BASE = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-store-')); +process.env.SHIFT_STATE_DIR = BASE; +const { engineDir } = require('../lib/store.cjs'); + +function repo() { return fs.mkdtempSync(path.join(os.tmpdir(), 'shift-storerepo-')); } + +test('engineDir basename is the 16-hex sha256 of the realpath, under the state base', () => { + const c = repo(); + const d = engineDir(c); + assert.equal(path.dirname(d), BASE); + const expected = crypto.createHash('sha256').update(fs.realpathSync(c)).digest('hex').slice(0, 16); + assert.equal(path.basename(d), expected); + assert.match(path.basename(d), /^[0-9a-f]{16}$/); +}); + +test('engineDir is idempotent and lives outside the repo', () => { + const c = repo(); + assert.equal(engineDir(c), engineDir(c)); + assert.ok(!engineDir(c).startsWith(path.resolve(c)), 'not inside the working repo'); +}); + +test('sibling repos sharing a basename get distinct engine dirs (full-path hash, no prefix collision)', () => { + const parentA = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-collide-aaaa-')); + const parentB = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-collide-bbbb-')); + fs.mkdirSync(path.join(parentA, 'repo')); fs.mkdirSync(path.join(parentB, 'repo')); + assert.notEqual(engineDir(path.join(parentA, 'repo')), engineDir(path.join(parentB, 'repo'))); +}); + +test('base resolution: SHIFT_STATE_DIR wins; else XDG_STATE_HOME/shift; (homedir/.local/state/shift is the documented default)', () => { + const c = repo(); + // SHIFT_STATE_DIR (set above) takes precedence + assert.ok(engineDir(c).startsWith(BASE)); + // when SHIFT_STATE_DIR is unset, XDG_STATE_HOME is used + const savedShift = process.env.SHIFT_STATE_DIR; + const savedXdg = process.env.XDG_STATE_HOME; + const xdg = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-xdg-')); + try { + delete process.env.SHIFT_STATE_DIR; + process.env.XDG_STATE_HOME = xdg; + assert.ok(engineDir(c).startsWith(path.join(xdg, 'shift') + path.sep), 'XDG_STATE_HOME/shift base'); + } finally { + process.env.SHIFT_STATE_DIR = savedShift; + if (savedXdg === undefined) delete process.env.XDG_STATE_HOME; else process.env.XDG_STATE_HOME = savedXdg; + } +}); diff --git a/shift/test/watch-model.test.cjs b/shift/test/watch-model.test.cjs index 7409e7f..2344a64 100644 --- a/shift/test/watch-model.test.cjs +++ b/shift/test/watch-model.test.cjs @@ -6,9 +6,20 @@ const os = require('node:os'); const path = require('node:path'); // Out-of-repo timeline base → tmp (fixtures have no timeline → per-bin falls back to state.bins). process.env.SHIFT_STATE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-wmbase-')); -const { buildModel, renderFrame, renderDetail, renderHistory } = require('../lib/watch-model.cjs'); +const { buildModel, renderFrame, renderDetail, renderHistory, renderLine, moveSelection, clampSelection } = require('../lib/watch-model.cjs'); const { aggregate } = require('../lib/history.cjs'); const { engineDir } = require('../lib/store.cjs'); +const { appendEvent } = require('../lib/timeline.cjs'); + +// A bare run: state in the engine dir, .shift/ for log; caller adds timeline/transcript. +function bareRun(state) { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-wm2-')); + const dir = path.join(cwd, '.shift'); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(engineDir(cwd), 'state.json'), JSON.stringify(state)); + fs.writeFileSync(path.join(dir, 'log.md'), '# log\n'); + return { cwd, dir }; +} function fixture({ paused = false, currentBinId = 'queue/03-build.md' } = {}) { const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'shift-watch-')); @@ -120,3 +131,63 @@ test('renderHistory shows per-run rows and a totals footer', () => { test('renderHistory with no records is a friendly message', () => { assert.match(renderHistory([], aggregate([]), { color: false }), /No shift runs recorded/i); }); + +test('buildModel derives per-bin + run tokens from the transcript when state has none', () => { + const { cwd, dir } = bareRun({ + runId: 'r', startedAt: '2026-06-15T23:59:00.000Z', iterations: 1, branch: 'shift/x', + currentBinId: null, transcriptPath: path.join(/* set below */ os.tmpdir(), 'x'), + bins: [{ id: 'queue/01.md', status: 'done' }] // no tokens / no durationMs in state + }); + const tpath = path.join(dir, 'transcript.jsonl'); + fs.writeFileSync(tpath, JSON.stringify({ type: 'assistant', timestamp: '2026-06-16T00:01:00.000Z', message: { usage: { output_tokens: 4200, input_tokens: 10, cache_read_input_tokens: 5 } } }) + '\n'); + // point state at the real transcript + lay down the timeline boundaries (keyed by cwd) + const sp = path.join(engineDir(cwd), 'state.json'); + const s = JSON.parse(fs.readFileSync(sp, 'utf8')); s.transcriptPath = tpath; fs.writeFileSync(sp, JSON.stringify(s)); + appendEvent(cwd, { t: '2026-06-16T00:00:00.000Z', event: 'start', id: 'queue/01.md' }); + appendEvent(cwd, { t: '2026-06-16T00:02:00.000Z', event: 'finish', id: 'queue/01.md' }); + + const m = buildModel({ dir, now: Date.parse('2026-06-16T00:05:00.000Z') }); + const b = m.bins.find(x => x.id === 'queue/01.md'); + assert.equal(b.tokensOutput, 4200, 'per-bin tokens from the transcript window [start, finish)'); + assert.equal(b.durationMs, 120000, 'runtime from the timeline window (2m)'); + assert.equal(m.outputTokens, 4200, 'run output tokens from the transcript over [run start, now)'); +}); + +test('buildModel gives the CURRENT bin an open window (start..now) for live runtime + tokens', () => { + const { cwd, dir } = bareRun({ + runId: 'r', startedAt: '2026-06-16T00:00:00.000Z', iterations: 1, branch: 'shift/x', + currentBinId: 'queue/01.md', bins: [{ id: 'queue/01.md', status: 'pending' }] + }); + const tpath = path.join(dir, 'transcript.jsonl'); + fs.writeFileSync(tpath, JSON.stringify({ type: 'assistant', timestamp: '2026-06-16T00:03:00.000Z', message: { usage: { output_tokens: 900, input_tokens: 1 } } }) + '\n'); + const sp = path.join(engineDir(cwd), 'state.json'); + const s = JSON.parse(fs.readFileSync(sp, 'utf8')); s.transcriptPath = tpath; fs.writeFileSync(sp, JSON.stringify(s)); + appendEvent(cwd, { t: '2026-06-16T00:00:00.000Z', event: 'start', id: 'queue/01.md' }); // started, not finished + + const m = buildModel({ dir, now: Date.parse('2026-06-16T00:05:00.000Z') }); + const b = m.bins.find(x => x.id === 'queue/01.md'); + assert.equal(b.current, true); + assert.equal(b.durationMs, 300000, 'open window start..now = 5m'); + assert.equal(b.tokensOutput, 900, 'tokens summed up to now (open window)'); +}); + +test('buildModel reads finalized from .shift/summary.md while state lives out-of-repo', () => { + const dir = fixture(); + fs.writeFileSync(path.join(dir, 'summary.md'), '# done\n'); + const m = buildModel({ dir, now: Date.now() }); + assert.equal(m.finalized, true); + assert.match(renderFrame(m, { color: false }), /finalized/); + assert.ok(renderLine(m, { color: false }).startsWith('●')); +}); + +test('moveSelection wraps; clampSelection keeps a selection valid as the list changes', () => { + assert.equal(moveSelection(0, 5, 'up'), 4); // wrap to last + assert.equal(moveSelection(4, 5, 'down'), 0); // wrap to first + assert.equal(moveSelection(2, 5, 'up'), 1); + assert.equal(moveSelection(2, 5, 'down'), 3); + assert.equal(moveSelection(0, 0, 'down'), -1); // no bins + assert.equal(clampSelection(4, 3), 2); // list shrank + assert.equal(clampSelection(-1, 3), 0); + assert.equal(clampSelection(1, 3), 1); + assert.equal(clampSelection(2, 0), -1); // empty +});