From 1ba0e0c6cdec35b3d29a88c454d9e5a8e7f7c374 Mon Sep 17 00:00:00 2001 From: zknpr Date: Fri, 22 May 2026 21:45:26 +0200 Subject: [PATCH 01/18] docs: clarify Agent harness's agentId suffix is a resume token, not a running signal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every Agent(codex:codex-rescue) return is suffixed by the Claude Code harness with `agentId: (use SendMessage with to: '' to continue this agent)`. This is a resume token for the same agent thread, not a "still running" or "in background" status indicator. Callers (including Claude itself) have repeatedly misread it as the latter and then incorrectly told users that Codex is still running in the background, when the work has actually completed. Adds one explicit caller-side rule in two places: - plugins/codex/skills/codex-result-handling/SKILL.md — the internal guidance the caller consults when presenting Codex output. - plugins/codex/commands/rescue.md Operating rules — the most common user-facing entry point (`/codex:rescue`). No code changes; both files are caller-side documentation. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/commands/rescue.md | 1 + plugins/codex/skills/codex-result-handling/SKILL.md | 1 + 2 files changed, 2 insertions(+) diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index 56de9555..765d79e0 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -41,6 +41,7 @@ Operating rules: - The subagent is a thin forwarder only. It should use one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...` and return that command's stdout as-is. - Return the Codex companion stdout verbatim to the user. - Do not paraphrase, summarize, rewrite, or add commentary before or after it. +- The Claude Code harness appends an `agentId: (use SendMessage with to: '' to continue this agent)` line to every `Agent(codex:codex-rescue)` return. This is a resume token for continuing the same agent thread, NOT a "still running" or "in background" status signal. By the time you see it, the subagent has finished. Do not paraphrase it as "Codex is running in background" or "task dispatched async" — the stdout above the suffix is the actual completed output, which you return verbatim per the rule above. - Do not ask the subagent to inspect files, monitor progress, poll `/codex:status`, fetch `/codex:result`, call `/codex:cancel`, summarize output, or do follow-up work of its own. - Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort. - Leave the model unset unless the user explicitly asks for one. If they ask for `spark`, map it to `gpt-5.3-codex-spark`. diff --git a/plugins/codex/skills/codex-result-handling/SKILL.md b/plugins/codex/skills/codex-result-handling/SKILL.md index e1896548..a8f5eba7 100644 --- a/plugins/codex/skills/codex-result-handling/SKILL.md +++ b/plugins/codex/skills/codex-result-handling/SKILL.md @@ -19,3 +19,4 @@ When the helper returns Codex output: - CRITICAL: After presenting review findings, STOP. Do not make any code changes. Do not fix any issues. You MUST explicitly ask the user which issues, if any, they want fixed before touching a single file. Auto-applying fixes from a review is strictly forbidden, even if the fix is obvious. - If the helper reports malformed output or a failed Codex run, include the most actionable stderr lines and stop there instead of guessing. - If the helper reports that setup or authentication is required, direct the user to `/codex:setup` and do not improvise alternate auth flows. +- The Claude Code harness appends an `agentId: (use SendMessage with to: '' to continue this agent)` line to every `Agent(codex:codex-rescue)` return. This is a resume token for continuing the same agent thread, NOT a "still running" or "in background" status signal. By the time you see it, the agent has finished and the work is done. Do not paraphrase it as "Codex is running in background" or "task dispatched async" — read the stdout above the suffix for the real completion state, and verify with `git status --short` if the agent had `--write`. From d651c502a3f7e761dae9e5ca768cdabba4ed94da Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 00:06:51 +0200 Subject: [PATCH 02/18] fix: give codex-rescue a deterministic completion signal so Claude stops waiting on finished runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A synchronous codex-rescue return carried no machine-checkable completion signal — only raw Codex text plus the harness resume-token suffix — so the main thread inferred status from prose and concluded "still running". For long runs the Claude Code Bash tool auto-backgrounds the companion call past its ~600s cap and returns "Command running in background... you will be notified", which the subagent faithfully relays; the detached run then completes but is orphaned. This is model-independent (reproduced on Opus), so it is not fixable by subagent instructions or a model bump. Fix at the structural layer: - Companion stamps a deterministic sentinel: [[codex-task status=complete]] (foreground) / [[codex-task status=dispatched id=]] (background); the background dispatch message no longer falsely promises a notification. - New PostToolUse hook (codex-rescue-completion-hook.mjs) injects an authoritative line into the main thread on every codex-rescue return: complete -> done/verify-on-disk; Bash auto-background -> still finishing detached, don't wait; dispatched -> poll /codex:status. - Reliable --background routing: explicit --background is honored, long open-ended work defaults to task --background, foreground only for short bounded runs. - Docs (rescue.md, codex-cli-runtime, codex-result-handling) tell the main thread to trust the sentinel/hook over prose. Tests: 92/92, incl. new task-status-token + hook coverage and updated command/ runtime assertions. Validated a real --background lifecycle end to end. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/agents/codex-rescue.md | 7 +- plugins/codex/commands/rescue.md | 11 +- plugins/codex/hooks/hooks.json | 14 +- plugins/codex/scripts/codex-companion.mjs | 8 +- .../scripts/codex-rescue-completion-hook.mjs | 155 +++++++++++++++++ plugins/codex/scripts/lib/render.mjs | 10 +- .../codex/scripts/lib/task-status-token.mjs | 45 +++++ .../codex/skills/codex-cli-runtime/SKILL.md | 5 +- .../skills/codex-result-handling/SKILL.md | 2 + tests/codex-rescue-hook.test.mjs | 157 ++++++++++++++++++ tests/commands.test.mjs | 37 ++++- tests/runtime.test.mjs | 34 +++- 12 files changed, 461 insertions(+), 24 deletions(-) create mode 100644 plugins/codex/scripts/codex-rescue-completion-hook.mjs create mode 100644 plugins/codex/scripts/lib/task-status-token.mjs create mode 100644 tests/codex-rescue-hook.test.mjs diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md index 7009ec86..a856e66b 100644 --- a/plugins/codex/agents/codex-rescue.md +++ b/plugins/codex/agents/codex-rescue.md @@ -20,8 +20,11 @@ Selection guidance: Forwarding rules: - Use exactly one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...`. -- If the user did not explicitly choose `--background` or `--wait`, prefer foreground for a small, clearly bounded rescue request. -- If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution. +- Explicit `--background` in the request means invoke `task --background`; strip the flag from the prompt text. +- Explicit `--wait` in the request means invoke foreground `task`; strip the flag from the prompt text. +- No explicit choice and the task is short and clearly bounded (quick fix, single-file edit, focused diagnosis) means foreground. +- No explicit choice and the task is long, open-ended, multi-step, or likely to exceed the foreground cap means `task --background`. Prefer background for substantial work. +- Foreground task runs are capped by the Bash tool at roughly 600 seconds; past that, they can be auto-backgrounded and orphaned. - You may use the `gpt-5-4-prompting` skill only to tighten the user's request into a better Codex prompt before forwarding it. - Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work beyond shaping the forwarded prompt text. - Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index 765d79e0..23f0822f 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -13,10 +13,12 @@ $ARGUMENTS Execution mode: -- If the request includes `--background`, run the `codex:codex-rescue` subagent in the background. -- If the request includes `--wait`, run the `codex:codex-rescue` subagent in the foreground. -- If neither flag is present, default to foreground. -- `--background` and `--wait` are execution flags for Claude Code. Do not forward them to `task`, and do not treat them as part of the natural-language task text. +- If the request includes `--background`, pass `--background` to the `codex:codex-rescue` subagent so it invokes companion `task --background`. +- If the request includes `--wait`, pass `--wait` to the `codex:codex-rescue` subagent so it invokes foreground `task`. +- If neither flag is present, foreground stays for short, bounded rescues that return the result directly. +- For long, substantial, or open-ended rescues, pass `--background`; the companion returns `[[codex-task status=dispatched id=]]`, then poll `/codex:status ` and fetch `/codex:result ` when done. +- A foreground rescue is capped by the Bash tool at roughly 600 seconds and can be auto-backgrounded past that cap, which orphans the detached Codex result. +- `--background` and `--wait` are execution flags for Claude Code and the rescue subagent. Preserve the execution choice, and strip them only from the natural-language task text before Codex sees the prompt. - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text. - If the request includes `--resume`, do not ask whether to continue. The user already chose. - If the request includes `--fresh`, do not ask whether to continue. The user already chose. @@ -41,6 +43,7 @@ Operating rules: - The subagent is a thin forwarder only. It should use one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...` and return that command's stdout as-is. - Return the Codex companion stdout verbatim to the user. - Do not paraphrase, summarize, rewrite, or add commentary before or after it. +- The companion stdout carries a deterministic sentinel on a standalone line: `[[codex-task status=complete]]` means a foreground task has completed and exited; `[[codex-task status=dispatched id=]]` means a companion background job was queued. Trust this sentinel and the PostToolUse hook over any prose. A dispatched sentinel is not an automatic-notification promise; tell the user only to poll `/codex:status ` if you are not returning stdout verbatim. - The Claude Code harness appends an `agentId: (use SendMessage with to: '' to continue this agent)` line to every `Agent(codex:codex-rescue)` return. This is a resume token for continuing the same agent thread, NOT a "still running" or "in background" status signal. By the time you see it, the subagent has finished. Do not paraphrase it as "Codex is running in background" or "task dispatched async" — the stdout above the suffix is the actual completed output, which you return verbatim per the rule above. - Do not ask the subagent to inspect files, monitor progress, poll `/codex:status`, fetch `/codex:result`, call `/codex:cancel`, summarize output, or do follow-up work of its own. - Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort. diff --git a/plugins/codex/hooks/hooks.json b/plugins/codex/hooks/hooks.json index 19e33b81..46c9412d 100644 --- a/plugins/codex/hooks/hooks.json +++ b/plugins/codex/hooks/hooks.json @@ -1,5 +1,5 @@ { - "description": "Optional stop-time review gate for Codex Companion.", + "description": "Codex Companion lifecycle hooks, stop-time review gate, and codex-rescue completion context.", "hooks": { "SessionStart": [ { @@ -33,6 +33,18 @@ } ] } + ], + "PostToolUse": [ + { + "matcher": "Agent", + "hooks": [ + { + "type": "command", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/codex-rescue-completion-hook.mjs\"", + "timeout": 5 + } + ] + } ] } } diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..47d27274 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -61,6 +61,7 @@ import { renderStatusReport, renderTaskResult } from "./lib/render.mjs"; +import { buildTaskDispatchedStatusToken } from "./lib/task-status-token.mjs"; const ROOT_DIR = path.resolve(fileURLToPath(new URL("..", import.meta.url))); const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json"); @@ -551,7 +552,12 @@ function buildTaskRunMetadata({ prompt, resumeLast = false }) { } function renderQueuedTaskLaunch(payload) { - return `${payload.title} started in the background as ${payload.jobId}. Check /codex:status ${payload.jobId} for progress.\n`; + const statusToken = buildTaskDispatchedStatusToken(payload.jobId); + return [ + statusToken, + `${payload.title} dispatched as background job ${payload.jobId}.`, + `No automatic notification will arrive; poll /codex:status ${payload.jobId}.` + ].join("\n") + "\n"; } function getJobKindLabel(kind, jobClass) { diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs new file mode 100644 index 00000000..a3840c82 --- /dev/null +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -0,0 +1,155 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import process from "node:process"; + +import { parseTaskStatusToken } from "./lib/task-status-token.mjs"; + +const COMPLETE_CONTEXT = + "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status."; +const BASH_AUTO_BACKGROUND_MARKER = "Command running in background with ID:"; +const BASH_AUTO_BACKGROUND_OUTPUT_PREFIX = "Output is being written to:"; + +function buildDispatchedContext(jobId) { + return `codex-rescue dispatched background job ${jobId}. No automatic notification will arrive; poll /codex:status ${jobId}.`; +} + +function extractBashAutoBackgroundOutputPath(text) { + const prefixIndex = text.indexOf(BASH_AUTO_BACKGROUND_OUTPUT_PREFIX); + if (prefixIndex === -1) { + return null; + } + + const afterPrefix = text.slice(prefixIndex + BASH_AUTO_BACKGROUND_OUTPUT_PREFIX.length).trim(); + const endMarkers = [". You will be notified", "\n", "\r"]; + let endIndex = afterPrefix.length; + for (const marker of endMarkers) { + const markerIndex = afterPrefix.indexOf(marker); + if (markerIndex !== -1 && markerIndex < endIndex) { + endIndex = markerIndex; + } + } + + const outputPath = afterPrefix.slice(0, endIndex).trim(); + return outputPath || null; +} + +function buildBashAutoBackgroundContext(outputPath) { + const followUp = outputPath + ? `re-check \`git status\` (if it ran with --write) and/or read the streamed output at ${outputPath}` + : "re-check `git status` (if it ran with --write)"; + return `codex-rescue's Codex run exceeded the foreground time cap and was auto-backgrounded by the Bash tool; it is STILL RUNNING detached. No completion notification will arrive. Do not wait passively — ${followUp} until the run lands, then act on the result.`; +} + +function readHookInput() { + const raw = fs.readFileSync(0, "utf8").trim(); + if (!raw) { + return {}; + } + return JSON.parse(raw); +} + +function emitAdditionalContext(additionalContext) { + process.stdout.write( + `${JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PostToolUse", + additionalContext + } + })}\n` + ); +} + +function isCodexRescueAgentToolUse(input) { + if (input?.hook_event_name !== "PostToolUse" || input?.tool_name !== "Agent") { + return false; + } + + const subagentType = String( + input?.tool_input?.subagent_type ?? input?.tool_input?.subagentType ?? input?.tool_input?.agent_type ?? "" + ).toLowerCase(); + return subagentType.includes("codex-rescue"); +} + +function isSynchronousAgentReturn(toolResponse) { + const status = String(toolResponse?.status ?? "").toLowerCase(); + return status !== "async_launched" && status !== "running"; +} + +function collectText(value) { + if (value == null) { + return ""; + } + if (typeof value === "string") { + return value; + } + if (Array.isArray(value)) { + return value.map((entry) => collectText(entry)).filter(Boolean).join("\n"); + } + if (typeof value !== "object") { + return ""; + } + + const textParts = []; + if (typeof value.text === "string") { + textParts.push(value.text); + } + if (typeof value.content === "string") { + textParts.push(value.content); + } else if (Array.isArray(value.content)) { + textParts.push(collectText(value.content)); + } + if (typeof value.result === "string") { + textParts.push(value.result); + } + if (typeof value.stdout === "string") { + textParts.push(value.stdout); + } + return textParts.filter(Boolean).join("\n"); +} + +function buildCompletionContext(input) { + const toolResponse = input?.tool_response; + if (!isSynchronousAgentReturn(toolResponse)) { + return null; + } + + const responseText = collectText(toolResponse); + const token = parseTaskStatusToken(responseText); + if (token?.status === "dispatched" && token.id) { + return buildDispatchedContext(token.id); + } + if (token?.status === "complete") { + return COMPLETE_CONTEXT; + } + + // Bash auto-backgrounds commands that exceed the 600s foreground cap. That + // synchronous Agent return means the Bash wrapper detached, not that Codex + // completed, so report the still-running state before tokenless completion. + if (responseText.includes(BASH_AUTO_BACKGROUND_MARKER)) { + return buildBashAutoBackgroundContext(extractBashAutoBackgroundOutputPath(responseText)); + } + + // Token absence still means completion here because PostToolUse fires only + // after a synchronous Agent tool call has returned to the parent thread. + return COMPLETE_CONTEXT; +} + +function main() { + const input = readHookInput(); + if (!isCodexRescueAgentToolUse(input)) { + return; + } + + const context = buildCompletionContext(input); + if (context) { + emitAdditionalContext(context); + } +} + +try { + main(); +} catch (error) { + process.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + process.exitCode = 1; +} diff --git a/plugins/codex/scripts/lib/render.mjs b/plugins/codex/scripts/lib/render.mjs index 2ec18523..f58459e9 100644 --- a/plugins/codex/scripts/lib/render.mjs +++ b/plugins/codex/scripts/lib/render.mjs @@ -1,3 +1,8 @@ +import { + appendTaskStatusToken, + buildTaskCompleteStatusToken +} from "./task-status-token.mjs"; + function severityRank(severity) { switch (severity) { case "critical": @@ -314,12 +319,13 @@ export function renderNativeReviewResult(result, meta) { export function renderTaskResult(parsedResult, meta) { const rawOutput = typeof parsedResult?.rawOutput === "string" ? parsedResult.rawOutput : ""; + const completeToken = buildTaskCompleteStatusToken(); if (rawOutput) { - return rawOutput.endsWith("\n") ? rawOutput : `${rawOutput}\n`; + return appendTaskStatusToken(rawOutput, completeToken); } const message = String(parsedResult?.failureMessage ?? "").trim() || "Codex did not return a final message."; - return `${message}\n`; + return appendTaskStatusToken(message, completeToken); } export function renderStatusReport(report) { diff --git a/plugins/codex/scripts/lib/task-status-token.mjs b/plugins/codex/scripts/lib/task-status-token.mjs new file mode 100644 index 00000000..d97d275e --- /dev/null +++ b/plugins/codex/scripts/lib/task-status-token.mjs @@ -0,0 +1,45 @@ +const DISPATCHED_JOB_ID_PATTERN = /^[A-Za-z0-9._:-]+$/; +const DISPATCHED_TOKEN_PATTERN = /^\[\[codex-task status=dispatched id=([A-Za-z0-9._:-]+)\]\]$/; + +export const TASK_COMPLETE_STATUS_TOKEN = "[[codex-task status=complete]]"; + +// The companion and PostToolUse hook share this standalone-line sentinel scheme. +// A line is machine-readable only when it exactly matches one of these forms: +// [[codex-task status=complete]] +// [[codex-task status=dispatched id=]] +// The fixed prefix, status key, and closing brackets make the marker greppable +// and keep normal Codex prose from being treated as authoritative state. +export function buildTaskCompleteStatusToken() { + return TASK_COMPLETE_STATUS_TOKEN; +} + +export function buildTaskDispatchedStatusToken(jobId) { + const normalizedJobId = String(jobId ?? "").trim(); + if (!DISPATCHED_JOB_ID_PATTERN.test(normalizedJobId)) { + throw new Error(`Invalid Codex task job id for status token: ${normalizedJobId || "(empty)"}`); + } + return `[[codex-task status=dispatched id=${normalizedJobId}]]`; +} + +export function appendTaskStatusToken(output, token) { + const text = String(output ?? ""); + const normalizedOutput = text === "" || text.endsWith("\n") ? text : `${text}\n`; + return `${normalizedOutput}${token}\n`; +} + +export function parseTaskStatusToken(output) { + let parsed = null; + for (const line of String(output ?? "").split(/\r?\n/)) { + const trimmedLine = line.trim(); + if (trimmedLine === TASK_COMPLETE_STATUS_TOKEN) { + parsed = { status: "complete", id: null }; + continue; + } + + const dispatchedMatch = trimmedLine.match(DISPATCHED_TOKEN_PATTERN); + if (dispatchedMatch) { + parsed = { status: "dispatched", id: dispatchedMatch[1] }; + } + } + return parsed; +} diff --git a/plugins/codex/skills/codex-cli-runtime/SKILL.md b/plugins/codex/skills/codex-cli-runtime/SKILL.md index 0e91bfb5..7e571435 100644 --- a/plugins/codex/skills/codex-cli-runtime/SKILL.md +++ b/plugins/codex/skills/codex-cli-runtime/SKILL.md @@ -25,7 +25,10 @@ Execution rules: Command selection: - Use exactly one `task` invocation per rescue handoff. -- If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only. Strip it before calling `task`, and do not treat it as part of the natural-language task text. +- If the forwarded request includes `--background`, pass that execution mode through by invoking `task --background`; strip the flag token only from the natural-language prompt text. +- If the forwarded request includes `--wait`, invoke foreground `task`; do not pass `--wait` to `task`, and strip the flag token only from the natural-language prompt text. +- Do not drop an explicit execution mode while stripping tokens from prompt text. +- Foreground task runs are capped by the Bash tool at roughly 600 seconds; past that, they can be auto-backgrounded and orphaned, so long work belongs in `task --background`. - If the forwarded request includes `--model`, normalize `spark` to `gpt-5.3-codex-spark` and pass it through to `task`. - If the forwarded request includes `--effort`, pass it through to `task`. - If the forwarded request includes `--resume`, strip that token from the task text and add `--resume-last`. diff --git a/plugins/codex/skills/codex-result-handling/SKILL.md b/plugins/codex/skills/codex-result-handling/SKILL.md index a8f5eba7..117cfb33 100644 --- a/plugins/codex/skills/codex-result-handling/SKILL.md +++ b/plugins/codex/skills/codex-result-handling/SKILL.md @@ -16,6 +16,8 @@ When the helper returns Codex output: - If Codex made edits, say so explicitly and list the touched files when the helper provides them. - For `codex:codex-rescue`, do not turn a failed or incomplete Codex run into a Claude-side implementation attempt. Report the failure and stop. - For `codex:codex-rescue`, if Codex was never successfully invoked, do not generate a substitute answer at all. +- For `codex:codex-rescue`, treat a standalone `[[codex-task status=complete]]` sentinel, or the matching PostToolUse hook context, as authoritative proof that the synchronous subagent has finished and exited. Do not wait for a notification or poll status after that signal. +- For `codex:codex-rescue`, treat a standalone `[[codex-task status=dispatched id=]]` sentinel as the only background-dispatch signal. No automatic notification will arrive; poll `/codex:status `. - CRITICAL: After presenting review findings, STOP. Do not make any code changes. Do not fix any issues. You MUST explicitly ask the user which issues, if any, they want fixed before touching a single file. Auto-applying fixes from a review is strictly forbidden, even if the fix is obvious. - If the helper reports malformed output or a failed Codex run, include the most actionable stderr lines and stop there instead of guessing. - If the helper reports that setup or authentication is required, direct the user to `/codex:setup` and do not improvise alternate auth flows. diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs new file mode 100644 index 00000000..a58c9196 --- /dev/null +++ b/tests/codex-rescue-hook.test.mjs @@ -0,0 +1,157 @@ +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; +import { fileURLToPath } from "node:url"; + +import { run } from "./helpers.mjs"; + +const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const HOOK = path.join(ROOT, "plugins", "codex", "scripts", "codex-rescue-completion-hook.mjs"); + +function runHook(input) { + return run("node", [HOOK], { + cwd: ROOT, + input: JSON.stringify(input) + }); +} + +function parseHookOutput(result) { + assert.equal(result.status, 0, result.stderr); + assert.ok(result.stdout.trim(), "expected hook to emit JSON"); + return JSON.parse(result.stdout); +} + +test("codex-rescue hook injects a complete line when the completion token is present", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-complete", + content: [ + { + type: "text", + text: "Handled the requested task.\n[[codex-task status=complete]]\n" + } + ] + } + }); + + const payload = parseHookOutput(result); + assert.deepEqual(payload, { + hookSpecificOutput: { + hookEventName: "PostToolUse", + additionalContext: + "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status." + } + }); +}); + +test("codex-rescue hook defaults synchronous tokenless returns to complete", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-tokenless", + content: [ + { + type: "text", + text: "Handled the requested task.\n" + } + ] + } + }); + + const payload = parseHookOutput(result); + assert.equal( + payload.hookSpecificOutput.additionalContext, + "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status." + ); +}); + +test("codex-rescue hook reports Bash auto-background returns as still running detached", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-auto-backgrounded", + content: [ + { + type: "text", + text: + "Command running in background with ID: bash-123. Output is being written to: /tmp/codex-rescue-output.log. You will be notified when it completes. To check interim output, use Read on that file path." + } + ] + } + }); + + const payload = parseHookOutput(result); + assert.equal( + payload.hookSpecificOutput.additionalContext, + "codex-rescue's Codex run exceeded the foreground time cap and was auto-backgrounded by the Bash tool; it is STILL RUNNING detached. No completion notification will arrive. Do not wait passively — re-check `git status` (if it ran with --write) and/or read the streamed output at /tmp/codex-rescue-output.log until the run lands, then act on the result." + ); + assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /COMPLETED and exited/); +}); + +test("codex-rescue hook injects a dispatched line when the dispatched token is present", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-dispatched", + content: [ + { + type: "text", + text: "[[codex-task status=dispatched id=task-abc123]]\nCodex Task dispatched as background job task-abc123.\n" + } + ] + } + }); + + const payload = parseHookOutput(result); + assert.deepEqual(payload, { + hookSpecificOutput: { + hookEventName: "PostToolUse", + additionalContext: + "codex-rescue dispatched background job task-abc123. No automatic notification will arrive; poll /codex:status task-abc123." + } + }); +}); + +test("codex-rescue hook ignores other subagent types", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "superpowers:code-reviewer" + }, + tool_response: { + status: "completed", + agentId: "agent-reviewer", + content: [ + { + type: "text", + text: "[[codex-task status=complete]]\n" + } + ] + } + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, ""); +}); diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index 3724ffa4..c56fd887 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -108,9 +108,15 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /AskUserQuestion/); assert.match(rescue, /Continue current Codex thread/); assert.match(rescue, /Start a new Codex thread/); - assert.match(rescue, /run the `codex:codex-rescue` subagent in the background/i); - assert.match(rescue, /default to foreground/i); - assert.match(rescue, /Do not forward them to `task`/i); + assert.match(rescue, /pass `--background` to the `codex:codex-rescue` subagent/i); + assert.match(rescue, /For long, substantial, or open-ended rescues, pass `--background`/i); + assert.match(rescue, /`\/codex:status `/); + assert.match(rescue, /`\/codex:result `/); + assert.match(rescue, /Foreground stays for short, bounded rescues/i); + assert.match(rescue, /foreground rescue is capped by the Bash tool/i); + assert.match(rescue, /auto-backgrounded past that cap/i); + assert.match(rescue, /Preserve the execution choice/i); + assert.match(rescue, /strip them only from the natural-language task text/i); assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i); assert.match(rescue, /Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort/i); assert.match(rescue, /If they ask for `spark`, map it to `gpt-5\.3-codex-spark`/i); @@ -121,13 +127,19 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /thin forwarder only/i); assert.match(rescue, /Return the Codex companion stdout verbatim to the user/i); assert.match(rescue, /Do not paraphrase, summarize, rewrite, or add commentary before or after it/i); + assert.match(rescue, /\[\[codex-task status=complete\]\]/); + assert.match(rescue, /\[\[codex-task status=dispatched id=\]\]/); + assert.match(rescue, /Trust this sentinel and the PostToolUse hook over any prose/i); assert.match(rescue, /return that command's stdout as-is/i); assert.match(rescue, /Leave `--resume` and `--fresh` in the forwarded request/i); assert.match(agent, /--resume/); assert.match(agent, /--fresh/); assert.match(agent, /thin forwarding wrapper/i); - assert.match(agent, /prefer foreground for a small, clearly bounded rescue request/i); - assert.match(agent, /If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution/i); + assert.match(agent, /Explicit `--background` in the request means invoke `task --background`/i); + assert.match(agent, /Explicit `--wait` in the request means invoke foreground `task`/i); + assert.match(agent, /No explicit choice and the task is short and clearly bounded .* means foreground/i); + assert.match(agent, /No explicit choice and the task is long, open-ended, multi-step, or likely to exceed the foreground cap means `task --background`/i); + assert.match(agent, /Prefer background for substantial work/i); assert.match(agent, /Use exactly one `Bash` call/i); assert.match(agent, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(agent, /Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`/i); @@ -147,8 +159,14 @@ test("rescue command absorbs continue semantics", () => { assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i); assert.match(runtimeSkill, /Leave model unset by default/i); assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i); - assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i); - assert.match(runtimeSkill, /Strip it before calling `task`/i); + assert.match(runtimeSkill, /If the forwarded request includes `--background`, pass that execution mode through by invoking `task --background`/i); + assert.match(runtimeSkill, /strip the flag token only from the natural-language prompt text/i); + assert.match(runtimeSkill, /If the forwarded request includes `--wait`, invoke foreground `task`/i); + assert.match(runtimeSkill, /do not pass `--wait` to `task`/i); + assert.match(runtimeSkill, /Do not drop an explicit execution mode while stripping tokens from prompt text/i); + assert.match(runtimeSkill, /Foreground task runs are capped by the Bash tool/i); + assert.match(runtimeSkill, /auto-backgrounded and orphaned/i); + assert.match(runtimeSkill, /long work belongs in `task --background`/i); assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i); assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i); @@ -179,6 +197,8 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/); assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i); assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i); + assert.match(resultHandling, /\[\[codex-task status=complete\]\]/); + assert.match(resultHandling, /No automatic notification will arrive; poll `\/codex:status `/); }); test("internal docs use task terminology for rescue runs", () => { @@ -200,6 +220,9 @@ test("hooks keep session-end cleanup and stop gating enabled", () => { const source = read("hooks/hooks.json"); assert.match(source, /SessionStart/); assert.match(source, /SessionEnd/); + assert.match(source, /PostToolUse/); + assert.match(source, /matcher":\s*"Agent"/); + assert.match(source, /codex-rescue-completion-hook\.mjs/); assert.match(source, /stop-review-gate-hook\.mjs/); assert.match(source, /session-lifecycle-hook\.mjs/); }); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..154cef8e 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -366,7 +366,7 @@ test("task --resume-last resumes the latest persisted task thread", () => { }); assert.equal(result.status, 0, result.stderr); - assert.equal(result.stdout, "Resumed the prior run.\nFollow-up prompt accepted.\n"); + assert.equal(result.stdout, "Resumed the prior run.\nFollow-up prompt accepted.\n[[codex-task status=complete]]\n"); }); test("task-resume-candidate returns the latest rescue thread from the current session", () => { @@ -577,7 +577,7 @@ test("write task output focuses on the Codex result without generic follow-up hi }); assert.equal(result.status, 0, result.stderr); - assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); }); test("task --resume acts like --resume-last without leaking the flag into the prompt", () => { @@ -715,7 +715,7 @@ test("task waits for the main thread to complete before returning the final resu }); assert.equal(result.status, 0, result.stderr); - assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); }); test("task ignores later subagent messages when choosing the final returned output", () => { @@ -733,7 +733,7 @@ test("task ignores later subagent messages when choosing the final returned outp }); assert.equal(result.status, 0, result.stderr); - assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); }); test("task can finish after subagent work even if the parent turn/completed event is missing", () => { @@ -751,7 +751,7 @@ test("task can finish after subagent work even if the parent turn/completed even }); assert.equal(result.status, 0, result.stderr); - assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); }); test("task using the shared broker still completes when Codex spawns subagents", () => { @@ -781,7 +781,7 @@ test("task using the shared broker still completes when Codex spawns subagents", }); assert.equal(result.status, 0, result.stderr); - assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); }); test("task --background enqueues a detached worker and exposes per-job status", async () => { @@ -833,6 +833,28 @@ test("task --background enqueues a detached worker and exposes per-job status", assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); }); +test("task --background emits a dispatched token without promising notification", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const launched = run("node", [SCRIPT, "task", "--background", "investigate the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(launched.status, 0, launched.stderr); + assert.match(launched.stdout, /^\[\[codex-task status=dispatched id=task-[^\]\s]+\]\]\n/); + const jobId = launched.stdout.match(/id=(task-[^\]\s]+)/)?.[1]; + assert.ok(jobId, "expected dispatched token to include a task job id"); + assert.match(launched.stdout, new RegExp(`No automatic notification will arrive; poll /codex:status ${jobId}\\.`)); + assert.doesNotMatch(launched.stdout, /will notify|will be notified|you will be notified/i); +}); + test("review rejects focus text because it is native-review only", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From e1b8ae340753291ea4d8477a3c3c9e56ed67de4f Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 00:40:35 +0200 Subject: [PATCH 03/18] fix(codex-rescue-hook): only claim success with positive evidence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the PR #346 Codex review (P1): buildCompletionContext fell back to the "COMPLETED ... final result" line for any synchronous return without a sentinel — including failed/errored/empty returns — falsely signaling success and suppressing retry/escalation. Now success is asserted only when the [[codex-task status=complete]] sentinel is present. A failure status (failed/error/timeout/...) or an empty return (codex-rescue returns nothing when Codex can't be invoked) gets a failure-aware "exited without a success signal — may have failed, verify/retry" line; any other tokenless return gets a neutral "exited, verify on disk" line. Every branch still says "do not wait" so a failed run doesn't reintroduce the original waiting bug. Tests: 94/94 (added failed-status and empty-return cases; tokenless default is now neutral). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../scripts/codex-rescue-completion-hook.mjs | 36 ++++++++++-- tests/codex-rescue-hook.test.mjs | 57 ++++++++++++++++++- 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs index a3840c82..bdd8819d 100644 --- a/plugins/codex/scripts/codex-rescue-completion-hook.mjs +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -7,6 +7,14 @@ import { parseTaskStatusToken } from "./lib/task-status-token.mjs"; const COMPLETE_CONTEXT = "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status."; +const NEUTRAL_EXIT_CONTEXT = + "codex-rescue has exited (synchronous return — it is not running). Treat the text above as its result and verify on disk (`git status` if it ran with --write); do not wait for a notification or poll status."; +const FAILURE_CONTEXT = + "codex-rescue exited WITHOUT a success signal — it is not running, so do not wait for a notification, but the run may have failed or produced no result. Review the output above and `git status`, then re-run or escalate instead of treating it as done."; +// Known non-success statuses. Without the complete sentinel, one of these (or +// an empty body) means there is no success signal, so we must not claim the +// run succeeded (PR #346 review P1). +const FAILURE_STATUSES = new Set(["failed", "fail", "error", "errored", "cancelled", "canceled", "timed_out", "timeout", "aborted"]); const BASH_AUTO_BACKGROUND_MARKER = "Command running in background with ID:"; const BASH_AUTO_BACKGROUND_OUTPUT_PREFIX = "Output is being written to:"; @@ -76,6 +84,16 @@ function isSynchronousAgentReturn(toolResponse) { return status !== "async_launched" && status !== "running"; } +function isFailedOrEmptyReturn(toolResponse, responseText) { + // codex-rescue returns nothing when Codex can't be invoked; an empty body or + // a known failure status means there is no success signal to report. + if (String(responseText ?? "").trim() === "") { + return true; + } + const status = String(toolResponse?.status ?? "").toLowerCase(); + return FAILURE_STATUSES.has(status); +} + function collectText(value) { if (value == null) { return ""; @@ -119,20 +137,28 @@ function buildCompletionContext(input) { if (token?.status === "dispatched" && token.id) { return buildDispatchedContext(token.id); } + // The complete sentinel is the only positive proof of a successful run; the + // companion stamps it solely on real completion, so it is the lone path that + // asserts success (PR #346 review P1). if (token?.status === "complete") { return COMPLETE_CONTEXT; } - // Bash auto-backgrounds commands that exceed the 600s foreground cap. That + // Bash auto-backgrounds commands that exceed the ~600s foreground cap. That // synchronous Agent return means the Bash wrapper detached, not that Codex - // completed, so report the still-running state before tokenless completion. + // completed, so report the still-running state before any completion claim. if (responseText.includes(BASH_AUTO_BACKGROUND_MARKER)) { return buildBashAutoBackgroundContext(extractBashAutoBackgroundOutputPath(responseText)); } - // Token absence still means completion here because PostToolUse fires only - // after a synchronous Agent tool call has returned to the parent thread. - return COMPLETE_CONTEXT; + // No success evidence: never claim success here. A failure status or an empty + // return gets a failure-aware line; everything else gets a neutral "exited, + // verify on disk" line. Every branch still says "do not wait" because a + // synchronous Agent return means the agent has exited. + if (isFailedOrEmptyReturn(toolResponse, responseText)) { + return FAILURE_CONTEXT; + } + return NEUTRAL_EXIT_CONTEXT; } function main() { diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs index a58c9196..da5db280 100644 --- a/tests/codex-rescue-hook.test.mjs +++ b/tests/codex-rescue-hook.test.mjs @@ -50,7 +50,7 @@ test("codex-rescue hook injects a complete line when the completion token is pre }); }); -test("codex-rescue hook defaults synchronous tokenless returns to complete", () => { +test("codex-rescue hook reports synchronous tokenless returns neutrally", () => { const result = runHook({ hook_event_name: "PostToolUse", tool_name: "Agent", @@ -72,7 +72,60 @@ test("codex-rescue hook defaults synchronous tokenless returns to complete", () const payload = parseHookOutput(result); assert.equal( payload.hookSpecificOutput.additionalContext, - "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status." + "codex-rescue has exited (synchronous return — it is not running). Treat the text above as its result and verify on disk (`git status` if it ran with --write); do not wait for a notification or poll status." + ); + assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /final result/); +}); + +test("codex-rescue hook reports failed tokenless returns without a success signal", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "failed", + agentId: "agent-failed", + content: [ + { + type: "text", + text: "Codex could not be invoked.\n" + } + ] + } + }); + + const payload = parseHookOutput(result); + assert.equal( + payload.hookSpecificOutput.additionalContext, + "codex-rescue exited WITHOUT a success signal — it is not running, so do not wait for a notification, but the run may have failed or produced no result. Review the output above and `git status`, then re-run or escalate instead of treating it as done." + ); +}); + +test("codex-rescue hook reports empty tokenless returns without a success signal", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-empty", + content: [ + { + type: "text", + text: "" + } + ] + } + }); + + const payload = parseHookOutput(result); + assert.equal( + payload.hookSpecificOutput.additionalContext, + "codex-rescue exited WITHOUT a success signal — it is not running, so do not wait for a notification, but the run may have failed or produced no result. Review the output above and `git status`, then re-run or escalate instead of treating it as done." ); }); From c4722b5215d9d8abbff53126df79cdcf98b006b2 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 01:13:56 +0200 Subject: [PATCH 04/18] fix(codex-rescue): run foreground tasks in a survivable detached worker A foreground `task` ran the Codex turn in-process, so when the Bash tool auto-backgrounded a long foreground call and the owning subagent was reaped, the run was killed mid-task and the work was lost with no resumable record. Now the foreground path enqueues the same detached (`detached` + `unref`) worker the `--background` path uses and waits for it inline, printing the worker's stored result. If the inline wait is killed, the detached worker survives and the result is recoverable via /codex:status. Completed-run output is byte-identical (the worker stamps the [[codex-task status=complete]] sentinel). The inline wait degrades to a "still running" status after the existing wait cap instead of blocking forever. enqueueBackgroundTask now writes the job record before spawning to close a worker-startup race. Tests: 95/95 (new test proves foreground goes through the detached worker via the queued-job record + log line, with identical stdout). Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 108 +++++++++++++++++++--- tests/runtime.test.mjs | 31 +++++++ 2 files changed, 124 insertions(+), 15 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 47d27274..71e55836 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -67,6 +67,7 @@ const ROOT_DIR = path.resolve(fileURLToPath(new URL("..", import.meta.url))); const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json"); const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000; const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000; +const FOREGROUND_TASK_POLL_INTERVAL_MS = 100; const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]); const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]); const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn."; @@ -661,18 +662,21 @@ function enqueueBackgroundTask(cwd, job, request) { const { logFile } = createTrackedProgress(job); appendLogLine(logFile, "Queued for background execution."); - const child = spawnDetachedTaskWorker(cwd, job.id); + // The queued record is written before spawning so the detached worker can always + // load its request, and foreground callers can wait on the job immediately. const queuedRecord = { ...job, status: "queued", phase: "queued", - pid: child.pid ?? null, + pid: null, logFile, request }; writeJobFile(job.workspaceRoot, job.id, queuedRecord); upsertJob(job.workspaceRoot, queuedRecord); + spawnDetachedTaskWorker(cwd, job.id); + return { payload: { jobId: job.id, @@ -685,6 +689,77 @@ function enqueueBackgroundTask(cwd, job, request) { }; } +function ensureTrailingNewline(value) { + const output = String(value ?? ""); + return output.endsWith("\n") ? output : `${output}\n`; +} + +function renderStoredTaskWorkerResult(job, storedJob) { + if (typeof storedJob?.rendered === "string" && storedJob.rendered) { + return ensureTrailingNewline(storedJob.rendered); + } + return renderStoredJobResult(job, storedJob); +} + +function resolveStoredTaskExitStatus(job, storedJob) { + if (job.status === "completed") { + return 0; + } + + const storedStatus = Number(storedJob?.result?.status); + if (Number.isInteger(storedStatus) && storedStatus !== 0) { + return storedStatus; + } + + return 1; +} + +function buildStoredTaskWorkerPayload(job, storedJob) { + const hasStoredResult = + storedJob?.result && typeof storedJob.result === "object" && !Array.isArray(storedJob.result); + const storedResult = hasStoredResult ? storedJob.result : {}; + return { + ...storedResult, + job, + storedJob + }; +} + +async function runForegroundTaskWorker(cwd, job, request, options = {}) { + enqueueBackgroundTask(cwd, job, request); + + // Foreground tasks run in the same detached worker as background tasks, then wait inline for + // the stored result so Bash auto-backgrounding and subagent teardown do not kill the Codex turn. + const snapshot = await waitForSingleJobSnapshot(cwd, job.id, { + pollIntervalMs: FOREGROUND_TASK_POLL_INTERVAL_MS + }); + if (snapshot.waitTimedOut) { + outputCommandResult(snapshot, renderJobStatusReport(snapshot.job), options.json); + process.exitCode = 1; + return; + } + + const storedJob = readStoredJob(snapshot.workspaceRoot, snapshot.job.id); + const payload = buildStoredTaskWorkerPayload(snapshot.job, storedJob); + const exitStatus = resolveStoredTaskExitStatus(snapshot.job, storedJob); + const rendered = renderStoredTaskWorkerResult(snapshot.job, storedJob); + + if ( + snapshot.job.status === "failed" && + !storedJob?.rendered && + storedJob?.errorMessage && + !options.json + ) { + process.stderr.write(`${storedJob.errorMessage}\n`); + } else { + outputCommandResult(payload, rendered, options.json); + } + + if (exitStatus !== 0) { + process.exitCode = exitStatus; + } +} + async function handleReviewCommand(argv, config) { const { options, positionals } = parseCommandInput(argv, { valueOptions: ["base", "scope", "model", "cwd"], @@ -738,7 +813,7 @@ async function handleReview(argv) { async function handleTask(argv) { const { options, positionals } = parseCommandInput(argv, { valueOptions: ["model", "effort", "cwd", "prompt-file"], - booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"], + booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background", "wait"], aliasMap: { m: "model" } @@ -780,20 +855,23 @@ async function handleTask(argv) { return; } + ensureCodexAvailable(cwd); + requireTaskRequest(prompt, resumeLast); + const job = buildTaskJob(workspaceRoot, taskMetadata, write); - await runForegroundCommand( + const request = buildTaskRequest({ + cwd, + model, + effort, + prompt, + write, + resumeLast, + jobId: job.id + }); + await runForegroundTaskWorker( + cwd, job, - (progress) => - executeTaskRun({ - cwd, - model, - effort, - prompt, - write, - resumeLast, - jobId: job.id, - onProgress: progress - }), + request, { json: options.json } ); } diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 154cef8e..76d10dc5 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -580,6 +580,37 @@ test("write task output focuses on the Codex result without generic follow-up hi assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); }); +test("foreground task enqueues a detached worker and prints the stored result", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--write", "fix the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n[[codex-task status=complete]]\n"); + + const stateDir = resolveStateDir(repo); + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const job = state.jobs.find((candidate) => candidate.jobClass === "task"); + assert.ok(job, "expected foreground task to create a tracked task job"); + assert.equal(job.status, "completed"); + + const storedJob = JSON.parse(fs.readFileSync(path.join(stateDir, "jobs", `${job.id}.json`), "utf8")); + const log = fs.readFileSync(storedJob.logFile, "utf8"); + assert.equal(storedJob.request.jobId, job.id); + assert.equal(storedJob.request.prompt, "fix the failing test"); + assert.match(log, /Queued for background execution\./); + assert.equal(result.stdout, storedJob.rendered); +}); + test("task --resume acts like --resume-last without leaking the flag into the prompt", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 65d8320953527b37638e240e563088020cad9e7d Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 01:38:03 +0200 Subject: [PATCH 05/18] =?UTF-8?q?fix(codex-rescue):=20address=20PR=20#346?= =?UTF-8?q?=20review=20=E2=80=94=20sentinel=20gating,=20foreground=20wait,?= =?UTF-8?q?=20worker=20pid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three regressions from e1b8ae3/c4722b5: - renderTaskResult stamped [[codex-task status=complete]] on failure/empty fallback output, so a failed run carried the success sentinel and the completion hook read it as success. The sentinel is now appended only on a successful exit (status 0); failure/empty output renders without it. - runForegroundTaskWorker inherited the 240s status-wait default, so foreground xhigh runs (routinely >4min) timed out instead of returning the result. The inline wait is now unbounded; the detached worker provides survival. - enqueueBackgroundTask left the queued record pid: null and never persisted the spawned worker pid, breaking /codex:cancel. The worker pid is now written after spawn, guarded against clobbering a finished job. Tests: 97/97 (added failure-no-sentinel + worker-pid coverage). Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 17 +++++++++++++- plugins/codex/scripts/lib/render.mjs | 6 +++-- tests/render.test.mjs | 28 ++++++++++++++++++++++- tests/runtime.test.mjs | 20 ++++++++++++++++ 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 71e55836..a08de8ef 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -499,6 +499,7 @@ async function executeTaskRun(request) { { rawOutput, failureMessage, + status: result.status, reasoningSummary: result.reasoningSummary }, { @@ -675,7 +676,19 @@ function enqueueBackgroundTask(cwd, job, request) { writeJobFile(job.workspaceRoot, job.id, queuedRecord); upsertJob(job.workspaceRoot, queuedRecord); - spawnDetachedTaskWorker(cwd, job.id); + const child = spawnDetachedTaskWorker(cwd, job.id); + // PR #346 review: keep the pre-spawn record, then persist the worker pid for cancellation. + const spawnedRecord = readStoredJob(job.workspaceRoot, job.id) ?? queuedRecord; + if (isActiveJobStatus(spawnedRecord.status)) { + writeJobFile(job.workspaceRoot, job.id, { + ...spawnedRecord, + pid: child.pid ?? null + }); + upsertJob(job.workspaceRoot, { + id: job.id, + pid: child.pid ?? null + }); + } return { payload: { @@ -731,6 +744,8 @@ async function runForegroundTaskWorker(cwd, job, request, options = {}) { // Foreground tasks run in the same detached worker as background tasks, then wait inline for // the stored result so Bash auto-backgrounding and subagent teardown do not kill the Codex turn. const snapshot = await waitForSingleJobSnapshot(cwd, job.id, { + // PR #346 review: foreground xhigh waits must survive beyond the 240s status default. + timeoutMs: Infinity, pollIntervalMs: FOREGROUND_TASK_POLL_INTERVAL_MS }); if (snapshot.waitTimedOut) { diff --git a/plugins/codex/scripts/lib/render.mjs b/plugins/codex/scripts/lib/render.mjs index f58459e9..7bd62cee 100644 --- a/plugins/codex/scripts/lib/render.mjs +++ b/plugins/codex/scripts/lib/render.mjs @@ -320,12 +320,14 @@ export function renderNativeReviewResult(result, meta) { export function renderTaskResult(parsedResult, meta) { const rawOutput = typeof parsedResult?.rawOutput === "string" ? parsedResult.rawOutput : ""; const completeToken = buildTaskCompleteStatusToken(); + // PR #346 review: only successful Codex task exits may stamp the success sentinel. + const succeeded = parsedResult?.status === 0 || parsedResult?.exitStatus === 0 || meta?.success === true; if (rawOutput) { - return appendTaskStatusToken(rawOutput, completeToken); + return succeeded ? appendTaskStatusToken(rawOutput, completeToken) : rawOutput.endsWith("\n") ? rawOutput : `${rawOutput}\n`; } const message = String(parsedResult?.failureMessage ?? "").trim() || "Codex did not return a final message."; - return appendTaskStatusToken(message, completeToken); + return succeeded ? appendTaskStatusToken(message, completeToken) : `${message}\n`; } export function renderStatusReport(report) { diff --git a/tests/render.test.mjs b/tests/render.test.mjs index ab68038e..745c2fe7 100644 --- a/tests/render.test.mjs +++ b/tests/render.test.mjs @@ -1,7 +1,9 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { renderReviewResult, renderStoredJobResult } from "../plugins/codex/scripts/lib/render.mjs"; +import { renderReviewResult, renderStoredJobResult, renderTaskResult } from "../plugins/codex/scripts/lib/render.mjs"; + +const TASK_COMPLETE_STATUS_TOKEN = "[[codex-task status=complete]]"; test("renderReviewResult degrades gracefully when JSON is missing required review fields", () => { const output = renderReviewResult( @@ -57,3 +59,27 @@ test("renderStoredJobResult prefers rendered output for structured review jobs", assert.match(output, /Codex session ID: thr_123/); assert.match(output, /Resume in Codex: codex resume thr_123/); }); + +test("renderTaskResult only stamps the complete sentinel for successful task runs", () => { + const successful = renderTaskResult( + { + rawOutput: "Handled the requested task.", + status: 0 + }, + {} + ); + + assert.equal(successful, `Handled the requested task.\n${TASK_COMPLETE_STATUS_TOKEN}\n`); + + const failedEmpty = renderTaskResult( + { + rawOutput: "", + failureMessage: "Codex failed before producing a final message.", + status: 1 + }, + {} + ); + + assert.equal(failedEmpty, "Codex failed before producing a final message.\n"); + assert.doesNotMatch(failedEmpty, /\[\[codex-task status=complete\]\]/); +}); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 76d10dc5..c88b0c2b 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -611,6 +611,15 @@ test("foreground task enqueues a detached worker and prints the stored result", assert.equal(result.stdout, storedJob.rendered); }); +test("foreground task worker wait is not bounded by the default status timeout", () => { + const source = fs.readFileSync(SCRIPT, "utf8"); + const match = source.match(/async function runForegroundTaskWorker[\s\S]*?\n}\n\nasync function handleTask/); + + assert.ok(match, "expected to locate runForegroundTaskWorker"); + assert.match(match[0], /timeoutMs:\s*Infinity/); + assert.match(match[0], /pollIntervalMs:\s*FOREGROUND_TASK_POLL_INTERVAL_MS/); +}); + test("task --resume acts like --resume-last without leaking the flag into the prompt", () => { const repo = makeTempDir(); const binDir = makeTempDir(); @@ -834,6 +843,17 @@ test("task --background enqueues a detached worker and exposes per-job status", assert.equal(launchPayload.status, "queued"); assert.match(launchPayload.jobId, /^task-/); + const stateDir = resolveStateDir(repo); + const launchedState = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const launchedJob = launchedState.jobs.find((candidate) => candidate.id === launchPayload.jobId); + assert.ok(Number.isInteger(launchedJob?.pid) && launchedJob.pid > 0, "expected launched job to record worker pid"); + + const launchedStoredJob = JSON.parse(fs.readFileSync(path.join(stateDir, "jobs", `${launchPayload.jobId}.json`), "utf8")); + assert.ok( + Number.isInteger(launchedStoredJob.pid) && launchedStoredJob.pid > 0, + "expected stored launched job to record worker pid" + ); + const waitedStatus = run( "node", [SCRIPT, "status", launchPayload.jobId, "--wait", "--timeout-ms", "15000", "--json"], From 2071e212aaf295f294e63b491f425b92dae37b38 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 10:51:28 +0200 Subject: [PATCH 06/18] feat(codex-rescue): reliable completion notification for background jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A companion --background job has no push channel to the main thread, and the subagent often paraphrases the [[...dispatched...]] sentinel away — so the hook could even report a still-running job as "done" (found in real testing). - The PostToolUse hook now uses companion STATE as ground truth: if the workspace has an active (queued/running) task job after a codex-rescue return, it injects a watcher instruction — arm `status --wait` via the Bash tool with run_in_background=true, which exits and re-invokes Claude on terminal status. Robust against paraphrased/dropped dispatch tokens and the Bash auto-background case. - The companion --background dispatch message and the rescue/result-handling docs now carry the same watcher command. Gives background codex-rescue a reliable completion notification — the backgrounded `status --wait` watcher isn't subject to the 600s foreground cap. Tests: 98/98 (new state-seeded watcher test incl. a paraphrased-dispatch case). Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/commands/rescue.md | 4 +- plugins/codex/scripts/codex-companion.mjs | 3 +- .../scripts/codex-rescue-completion-hook.mjs | 32 +++++++ .../skills/codex-result-handling/SKILL.md | 2 +- tests/codex-rescue-hook.test.mjs | 95 ++++++++++++++++++- tests/commands.test.mjs | 9 +- tests/runtime.test.mjs | 6 ++ 7 files changed, 142 insertions(+), 9 deletions(-) diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index 23f0822f..dc6468ae 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -16,7 +16,7 @@ Execution mode: - If the request includes `--background`, pass `--background` to the `codex:codex-rescue` subagent so it invokes companion `task --background`. - If the request includes `--wait`, pass `--wait` to the `codex:codex-rescue` subagent so it invokes foreground `task`. - If neither flag is present, foreground stays for short, bounded rescues that return the result directly. -- For long, substantial, or open-ended rescues, pass `--background`; the companion returns `[[codex-task status=dispatched id=]]`, then poll `/codex:status ` and fetch `/codex:result ` when done. +- For long, substantial, or open-ended rescues, pass `--background`; the companion returns `[[codex-task status=dispatched id=]]`. A dispatched background job has no automatic push notification, so arm a watcher by running `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" status --wait --timeout-ms 1800000` via the Bash tool with `run_in_background=true`; it exits and re-invokes Claude when the job reaches a terminal status. If it returns while the job is still running, re-arm the same command. Fetch `/codex:result ` only after the watcher reports a terminal status. - A foreground rescue is capped by the Bash tool at roughly 600 seconds and can be auto-backgrounded past that cap, which orphans the detached Codex result. - `--background` and `--wait` are execution flags for Claude Code and the rescue subagent. Preserve the execution choice, and strip them only from the natural-language task text before Codex sees the prompt. - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text. @@ -43,7 +43,7 @@ Operating rules: - The subagent is a thin forwarder only. It should use one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...` and return that command's stdout as-is. - Return the Codex companion stdout verbatim to the user. - Do not paraphrase, summarize, rewrite, or add commentary before or after it. -- The companion stdout carries a deterministic sentinel on a standalone line: `[[codex-task status=complete]]` means a foreground task has completed and exited; `[[codex-task status=dispatched id=]]` means a companion background job was queued. Trust this sentinel and the PostToolUse hook over any prose. A dispatched sentinel is not an automatic-notification promise; tell the user only to poll `/codex:status ` if you are not returning stdout verbatim. +- The companion stdout carries a deterministic sentinel on a standalone line: `[[codex-task status=complete]]` means a foreground task has completed and exited; `[[codex-task status=dispatched id=]]` means a companion background job was queued. Trust this sentinel, companion state, and the PostToolUse hook over any prose. A dispatched sentinel is not an automatic-notification promise; never treat a dispatched/background job as done until `codex-companion.mjs status --wait` returns a terminal status from a Bash watcher launched with `run_in_background=true`. - The Claude Code harness appends an `agentId: (use SendMessage with to: '' to continue this agent)` line to every `Agent(codex:codex-rescue)` return. This is a resume token for continuing the same agent thread, NOT a "still running" or "in background" status signal. By the time you see it, the subagent has finished. Do not paraphrase it as "Codex is running in background" or "task dispatched async" — the stdout above the suffix is the actual completed output, which you return verbatim per the rule above. - Do not ask the subagent to inspect files, monitor progress, poll `/codex:status`, fetch `/codex:result`, call `/codex:cancel`, summarize output, or do follow-up work of its own. - Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort. diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index a08de8ef..a0544bed 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -558,7 +558,8 @@ function renderQueuedTaskLaunch(payload) { return [ statusToken, `${payload.title} dispatched as background job ${payload.jobId}.`, - `No automatic notification will arrive; poll /codex:status ${payload.jobId}.` + `No automatic notification will arrive; poll /codex:status ${payload.jobId}.`, + `To be notified on completion, run with the Bash tool (run_in_background): node "\${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" status ${payload.jobId} --wait --timeout-ms 1800000 (re-arm it if it returns still-running).` ].join("\n") + "\n"; } diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs index bdd8819d..48539bae 100644 --- a/plugins/codex/scripts/codex-rescue-completion-hook.mjs +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -1,10 +1,13 @@ #!/usr/bin/env node import fs from "node:fs"; +import { fileURLToPath } from "node:url"; import process from "node:process"; +import { loadState } from "./lib/state.mjs"; import { parseTaskStatusToken } from "./lib/task-status-token.mjs"; +const COMPANION_SCRIPT_PATH = fileURLToPath(new URL("./codex-companion.mjs", import.meta.url)); const COMPLETE_CONTEXT = "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status."; const NEUTRAL_EXIT_CONTEXT = @@ -15,9 +18,14 @@ const FAILURE_CONTEXT = // an empty body) means there is no success signal, so we must not claim the // run succeeded (PR #346 review P1). const FAILURE_STATUSES = new Set(["failed", "fail", "error", "errored", "cancelled", "canceled", "timed_out", "timeout", "aborted"]); +const ACTIVE_TASK_STATUSES = new Set(["queued", "running"]); const BASH_AUTO_BACKGROUND_MARKER = "Command running in background with ID:"; const BASH_AUTO_BACKGROUND_OUTPUT_PREFIX = "Output is being written to:"; +function buildWatcherContext(jobId) { + return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node ${COMPANION_SCRIPT_PATH} status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; +} + function buildDispatchedContext(jobId) { return `codex-rescue dispatched background job ${jobId}. No automatic notification will arrive; poll /codex:status ${jobId}.`; } @@ -84,6 +92,23 @@ function isSynchronousAgentReturn(toolResponse) { return status !== "async_launched" && status !== "running"; } +function getHookCwd(input) { + const cwd = input?.cwd; + return typeof cwd === "string" && cwd.trim() ? cwd : process.cwd(); +} + +function compareJobsNewestFirst(left, right) { + return String(right.updatedAt ?? right.createdAt ?? "").localeCompare(String(left.updatedAt ?? left.createdAt ?? "")); +} + +function findNewestActiveTaskJob(cwd) { + return ( + [...loadState(cwd).jobs] + .filter((job) => job?.jobClass === "task" && ACTIVE_TASK_STATUSES.has(String(job.status ?? ""))) + .sort(compareJobsNewestFirst)[0] ?? null + ); +} + function isFailedOrEmptyReturn(toolResponse, responseText) { // codex-rescue returns nothing when Codex can't be invoked; an empty body or // a known failure status means there is no success signal to report. @@ -132,6 +157,13 @@ function buildCompletionContext(input) { return null; } + // The subagent may paraphrase or drop the dispatched sentinel, so companion + // state is the reliable signal for a queued/running rescue task after return. + const activeTaskJob = findNewestActiveTaskJob(getHookCwd(input)); + if (activeTaskJob) { + return buildWatcherContext(activeTaskJob.id); + } + const responseText = collectText(toolResponse); const token = parseTaskStatusToken(responseText); if (token?.status === "dispatched" && token.id) { diff --git a/plugins/codex/skills/codex-result-handling/SKILL.md b/plugins/codex/skills/codex-result-handling/SKILL.md index 117cfb33..d29f0e47 100644 --- a/plugins/codex/skills/codex-result-handling/SKILL.md +++ b/plugins/codex/skills/codex-result-handling/SKILL.md @@ -17,7 +17,7 @@ When the helper returns Codex output: - For `codex:codex-rescue`, do not turn a failed or incomplete Codex run into a Claude-side implementation attempt. Report the failure and stop. - For `codex:codex-rescue`, if Codex was never successfully invoked, do not generate a substitute answer at all. - For `codex:codex-rescue`, treat a standalone `[[codex-task status=complete]]` sentinel, or the matching PostToolUse hook context, as authoritative proof that the synchronous subagent has finished and exited. Do not wait for a notification or poll status after that signal. -- For `codex:codex-rescue`, treat a standalone `[[codex-task status=dispatched id=]]` sentinel as the only background-dispatch signal. No automatic notification will arrive; poll `/codex:status `. +- For `codex:codex-rescue`, treat a standalone `[[codex-task status=dispatched id=]]` sentinel as a background-dispatch signal, not a completion signal. No automatic push notification will arrive; arm `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" status --wait --timeout-ms 1800000` via the Bash tool with `run_in_background=true` to be re-invoked on terminal status. If it returns while the job is still running, re-arm the same command. Never treat a dispatched/background job as done until that watcher returns a terminal status. - CRITICAL: After presenting review findings, STOP. Do not make any code changes. Do not fix any issues. You MUST explicitly ask the user which issues, if any, they want fixed before touching a single file. Auto-applying fixes from a review is strictly forbidden, even if the fix is obvious. - If the helper reports malformed output or a failed Codex run, include the most actionable stderr lines and stop there instead of guessing. - If the helper reports that setup or authentication is required, direct the user to `/codex:setup` and do not improvise alternate auth flows. diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs index da5db280..5c15de48 100644 --- a/tests/codex-rescue-hook.test.mjs +++ b/tests/codex-rescue-hook.test.mjs @@ -3,14 +3,46 @@ import test from "node:test"; import assert from "node:assert/strict"; import { fileURLToPath } from "node:url"; -import { run } from "./helpers.mjs"; +import { makeTempDir, run } from "./helpers.mjs"; +import { saveState } from "../plugins/codex/scripts/lib/state.mjs"; const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); const HOOK = path.join(ROOT, "plugins", "codex", "scripts", "codex-rescue-completion-hook.mjs"); +const COMPANION = path.join(ROOT, "plugins", "codex", "scripts", "codex-companion.mjs"); -function runHook(input) { +function withPluginData(pluginDataDir, callback) { + const previousPluginDataDir = process.env.CLAUDE_PLUGIN_DATA; + process.env.CLAUDE_PLUGIN_DATA = pluginDataDir; + try { + return callback(); + } finally { + if (previousPluginDataDir == null) { + delete process.env.CLAUDE_PLUGIN_DATA; + } else { + process.env.CLAUDE_PLUGIN_DATA = previousPluginDataDir; + } + } +} + +function seedHookState(cwd, pluginDataDir, jobs) { + withPluginData(pluginDataDir, () => + saveState(cwd, { + version: 1, + config: { stopReviewGate: false }, + jobs + }) + ); +} + +function runHook(input, options = {}) { + const pluginDataDir = options.pluginDataDir ?? makeTempDir(); return run("node", [HOOK], { cwd: ROOT, + env: { + ...process.env, + ...(options.env ?? {}), + CLAUDE_PLUGIN_DATA: pluginDataDir + }, input: JSON.stringify(input) }); } @@ -21,6 +53,65 @@ function parseHookOutput(result) { return JSON.parse(result.stdout); } +test("codex-rescue hook emits a watcher when companion state has an active task job", () => { + const workspace = makeTempDir(); + const pluginDataDir = makeTempDir(); + seedHookState(workspace, pluginDataDir, [ + { + id: "task-queued-old", + status: "queued", + title: "Codex Task", + jobClass: "task", + updatedAt: "2026-05-25T10:00:00.000Z" + }, + { + id: "review-running-newer", + status: "running", + title: "Codex Review", + jobClass: "review", + updatedAt: "2026-05-25T10:10:00.000Z" + }, + { + id: "task-running-new", + status: "running", + title: "Codex Task", + jobClass: "task", + updatedAt: "2026-05-25T10:05:00.000Z" + } + ]); + + const result = runHook( + { + hook_event_name: "PostToolUse", + tool_name: "Agent", + cwd: workspace, + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-paraphrased-dispatch", + content: [ + { + type: "text", + text: "Codex is running in background (ID task-running-new)." + } + ] + } + }, + { pluginDataDir } + ); + + const payload = parseHookOutput(result); + assert.deepEqual(payload, { + hookSpecificOutput: { + hookEventName: "PostToolUse", + additionalContext: + `codex-rescue background job task-running-new is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node ${COMPANION} status task-running-new --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.` + } + }); +}); + test("codex-rescue hook injects a complete line when the completion token is present", () => { const result = runHook({ hook_event_name: "PostToolUse", diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index c56fd887..431ddc1f 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -110,7 +110,8 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /Start a new Codex thread/); assert.match(rescue, /pass `--background` to the `codex:codex-rescue` subagent/i); assert.match(rescue, /For long, substantial, or open-ended rescues, pass `--background`/i); - assert.match(rescue, /`\/codex:status `/); + assert.match(rescue, /status --wait --timeout-ms 1800000/); + assert.match(rescue, /run_in_background=true/); assert.match(rescue, /`\/codex:result `/); assert.match(rescue, /Foreground stays for short, bounded rescues/i); assert.match(rescue, /foreground rescue is capped by the Bash tool/i); @@ -129,7 +130,7 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /Do not paraphrase, summarize, rewrite, or add commentary before or after it/i); assert.match(rescue, /\[\[codex-task status=complete\]\]/); assert.match(rescue, /\[\[codex-task status=dispatched id=\]\]/); - assert.match(rescue, /Trust this sentinel and the PostToolUse hook over any prose/i); + assert.match(rescue, /Trust this sentinel, companion state, and the PostToolUse hook over any prose/i); assert.match(rescue, /return that command's stdout as-is/i); assert.match(rescue, /Leave `--resume` and `--fresh` in the forwarded request/i); assert.match(agent, /--resume/); @@ -198,7 +199,9 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i); assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i); assert.match(resultHandling, /\[\[codex-task status=complete\]\]/); - assert.match(resultHandling, /No automatic notification will arrive; poll `\/codex:status `/); + assert.match(resultHandling, /No automatic push notification will arrive/i); + assert.match(resultHandling, /status --wait --timeout-ms 1800000/); + assert.match(resultHandling, /run_in_background=true/); }); test("internal docs use task terminology for rescue runs", () => { diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index c88b0c2b..93d72ecd 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -903,6 +903,12 @@ test("task --background emits a dispatched token without promising notification" const jobId = launched.stdout.match(/id=(task-[^\]\s]+)/)?.[1]; assert.ok(jobId, "expected dispatched token to include a task job id"); assert.match(launched.stdout, new RegExp(`No automatic notification will arrive; poll /codex:status ${jobId}\\.`)); + assert.match( + launched.stdout, + new RegExp( + `To be notified on completion, run with the Bash tool \\(run_in_background\\): node "\\$\\{CLAUDE_PLUGIN_ROOT\\}/scripts/codex-companion\\.mjs" status ${jobId} --wait --timeout-ms 1800000 \\(re-arm it if it returns still-running\\)\\.` + ) + ); assert.doesNotMatch(launched.stdout, /will notify|will be notified|you will be notified/i); }); From 11db7d5bb5adb64c10300e7c91dbc8530fdab5b5 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 11:21:58 +0200 Subject: [PATCH 07/18] =?UTF-8?q?fix(codex-rescue-hook):=20address=20PR=20?= =?UTF-8?q?#346=20review=20=E2=80=94=20token=20order,=20session=20scope,?= =?UTF-8?q?=20quoting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Check the completion/dispatch token BEFORE inferring active-task state, so a return that actually completed isn't masked by an unrelated active job. - Scope findNewestActiveTaskJob to the hook's session_id (job.sessionId match), with an unscoped fallback when no session_id is present — avoids pointing the watcher at another Claude session's job. - Quote the companion path in the watcher command (handles paths with spaces). Tests: 100/100 (added complete-token-beats-active-job, session-scoping, and quoted-path cases). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../scripts/codex-rescue-completion-hook.mjs | 40 +++--- tests/codex-rescue-hook.test.mjs | 130 +++++++++++++++++- 2 files changed, 152 insertions(+), 18 deletions(-) diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs index 48539bae..accf6329 100644 --- a/plugins/codex/scripts/codex-rescue-completion-hook.mjs +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -23,7 +23,7 @@ const BASH_AUTO_BACKGROUND_MARKER = "Command running in background with ID:"; const BASH_AUTO_BACKGROUND_OUTPUT_PREFIX = "Output is being written to:"; function buildWatcherContext(jobId) { - return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node ${COMPANION_SCRIPT_PATH} status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; + return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node "${COMPANION_SCRIPT_PATH}" status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; } function buildDispatchedContext(jobId) { @@ -101,12 +101,18 @@ function compareJobsNewestFirst(left, right) { return String(right.updatedAt ?? right.createdAt ?? "").localeCompare(String(left.updatedAt ?? left.createdAt ?? "")); } -function findNewestActiveTaskJob(cwd) { - return ( - [...loadState(cwd).jobs] - .filter((job) => job?.jobClass === "task" && ACTIVE_TASK_STATUSES.has(String(job.status ?? ""))) - .sort(compareJobsNewestFirst)[0] ?? null +function findNewestActiveTaskJob(cwd, sessionId) { + const activeTaskJobs = [...loadState(cwd).jobs].filter( + (job) => job?.jobClass === "task" && ACTIVE_TASK_STATUSES.has(String(job.status ?? "")) ); + const normalizedSessionId = typeof sessionId === "string" && sessionId.trim() ? sessionId : null; + // Companion task jobs are session-owned; when Claude gives us the current + // session id, only infer active state from jobs created by this same session. + // Older hook inputs had no session_id, so those preserve the unscoped fallback. + const candidateJobs = normalizedSessionId + ? activeTaskJobs.filter((job) => job?.sessionId === normalizedSessionId) + : activeTaskJobs; + return candidateJobs.sort(compareJobsNewestFirst)[0] ?? null; } function isFailedOrEmptyReturn(toolResponse, responseText) { @@ -157,24 +163,26 @@ function buildCompletionContext(input) { return null; } - // The subagent may paraphrase or drop the dispatched sentinel, so companion - // state is the reliable signal for a queued/running rescue task after return. - const activeTaskJob = findNewestActiveTaskJob(getHookCwd(input)); - if (activeTaskJob) { - return buildWatcherContext(activeTaskJob.id); - } - const responseText = collectText(toolResponse); const token = parseTaskStatusToken(responseText); - if (token?.status === "dispatched" && token.id) { - return buildDispatchedContext(token.id); - } // The complete sentinel is the only positive proof of a successful run; the // companion stamps it solely on real completion, so it is the lone path that // asserts success (PR #346 review P1). if (token?.status === "complete") { return COMPLETE_CONTEXT; } + // Status tokens are emitted by this codex-rescue return, so they are more + // authoritative than any task state that may belong to another concurrent run. + if (token?.status === "dispatched" && token.id) { + return buildDispatchedContext(token.id); + } + + // The subagent may paraphrase or drop the dispatched sentinel, so companion + // state is the fallback signal for a queued/running rescue task after return. + const activeTaskJob = findNewestActiveTaskJob(getHookCwd(input), input?.session_id); + if (activeTaskJob) { + return buildWatcherContext(activeTaskJob.id); + } // Bash auto-backgrounds commands that exceed the ~600s foreground cap. That // synchronous Agent return means the Bash wrapper detached, not that Codex diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs index 5c15de48..93617598 100644 --- a/tests/codex-rescue-hook.test.mjs +++ b/tests/codex-rescue-hook.test.mjs @@ -53,6 +53,10 @@ function parseHookOutput(result) { return JSON.parse(result.stdout); } +function expectedWatcherContext(jobId) { + return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node "${COMPANION}" status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; +} + test("codex-rescue hook emits a watcher when companion state has an active task job", () => { const workspace = makeTempDir(); const pluginDataDir = makeTempDir(); @@ -62,6 +66,7 @@ test("codex-rescue hook emits a watcher when companion state has an active task status: "queued", title: "Codex Task", jobClass: "task", + sessionId: "session-main", updatedAt: "2026-05-25T10:00:00.000Z" }, { @@ -76,6 +81,7 @@ test("codex-rescue hook emits a watcher when companion state has an active task status: "running", title: "Codex Task", jobClass: "task", + sessionId: "session-main", updatedAt: "2026-05-25T10:05:00.000Z" } ]); @@ -84,6 +90,7 @@ test("codex-rescue hook emits a watcher when companion state has an active task { hook_event_name: "PostToolUse", tool_name: "Agent", + session_id: "session-main", cwd: workspace, tool_input: { subagent_type: "codex:codex-rescue" @@ -106,10 +113,10 @@ test("codex-rescue hook emits a watcher when companion state has an active task assert.deepEqual(payload, { hookSpecificOutput: { hookEventName: "PostToolUse", - additionalContext: - `codex-rescue background job task-running-new is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node ${COMPANION} status task-running-new --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.` + additionalContext: expectedWatcherContext("task-running-new") } }); + assert.ok(payload.hookSpecificOutput.additionalContext.includes(`node "${COMPANION}" status task-running-new`)); }); test("codex-rescue hook injects a complete line when the completion token is present", () => { @@ -141,6 +148,125 @@ test("codex-rescue hook injects a complete line when the completion token is pre }); }); +test("codex-rescue hook treats the completion token as authoritative over active task state", () => { + const workspace = makeTempDir(); + const pluginDataDir = makeTempDir(); + seedHookState(workspace, pluginDataDir, [ + { + id: "task-unrelated-running", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "other-session", + updatedAt: "2026-05-25T10:20:00.000Z" + } + ]); + + const result = runHook( + { + hook_event_name: "PostToolUse", + tool_name: "Agent", + cwd: workspace, + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-complete-with-unrelated-active-job", + content: [ + { + type: "text", + text: "Handled the requested task.\n[[codex-task status=complete]]\n" + } + ] + } + }, + { pluginDataDir } + ); + + const payload = parseHookOutput(result); + assert.equal( + payload.hookSpecificOutput.additionalContext, + "codex-rescue has COMPLETED and exited. The text above is the final result. Do NOT wait for a notification or poll status. If it ran with --write, verify changed files with git status." + ); + assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /background job task-unrelated-running/); +}); + +test("codex-rescue hook scopes active task state to the current session when present", () => { + const workspace = makeTempDir(); + const pluginDataDir = makeTempDir(); + seedHookState(workspace, pluginDataDir, [ + { + id: "task-current-session", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "session-current", + updatedAt: "2026-05-25T10:00:00.000Z" + }, + { + id: "task-other-session-newer", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "session-other", + updatedAt: "2026-05-25T10:30:00.000Z" + } + ]); + + const scopedResult = runHook( + { + hook_event_name: "PostToolUse", + tool_name: "Agent", + session_id: "session-current", + cwd: workspace, + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-tokenless-current-session", + content: [ + { + type: "text", + text: "Codex is still running." + } + ] + } + }, + { pluginDataDir } + ); + + const scopedPayload = parseHookOutput(scopedResult); + assert.equal(scopedPayload.hookSpecificOutput.additionalContext, expectedWatcherContext("task-current-session")); + assert.doesNotMatch(scopedPayload.hookSpecificOutput.additionalContext, /task-other-session-newer/); + + const unscopedResult = runHook( + { + hook_event_name: "PostToolUse", + tool_name: "Agent", + cwd: workspace, + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-tokenless-without-session", + content: [ + { + type: "text", + text: "Codex is still running." + } + ] + } + }, + { pluginDataDir } + ); + + const unscopedPayload = parseHookOutput(unscopedResult); + assert.equal(unscopedPayload.hookSpecificOutput.additionalContext, expectedWatcherContext("task-other-session-newer")); +}); + test("codex-rescue hook reports synchronous tokenless returns neutrally", () => { const result = runHook({ hook_event_name: "PostToolUse", From 153bd63aec3321b41747cb39cc3670b8c24bd439 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 11:42:06 +0200 Subject: [PATCH 08/18] fix(codex-rescue): watcher-consistent dispatched context + reject --background/--wait conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Hook: the dispatched-token branch now emits the full watcher instruction (buildWatcherContext) instead of a bare "poll /codex:status", matching the state-inference fallback; removed the now-unused buildDispatchedContext. - Companion: handleTask now rejects conflicting --background + --wait with an error, mirroring the --resume/--fresh guard (PR #346 review P2 — previously --background silently won, ignoring the explicit foreground --wait). Tests: 101/101. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 3 +++ .../scripts/codex-rescue-completion-hook.mjs | 6 +----- tests/codex-rescue-hook.test.mjs | 3 +-- tests/runtime.test.mjs | 21 +++++++++++++++++++ 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index a0544bed..b22a3588 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -846,6 +846,9 @@ async function handleTask(argv) { if (resumeLast && fresh) { throw new Error("Choose either --resume/--resume-last or --fresh."); } + if (options.background && options.wait) { + throw new Error("Choose either --background or --wait, not both."); + } const write = Boolean(options.write); const taskMetadata = buildTaskRunMetadata({ prompt, diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs index accf6329..9974bdaf 100644 --- a/plugins/codex/scripts/codex-rescue-completion-hook.mjs +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -26,10 +26,6 @@ function buildWatcherContext(jobId) { return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node "${COMPANION_SCRIPT_PATH}" status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; } -function buildDispatchedContext(jobId) { - return `codex-rescue dispatched background job ${jobId}. No automatic notification will arrive; poll /codex:status ${jobId}.`; -} - function extractBashAutoBackgroundOutputPath(text) { const prefixIndex = text.indexOf(BASH_AUTO_BACKGROUND_OUTPUT_PREFIX); if (prefixIndex === -1) { @@ -174,7 +170,7 @@ function buildCompletionContext(input) { // Status tokens are emitted by this codex-rescue return, so they are more // authoritative than any task state that may belong to another concurrent run. if (token?.status === "dispatched" && token.id) { - return buildDispatchedContext(token.id); + return buildWatcherContext(token.id); } // The subagent may paraphrase or drop the dispatched sentinel, so companion diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs index 93617598..a21737de 100644 --- a/tests/codex-rescue-hook.test.mjs +++ b/tests/codex-rescue-hook.test.mjs @@ -397,8 +397,7 @@ test("codex-rescue hook injects a dispatched line when the dispatched token is p assert.deepEqual(payload, { hookSpecificOutput: { hookEventName: "PostToolUse", - additionalContext: - "codex-rescue dispatched background job task-abc123. No automatic notification will arrive; poll /codex:status task-abc123." + additionalContext: expectedWatcherContext("task-abc123") } }); }); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 93d72ecd..e8d59bd4 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -912,6 +912,27 @@ test("task --background emits a dispatched token without promising notification" assert.doesNotMatch(launched.stdout, /will notify|will be notified|you will be notified/i); }); +test("task rejects conflicting background and wait flags before dispatching a job", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--background", "--wait", "investigate the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + const stateFile = path.join(resolveStateDir(repo), "state.json"); + const stateJobs = fs.existsSync(stateFile) ? JSON.parse(fs.readFileSync(stateFile, "utf8")).jobs : []; + assert.deepEqual(stateJobs, []); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /Choose either --background or --wait, not both\./); +}); + test("review rejects focus text because it is native-review only", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 186e10350424d9a464ae41ad0b8f2388ff68bcbc Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 12:33:35 +0200 Subject: [PATCH 09/18] fix(codex-rescue): harden background machinery (PR #346 review) - Hook: gate the global active-job fallback on invocation-specific background evidence (BACKGROUND_INTENT_PATTERN) and check the Bash auto-background marker before it, so a sentinel-less foreground failure isn't misrouted to an unrelated concurrent task's job. (P1 + P2) - Companion: enqueueBackgroundTask re-reads the record before spawning and skips the worker if /codex:cancel already marked it terminal in the write->spawn window. (P1) - Companion: the foreground Infinity wait now bails with an actionable error when the detached worker pid exits while the job is still active (pid-liveness, so long runs still wait), plus a brief missing-job retry for the worker-writes-record race. (P2) Tests: 104/104. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 147 ++++++++++++++- .../scripts/codex-rescue-completion-hook.mjs | 21 ++- tests/codex-rescue-hook.test.mjs | 106 ++++++++--- tests/runtime.test.mjs | 178 +++++++++++++++++- 4 files changed, 418 insertions(+), 34 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index b22a3588..8c946fb4 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -68,6 +68,7 @@ const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json" const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000; const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000; const FOREGROUND_TASK_POLL_INTERVAL_MS = 100; +const FOREGROUND_TASK_MISSING_JOB_RETRY_MS = 5000; const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]); const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]); const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn."; @@ -159,6 +160,38 @@ function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function normalizeWorkerPid(pid) { + const normalized = Number(pid); + return Number.isInteger(normalized) && normalized > 0 ? normalized : null; +} + +function isProcessAlive(pid) { + // A pid that answers signal 0 still has a live process table entry. EPERM also + // means the process exists but this user cannot signal it. + const normalizedPid = normalizeWorkerPid(pid); + if (!normalizedPid) { + return false; + } + + try { + process.kill(normalizedPid, 0); + return true; + } catch (error) { + return error?.code === "EPERM"; + } +} + +function hasExitedActiveWorker(job) { + // Active jobs with a recorded worker pid should keep that worker alive until + // the job reaches a terminal state; a missing process means launch failed. + const workerPid = normalizeWorkerPid(job?.pid); + return Boolean(workerPid && isActiveJobStatus(job?.status) && !isProcessAlive(workerPid)); +} + +function isMissingJobError(error) { + return String(error?.message ?? "").startsWith("No job found for "); +} + function shorten(text, limit = 96) { const normalized = String(text ?? "").trim().replace(/\s+/g, " "); if (!normalized) { @@ -317,17 +350,49 @@ function findLatestResumableTaskJob(jobs) { async function waitForSingleJobSnapshot(cwd, reference, options = {}) { const timeoutMs = Math.max(0, Number(options.timeoutMs) || DEFAULT_STATUS_WAIT_TIMEOUT_MS); const pollIntervalMs = Math.max(100, Number(options.pollIntervalMs) || DEFAULT_STATUS_POLL_INTERVAL_MS); + const retryMissingJobMs = Math.max(0, Number(options.retryMissingJobMs) || FOREGROUND_TASK_MISSING_JOB_RETRY_MS); const deadline = Date.now() + timeoutMs; - let snapshot = buildSingleJobSnapshot(cwd, reference); + const missingJobDeadline = Date.now() + retryMissingJobMs; + + const readSnapshot = () => { + try { + return buildSingleJobSnapshot(cwd, reference); + } catch (error) { + if (options.retryMissingJob && isMissingJobError(error) && Date.now() < missingJobDeadline) { + return null; + } + throw error; + } + }; + + let snapshot = readSnapshot(); + + while ((!snapshot || isActiveJobStatus(snapshot.job.status)) && Date.now() < deadline) { + if (snapshot && options.failWhenWorkerExits && hasExitedActiveWorker(snapshot.job)) { + return { + ...snapshot, + waitTimedOut: false, + workerExited: true, + timeoutMs + }; + } + + // Foreground task workers write state concurrently with the foreground + // waiter. If a transient read sees no parseable job, retry briefly instead + // of failing before the queued record becomes readable. + const activeDeadline = snapshot ? deadline : Math.min(deadline, missingJobDeadline); + await sleep(Math.min(pollIntervalMs, Math.max(0, activeDeadline - Date.now()))); + snapshot = readSnapshot(); + } - while (isActiveJobStatus(snapshot.job.status) && Date.now() < deadline) { - await sleep(Math.min(pollIntervalMs, Math.max(0, deadline - Date.now()))); + if (!snapshot) { snapshot = buildSingleJobSnapshot(cwd, reference); } return { ...snapshot, waitTimedOut: isActiveJobStatus(snapshot.job.status), + workerExited: Boolean(options.failWhenWorkerExits && hasExitedActiveWorker(snapshot.job)), timeoutMs }; } @@ -677,9 +742,27 @@ function enqueueBackgroundTask(cwd, job, request) { writeJobFile(job.workspaceRoot, job.id, queuedRecord); upsertJob(job.workspaceRoot, queuedRecord); + // PR #346 review: /codex:cancel can land after the durable queued record is + // written but before worker launch; re-read the job and do not spawn a worker + // for a record that is already terminal. + const latestQueuedRecord = readStoredJob(job.workspaceRoot, job.id) ?? queuedRecord; + if (!isActiveJobStatus(latestQueuedRecord.status)) { + appendLogLine(logFile, `Skipped background worker launch because job is ${latestQueuedRecord.status}.`); + return { + payload: { + jobId: job.id, + status: latestQueuedRecord.status ?? "cancelled", + title: job.title, + summary: job.summary, + logFile + }, + logFile + }; + } + const child = spawnDetachedTaskWorker(cwd, job.id); // PR #346 review: keep the pre-spawn record, then persist the worker pid for cancellation. - const spawnedRecord = readStoredJob(job.workspaceRoot, job.id) ?? queuedRecord; + const spawnedRecord = readStoredJob(job.workspaceRoot, job.id) ?? latestQueuedRecord; if (isActiveJobStatus(spawnedRecord.status)) { writeJobFile(job.workspaceRoot, job.id, { ...spawnedRecord, @@ -703,6 +786,45 @@ function enqueueBackgroundTask(cwd, job, request) { }; } +function buildWorkerExitedError(jobId) { + return `background worker exited before completing; check /codex:status ${jobId}`; +} + +function failActiveWorkerJob(workspaceRoot, job, errorMessage) { + // Re-read before marking failed so a concurrent cancellation or completion is + // not overwritten by the foreground waiter. + const latestStoredJob = readStoredJob(workspaceRoot, job.id) ?? {}; + const latestJob = { + ...job, + ...latestStoredJob + }; + if (!isActiveJobStatus(latestJob.status)) { + return latestJob; + } + + const completedAt = nowIso(); + const failedJob = { + ...latestJob, + status: "failed", + phase: "failed", + pid: null, + completedAt, + errorMessage + }; + + writeJobFile(workspaceRoot, job.id, failedJob); + upsertJob(workspaceRoot, { + id: job.id, + status: "failed", + phase: "failed", + pid: null, + completedAt, + errorMessage + }); + appendLogLine(failedJob.logFile, errorMessage); + return failedJob; +} + function ensureTrailingNewline(value) { const output = String(value ?? ""); return output.endsWith("\n") ? output : `${output}\n`; @@ -747,8 +869,23 @@ async function runForegroundTaskWorker(cwd, job, request, options = {}) { const snapshot = await waitForSingleJobSnapshot(cwd, job.id, { // PR #346 review: foreground xhigh waits must survive beyond the 240s status default. timeoutMs: Infinity, - pollIntervalMs: FOREGROUND_TASK_POLL_INTERVAL_MS + pollIntervalMs: FOREGROUND_TASK_POLL_INTERVAL_MS, + retryMissingJob: true, + // PR #346 review: an unbounded wait must still fail fast when its detached + // worker pid has exited while the job remains queued/running. + failWhenWorkerExits: true }); + if (snapshot.workerExited) { + const errorMessage = buildWorkerExitedError(snapshot.job.id); + const failedJob = failActiveWorkerJob(snapshot.workspaceRoot, snapshot.job, errorMessage); + if (options.json) { + outputCommandResult({ job: failedJob, errorMessage }, `${errorMessage}\n`, true); + } else { + process.stderr.write(`${errorMessage}\n`); + } + process.exitCode = 1; + return; + } if (snapshot.waitTimedOut) { outputCommandResult(snapshot, renderJobStatusReport(snapshot.job), options.json); process.exitCode = 1; diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs index 9974bdaf..2c7eaddd 100644 --- a/plugins/codex/scripts/codex-rescue-completion-hook.mjs +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -21,6 +21,10 @@ const FAILURE_STATUSES = new Set(["failed", "fail", "error", "errored", "cancell const ACTIVE_TASK_STATUSES = new Set(["queued", "running"]); const BASH_AUTO_BACKGROUND_MARKER = "Command running in background with ID:"; const BASH_AUTO_BACKGROUND_OUTPUT_PREFIX = "Output is being written to:"; +// PR #346 review: state fallback is only safe when this synchronous return +// contains invocation-specific evidence that the task was actually backgrounded. +const BACKGROUND_INTENT_PATTERN = + /running in (?:the\s+)?background|backgrounded|dispatched|you'?ll be notified|you will be notified|will notify|will be notified/i; function buildWatcherContext(jobId) { return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node "${COMPANION_SCRIPT_PATH}" status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; @@ -173,13 +177,6 @@ function buildCompletionContext(input) { return buildWatcherContext(token.id); } - // The subagent may paraphrase or drop the dispatched sentinel, so companion - // state is the fallback signal for a queued/running rescue task after return. - const activeTaskJob = findNewestActiveTaskJob(getHookCwd(input), input?.session_id); - if (activeTaskJob) { - return buildWatcherContext(activeTaskJob.id); - } - // Bash auto-backgrounds commands that exceed the ~600s foreground cap. That // synchronous Agent return means the Bash wrapper detached, not that Codex // completed, so report the still-running state before any completion claim. @@ -187,6 +184,16 @@ function buildCompletionContext(input) { return buildBashAutoBackgroundContext(extractBashAutoBackgroundOutputPath(responseText)); } + // PR #346 review: only consult global companion state after this return says + // the task was backgrounded; otherwise an unrelated active task can hide a + // sentinel-less failure from the foreground invocation that just returned. + if (BACKGROUND_INTENT_PATTERN.test(responseText)) { + const activeTaskJob = findNewestActiveTaskJob(getHookCwd(input), input?.session_id); + if (activeTaskJob) { + return buildWatcherContext(activeTaskJob.id); + } + } + // No success evidence: never claim success here. A failure status or an empty // return gets a failure-aware line; everything else gets a neutral "exited, // verify on disk" line. Every branch still says "do not wait" because a diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs index a21737de..3193f8bf 100644 --- a/tests/codex-rescue-hook.test.mjs +++ b/tests/codex-rescue-hook.test.mjs @@ -57,7 +57,7 @@ function expectedWatcherContext(jobId) { return `codex-rescue background job ${jobId} is RUNNING — there is no automatic push notification. To be notified, arm a watcher: run this via the Bash tool with run_in_background=true: node "${COMPANION}" status ${jobId} --wait --timeout-ms 1800000 — it blocks until the job is terminal, then exits and re-invokes you. If it returns and the job is still running, re-arm the same command. Do NOT treat the job as done until the watcher reports a terminal status.`; } -test("codex-rescue hook emits a watcher when companion state has an active task job", () => { +test("codex-rescue hook emits a watcher when this return has background phrasing and active task state", () => { const workspace = makeTempDir(); const pluginDataDir = makeTempDir(); seedHookState(workspace, pluginDataDir, [ @@ -119,6 +119,51 @@ test("codex-rescue hook emits a watcher when companion state has an active task assert.ok(payload.hookSpecificOutput.additionalContext.includes(`node "${COMPANION}" status task-running-new`)); }); +test("codex-rescue hook does not infer active state without background evidence in this return", () => { + const workspace = makeTempDir(); + const pluginDataDir = makeTempDir(); + seedHookState(workspace, pluginDataDir, [ + { + id: "task-unrelated-running", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "session-main", + updatedAt: "2026-05-25T10:05:00.000Z" + } + ]); + + const result = runHook( + { + hook_event_name: "PostToolUse", + tool_name: "Agent", + session_id: "session-main", + cwd: workspace, + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "failed", + agentId: "agent-sentinel-less-failure", + content: [ + { + type: "text", + text: "Codex could not be invoked.\n" + } + ] + } + }, + { pluginDataDir } + ); + + const payload = parseHookOutput(result); + assert.equal( + payload.hookSpecificOutput.additionalContext, + "codex-rescue exited WITHOUT a success signal — it is not running, so do not wait for a notification, but the run may have failed or produced no result. Review the output above and `git status`, then re-run or escalate instead of treating it as done." + ); + assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /background job task-unrelated-running/); +}); + test("codex-rescue hook injects a complete line when the completion token is present", () => { const result = runHook({ hook_event_name: "PostToolUse", @@ -192,7 +237,7 @@ test("codex-rescue hook treats the completion token as authoritative over active assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /background job task-unrelated-running/); }); -test("codex-rescue hook scopes active task state to the current session when present", () => { +test("codex-rescue hook scopes background-evidenced active task state to the current session", () => { const workspace = makeTempDir(); const pluginDataDir = makeTempDir(); seedHookState(workspace, pluginDataDir, [ @@ -229,7 +274,7 @@ test("codex-rescue hook scopes active task state to the current session when pre content: [ { type: "text", - text: "Codex is still running." + text: "Codex is still running in the background." } ] } @@ -255,7 +300,7 @@ test("codex-rescue hook scopes active task state to the current session when pre content: [ { type: "text", - text: "Codex is still running." + text: "Codex is still running in the background." } ] } @@ -347,24 +392,42 @@ test("codex-rescue hook reports empty tokenless returns without a success signal }); test("codex-rescue hook reports Bash auto-background returns as still running detached", () => { - const result = runHook({ - hook_event_name: "PostToolUse", - tool_name: "Agent", - tool_input: { - subagent_type: "codex:codex-rescue" - }, - tool_response: { - status: "completed", - agentId: "agent-auto-backgrounded", - content: [ - { - type: "text", - text: - "Command running in background with ID: bash-123. Output is being written to: /tmp/codex-rescue-output.log. You will be notified when it completes. To check interim output, use Read on that file path." - } - ] + const workspace = makeTempDir(); + const pluginDataDir = makeTempDir(); + seedHookState(workspace, pluginDataDir, [ + { + id: "task-unrelated-running", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "session-main", + updatedAt: "2026-05-25T10:05:00.000Z" } - }); + ]); + + const result = runHook( + { + hook_event_name: "PostToolUse", + tool_name: "Agent", + session_id: "session-main", + cwd: workspace, + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "completed", + agentId: "agent-auto-backgrounded", + content: [ + { + type: "text", + text: + "Command running in background with ID: bash-123. Output is being written to: /tmp/codex-rescue-output.log. You will be notified when it completes. To check interim output, use Read on that file path." + } + ] + } + }, + { pluginDataDir } + ); const payload = parseHookOutput(result); assert.equal( @@ -372,6 +435,7 @@ test("codex-rescue hook reports Bash auto-background returns as still running de "codex-rescue's Codex run exceeded the foreground time cap and was auto-backgrounded by the Bash tool; it is STILL RUNNING detached. No completion notification will arrive. Do not wait passively — re-check `git status` (if it ran with --write) and/or read the streamed output at /tmp/codex-rescue-output.log until the run lands, then act on the result." ); assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /COMPLETED and exited/); + assert.doesNotMatch(payload.hookSpecificOutput.additionalContext, /background job task-unrelated-running/); }); test("codex-rescue hook injects a dispatched line when the dispatched token is present", () => { diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index e8d59bd4..fb9f3984 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -2,7 +2,7 @@ import fs from "node:fs"; import path from "node:path"; import test from "node:test"; import assert from "node:assert/strict"; -import { spawn } from "node:child_process"; +import { spawn, spawnSync } from "node:child_process"; import { fileURLToPath } from "node:url"; import { buildEnv, installFakeCodex } from "./fake-codex-fixture.mjs"; @@ -16,6 +16,17 @@ const SCRIPT = path.join(PLUGIN_ROOT, "scripts", "codex-companion.mjs"); const STOP_HOOK = path.join(PLUGIN_ROOT, "scripts", "stop-review-gate-hook.mjs"); const SESSION_HOOK = path.join(PLUGIN_ROOT, "scripts", "session-lifecycle-hook.mjs"); +function writePreloadScript(dir, source) { + const preloadPath = path.join(dir, "preload.cjs"); + fs.writeFileSync(preloadPath, source, "utf8"); + return preloadPath; +} + +function extendNodeOptions(preloadPath) { + const previous = process.env.NODE_OPTIONS ? `${process.env.NODE_OPTIONS} ` : ""; + return `${previous}--require=${preloadPath}`; +} + async function waitFor(predicate, { timeoutMs = 5000, intervalMs = 50 } = {}) { const start = Date.now(); while (Date.now() - start < timeoutMs) { @@ -620,6 +631,54 @@ test("foreground task worker wait is not bounded by the default status timeout", assert.match(match[0], /pollIntervalMs:\s*FOREGROUND_TASK_POLL_INTERVAL_MS/); }); +test("foreground task reports an error when the detached worker pid is already dead", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const preloadPath = writePreloadScript( + binDir, + ` +const childProcess = require("node:child_process"); +const { syncBuiltinESMExports } = require("node:module"); +const originalSpawn = childProcess.spawn; +childProcess.spawn = function patchedSpawn(command, args, options) { + if (Array.isArray(args) && args[1] === "task-worker") { + return { pid: 987654321, unref() {} }; + } + return originalSpawn.apply(this, arguments); +}; +syncBuiltinESMExports(); +` + ); + + const result = spawnSync(process.execPath, [SCRIPT, "task", "--write", "fix the failing test"], { + cwd: repo, + env: { + ...buildEnv(binDir), + NODE_OPTIONS: extendNodeOptions(preloadPath) + }, + encoding: "utf8", + timeout: 3000, + windowsHide: true + }); + + assert.notEqual(result.error?.code, "ETIMEDOUT", "foreground task hung waiting for a dead worker pid"); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /background worker exited before completing; check \/codex:status/); + + const stateDir = resolveStateDir(repo); + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const job = state.jobs.find((candidate) => candidate.jobClass === "task"); + assert.equal(job.status, "failed"); + assert.equal(job.pid, null); + assert.match(job.errorMessage, /background worker exited before completing/); +}); + test("task --resume acts like --resume-last without leaking the flag into the prompt", () => { const repo = makeTempDir(); const binDir = makeTempDir(); @@ -884,6 +943,123 @@ test("task --background enqueues a detached worker and exposes per-job status", assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); }); +test("task --background skips spawning when the queued job was cancelled before worker launch", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const spawnMarker = path.join(binDir, "task-worker-spawned.json"); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const preloadPath = writePreloadScript( + binDir, + ` +const fs = require("node:fs"); +const path = require("node:path"); +const childProcess = require("node:child_process"); +const { syncBuiltinESMExports } = require("node:module"); +const spawnMarker = ${JSON.stringify(spawnMarker)}; +const originalWriteFileSync = fs.writeFileSync; +const originalSpawn = childProcess.spawn; +let rewriting = false; + +function cancelQueuedTask(job) { + return { + ...job, + status: "cancelled", + phase: "cancelled", + pid: null, + errorMessage: "Cancelled before worker launch.", + cancelledAt: new Date().toISOString() + }; +} + +function rewriteJson(filePath, payload) { + rewriting = true; + try { + originalWriteFileSync(filePath, JSON.stringify(payload, null, 2) + "\\n", "utf8"); + } finally { + rewriting = false; + } +} + +fs.writeFileSync = function patchedWriteFileSync(file, data, options) { + originalWriteFileSync.apply(this, arguments); + if (rewriting) { + return; + } + + const filePath = String(file); + if (!filePath.endsWith(".json")) { + return; + } + + let parsed; + try { + parsed = JSON.parse(String(data)); + } catch { + return; + } + + if (filePath.includes(path.sep + "jobs" + path.sep) && parsed?.jobClass === "task" && parsed.status === "queued") { + rewriteJson(filePath, cancelQueuedTask(parsed)); + return; + } + + if (filePath.endsWith(path.sep + "state.json") && Array.isArray(parsed.jobs)) { + let changed = false; + const jobs = parsed.jobs.map((job) => { + if (job?.jobClass !== "task" || job.status !== "queued") { + return job; + } + changed = true; + return cancelQueuedTask(job); + }); + if (changed) { + rewriteJson(filePath, { ...parsed, jobs }); + } + } +}; + +childProcess.spawn = function patchedSpawn(command, args, options) { + if (Array.isArray(args) && args[1] === "task-worker") { + originalWriteFileSync(spawnMarker, JSON.stringify({ command, args }, null, 2) + "\\n", "utf8"); + return { pid: 424242, unref() {} }; + } + return originalSpawn.apply(this, arguments); +}; + +syncBuiltinESMExports(); +` + ); + + const launched = run("node", [SCRIPT, "task", "--background", "--json", "investigate the failing test"], { + cwd: repo, + env: { + ...buildEnv(binDir), + NODE_OPTIONS: extendNodeOptions(preloadPath) + } + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + assert.equal(launchPayload.status, "cancelled"); + assert.equal(fs.existsSync(spawnMarker), false, "expected cancellation guard to skip task-worker spawn"); + + const stateDir = resolveStateDir(repo); + const job = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")).jobs.find( + (candidate) => candidate.id === launchPayload.jobId + ); + assert.equal(job.status, "cancelled"); + assert.equal(job.pid, null); + + const storedJob = JSON.parse(fs.readFileSync(path.join(stateDir, "jobs", `${launchPayload.jobId}.json`), "utf8")); + assert.equal(storedJob.status, "cancelled"); + assert.equal(storedJob.pid, null); +}); + test("task --background emits a dispatched token without promising notification", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From a7aaff1ebd7c2abba3fc4ac2bcb8fbbd14f38732 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 13:00:31 +0200 Subject: [PATCH 10/18] fix(codex-rescue): make launch/cancel path atomic and crash-safe (PR #346 review) Resolve the launch/cancel race class and spawn-error crash flagged on 186e103 by routing every launch/cancel job-record mutation through a single atomic updateState read-modify-write instead of read-then-write checks: - P1 cancel-resurrect race (persistSpawnedTaskWorkerPid): the worker pid is now written and cancellation re-checked inside one updateState transition. If a concurrent /codex:cancel already made the job terminal, the pid is NOT written and the just-spawned worker is killed (terminateProcessTree) instead of being resurrected by stale launch data. - P1 unhandled detached spawn error (spawnDetachedTaskWorker): wrap spawn in try/catch for sync failures, attach child.once('error') for async failures, and treat a missing child.pid as a launch failure. Any spawn failure atomically marks the job failed (markTaskLaunchFailed) so the foreground waiter surfaces a structured error instead of crashing or hanging. - P2 dispatched sentinel on terminal launch (renderQueuedTaskLaunch): a terminal enqueue payload (cancelled/failed before worker launch) now renders a plain terminal message with NO dispatched sentinel or watcher guidance, so hooks and humans never treat a non-started job as live background work. New helpers isTerminalJobRecord/resolveLaunchStatus check both the summary state row and the per-job file, since either can be observed first during a race. Tests: 3 new runtime cases (cancel-after-spawn kills the orphan and keeps the job cancelled with no pid; spawn failure yields a structured failed payload with no hang/crash; cancel-before-launch renders terminal text with no dispatched token). Full suite 107/107 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 271 +++++++++++++++---- tests/runtime.test.mjs | 302 ++++++++++++++++++++++ 2 files changed, 529 insertions(+), 44 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 8c946fb4..be162d9e 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -29,6 +29,7 @@ import { getConfig, listJobs, setConfig, + updateState, upsertJob, writeJobFile } from "./lib/state.mjs"; @@ -323,6 +324,22 @@ function isActiveJobStatus(status) { return status === "queued" || status === "running"; } +function isTerminalJobRecord(stateJob, storedJob = null) { + // PR #346 review: launch/cancel races can expose either the summary state row + // or the per-job file first, so both records must agree the job is active + // before a launcher writes worker-owned fields such as pid. + return !isActiveJobStatus(stateJob?.status) || Boolean(storedJob && !isActiveJobStatus(storedJob.status)); +} + +function resolveLaunchStatus(stateJob, storedJob = null, fallback = "queued") { + for (const status of [stateJob?.status, storedJob?.status]) { + if (status && !isActiveJobStatus(status)) { + return status; + } + } + return stateJob?.status ?? storedJob?.status ?? fallback; +} + function getCurrentClaudeSessionId() { return process.env[SESSION_ID_ENV] ?? null; } @@ -619,6 +636,17 @@ function buildTaskRunMetadata({ prompt, resumeLast = false }) { } function renderQueuedTaskLaunch(payload) { + // PR #346 review: a terminal enqueue payload means launch did not dispatch a + // worker, so emitting the dispatched sentinel would make hooks and humans + // believe there is live background work to poll. + if (!isActiveJobStatus(payload.status)) { + if (payload.status === "cancelled") { + return `Codex task ${payload.jobId} was cancelled before a worker launched; no work started.\n`; + } + const detail = payload.errorMessage ? `: ${payload.errorMessage}` : "."; + return `Codex task ${payload.jobId} failed before a worker launched${detail}\n`; + } + const statusToken = buildTaskDispatchedStatusToken(payload.jobId); return [ statusToken, @@ -712,17 +740,153 @@ async function runForegroundCommand(job, runner, options = {}) { return execution; } -function spawnDetachedTaskWorker(cwd, jobId) { - const scriptPath = path.join(ROOT_DIR, "scripts", "codex-companion.mjs"); - const child = spawn(process.execPath, [scriptPath, "task-worker", "--cwd", cwd, "--job-id", jobId], { - cwd, - env: process.env, - detached: true, - stdio: "ignore", - windowsHide: true +function formatSpawnFailureMessage(error) { + const detail = error instanceof Error ? error.message : String(error ?? ""); + return detail ? `Failed to launch background worker: ${detail}` : "Failed to launch background worker."; +} + +function persistQueuedTaskRecord(workspaceRoot, queuedRecord) { + // PR #346 review: the launch record is inserted through updateState so the + // summary row and stored job file are created from the same read-modify-write + // transition before any worker is allowed to observe the request. + updateState(workspaceRoot, (state) => { + const existingIndex = state.jobs.findIndex((candidate) => candidate.id === queuedRecord.id); + const nextRecord = { + ...queuedRecord, + updatedAt: nowIso() + }; + if (existingIndex === -1) { + state.jobs.unshift(nextRecord); + } else { + state.jobs[existingIndex] = { + ...state.jobs[existingIndex], + ...nextRecord + }; + } + writeJobFile(workspaceRoot, queuedRecord.id, nextRecord); + }); +} + +function markTaskLaunchFailed(workspaceRoot, jobId, errorMessage, fallbackLogFile = null) { + let failedJob = null; + let preservedJob = null; + + // PR #346 review: launch failures must become a durable failed job via one + // state read-modify-write, otherwise foreground waiters can hang on a queued + // record whose worker never existed. + updateState(workspaceRoot, (state) => { + const jobIndex = state.jobs.findIndex((candidate) => candidate.id === jobId); + if (jobIndex === -1) { + return; + } + + const stateJob = state.jobs[jobIndex]; + const storedJob = readStoredJob(workspaceRoot, jobId); + if (isTerminalJobRecord(stateJob, storedJob)) { + preservedJob = { + ...stateJob, + ...(storedJob ?? {}) + }; + return; + } + + const completedAt = nowIso(); + failedJob = { + ...(storedJob ?? {}), + ...stateJob, + status: "failed", + phase: "failed", + pid: null, + completedAt, + updatedAt: completedAt, + errorMessage, + logFile: stateJob.logFile ?? storedJob?.logFile ?? fallbackLogFile + }; + state.jobs[jobIndex] = failedJob; + writeJobFile(workspaceRoot, jobId, failedJob); + }); + + if (failedJob) { + appendLogLine(failedJob.logFile, errorMessage); + return failedJob; + } + return preservedJob; +} + +function persistSpawnedTaskWorkerPid(workspaceRoot, jobId, childPid) { + let shouldKillWorker = false; + let launchStatus = "queued"; + let updatedJob = null; + + // PR #346 review: pid persistence must re-check cancellation inside the same + // state read-modify-write that writes the pid. A cancel that already made the + // job terminal wins, and the just-spawned worker is killed after the state + // update instead of being resurrected by stale launch data. + updateState(workspaceRoot, (state) => { + const jobIndex = state.jobs.findIndex((candidate) => candidate.id === jobId); + if (jobIndex === -1) { + shouldKillWorker = true; + launchStatus = "cancelled"; + return; + } + + const stateJob = state.jobs[jobIndex]; + const storedJob = readStoredJob(workspaceRoot, jobId); + if (isTerminalJobRecord(stateJob, storedJob)) { + shouldKillWorker = true; + launchStatus = resolveLaunchStatus(stateJob, storedJob, "cancelled"); + updatedJob = { + ...stateJob, + ...(storedJob ?? {}) + }; + return; + } + + const updatedAt = nowIso(); + updatedJob = { + ...(storedJob ?? {}), + ...stateJob, + pid: childPid, + updatedAt + }; + state.jobs[jobIndex] = updatedJob; + writeJobFile(workspaceRoot, jobId, updatedJob); + launchStatus = updatedJob.status; }); + + return { shouldKillWorker, launchStatus, job: updatedJob }; +} + +function spawnDetachedTaskWorker(cwd, jobId, options = {}) { + const scriptPath = path.join(ROOT_DIR, "scripts", "codex-companion.mjs"); + let child = null; + + try { + child = spawn(process.execPath, [scriptPath, "task-worker", "--cwd", cwd, "--job-id", jobId], { + cwd, + env: process.env, + detached: true, + stdio: "ignore", + windowsHide: true + }); + } catch (error) { + return { child: null, error }; + } + + const handleSpawnError = (error) => { + options.onError?.(error); + }; + if (typeof child.once === "function") { + child.once("error", handleSpawnError); + } else if (typeof child.on === "function") { + child.on("error", handleSpawnError); + } + child.unref(); - return child; + if (!normalizeWorkerPid(child.pid)) { + return { child, error: new Error("missing worker pid") }; + } + return { child, error: null }; } function enqueueBackgroundTask(cwd, job, request) { @@ -739,8 +903,7 @@ function enqueueBackgroundTask(cwd, job, request) { logFile, request }; - writeJobFile(job.workspaceRoot, job.id, queuedRecord); - upsertJob(job.workspaceRoot, queuedRecord); + persistQueuedTaskRecord(job.workspaceRoot, queuedRecord); // PR #346 review: /codex:cancel can land after the durable queued record is // written but before worker launch; re-read the job and do not spawn a worker @@ -760,24 +923,36 @@ function enqueueBackgroundTask(cwd, job, request) { }; } - const child = spawnDetachedTaskWorker(cwd, job.id); - // PR #346 review: keep the pre-spawn record, then persist the worker pid for cancellation. - const spawnedRecord = readStoredJob(job.workspaceRoot, job.id) ?? latestQueuedRecord; - if (isActiveJobStatus(spawnedRecord.status)) { - writeJobFile(job.workspaceRoot, job.id, { - ...spawnedRecord, - pid: child.pid ?? null - }); - upsertJob(job.workspaceRoot, { - id: job.id, - pid: child.pid ?? null - }); + const recordLaunchFailure = (error) => + markTaskLaunchFailed(job.workspaceRoot, job.id, formatSpawnFailureMessage(error), logFile); + const { child, error: spawnError } = spawnDetachedTaskWorker(cwd, job.id, { + onError: recordLaunchFailure + }); + if (spawnError || !child) { + const failedJob = recordLaunchFailure(spawnError ?? new Error("missing child process")); + return { + payload: { + jobId: job.id, + status: failedJob?.status ?? "failed", + title: job.title, + summary: job.summary, + logFile, + errorMessage: failedJob?.errorMessage ?? formatSpawnFailureMessage(spawnError) + }, + logFile + }; + } + + const { shouldKillWorker, launchStatus } = persistSpawnedTaskWorkerPid(job.workspaceRoot, job.id, child.pid); + if (shouldKillWorker) { + appendLogLine(logFile, `Terminating background worker because job is ${launchStatus}.`); + terminateProcessTree(child.pid); } return { payload: { jobId: job.id, - status: "queued", + status: launchStatus, title: job.title, summary: job.summary, logFile @@ -1184,27 +1359,35 @@ async function handleCancel(argv) { appendLogLine(job.logFile, "Cancelled by user."); const completedAt = nowIso(); - const nextJob = { - ...job, - status: "cancelled", - phase: "cancelled", - pid: null, - completedAt, - errorMessage: "Cancelled by user." - }; + let nextJob = null; + // PR #346 review: cancellation is the terminal launch/cancel transition, so + // the state row and stored job file are updated from one read-modify-write + // instead of a stale stored-job write followed by a separate state patch. + updateState(workspaceRoot, (state) => { + const jobIndex = state.jobs.findIndex((candidate) => candidate.id === job.id); + const stateJob = jobIndex === -1 ? job : state.jobs[jobIndex]; + const latestStoredJob = readStoredJob(workspaceRoot, job.id) ?? existing; + nextJob = { + ...latestStoredJob, + ...stateJob, + status: "cancelled", + phase: "cancelled", + pid: null, + completedAt, + updatedAt: completedAt, + errorMessage: "Cancelled by user." + }; - writeJobFile(workspaceRoot, job.id, { - ...existing, - ...nextJob, - cancelledAt: completedAt - }); - upsertJob(workspaceRoot, { - id: job.id, - status: "cancelled", - phase: "cancelled", - pid: null, - errorMessage: "Cancelled by user.", - completedAt + if (jobIndex === -1) { + state.jobs.unshift(nextJob); + } else { + state.jobs[jobIndex] = nextJob; + } + + writeJobFile(workspaceRoot, job.id, { + ...nextJob, + cancelledAt: completedAt + }); }); const payload = { diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index fb9f3984..a18a48ce 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -1060,6 +1060,308 @@ syncBuiltinESMExports(); assert.equal(storedJob.pid, null); }); +test("task --background does not persist a worker pid when cancellation wins after spawn", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const killMarker = path.join(binDir, "task-worker-killed.json"); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const preloadPath = writePreloadScript( + binDir, + ` +const fs = require("node:fs"); +const path = require("node:path"); +const childProcess = require("node:child_process"); +const processModule = require("node:process"); +const { syncBuiltinESMExports } = require("node:module"); +const killMarker = ${JSON.stringify(killMarker)}; +const originalReadFileSync = fs.readFileSync; +const originalWriteFileSync = fs.writeFileSync; +const originalSpawn = childProcess.spawn; +const originalKill = processModule.kill; +let spawnedTaskWorker = false; +let injectedCancellation = false; +let rewriting = false; + +function encodeLikeOriginal(value, options) { + return options === "utf8" || options?.encoding === "utf8" ? value : Buffer.from(value); +} + +function cancelTaskJob(job) { + return { + ...job, + status: "cancelled", + phase: "cancelled", + pid: null, + errorMessage: "Cancelled after worker spawn.", + cancelledAt: new Date().toISOString() + }; +} + +function writeJson(filePath, payload) { + rewriting = true; + try { + originalWriteFileSync(filePath, JSON.stringify(payload, null, 2) + "\\n", "utf8"); + } finally { + rewriting = false; + } +} + +fs.readFileSync = function patchedReadFileSync(file, options) { + const filePath = String(file); + const data = originalReadFileSync.apply(this, arguments); + if (!spawnedTaskWorker || injectedCancellation || rewriting || !filePath.endsWith(path.sep + "state.json")) { + return data; + } + + let parsed; + try { + parsed = JSON.parse(String(data)); + } catch { + return data; + } + + if (!Array.isArray(parsed.jobs)) { + return data; + } + + let cancelledJob = null; + const jobs = parsed.jobs.map((job) => { + if (!cancelledJob && job?.jobClass === "task" && job.status === "queued") { + cancelledJob = cancelTaskJob(job); + return cancelledJob; + } + return job; + }); + if (!cancelledJob) { + return data; + } + + injectedCancellation = true; + const cancelledState = { ...parsed, jobs }; + writeJson(filePath, cancelledState); + writeJson(path.join(path.dirname(filePath), "jobs", cancelledJob.id + ".json"), cancelledJob); + return encodeLikeOriginal(JSON.stringify(cancelledState, null, 2) + "\\n", options); +}; + +childProcess.spawn = function patchedSpawn(command, args, options) { + if (Array.isArray(args) && args[1] === "task-worker") { + spawnedTaskWorker = true; + return { + pid: 424242, + unref() {}, + once() { + return this; + }, + on() { + return this; + } + }; + } + return originalSpawn.apply(this, arguments); +}; + +processModule.kill = function patchedKill(pid, signal) { + if (pid === -424242 || pid === 424242) { + originalWriteFileSync(killMarker, JSON.stringify({ pid, signal }, null, 2) + "\\n", "utf8"); + return true; + } + return originalKill.apply(this, arguments); +}; + +syncBuiltinESMExports(); +` + ); + + const launched = run("node", [SCRIPT, "task", "--background", "--json", "investigate the failing test"], { + cwd: repo, + env: { + ...buildEnv(binDir), + NODE_OPTIONS: extendNodeOptions(preloadPath) + } + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + assert.equal(launchPayload.status, "cancelled"); + assert.equal(fs.existsSync(killMarker), true, "expected the orphaned task-worker process group to be killed"); + + const stateDir = resolveStateDir(repo); + const job = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")).jobs.find( + (candidate) => candidate.id === launchPayload.jobId + ); + assert.equal(job.status, "cancelled"); + assert.equal(job.pid, null); + + const storedJob = JSON.parse(fs.readFileSync(path.join(stateDir, "jobs", `${launchPayload.jobId}.json`), "utf8")); + assert.equal(storedJob.status, "cancelled"); + assert.equal(storedJob.pid, null); +}); + +test("foreground task reports a structured failure when the detached worker spawn fails", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const preloadPath = writePreloadScript( + binDir, + ` +const childProcess = require("node:child_process"); +const { EventEmitter } = require("node:events"); +const { syncBuiltinESMExports } = require("node:module"); +const originalSpawn = childProcess.spawn; + +childProcess.spawn = function patchedSpawn(command, args, options) { + if (Array.isArray(args) && args[1] === "task-worker") { + const child = new EventEmitter(); + child.pid = undefined; + child.unref = function unref() {}; + process.nextTick(() => { + const error = new Error("spawn ENOENT task-worker"); + error.code = "ENOENT"; + child.emit("error", error); + }); + return child; + } + return originalSpawn.apply(this, arguments); +}; + +syncBuiltinESMExports(); +` + ); + + const result = spawnSync(process.execPath, [SCRIPT, "task", "--write", "--json", "fix the failing test"], { + cwd: repo, + env: { + ...buildEnv(binDir), + NODE_OPTIONS: extendNodeOptions(preloadPath) + }, + encoding: "utf8", + timeout: 3000, + windowsHide: true + }); + + assert.notEqual(result.error?.code, "ETIMEDOUT", "foreground task hung waiting for a worker that never launched"); + assert.notEqual(result.status, 0); + assert.doesNotMatch(result.stderr, /Unhandled 'error' event/); + + const payload = JSON.parse(result.stdout); + assert.equal(payload.job.status, "failed"); + assert.equal(payload.job.pid, null); + assert.match(payload.storedJob.errorMessage, /Failed to launch background worker|missing worker pid|spawn ENOENT/); +}); + +test("task --background cancelled before launch renders terminal text without a dispatched token", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const preloadPath = writePreloadScript( + binDir, + ` +const fs = require("node:fs"); +const path = require("node:path"); +const childProcess = require("node:child_process"); +const { syncBuiltinESMExports } = require("node:module"); +const originalWriteFileSync = fs.writeFileSync; +const originalSpawn = childProcess.spawn; +let rewriting = false; + +function cancelQueuedTask(job) { + return { + ...job, + status: "cancelled", + phase: "cancelled", + pid: null, + errorMessage: "Cancelled before worker launch.", + cancelledAt: new Date().toISOString() + }; +} + +function rewriteJson(filePath, payload) { + rewriting = true; + try { + originalWriteFileSync(filePath, JSON.stringify(payload, null, 2) + "\\n", "utf8"); + } finally { + rewriting = false; + } +} + +fs.writeFileSync = function patchedWriteFileSync(file, data, options) { + originalWriteFileSync.apply(this, arguments); + if (rewriting) { + return; + } + + const filePath = String(file); + if (!filePath.endsWith(".json")) { + return; + } + + let parsed; + try { + parsed = JSON.parse(String(data)); + } catch { + return; + } + + if (filePath.includes(path.sep + "jobs" + path.sep) && parsed?.jobClass === "task" && parsed.status === "queued") { + rewriteJson(filePath, cancelQueuedTask(parsed)); + return; + } + + if (filePath.endsWith(path.sep + "state.json") && Array.isArray(parsed.jobs)) { + let changed = false; + const jobs = parsed.jobs.map((job) => { + if (job?.jobClass !== "task" || job.status !== "queued") { + return job; + } + changed = true; + return cancelQueuedTask(job); + }); + if (changed) { + rewriteJson(filePath, { ...parsed, jobs }); + } + } +}; + +childProcess.spawn = function patchedSpawn(command, args, options) { + if (Array.isArray(args) && args[1] === "task-worker") { + throw new Error("task-worker should not launch after cancellation"); + } + return originalSpawn.apply(this, arguments); +}; + +syncBuiltinESMExports(); +` + ); + + const launched = run("node", [SCRIPT, "task", "--background", "investigate the failing test"], { + cwd: repo, + env: { + ...buildEnv(binDir), + NODE_OPTIONS: extendNodeOptions(preloadPath) + } + }); + + assert.equal(launched.status, 0, launched.stderr); + assert.doesNotMatch(launched.stdout, /\[\[codex-task status=dispatched/); + assert.doesNotMatch(launched.stdout, /dispatched as background job/); + assert.match(launched.stdout, /was cancelled before a worker launched; no work started\./); +}); + test("task --background emits a dispatched token without promising notification", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 5feffb03bc2616d60852ee6314329fb7722d9d1b Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 13:26:48 +0200 Subject: [PATCH 11/18] fix(codex-rescue): status --wait reconciles a dead background worker instead of hanging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The background-completion watcher reaches detached workers through `status --wait`, but that path called waitForSingleJobSnapshot WITHOUT failWhenWorkerExits — the dead-worker liveness check (hasExitedActiveWorker) was only wired into the foreground task path. So when a --background worker died without writing a terminal status (crash/kill/teardown), the job stayed `running` forever, the watcher polled that stale status until --timeout-ms elapsed (e.g. 30 min) and returned a misleading timeout, and the same stale `running` blocked /codex:result and kept the completion hook treating the job as active. Fix (reuse the proven foreground helpers, no new machinery): - handleStatus --wait now passes failWhenWorkerExits: true; on workerExited it reconciles via failActiveWorkerJob (persists status=failed, pid=null, errorMessage) and renders the crash, mirroring runForegroundTaskWorker. - A plain status read also reconciles a dead active worker, so result, hooks, and later status callers see the durable failure (failActiveWorkerJob persists globally, so the first read repairs state for everyone). No false positives: hasExitedActiveWorker requires a recorded pid that is not alive, so pid-null/queued and live-pid jobs are never reconciled. Tests (+3, suite 110/110): a dead-pid running job makes status --wait return promptly as failed (no ETIMEDOUT) and persists failed/pid null; a plain status read reconciles the same; a live-pid (process.pid) running job still times out and is NOT reconciled (false-positive guard). Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 26 ++++- tests/runtime.test.mjs | 111 ++++++++++++++++++++++ 2 files changed, 135 insertions(+), 2 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index be162d9e..10b6a0d7 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -1252,6 +1252,16 @@ async function handleTaskWorker(argv) { ); } +function reconcileExitedStatusWorker(snapshot) { + if (!snapshot.workerExited && !hasExitedActiveWorker(snapshot.job)) { + return snapshot.job; + } + + // Watcher dead-worker reliability fix: status/status --wait are the first + // readers that can convert a stale running worker into a durable failure. + return failActiveWorkerJob(snapshot.workspaceRoot, snapshot.job, buildWorkerExitedError(snapshot.job.id)); +} + async function handleStatus(argv) { const { options, positionals } = parseCommandInput(argv, { valueOptions: ["cwd", "timeout-ms", "poll-interval-ms"], @@ -1264,10 +1274,22 @@ async function handleStatus(argv) { const snapshot = options.wait ? await waitForSingleJobSnapshot(cwd, reference, { timeoutMs: options["timeout-ms"], - pollIntervalMs: options["poll-interval-ms"] + pollIntervalMs: options["poll-interval-ms"], + // Watcher dead-worker reliability fix: the background completion + // watcher reaches detached workers through status --wait. + failWhenWorkerExits: true }) : buildSingleJobSnapshot(cwd, reference); - outputCommandResult(snapshot, renderJobStatusReport(snapshot.job), options.json); + if (snapshot.workerExited) { + const failedJob = reconcileExitedStatusWorker(snapshot); + outputCommandResult({ ...snapshot, job: failedJob }, renderJobStatusReport(failedJob), options.json); + return; + } + + // Watcher dead-worker reliability fix: a plain status read also repairs + // stale running state for result, hooks, and later status callers. + const reconciledJob = reconcileExitedStatusWorker(snapshot); + outputCommandResult({ ...snapshot, job: reconciledJob }, renderJobStatusReport(reconciledJob), options.json); return; } diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index a18a48ce..4111d9f8 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -39,6 +39,39 @@ async function waitFor(predicate, { timeoutMs = 5000, intervalMs = 50 } = {}) { throw new Error("Timed out waiting for condition."); } +function seedRunningTaskJob(workspace, { id, pid }) { + // Status reads the summary state row first and the reconciler re-reads the + // stored job file before persisting a terminal state. + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const logFile = path.join(jobsDir, `${id}.log`); + const job = { + id, + status: "running", + phase: "running", + title: "Codex Task", + jobClass: "task", + summary: "Investigate flaky test", + pid, + logFile, + createdAt: "2026-03-18T15:30:00.000Z", + startedAt: "2026-03-18T15:30:01.000Z", + updatedAt: "2026-03-18T15:30:02.000Z" + }; + + fs.writeFileSync(logFile, "[2026-03-18T15:30:00.000Z] Starting Codex Task.\n", "utf8"); + fs.writeFileSync(path.join(jobsDir, `${id}.json`), `${JSON.stringify(job, null, 2)}\n`, "utf8"); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify({ version: 1, config: { stopReviewGate: false }, jobs: [job] }, null, 2)}\n`, + "utf8" + ); + + return { stateDir, jobsDir, job, logFile }; +} + test("setup reports ready when fake codex is installed and authenticated", () => { const binDir = makeTempDir(); installFakeCodex(binDir); @@ -1794,6 +1827,84 @@ test("status --wait times out cleanly when a job is still active", () => { assert.equal(payload.waitTimedOut, true); }); +test("status --wait reports a crashed worker instead of hanging when the worker pid is dead", () => { + const workspace = makeTempDir(); + const { stateDir } = seedRunningTaskJob(workspace, { id: "task-dead-wait", pid: 987654321 }); + + const result = spawnSync( + process.execPath, + [SCRIPT, "status", "task-dead-wait", "--wait", "--timeout-ms", "600000", "--json"], + { + cwd: workspace, + encoding: "utf8", + timeout: 3000, + windowsHide: true + } + ); + + assert.notEqual(result.error?.code, "ETIMEDOUT", "status --wait hung waiting for a dead worker pid"); + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.job.id, "task-dead-wait"); + assert.equal(payload.job.status, "failed"); + assert.match(payload.job.errorMessage, /background worker exited before completing/); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const job = state.jobs.find((candidate) => candidate.id === "task-dead-wait"); + assert.equal(job.status, "failed"); + assert.equal(job.pid, null); + assert.match(job.errorMessage, /background worker exited before completing/); +}); + +test("status (no --wait) reconciles a dead-worker job to failed", () => { + const workspace = makeTempDir(); + const { stateDir } = seedRunningTaskJob(workspace, { id: "task-dead-nowait", pid: 987654321 }); + + const result = run("node", [SCRIPT, "status", "task-dead-nowait", "--json"], { + cwd: workspace + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.job.id, "task-dead-nowait"); + assert.equal(payload.job.status, "failed"); + assert.match(payload.job.errorMessage, /background worker exited before completing/); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const job = state.jobs.find((candidate) => candidate.id === "task-dead-nowait"); + assert.equal(job.status, "failed"); + assert.equal(job.pid, null); + assert.match(job.errorMessage, /background worker exited before completing/); +}); + +test("status --wait keeps waiting when the active worker pid is alive", () => { + const workspace = makeTempDir(); + const { stateDir } = seedRunningTaskJob(workspace, { id: "task-live-pid", pid: process.pid }); + + const result = spawnSync( + process.execPath, + [SCRIPT, "status", "task-live-pid", "--wait", "--timeout-ms", "60", "--json"], + { + cwd: workspace, + encoding: "utf8", + timeout: 3000, + windowsHide: true + } + ); + + assert.notEqual(result.error?.code, "ETIMEDOUT", "status --wait did not honor the short timeout"); + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.job.id, "task-live-pid"); + assert.equal(payload.job.status, "running"); + assert.equal(payload.waitTimedOut, true); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const job = state.jobs.find((candidate) => candidate.id === "task-live-pid"); + assert.equal(job.status, "running"); + assert.equal(job.pid, process.pid); +}); + test("result returns the stored output for the latest finished job by default", () => { const workspace = makeTempDir(); const stateDir = resolveStateDir(workspace); From bb53f47f46a4bd6761972e13c43bcd232e3e2f65 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 13:40:51 +0200 Subject: [PATCH 12/18] fix(codex-rescue): trust Agent tool status over echoed tokens; exit non-zero on failed launch (PR #346 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the 4 findings from the Codex review of a7aaff1: - P1 (hook): the completion hook honored a [[codex-task status=complete]] / [[codex-task status=dispatched]] line in the response body BEFORE checking the Agent tool status, so a failed return whose text merely echoed the literal token (model output or prompt echo) was misclassified as success / active dispatch — suppressing retries and pointing watchers at a phantom job. The structured tool status is authoritative: a known failure status now short-circuits to the failure context before any token handling. A genuine success (status completed/success/none) still honors the complete token. - P2 (hook): isSynchronousAgentReturn treated every status except async_launched/running as terminal, including sub_agent_entered (interactive subagent handoff). The hook now treats sub_agent_entered as non-terminal and stays silent so it never tells the parent "exited" while the subagent is live. - P1 (companion): handleTask --background printed the launch payload and always returned exit 0, even when enqueueBackgroundTask reported status=failed (worker spawn/pid failure). It now sets a non-zero exit code on a failed launch so callers don't treat a failed dispatch as success (cancelled stays exit 0). Tests (+4, suite 114/114): a failed return echoing the complete token yields the failure context (not COMPLETED); a failed return echoing a dispatched token yields the failure context with no watcher instruction; a sub_agent_entered return emits no context; task --background exits non-zero with status=failed on spawn failure. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 5 ++ .../scripts/codex-rescue-completion-hook.mjs | 17 ++++- tests/codex-rescue-hook.test.mjs | 74 +++++++++++++++++++ tests/runtime.test.mjs | 55 ++++++++++++++ 4 files changed, 150 insertions(+), 1 deletion(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 10b6a0d7..e87376d2 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -1183,6 +1183,11 @@ async function handleTask(argv) { }); const { payload } = enqueueBackgroundTask(cwd, job, request); outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json); + // PR #346 review (P1): a failed launch must not exit 0, or callers treat a failed + // dispatch as a successful one and skip retry/escalation. + if (payload.status === "failed") { + process.exitCode = 1; + } return; } diff --git a/plugins/codex/scripts/codex-rescue-completion-hook.mjs b/plugins/codex/scripts/codex-rescue-completion-hook.mjs index 2c7eaddd..31168501 100644 --- a/plugins/codex/scripts/codex-rescue-completion-hook.mjs +++ b/plugins/codex/scripts/codex-rescue-completion-hook.mjs @@ -19,6 +19,7 @@ const FAILURE_CONTEXT = // run succeeded (PR #346 review P1). const FAILURE_STATUSES = new Set(["failed", "fail", "error", "errored", "cancelled", "canceled", "timed_out", "timeout", "aborted"]); const ACTIVE_TASK_STATUSES = new Set(["queued", "running"]); +const NON_TERMINAL_AGENT_STATUSES = new Set(["async_launched", "running", "sub_agent_entered"]); const BASH_AUTO_BACKGROUND_MARKER = "Command running in background with ID:"; const BASH_AUTO_BACKGROUND_OUTPUT_PREFIX = "Output is being written to:"; // PR #346 review: state fallback is only safe when this synchronous return @@ -89,7 +90,9 @@ function isCodexRescueAgentToolUse(input) { function isSynchronousAgentReturn(toolResponse) { const status = String(toolResponse?.status ?? "").toLowerCase(); - return status !== "async_launched" && status !== "running"; + // PR #346 review (P2): sub_agent_entered is an interactive handoff, not a terminal + // return — the rescue agent is still active, so the completion hook must stay silent. + return !NON_TERMINAL_AGENT_STATUSES.has(status); } function getHookCwd(input) { @@ -125,6 +128,10 @@ function isFailedOrEmptyReturn(toolResponse, responseText) { return FAILURE_STATUSES.has(status); } +function hasFailureStatus(toolResponse) { + return FAILURE_STATUSES.has(String(toolResponse?.status ?? "").toLowerCase()); +} + function collectText(value) { if (value == null) { return ""; @@ -164,6 +171,14 @@ function buildCompletionContext(input) { } const responseText = collectText(toolResponse); + // PR #346 review (P1): the structured Agent tool status is authoritative over any + // [[codex-task ...]] line in the body. A failed return whose text echoes a complete/ + // dispatched token must not be reclassified as success or an active dispatch, so a + // known failure status short-circuits before token handling. + if (hasFailureStatus(toolResponse)) { + return FAILURE_CONTEXT; + } + const token = parseTaskStatusToken(responseText); // The complete sentinel is the only positive proof of a successful run; the // companion stamps it solely on real completion, so it is the lone path that diff --git a/tests/codex-rescue-hook.test.mjs b/tests/codex-rescue-hook.test.mjs index 3193f8bf..5567481e 100644 --- a/tests/codex-rescue-hook.test.mjs +++ b/tests/codex-rescue-hook.test.mjs @@ -193,6 +193,31 @@ test("codex-rescue hook injects a complete line when the completion token is pre }); }); +test("codex-rescue hook reports failure (not success) when a failed return echoes the complete token", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "failed", + agentId: "agent-failed-echo-complete", + content: [ + { + type: "text", + text: "The original prompt mentioned this literal token:\n[[codex-task status=complete]]\nThe run still failed.\n" + } + ] + } + }); + + const payload = parseHookOutput(result); + const additionalContext = payload.hookSpecificOutput.additionalContext; + assert.match(additionalContext, /exited WITHOUT a success signal/); + assert.doesNotMatch(additionalContext, /COMPLETED and exited/); +}); + test("codex-rescue hook treats the completion token as authoritative over active task state", () => { const workspace = makeTempDir(); const pluginDataDir = makeTempDir(); @@ -365,6 +390,29 @@ test("codex-rescue hook reports failed tokenless returns without a success signa ); }); +test("codex-rescue hook stays silent on sub_agent_entered handoff", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "sub_agent_entered", + agentId: "agent-interactive-handoff", + content: [ + { + type: "text", + text: "codex-rescue entered an interactive subagent handoff.\n" + } + ] + } + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, ""); +}); + test("codex-rescue hook reports empty tokenless returns without a success signal", () => { const result = runHook({ hook_event_name: "PostToolUse", @@ -466,6 +514,32 @@ test("codex-rescue hook injects a dispatched line when the dispatched token is p }); }); +test("codex-rescue hook reports failure (not dispatch) when a failed return echoes a dispatched token", () => { + const result = runHook({ + hook_event_name: "PostToolUse", + tool_name: "Agent", + tool_input: { + subagent_type: "codex:codex-rescue" + }, + tool_response: { + status: "failed", + agentId: "agent-failed-echo-dispatched", + content: [ + { + type: "text", + text: + "The model output echoed a dispatch example:\n[[codex-task status=dispatched id=task-xxxx]]\nNo worker was actually launched.\n" + } + ] + } + }); + + const payload = parseHookOutput(result); + const additionalContext = payload.hookSpecificOutput.additionalContext; + assert.match(additionalContext, /exited WITHOUT a success signal/); + assert.doesNotMatch(additionalContext, /status .* --wait/); +}); + test("codex-rescue hook ignores other subagent types", () => { const result = runHook({ hook_event_name: "PostToolUse", diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 4111d9f8..d7934136 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -1292,6 +1292,61 @@ syncBuiltinESMExports(); assert.match(payload.storedJob.errorMessage, /Failed to launch background worker|missing worker pid|spawn ENOENT/); }); +test("task --background exits non-zero when the worker spawn fails", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const preloadPath = writePreloadScript( + binDir, + ` +const childProcess = require("node:child_process"); +const { EventEmitter } = require("node:events"); +const { syncBuiltinESMExports } = require("node:module"); +const originalSpawn = childProcess.spawn; + +childProcess.spawn = function patchedSpawn(command, args, options) { + if (Array.isArray(args) && args[1] === "task-worker") { + const child = new EventEmitter(); + child.pid = undefined; + child.unref = function unref() {}; + process.nextTick(() => { + const error = new Error("spawn ENOENT task-worker"); + error.code = "ENOENT"; + child.emit("error", error); + }); + return child; + } + return originalSpawn.apply(this, arguments); +}; + +syncBuiltinESMExports(); +` + ); + + const result = spawnSync(process.execPath, [SCRIPT, "task", "--background", "--json", "fix the failing test"], { + cwd: repo, + env: { + ...buildEnv(binDir), + NODE_OPTIONS: extendNodeOptions(preloadPath) + }, + encoding: "utf8", + timeout: 3000, + windowsHide: true + }); + + assert.notEqual(result.error?.code, "ETIMEDOUT", "background task hung after a worker launch failure"); + assert.notEqual(result.status, 0); + assert.doesNotMatch(result.stderr, /Unhandled 'error' event/); + + const payload = JSON.parse(result.stdout); + assert.equal(payload.status, "failed"); +}); + test("task --background cancelled before launch renders terminal text without a dispatched token", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From b854a767671ad74dd419664cff9e706b6e65e370 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 20:54:35 +0200 Subject: [PATCH 13/18] fix(codex): reap locally-spawned codex app-server on host exit (process leak) A SpawnedCodexAppServerClient spawns `codex app-server` as a NON-detached child but only killed it via a deferred, unref'd setTimeout in close(). On a normal host exit (or process.exit()) before close() ran or that timer fired, the OS does not reap the child, so the app-server orphaned. When the shared broker can't start (resource pressure / after /reload-plugins), every command falls back to a local spawn, so orphans snowballed (observed 564 processes), which then exhausted resources and crashed new workers at ~2 min. Fix: each client reaps ONLY its own spawned child on host exit via a process.once("exit") SIGKILL backstop, removed on child error/exit and in close() (idempotent). No process scanning (parallel sessions own their own app-servers) and no SIGTERM/SIGINT handlers (a non-exiting signal listener would keep plain CLI hosts alive; the existing process-tree kill already covers signals). The only gap closed is normal host exit. Tests (+2): a locally-spawned app-server is dead after its host exits WITHOUT close(); close() restores process.listenerCount("exit") to baseline (no leak). Suite 88/88. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/lib/app-server.mjs | 33 ++++++ tests/runtime.test.mjs | 129 ++++++++++++++++++++++- 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/plugins/codex/scripts/lib/app-server.mjs b/plugins/codex/scripts/lib/app-server.mjs index 127c8376..53625311 100644 --- a/plugins/codex/scripts/lib/app-server.mjs +++ b/plugins/codex/scripts/lib/app-server.mjs @@ -183,6 +183,8 @@ class SpawnedCodexAppServerClient extends AppServerClientBase { constructor(cwd, options = {}) { super(cwd, options); this.transport = "direct"; + // Holds the per-child process exit listener so close() and child shutdown can remove it exactly once. + this._exitReaper = null; } async initialize() { @@ -194,6 +196,23 @@ class SpawnedCodexAppServerClient extends AppServerClientBase { windowsHide: true }); + // Leak fix: a non-detached codex app-server child is NOT reaped by the OS when + // this host process exits normally. Guarantee we kill OUR OWN child on host exit + // (exit handlers must be synchronous, so SIGKILL). We never scan/kill processes we + // did not spawn -- parallel sessions own their own app-servers. + this._exitReaper = () => { + try { + if (this.proc && this.proc.exitCode === null && !this.proc.killed) { + this.proc.kill("SIGKILL"); + } + } catch { + // best-effort during host teardown + } + }; + process.once("exit", this._exitReaper); + // Do not add SIGTERM/SIGINT handlers here: non-exiting signal listeners would keep + // plain CLI hosts alive, while existing process-tree shutdown already covers signals. + this.proc.stdout.setEncoding("utf8"); this.proc.stderr.setEncoding("utf8"); @@ -202,10 +221,12 @@ class SpawnedCodexAppServerClient extends AppServerClientBase { }); this.proc.on("error", (error) => { + this.removeExitReaper(); this.handleExit(error); }); this.proc.on("exit", (code, signal) => { + this.removeExitReaper(); const detail = code === 0 ? null @@ -225,9 +246,20 @@ class SpawnedCodexAppServerClient extends AppServerClientBase { this.notify("initialized", {}); } + removeExitReaper() { + // The exit listener belongs to this spawned child only; removing it prevents + // listener buildup in long-lived hosts after the child has already stopped. + if (!this._exitReaper) { + return; + } + process.removeListener("exit", this._exitReaper); + this._exitReaper = null; + } + async close() { if (this.closed) { await this.exitPromise; + this.removeExitReaper(); return; } @@ -259,6 +291,7 @@ class SpawnedCodexAppServerClient extends AppServerClientBase { } await this.exitPromise; + this.removeExitReaper(); } sendMessage(message) { diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index d7934136..a72844d2 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -3,10 +3,11 @@ import path from "node:path"; import test from "node:test"; import assert from "node:assert/strict"; import { spawn, spawnSync } from "node:child_process"; -import { fileURLToPath } from "node:url"; +import { fileURLToPath, pathToFileURL } from "node:url"; import { buildEnv, installFakeCodex } from "./fake-codex-fixture.mjs"; import { initGitRepo, makeTempDir, run } from "./helpers.mjs"; +import { CodexAppServerClient } from "../plugins/codex/scripts/lib/app-server.mjs"; import { loadBrokerSession, saveBrokerSession } from "../plugins/codex/scripts/lib/broker-lifecycle.mjs"; import { resolveStateDir } from "../plugins/codex/scripts/lib/state.mjs"; @@ -72,6 +73,18 @@ function seedRunningTaskJob(workspace, { id, pid }) { return { stateDir, jobsDir, job, logFile }; } +function isPidDead(pid) { + try { + process.kill(pid, 0); + return false; + } catch (error) { + if (error?.code === "ESRCH") { + return true; + } + throw error; + } +} + test("setup reports ready when fake codex is installed and authenticated", () => { const binDir = makeTempDir(); installFakeCodex(binDir); @@ -2724,6 +2737,120 @@ test("stop hook runs the actual task when auth status looks stale", () => { assert.match(payload.reason, /Missing empty-state guard/i); }); +test("locally-spawned codex app-server is reaped when the host process exits without close()", async (t) => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const hostScript = path.join(repo, "host-exits-with-direct-app-server.mjs"); + const appServerModule = pathToFileURL(path.join(PLUGIN_ROOT, "scripts", "lib", "app-server.mjs")).href; + let appServerPid = null; + + installFakeCodex(binDir, "interruptible-slow-task"); + + fs.writeFileSync( + hostScript, + `import { CodexAppServerClient } from ${JSON.stringify(appServerModule)}; + +const cwd = ${JSON.stringify(repo)}; +const client = await CodexAppServerClient.connect(cwd, { disableBroker: true, env: process.env }); +const thread = await client.request("thread/start", { cwd, ephemeral: true }); +await client.request("turn/start", { + threadId: thread.thread.id, + input: [{ type: "text", text: "keep the fake app-server alive until the host exits" }] +}); + +process.stdout.write(String(client.proc.pid) + "\\n", () => { + process.exit(0); +}); +`, + "utf8" + ); + + t.after(() => { + if (appServerPid !== null) { + try { + process.kill(appServerPid, "SIGKILL"); + } catch { + // Ignore a process that was already reaped by the exit handler under test. + } + } + }); + + const host = spawn(process.execPath, [hostScript], { + cwd: repo, + env: buildEnv(binDir), + stdio: ["ignore", "pipe", "pipe"] + }); + let stdout = ""; + let stderr = ""; + host.stdout.setEncoding("utf8"); + host.stderr.setEncoding("utf8"); + host.stdout.on("data", (chunk) => { + stdout += chunk; + }); + host.stderr.on("data", (chunk) => { + stderr += chunk; + }); + + const hostResult = await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + host.kill("SIGKILL"); + reject(new Error("Timed out waiting for host process to exit.")); + }, 5000); + host.on("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + host.on("close", (code, signal) => { + clearTimeout(timeout); + resolve({ code, signal }); + }); + }); + + assert.equal(hostResult.code, 0, stderr); + assert.equal(hostResult.signal, null, stderr); + appServerPid = Number.parseInt(stdout.trim(), 10); + assert.equal(Number.isInteger(appServerPid), true, `invalid app-server pid output: ${stdout}`); + + await waitFor(() => isPidDead(appServerPid), { timeoutMs: 4000, intervalMs: 50 }); +}); + +test("close() removes the exit reaper for locally-spawned codex app-server clients", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const beforeExitListeners = process.listenerCount("exit"); + let client = null; + let appServerPid = null; + + installFakeCodex(binDir); + + try { + client = await CodexAppServerClient.connect(repo, { + disableBroker: true, + env: buildEnv(binDir) + }); + appServerPid = client.proc.pid; + + assert.equal(client.transport, "direct"); + assert.equal(process.listenerCount("exit"), beforeExitListeners + 1); + + await client.close(); + + await waitFor(() => isPidDead(appServerPid), { timeoutMs: 4000, intervalMs: 50 }); + assert.equal(process.listenerCount("exit"), beforeExitListeners); + } finally { + if (client) { + await client.close().catch(() => {}); + } + if (appServerPid !== null) { + try { + process.kill(appServerPid, "SIGKILL"); + } catch { + // Ignore a process that close() or the child exit event already reaped. + } + } + } +}); + test("commands lazily start and reuse one shared app-server after first use", async () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 4de4d9c095c29c2086ef6a7c8fabc4db34550fea Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 21:08:16 +0200 Subject: [PATCH 14/18] feat(codex): alert that a write task can only edit within its cwd When a write-capable `task` run is dispatched against a target outside the run's cwd, Codex had no up-front signal that its workspace-write sandbox is rooted at the cwd, so it burned the whole run probing the sandbox (test -w / ls -ldOe / stat / id) and then failed. (This bites the Agent-tool codex-rescue dispatch, which inherits the session cwd and cannot change it.) - Implementer-facing: executeTaskRun now injects a concise workspace-boundary notice as a leading, EPHEMERAL element of the Codex turn input for write runs only ("write access ONLY within ; do NOT probe the sandbox; if you must write outside it, stop and report it needs --cwd "). It is added via buildTurnInput(prompt, { boundaryNote }) so it never touches the stored request.prompt, the persistent thread name, or --resume-last. Read-only and review runs are unchanged (boundaryNote null). - Dispatcher-facing: commands/rescue.md documents that codex-rescue writes are confined to the run's cwd, that the Agent-tool dispatch inherits the session cwd, and that cross-repo work must use the companion with --cwd . Tests (+3): the notice is prepended to write-task turn input; it is absent from the stored prompt and thread name (ephemeral); a read-only run gets no notice. Suite 89/89. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/commands/rescue.md | 6 ++ plugins/codex/scripts/codex-companion.mjs | 6 ++ plugins/codex/scripts/lib/codex.mjs | 13 +++- tests/fake-codex-fixture.mjs | 1 + tests/runtime.test.mjs | 95 +++++++++++++++++++++++ 5 files changed, 118 insertions(+), 3 deletions(-) diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index dc6468ae..d74b4860 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -51,3 +51,9 @@ Operating rules: - Leave `--resume` and `--fresh` in the forwarded request. The subagent handles that routing when it builds the `task` command. - If the helper reports that Codex is missing or unauthenticated, stop and tell the user to run `/codex:setup`. - If the user did not supply a request, ask what Codex should investigate or fix. + +Workspace boundary: + +- A Codex `task` run can only create or modify files **within its working directory** (the Codex `workspace-write` sandbox is rooted at the run's `--cwd`). Files outside that directory are read-only. +- The `Agent`-tool `codex:codex-rescue` dispatch inherits **this Claude session's** cwd, and the `Agent` tool cannot change it. So a rescue request that needs to write to a *different* repository will silently fail to write (Codex can only probe, not edit) — do not route cross-repo write work through the subagent. +- To target a different repository, either run from a Claude session inside that repo, or invoke the companion directly with `--cwd `: `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --cwd --write ...`. diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index e87376d2..855025ad 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -572,6 +572,8 @@ async function executeTaskRun(request) { sandbox: request.write ? "workspace-write" : "read-only", onProgress: request.onProgress, persistThread: true, + // Keep the workspace-boundary notice ephemeral by adding it only to turn input, not stored prompts or thread names. + boundaryNote: request.write ? buildWorkspaceBoundaryNotice(workspaceRoot) : null, threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT) }); @@ -712,6 +714,10 @@ function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId }; } +function buildWorkspaceBoundaryNotice(cwd) { + return `WORKSPACE: this run has write access ONLY within ${cwd}. Everything outside ${cwd} is read-only (Codex workspace-write sandbox). Do NOT probe, test, or investigate the sandbox boundary. If completing this task requires creating or modifying files outside ${cwd}, stop immediately and report that it must be re-dispatched with \`--cwd \` — do not attempt workarounds.`; +} + function readTaskPrompt(cwd, options, positionals) { if (options["prompt-file"]) { return fs.readFileSync(path.resolve(cwd, options["prompt-file"]), "utf8"); diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index f2fe88bd..3c101fc4 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -77,8 +77,15 @@ function buildResumeParams(threadId, cwd, options = {}) { } /** @returns {UserInput[]} */ -function buildTurnInput(prompt) { - return [{ type: "text", text: prompt, text_elements: [] }]; +function buildTurnInput(prompt, options = {}) { + const input = []; + const boundaryNote = String(options.boundaryNote ?? "").trim(); + if (boundaryNote) { + // The workspace-boundary notice is ephemeral and exists only in the turn input sent to Codex. + input.push({ type: "text", text: boundaryNote, text_elements: [] }); + } + input.push({ type: "text", text: prompt, text_elements: [] }); + return input; } function shorten(text, limit = 72) { @@ -1004,7 +1011,7 @@ export async function runAppServerTurn(cwd, options = {}) { () => client.request("turn/start", { threadId, - input: buildTurnInput(prompt), + input: buildTurnInput(prompt, { boundaryNote: options.boundaryNote }), model: options.model ?? null, effort: options.effort ?? null, outputSchema: options.outputSchema ?? null diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs index debcadce..c1938c9b 100644 --- a/tests/fake-codex-fixture.mjs +++ b/tests/fake-codex-fixture.mjs @@ -380,6 +380,7 @@ rl.on("line", (line) => { turnId, model: message.params.model ?? null, effort: message.params.effort ?? null, + input: message.params.input ?? [], prompt }; saveState(state); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index a72844d2..a0286b1b 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -725,6 +725,101 @@ syncBuiltinESMExports(); assert.match(job.errorMessage, /background worker exited before completing/); }); +test("write task run prepends the workspace-boundary notice to the turn input", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--write", "fix the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.equal(fakeState.lastTurnStart.input.length, 2); + assert.equal(fakeState.lastTurnStart.input[0].type, "text"); + assert.match(fakeState.lastTurnStart.input[0].text, /write access ONLY within/); + assert.match(fakeState.lastTurnStart.input[0].text, /Do NOT probe/); + assert.match(fakeState.lastTurnStart.input[0].text, /--cwd/); + assert.match(fakeState.lastTurnStart.input[0].text, new RegExp(repo.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))); + assert.deepEqual(fakeState.lastTurnStart.input[1], { + type: "text", + text: "fix the failing test", + text_elements: [] + }); +}); + +test("the workspace-boundary notice is ephemeral for write task runs", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const launched = run("node", [SCRIPT, "task", "--background", "--write", "--json", "implement scoped update"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + const jobFile = path.join(resolveStateDir(repo), "jobs", `${launchPayload.jobId}.json`); + const queuedJob = JSON.parse(fs.readFileSync(jobFile, "utf8")); + assert.equal(queuedJob.request.prompt, "implement scoped update"); + assert.doesNotMatch(queuedJob.request.prompt, /WORKSPACE:/); + assert.doesNotMatch(queuedJob.request.prompt, /write access ONLY within/); + + await waitFor(() => { + const storedJob = JSON.parse(fs.readFileSync(jobFile, "utf8")); + return storedJob.status === "completed" ? storedJob : null; + }, { timeoutMs: 15000 }); + + const completedJob = JSON.parse(fs.readFileSync(jobFile, "utf8")); + assert.equal(completedJob.request.prompt, "implement scoped update"); + assert.doesNotMatch(completedJob.request.prompt, /WORKSPACE:/); + + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + const taskThread = fakeState.threads.find((thread) => thread.id === fakeState.lastTurnStart.threadId); + assert.match(taskThread.name, /^Codex Companion Task: implement scoped update/); + assert.doesNotMatch(taskThread.name, /WORKSPACE:/); + assert.doesNotMatch(taskThread.name, /write access ONLY within/); +}); + +test("read-only task run does not get the workspace-boundary notice", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "diagnose the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.deepEqual(fakeState.lastTurnStart.input, [ + { + type: "text", + text: "diagnose the failing test", + text_elements: [] + } + ]); +}); + test("task --resume acts like --resume-last without leaking the flag into the prompt", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 93e29aafc004e75d9898825931120c9318631cbb Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 22:28:43 +0200 Subject: [PATCH 15/18] fix(codex-rescue): default to --background; foreground inside the subagent is reaped at ~143s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause (reproduced cleanly, not the daemon leak): a foreground (blocking) `node companion task` call running INSIDE the codex-rescue Agent subagent is auto-backgrounded by the harness and then reaped when the subagent ends — observed at ~143s — which kills the detached Codex worker mid-run and silently loses the work. Confirmed by lifetimes: foreground-via-subagent died at 142.1s / 143.3s (multiwarm) and 143.9s (controlled repro), while --background via the subagent survived 309s and via direct Bash 461s / 492s. --background returns immediately, so the worker reparents to launchd and survives. Fix: codex-rescue now defaults to `task --background` whenever the user did not explicitly pass `--wait`; foreground is reserved for short, clearly-bounded `--wait` requests (well under ~140s), and substantial/long/write/uncertain work always goes `--background`. The agent md and rescue.md document why (the subagent reaping at ~143s, not the ~600s direct-Bash cap that the docs wrongly cited). The main thread continues to arm the status --wait watcher for background completion — the pattern proven reliable across this work. Tests: assertions updated to the new default-background guidance + that the incorrect ~600s foreground-cap claim is gone. Suite 119/119. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/agents/codex-rescue.md | 7 +++---- plugins/codex/commands/rescue.md | 4 ++-- tests/commands.test.mjs | 21 ++++++++++++++------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md index a856e66b..e42c8d39 100644 --- a/plugins/codex/agents/codex-rescue.md +++ b/plugins/codex/agents/codex-rescue.md @@ -21,10 +21,9 @@ Forwarding rules: - Use exactly one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...`. - Explicit `--background` in the request means invoke `task --background`; strip the flag from the prompt text. -- Explicit `--wait` in the request means invoke foreground `task`; strip the flag from the prompt text. -- No explicit choice and the task is short and clearly bounded (quick fix, single-file edit, focused diagnosis) means foreground. -- No explicit choice and the task is long, open-ended, multi-step, or likely to exceed the foreground cap means `task --background`. Prefer background for substantial work. -- Foreground task runs are capped by the Bash tool at roughly 600 seconds; past that, they can be auto-backgrounded and orphaned. +- Default to `task --background` whenever the user did NOT explicitly pass `--wait`. Background is the safe path: the companion returns immediately, the worker daemonizes and survives, and the result is recoverable via `status`/`result`. +- Explicit `--wait` means invoke foreground `task` — but ONLY for a short, clearly bounded request (quick fix, single-file edit, focused diagnosis that finishes well under ~140s); strip the flag from the prompt text. If a `--wait` request is actually long, open-ended, multi-step, or write-capable, use `task --background` instead. When in doubt, `--background`. +- Why background is the default: a foreground (blocking) `task` call running inside THIS subagent is auto-backgrounded by the harness and then reaped when the subagent ends (observed at ~143s), which kills the Codex worker mid-run and silently loses the work. `task --background` avoids this entirely, so never run substantial work foreground from here. - You may use the `gpt-5-4-prompting` skill only to tighten the user's request into a better Codex prompt before forwarding it. - Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work beyond shaping the forwarded prompt text. - Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index d74b4860..1eed1f81 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -15,9 +15,9 @@ Execution mode: - If the request includes `--background`, pass `--background` to the `codex:codex-rescue` subagent so it invokes companion `task --background`. - If the request includes `--wait`, pass `--wait` to the `codex:codex-rescue` subagent so it invokes foreground `task`. -- If neither flag is present, foreground stays for short, bounded rescues that return the result directly. +- If neither flag is present, default to `--background` (the safe path: the worker daemonizes and survives; collect the result via `/codex:status` + `/codex:result`). Only stay foreground when `--wait` is explicit AND the rescue is short and clearly bounded. - For long, substantial, or open-ended rescues, pass `--background`; the companion returns `[[codex-task status=dispatched id=]]`. A dispatched background job has no automatic push notification, so arm a watcher by running `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" status --wait --timeout-ms 1800000` via the Bash tool with `run_in_background=true`; it exits and re-invokes Claude when the job reaches a terminal status. If it returns while the job is still running, re-arm the same command. Fetch `/codex:result ` only after the watcher reports a terminal status. -- A foreground rescue is capped by the Bash tool at roughly 600 seconds and can be auto-backgrounded past that cap, which orphans the detached Codex result. +- A foreground rescue running inside the subagent is auto-backgrounded by the harness and then reaped when the subagent ends (observed at ~143s — much sooner than the ~600s direct-Bash cap), which kills the Codex worker mid-run and loses the result. Route anything substantial to `--background`. - `--background` and `--wait` are execution flags for Claude Code and the rescue subagent. Preserve the execution choice, and strip them only from the natural-language task text before Codex sees the prompt. - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text. - If the request includes `--resume`, do not ask whether to continue. The user already chose. diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index 431ddc1f..66de748a 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -113,9 +113,16 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /status --wait --timeout-ms 1800000/); assert.match(rescue, /run_in_background=true/); assert.match(rescue, /`\/codex:result `/); - assert.match(rescue, /Foreground stays for short, bounded rescues/i); - assert.match(rescue, /foreground rescue is capped by the Bash tool/i); - assert.match(rescue, /auto-backgrounded past that cap/i); + // Fix: a foreground task call inside the subagent is auto-backgrounded and + // reaped at ~143s (kills the worker mid-run). Default must be --background. + assert.match(rescue, /default to `--background`/i); + assert.match(rescue, /Only stay foreground when `--wait` is explicit/i); + assert.match(rescue, /running inside the subagent is auto-backgrounded by the harness and then reaped/i); + assert.match(rescue, /observed at ~143s/i); + assert.doesNotMatch(rescue, /foreground rescue is capped by the Bash tool at roughly 600 seconds/i); + assert.match(agent, /Default to `task --background` whenever the user did NOT explicitly pass `--wait`/i); + assert.match(agent, /reaped when the subagent ends \(observed at ~143s\)/i); + assert.doesNotMatch(agent, /capped by the Bash tool at roughly 600 seconds/i); assert.match(rescue, /Preserve the execution choice/i); assert.match(rescue, /strip them only from the natural-language task text/i); assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i); @@ -137,10 +144,10 @@ test("rescue command absorbs continue semantics", () => { assert.match(agent, /--fresh/); assert.match(agent, /thin forwarding wrapper/i); assert.match(agent, /Explicit `--background` in the request means invoke `task --background`/i); - assert.match(agent, /Explicit `--wait` in the request means invoke foreground `task`/i); - assert.match(agent, /No explicit choice and the task is short and clearly bounded .* means foreground/i); - assert.match(agent, /No explicit choice and the task is long, open-ended, multi-step, or likely to exceed the foreground cap means `task --background`/i); - assert.match(agent, /Prefer background for substantial work/i); + assert.match(agent, /Explicit `--wait` means invoke foreground `task`/i); + assert.match(agent, /ONLY for a short, clearly bounded request/i); + assert.match(agent, /Default to `task --background` whenever the user did NOT explicitly pass `--wait`/i); + assert.match(agent, /never run substantial work foreground from here/i); assert.match(agent, /Use exactly one `Bash` call/i); assert.match(agent, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(agent, /Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`/i); From 4ebc407a7c836824184d3459fd50448f51a3424b Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 22:36:23 +0200 Subject: [PATCH 16/18] fix(codex-rescue): dead-worker failure warns edits may still land via the app-server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The worker drives the turn but the shared app-server applies file edits, so edits in flight when the worker dies can land AFTER the worker pid is dead and after a status snapshot (the "failed-but-wrote" race — confirmed in the wild: a reaped worker's edits surfaced after the job already read dead/failed). The dead-worker reconciliation marks such a job `failed` promptly, which is correct (no result was produced) but can fire mid-write. buildWorkerExitedError now carries the verify-on-disk discipline in its message: "... files it was editing may have already landed or may still be landing via the shared app-server — re-check disk state (`git status`) before concluding, and do not trust a single snapshot." So any caller (foreground waiter, status --wait reconcile, result) is warned to re-verify rather than act on one snapshot. This is now rare in practice: default-`--background` (93e29aa) stops workers being reaped mid-turn, so they complete normally; this covers the residual genuine-crash case. Message keeps the existing prefix so callers/tests are unaffected. Suite 119/119. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 6 +++++- tests/runtime.test.mjs | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 855025ad..7aba49c9 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -968,7 +968,11 @@ function enqueueBackgroundTask(cwd, job, request) { } function buildWorkerExitedError(jobId) { - return `background worker exited before completing; check /codex:status ${jobId}`; + // The worker drives the turn, but the shared app-server applies file edits, so + // edits in flight when the worker died can still land AFTER this point. Carry + // the verify-on-disk discipline in the message so callers re-check rather than + // trust a single (possibly mid-write) snapshot. + return `background worker exited before completing; check /codex:status ${jobId}. Files it was editing may have already landed or may still be landing via the shared app-server — re-check disk state (\`git status\`) before concluding, and do not trust a single snapshot.`; } function failActiveWorkerJob(workspaceRoot, job, errorMessage) { diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index a0286b1b..c75c55b3 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -2011,6 +2011,10 @@ test("status --wait reports a crashed worker instead of hanging when the worker assert.equal(payload.job.id, "task-dead-wait"); assert.equal(payload.job.status, "failed"); assert.match(payload.job.errorMessage, /background worker exited before completing/); + // The failure message must carry the verify-on-disk warning: edits can still + // land via the shared app-server after the worker pid dies, so a single + // snapshot is not trustworthy. + assert.match(payload.job.errorMessage, /still be landing via the shared app-server|re-check disk state/i); const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); const job = state.jobs.find((candidate) => candidate.id === "task-dead-wait"); From 0a8ecc61f58ba27268ea41a3eea882bbd97d0c0a Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 23:01:25 +0200 Subject: [PATCH 17/18] fix(codex): exit reaper kills the whole app-server tree on Windows (PR #346 review P1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The leak-fix exit reaper called this.proc.kill("SIGKILL") unconditionally, but on Windows the client is spawned with shell:true, so this.proc is cmd.exe and the real codex app-server is its child — a flat SIGKILL kills only the shell and leaks the app-server. Mirror close()'s platform branch: on Windows use terminateProcessTree (synchronous taskkill /T /F, safe in an exit handler) to kill the whole tree; non-Windows keeps the direct-child SIGKILL. (The P2 zombie note is addressed in-thread: the SIGKILL backstop is the right tradeoff — close() reaps in the normal path, the orphan is reaped by init on any normal host, and not killing reinstates the worse live-process leak.) Suite 119/119. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/lib/app-server.mjs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/plugins/codex/scripts/lib/app-server.mjs b/plugins/codex/scripts/lib/app-server.mjs index 53625311..3324497a 100644 --- a/plugins/codex/scripts/lib/app-server.mjs +++ b/plugins/codex/scripts/lib/app-server.mjs @@ -198,12 +198,23 @@ class SpawnedCodexAppServerClient extends AppServerClientBase { // Leak fix: a non-detached codex app-server child is NOT reaped by the OS when // this host process exits normally. Guarantee we kill OUR OWN child on host exit - // (exit handlers must be synchronous, so SIGKILL). We never scan/kill processes we - // did not spawn -- parallel sessions own their own app-servers. + // (exit handlers must be synchronous). We never scan/kill processes we did not + // spawn -- parallel sessions own their own app-servers. this._exitReaper = () => { try { if (this.proc && this.proc.exitCode === null && !this.proc.killed) { - this.proc.kill("SIGKILL"); + if (process.platform === "win32") { + // PR #346 review (P1): with shell:true, this.proc is cmd.exe and the + // real codex app-server is its child; a flat SIGKILL would orphan it. + // terminateProcessTree is synchronous (taskkill /T /F), so it is safe + // in an exit handler and kills the whole tree. + terminateProcessTree(this.proc.pid); + } else { + // Direct (non-shell) child: SIGKILL it; the orphan is reparented to + // PID 1 and reaped by init (launchd/systemd) — see the reply on the + // P2 zombie note for the no-reaper-container caveat. + this.proc.kill("SIGKILL"); + } } } catch { // best-effort during host teardown From 6f6ec98d65ef6356b8c43a9879c134e6ea814a00 Mon Sep 17 00:00:00 2001 From: zknpr Date: Mon, 25 May 2026 23:19:20 +0200 Subject: [PATCH 18/18] fix(codex): task --background exits non-zero on a cancelled launch too (PR #346 review P2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handleTask --background exited non-zero only on status=failed, but enqueueBackgroundTask can also return a terminal `cancelled` payload when a concurrent /codex:cancel wins before worker spawn. In that race no worker launched, yet the command exited 0 — so a caller keying off exit status treats dispatch as successful and skips retry/escalation. Generalize to the clean invariant: exit 0 iff a live job was dispatched (queued/running); any terminal launch status (failed OR cancelled) exits non-zero. Replaces the `status === "failed"` check with `!isActiveJobStatus`. Tests: the three cancelled-launch cases now assert non-zero exit (the dispatched/queued case still asserts 0). Suite 119/119. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/codex/scripts/codex-companion.mjs | 9 ++++++--- tests/runtime.test.mjs | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 7aba49c9..adf0c65e 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -1193,9 +1193,12 @@ async function handleTask(argv) { }); const { payload } = enqueueBackgroundTask(cwd, job, request); outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json); - // PR #346 review (P1): a failed launch must not exit 0, or callers treat a failed - // dispatch as a successful one and skip retry/escalation. - if (payload.status === "failed") { + // PR #346 review: exit non-zero unless a live background job was actually + // dispatched (queued/running). A terminal launch — `failed`, or `cancelled` + // when a concurrent cancel won before worker spawn — means there is no job to + // poll, so callers keying off exit status must not treat it as a successful + // dispatch (they would skip retry/escalation). + if (!isActiveJobStatus(payload.status)) { process.exitCode = 1; } return; diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index c75c55b3..9e3012ca 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -1184,7 +1184,8 @@ syncBuiltinESMExports(); } }); - assert.equal(launched.status, 0, launched.stderr); + // PR #346 review: a cancelled launch dispatched no live job, so exit is non-zero. + assert.notEqual(launched.status, 0, launched.stderr); const launchPayload = JSON.parse(launched.stdout); assert.equal(launchPayload.status, "cancelled"); assert.equal(fs.existsSync(spawnMarker), false, "expected cancellation guard to skip task-worker spawn"); @@ -1326,7 +1327,8 @@ syncBuiltinESMExports(); } }); - assert.equal(launched.status, 0, launched.stderr); + // PR #346 review: a cancelled launch dispatched no live job, so exit is non-zero. + assert.notEqual(launched.status, 0, launched.stderr); const launchPayload = JSON.parse(launched.stdout); assert.equal(launchPayload.status, "cancelled"); assert.equal(fs.existsSync(killMarker), true, "expected the orphaned task-worker process group to be killed"); @@ -1552,7 +1554,8 @@ syncBuiltinESMExports(); } }); - assert.equal(launched.status, 0, launched.stderr); + // PR #346 review: a cancelled launch dispatched no live job, so exit is non-zero. + assert.notEqual(launched.status, 0, launched.stderr); assert.doesNotMatch(launched.stdout, /\[\[codex-task status=dispatched/); assert.doesNotMatch(launched.stdout, /dispatched as background job/); assert.match(launched.stdout, /was cancelled before a worker launched; no work started\./);