diff --git a/plugins/codex/commands/adversarial-review.md b/plugins/codex/commands/adversarial-review.md
index da440ab4..bbe9b945 100644
--- a/plugins/codex/commands/adversarial-review.md
+++ b/plugins/codex/commands/adversarial-review.md
@@ -1,6 +1,6 @@
 ---
 description: Run a Codex review that challenges the implementation approach and design choices
-argument-hint: '[--wait|--background] [--base <ref>] [--scope auto|working-tree|branch] [focus ...]'
+argument-hint: '[--wait|--background] [--base <ref>] [--scope auto|working-tree|branch] [--max-investigation-turns N] [focus ...]'
 disable-model-invocation: true
 allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), AskUserQuestion
 ---
@@ -43,6 +43,7 @@ Argument handling:
 - It supports working-tree review, branch review, and `--base <ref>`.
 - It does not support `--scope staged` or `--scope unstaged`.
 - Unlike `/codex:review`, it can still take extra focus text after the flags.
+- For very large diffs that exceed the inline threshold, Codex investigates the diff with read-only commands across multiple turns. Use `--max-investigation-turns N` (default 10) to raise or lower the cap.
 
 Foreground flow:
 - Run:
diff --git a/plugins/codex/prompts/adversarial-review-finalize.md b/plugins/codex/prompts/adversarial-review-finalize.md
new file mode 100644
index 00000000..c4d0097b
--- /dev/null
+++ b/plugins/codex/prompts/adversarial-review-finalize.md
@@ -0,0 +1,54 @@
+<role>
+You have just completed an investigation of a code change. Now produce the structured adversarial review.
+</role>
+
+<task>
+Based on your investigation in the prior turns of this thread, write up your findings as a structured review.
+Target: {{TARGET_LABEL}}
+User focus: {{USER_FOCUS}}
+</task>
+
+<finding_bar>
+Report only material findings.
+Do not include style feedback, naming feedback, low-value cleanup, or speculative concerns without evidence.
+A finding should answer:
+1. What can go wrong?
+2. Why is this code path vulnerable?
+3. What is the likely impact?
+4. What concrete change would reduce the risk?
+</finding_bar>
+
+<structured_output_contract>
+This is the finalization turn. Do NOT run any shell commands or tool calls in this turn — your investigation is already complete and you have all the context you need from the prior turns of this thread.
+Return only valid JSON matching the provided schema. Your entire output must be that JSON — no prose before or after, no shell commands, no tool-call payloads.
+Keep the output compact and specific.
+Use `needs-attention` if there is any material risk worth blocking on.
+Use `approve` only if you cannot support any substantive adversarial finding from your investigation.
+Every finding must include:
+- the affected file
+- `line_start` and `line_end`
+- a confidence score from 0 to 1
+- a concrete recommendation
+Write the summary like a terse ship/no-ship assessment, not a neutral recap.
+</structured_output_contract>
+
+<grounding_rules>
+Be aggressive, but stay grounded.
+Every finding must be defensible from what you read during the investigation.
+Do not invent files, lines, code paths, incidents, attack chains, or runtime behavior you cannot support.
+If a conclusion depends on an inference, state that explicitly in the finding body and keep the confidence honest.
+</grounding_rules>
+
+<calibration_rules>
+Prefer one strong finding over several weak ones.
+Do not dilute serious issues with filler.
+If the change looks safe, say so directly and return no findings.
+</calibration_rules>
+
+<final_check>
+Before finalizing, check that each finding is:
+- adversarial rather than stylistic
+- tied to a concrete code location
+- plausible under a real failure scenario
+- actionable for an engineer fixing the issue
+</final_check>
diff --git a/plugins/codex/prompts/adversarial-review-investigate.md b/plugins/codex/prompts/adversarial-review-investigate.md
new file mode 100644
index 00000000..54d6dcc9
--- /dev/null
+++ b/plugins/codex/prompts/adversarial-review-investigate.md
@@ -0,0 +1,48 @@
+<role>
+You are Codex performing an adversarial software review.
+Your job is to break confidence in the change, not to validate it.
+This is the investigation phase: gather evidence with read-only commands before producing any structured output.
+</role>
+
+<task>
+Investigate the change so you can later produce a confident adversarial assessment.
+Target: {{TARGET_LABEL}}
+User focus: {{USER_FOCUS}}
+</task>
+
+<operating_stance>
+Default to skepticism.
+Assume the change can fail in subtle, high-cost, or user-visible ways until the evidence says otherwise.
+Do not give credit for good intent, partial fixes, or likely follow-up work.
+If something only works on the happy path, treat that as a real weakness.
+</operating_stance>
+
+<attack_surface>
+Prioritize the kinds of failures that are expensive, dangerous, or hard to detect:
+- auth, permissions, tenant isolation, and trust boundaries
+- data loss, corruption, duplication, and irreversible state changes
+- rollback safety, retries, partial failure, and idempotency gaps
+- race conditions, ordering assumptions, stale state, and re-entrancy
+- empty-state, null, timeout, and degraded dependency behavior
+- version skew, schema drift, migration hazards, and compatibility regressions
+- observability gaps that would hide failure or make recovery harder
+</attack_surface>
+
+<investigation_method>
+Use read-only shell commands to inspect the diff and the surrounding code.
+Useful starting points: `git diff`, `git log`, `git show`, `git blame`, `cat`, `rg`/`grep`.
+Read the changed files, follow references, and confirm or refute hypotheses with evidence from the code.
+Do not modify any files. Your sandbox is read-only.
+{{REVIEW_COLLECTION_GUIDANCE}}
+</investigation_method>
+
+<convergence>
+Continue investigating until you can defend a confident adversarial assessment.
+When you have seen enough, emit a brief summary message describing what you found and stop running commands.
+A summary message with no further command calls signals that you are ready for the finalization phase.
+Do not produce a structured review yet — that comes in the next phase.
+</convergence>
+
+<repository_context>
+{{REVIEW_INPUT}}
+</repository_context>
diff --git a/plugins/codex/prompts/adversarial-review.md b/plugins/codex/prompts/adversarial-review.md
index 78668af6..036e66f7 100644
--- a/plugins/codex/prompts/adversarial-review.md
+++ b/plugins/codex/prompts/adversarial-review.md
@@ -46,7 +46,8 @@ A finding should answer:
 </finding_bar>
 
 <structured_output_contract>
-Return only valid JSON matching the provided schema.
+This is a single-turn review. Do NOT run any shell commands or tool calls — the file contents you need are already embedded in the repository_context block below. Do not emit a tool-use stub like `{"cmd": "..."}` instead of the review; that is not the schema.
+Return only valid JSON matching the provided schema. Your entire output must be that JSON — no prose before or after, no shell commands, no tool-call payloads.
 Keep the output compact and specific.
 Use `needs-attention` if there is any material risk worth blocking on.
 Use `approve` only if you cannot support any substantive adversarial finding from the provided context.
diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 35222fd5..78e65c3d 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -17,6 +17,7 @@ import {
     interruptAppServerTurn,
     parseStructuredOutput,
     readOutputSchema,
+    runAppServerInvestigation,
     runAppServerReview,
     runAppServerTurn
   } from "./lib/codex.mjs";
@@ -246,6 +247,24 @@ function buildAdversarialReviewPrompt(context, focusText) {
   });
 }
 
+function buildAdversarialInvestigatePrompt(context, focusText) {
+  const template = loadPromptTemplate(ROOT_DIR, "adversarial-review-investigate");
+  return interpolateTemplate(template, {
+    TARGET_LABEL: context.target.label,
+    USER_FOCUS: focusText || "No extra focus provided.",
+    REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
+    REVIEW_INPUT: context.content
+  });
+}
+
+function buildAdversarialFinalizePrompt(context, focusText) {
+  const template = loadPromptTemplate(ROOT_DIR, "adversarial-review-finalize");
+  return interpolateTemplate(template, {
+    TARGET_LABEL: context.target.label,
+    USER_FOCUS: focusText || "No extra focus provided."
+  });
+}
+
 function ensureCodexAvailable(cwd) {
   const availability = getCodexAvailability(cwd);
   if (!availability.available) {
@@ -404,14 +423,29 @@ async function executeReviewRun(request) {
   }
 
   const context = collectReviewContext(request.cwd, target);
-  const prompt = buildAdversarialReviewPrompt(context, focusText);
-  const result = await runAppServerTurn(context.repoRoot, {
-    prompt,
-    model: request.model,
-    sandbox: "read-only",
-    outputSchema: readOutputSchema(REVIEW_SCHEMA),
-    onProgress: request.onProgress
-  });
+  let result;
+  if (context.inputMode === "self-collect") {
+    const investigatePrompt = buildAdversarialInvestigatePrompt(context, focusText);
+    const finalizePrompt = buildAdversarialFinalizePrompt(context, focusText);
+    result = await runAppServerInvestigation(context.repoRoot, {
+      investigatePrompt,
+      finalizePrompt,
+      outputSchema: readOutputSchema(REVIEW_SCHEMA),
+      model: request.model,
+      sandbox: "read-only",
+      maxInvestigationTurns: request.maxInvestigationTurns,
+      onProgress: request.onProgress
+    });
+  } else {
+    const prompt = buildAdversarialReviewPrompt(context, focusText);
+    result = await runAppServerTurn(context.repoRoot, {
+      prompt,
+      model: request.model,
+      sandbox: "read-only",
+      outputSchema: readOutputSchema(REVIEW_SCHEMA),
+      onProgress: request.onProgress
+    });
+  }
   const parsed = parseStructuredOutput(result.finalMessage, {
     status: result.status,
     failureMessage: result.error?.message ?? result.stderr
@@ -436,6 +470,9 @@ async function executeReviewRun(request) {
     parseError: parsed.parseError,
     reasoningSummary: result.reasoningSummary
   };
+  if (result.investigation) {
+    payload.investigation = result.investigation;
+  }
 
   return {
     exitStatus: result.status,
@@ -445,7 +482,8 @@ async function executeReviewRun(request) {
     rendered: renderReviewResult(parsed, {
       reviewLabel: reviewName,
       targetLabel: context.target.label,
-      reasoningSummary: result.reasoningSummary
+      reasoningSummary: result.reasoningSummary,
+      investigation: result.investigation ?? null
     }),
     summary: parsed.parsed?.summary ?? parsed.parseError ?? firstMeaningfulLine(result.finalMessage, `${reviewName} finished.`),
     jobTitle: `Codex ${reviewName}`,
@@ -681,13 +719,22 @@ function enqueueBackgroundTask(cwd, job, request) {
 
 async function handleReviewCommand(argv, config) {
   const { options, positionals } = parseCommandInput(argv, {
-    valueOptions: ["base", "scope", "model", "cwd"],
+    valueOptions: ["base", "scope", "model", "cwd", "max-investigation-turns"],
     booleanOptions: ["json", "background", "wait"],
     aliasMap: {
       m: "model"
     }
   });
 
+  const rawMaxTurns = options["max-investigation-turns"];
+  let maxInvestigationTurns;
+  if (rawMaxTurns !== undefined) {
+    if (!/^[1-9][0-9]*$/.test(String(rawMaxTurns))) {
+      throw new Error(`--max-investigation-turns must be a positive integer (got: ${rawMaxTurns})`);
+    }
+    maxInvestigationTurns = Number(rawMaxTurns);
+  }
+
   const cwd = resolveCommandCwd(options);
   const workspaceRoot = resolveCommandWorkspace(options);
   const focusText = positionals.join(" ").trim();
@@ -716,6 +763,7 @@ async function handleReviewCommand(argv, config) {
         model: options.model,
         focusText,
         reviewName: config.reviewName,
+        maxInvestigationTurns,
         onProgress: progress
       }),
     { json: options.json }
diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
index f2fe88bd..9a64f530 100644
--- a/plugins/codex/scripts/lib/codex.mjs
+++ b/plugins/codex/scripts/lib/codex.mjs
@@ -658,6 +658,9 @@ async function resumeThread(client, threadId, cwd, options = {}) {
 }
 
 function buildResultStatus(turnState) {
+  if (turnState.error) {
+    return 1;
+  }
   return turnState.finalTurn?.status === "completed" ? 0 : 1;
 }
 
@@ -1028,6 +1031,224 @@ export async function runAppServerTurn(cwd, options = {}) {
   });
 }
 
+const DEFAULT_MAX_INVESTIGATION_TURNS = 10;
+const INVESTIGATION_CONTINUATION_CUE = "Continue your investigation.";
+
+export async function runAppServerInvestigation(cwd, options = {}) {
+  const availability = getCodexAvailability(cwd);
+  if (!availability.available) {
+    throw new Error("Codex CLI is not installed or is missing required runtime support. Install it with `npm install -g @openai/codex`, then rerun `/codex:setup`.");
+  }
+
+  const investigatePrompt = options.investigatePrompt?.trim();
+  const finalizePrompt = options.finalizePrompt?.trim();
+  if (!investigatePrompt) {
+    throw new Error("runAppServerInvestigation requires investigatePrompt.");
+  }
+  if (!finalizePrompt) {
+    throw new Error("runAppServerInvestigation requires finalizePrompt.");
+  }
+  const maxInvestigationTurns = Number.isFinite(options.maxInvestigationTurns) && options.maxInvestigationTurns > 0
+    ? Math.floor(options.maxInvestigationTurns)
+    : DEFAULT_MAX_INVESTIGATION_TURNS;
+  const sandbox = options.sandbox ?? "read-only";
+
+  return withAppServer(cwd, async (client) => {
+    emitProgress(options.onProgress, "Starting Codex investigation thread.", "starting");
+    const startResponse = await startThread(client, cwd, {
+      model: options.model,
+      sandbox,
+      ephemeral: true,
+      threadName: null
+    });
+    const threadId = startResponse.thread.id;
+    emitProgress(options.onProgress, `Thread ready (${threadId}).`, "starting", { threadId });
+
+    let turnCount = 0;
+    let truncated = false;
+    let totalCommandsRun = 0;
+    const aggregatedCommandExecutions = [];
+    const aggregatedFileChanges = [];
+
+    for (let i = 1; i <= maxInvestigationTurns; i += 1) {
+      const promptText = i === 1 ? investigatePrompt : INVESTIGATION_CONTINUATION_CUE;
+      emitProgress(options.onProgress, `Investigation turn ${i}.`, "investigating");
+
+      let turnState;
+      try {
+        turnState = await captureTurn(
+          client,
+          threadId,
+          () =>
+            client.request("turn/start", {
+              threadId,
+              input: buildTurnInput(promptText),
+              model: options.model ?? null,
+              effort: options.effort ?? null,
+              outputSchema: null
+            }),
+          { onProgress: options.onProgress }
+        );
+      } catch (transportError) {
+        return {
+          status: 1,
+          threadId,
+          turnId: null,
+          finalMessage: "",
+          reasoningSummary: [],
+          turn: null,
+          error: { message: transportError?.message ?? String(transportError) },
+          stderr: cleanCodexStderr(client.stderr),
+          fileChanges: aggregatedFileChanges,
+          touchedFiles: collectTouchedFiles(aggregatedFileChanges),
+          commandExecutions: aggregatedCommandExecutions,
+          investigation: { turnCount, truncated: false }
+        };
+      }
+
+      turnCount = i;
+      const turnCommandCount = turnState.commandExecutions.length;
+      totalCommandsRun += turnCommandCount;
+      for (const cmd of turnState.commandExecutions) {
+        aggregatedCommandExecutions.push(cmd);
+      }
+      for (const change of turnState.fileChanges) {
+        aggregatedFileChanges.push(change);
+      }
+
+      if (turnState.error) {
+        return {
+          status: buildResultStatus(turnState),
+          threadId,
+          turnId: turnState.turnId,
+          finalMessage: turnState.lastAgentMessage,
+          reasoningSummary: turnState.reasoningSummary,
+          turn: turnState.finalTurn,
+          error: turnState.error,
+          stderr: cleanCodexStderr(client.stderr),
+          fileChanges: aggregatedFileChanges,
+          touchedFiles: collectTouchedFiles(aggregatedFileChanges),
+          commandExecutions: aggregatedCommandExecutions,
+          investigation: { turnCount, truncated: false }
+        };
+      }
+
+      // Convergence: a turn that produces no commands and emits an agent
+      // message is the contract the investigate prompt teaches the model
+      // ("a summary message with no further command calls signals readiness").
+      // The legacy check required `phase: "final_answer"`, but recon turns
+      // run with outputSchema=null so the app-server does not always tag
+      // messages with that phase — leading to runaway turns where the model
+      // keeps insisting it has converged but the loop refuses to listen.
+      const turnHadAgentMessage = Boolean(turnState.lastAgentMessage);
+      const converged = turnCommandCount === 0 && turnHadAgentMessage;
+      if (converged) {
+        break;
+      }
+
+      if (i === maxInvestigationTurns) {
+        truncated = true;
+      }
+    }
+
+    if (totalCommandsRun === 0) {
+      truncated = true;
+    }
+
+    emitProgress(options.onProgress, "Investigation complete; finalizing structured output.", "finalizing");
+
+    // The finalize turn is supposed to emit only the structured JSON. In
+    // practice the model sometimes emits a tool-call stub instead (e.g.
+    // {"cmd": "wc -l ..."}) — if any commands ran during finalize, treat
+    // that as a contract violation and retry once with a sharper prompt.
+    const STRICT_FINALIZE_REMINDER =
+      "STRICT FINALIZE: do not run any shell commands. Output ONLY the JSON " +
+      "matching the schema, with no prose, no tool calls, and nothing else.\n\n";
+    let finalizeState;
+    let finalizeAttempts = 0;
+    const MAX_FINALIZE_ATTEMPTS = 2;
+    while (finalizeAttempts < MAX_FINALIZE_ATTEMPTS) {
+      finalizeAttempts += 1;
+      const promptText = finalizeAttempts === 1
+        ? finalizePrompt
+        : STRICT_FINALIZE_REMINDER + finalizePrompt;
+      try {
+        finalizeState = await captureTurn(
+          client,
+          threadId,
+          () =>
+            client.request("turn/start", {
+              threadId,
+              input: buildTurnInput(promptText),
+              model: options.model ?? null,
+              effort: options.effort ?? null,
+              outputSchema: options.outputSchema ?? null
+            }),
+          { onProgress: options.onProgress }
+        );
+      } catch (transportError) {
+        return {
+          status: 1,
+          threadId,
+          turnId: null,
+          finalMessage: "",
+          reasoningSummary: [],
+          turn: null,
+          error: { message: transportError?.message ?? String(transportError) },
+          stderr: cleanCodexStderr(client.stderr),
+          fileChanges: aggregatedFileChanges,
+          touchedFiles: collectTouchedFiles(aggregatedFileChanges),
+          commandExecutions: aggregatedCommandExecutions,
+          investigation: { turnCount, truncated }
+        };
+      }
+
+      // If the finalize turn ran commands, the model violated the contract.
+      // Retry once with a stricter prompt; if it still fails, accept the
+      // (likely-malformed) output and let the parser surface the error.
+      if (finalizeState.commandExecutions.length === 0) {
+        break;
+      }
+      if (finalizeAttempts < MAX_FINALIZE_ATTEMPTS) {
+        emitProgress(
+          options.onProgress,
+          "Finalize turn ran commands; retrying with stricter prompt.",
+          "finalizing"
+        );
+      }
+      // Aggregate the wasted commands so the caller still sees what happened.
+      for (const cmd of finalizeState.commandExecutions) {
+        aggregatedCommandExecutions.push(cmd);
+      }
+      for (const change of finalizeState.fileChanges) {
+        aggregatedFileChanges.push(change);
+      }
+    }
+
+    for (const cmd of finalizeState.commandExecutions) {
+      aggregatedCommandExecutions.push(cmd);
+    }
+    for (const change of finalizeState.fileChanges) {
+      aggregatedFileChanges.push(change);
+    }
+
+    return {
+      status: buildResultStatus(finalizeState),
+      threadId,
+      turnId: finalizeState.turnId,
+      finalMessage: finalizeState.lastAgentMessage,
+      reasoningSummary: finalizeState.reasoningSummary,
+      turn: finalizeState.finalTurn,
+      error: finalizeState.error,
+      stderr: cleanCodexStderr(client.stderr),
+      fileChanges: aggregatedFileChanges,
+      touchedFiles: collectTouchedFiles(aggregatedFileChanges),
+      commandExecutions: aggregatedCommandExecutions,
+      investigation: { turnCount, truncated }
+    };
+  });
+}
+
 export async function findLatestTaskThread(cwd) {
   const availability = getCodexAvailability(cwd);
   if (!availability.available) {
diff --git a/plugins/codex/scripts/lib/git.mjs b/plugins/codex/scripts/lib/git.mjs
index 1749cfc8..c06c599b 100644
--- a/plugins/codex/scripts/lib/git.mjs
+++ b/plugins/codex/scripts/lib/git.mjs
@@ -5,7 +5,12 @@ import { isProbablyText } from "./fs.mjs";
 import { formatCommandFailure, runCommand, runCommandChecked } from "./process.mjs";
 
 const MAX_UNTRACKED_BYTES = 24 * 1024;
-const DEFAULT_INLINE_DIFF_MAX_FILES = 2;
+// Inline-diff embeds full file contents into the prompt and pins outputSchema
+// on a single turn — there is no recovery if the model wants to investigate
+// before producing the verdict. Keep this path narrow: only single-file
+// reviews of small diffs use it. Anything larger falls through to the
+// two-phase self-collect path which can tolerate exploratory turns.
+const DEFAULT_INLINE_DIFF_MAX_FILES = 1;
 const DEFAULT_INLINE_DIFF_MAX_BYTES = 256 * 1024;
 
 function git(cwd, args, options = {}) {
diff --git a/plugins/codex/scripts/lib/render.mjs b/plugins/codex/scripts/lib/render.mjs
index 2ec18523..f4591c65 100644
--- a/plugins/codex/scripts/lib/render.mjs
+++ b/plugins/codex/scripts/lib/render.mjs
@@ -208,6 +208,16 @@ export function renderSetupReport(report) {
   return `${lines.join("\n").trimEnd()}\n`;
 }
 
+function appendInvestigationBanner(lines, meta) {
+  if (meta.investigation?.truncated === true) {
+    const turns = meta.investigation.turnCount ?? "?";
+    lines.push(
+      `Investigation truncated at ${turns} turns; findings may be shallow. Use --max-investigation-turns to raise the cap.`,
+      ""
+    );
+  }
+}
+
 export function renderReviewResult(parsedResult, meta) {
   if (!parsedResult.parsed) {
     const lines = [
@@ -217,6 +227,7 @@ export function renderReviewResult(parsedResult, meta) {
       "",
       `- Parse error: ${parsedResult.parseError}`
     ];
+    appendInvestigationBanner(lines, meta);
 
     if (parsedResult.rawOutput) {
       lines.push("", "Raw final message:", "", "```text", parsedResult.rawOutput, "```");
@@ -237,6 +248,7 @@ export function renderReviewResult(parsedResult, meta) {
       "",
       `- Validation error: ${validationError}`
     ];
+    appendInvestigationBanner(lines, meta);
 
     if (parsedResult.rawOutput) {
       lines.push("", "Raw final message:", "", "```text", parsedResult.rawOutput, "```");
@@ -258,6 +270,7 @@ export function renderReviewResult(parsedResult, meta) {
     data.summary,
     ""
   ];
+  appendInvestigationBanner(lines, meta);
 
   if (findings.length === 0) {
     lines.push("No material findings.");
diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs
index 3724ffa4..496a740e 100644
--- a/tests/commands.test.mjs
+++ b/tests/commands.test.mjs
@@ -49,7 +49,9 @@ test("adversarial review command uses AskUserQuestion and background Bash while
   assert.match(source, /```bash/);
   assert.match(source, /```typescript/);
   assert.match(source, /adversarial-review "\$ARGUMENTS"/);
-  assert.match(source, /\[--scope auto\|working-tree\|branch\] \[focus \.\.\.\]/);
+  assert.match(source, /\[--scope auto\|working-tree\|branch\]/);
+  assert.match(source, /\[--max-investigation-turns N\]/);
+  assert.match(source, /\[focus \.\.\.\]/);
   assert.match(source, /run_in_background:\s*true/);
   assert.match(source, /command:\s*`node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" adversarial-review "\$ARGUMENTS"`/);
   assert.match(source, /description:\s*"Codex adversarial review"/);
diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs
index debcadce..e1ac99cd 100644
--- a/tests/fake-codex-fixture.mjs
+++ b/tests/fake-codex-fixture.mjs
@@ -2,7 +2,7 @@ import fs from "node:fs";
 import path from "node:path";
 import process from "node:process";
 
-import { writeExecutable } from "./helpers.mjs";
+import { makeTempDir, writeExecutable } from "./helpers.mjs";
 
 export function installFakeCodex(binDir, behavior = "review-ok") {
   const statePath = path.join(binDir, "fake-codex-state.json");
@@ -369,6 +369,50 @@ rl.on("line", (line) => {
 
 	      case "turn/start": {
 	        const thread = ensureThread(state, message.params.threadId);
+
+        if (BEHAVIOR === "queue-driven") {
+          if (!state.requests) { state.requests = []; }
+          state.requests.push({ method: "turn/start", params: message.params });
+
+          const turnId = nextTurnId(state);
+          thread.updatedAt = now();
+
+          const entry = (state.queue && state.queue.length > 0) ? state.queue.shift() : null;
+          saveState(state);
+
+          if (entry && entry.rpcError) {
+            send({ id: message.id, error: { code: -32000, message: entry.rpcError.message } });
+            break;
+          }
+
+          send({ id: message.id, result: { turn: buildTurn(turnId) } });
+          send({ method: "turn/started", params: { threadId: thread.id, turn: buildTurn(turnId) } });
+
+          const commands = (entry && entry.commands) || [];
+          let cmdCounter = 0;
+          for (const cmd of commands) {
+            const itemId = "cmd_" + turnId + "_" + (cmdCounter++);
+            send({ method: "item/started", params: { threadId: thread.id, turnId, item: { type: "commandExecution", id: itemId, command: cmd.command, status: "in_progress" } } });
+            send({ method: "item/completed", params: { threadId: thread.id, turnId, item: { type: "commandExecution", id: itemId, command: cmd.command, exitCode: cmd.exitCode ?? 0, status: "completed" } } });
+          }
+
+          if (entry && entry.finalAnswer) {
+            const phase = entry.finalAnswer.phase ?? "final_answer";
+            send({ method: "item/completed", params: { threadId: thread.id, turnId, item: { type: "agentMessage", id: "msg_" + turnId, text: entry.finalAnswer.text, phase } } });
+          }
+
+          if (entry && entry.turnError) {
+            send({ method: "error", params: { threadId: thread.id, turnId, error: { message: entry.turnError.message } } });
+          }
+
+          if (!entry) {
+            send({ method: "item/completed", params: { threadId: thread.id, turnId, item: { type: "agentMessage", id: "msg_" + turnId, text: "", phase: "agent_message" } } });
+          }
+
+          send({ method: "turn/completed", params: { threadId: thread.id, turn: buildTurn(turnId, "completed") } });
+          break;
+        }
+
 	        const prompt = (message.params.input || [])
           .filter((item) => item.type === "text")
           .map((item) => item.text)
@@ -587,3 +631,71 @@ export function buildEnv(binDir) {
     PATH: `${binDir}${sep}${process.env.PATH}`
   };
 }
+
+/**
+ * Sets up a queue-driven fake Codex harness for multi-turn tests.
+ * Returns a handle with helpers for scripting turn responses and
+ * inspecting captured requests.
+ */
+export function setupFakeCodex({ cwd } = {}) {
+  const binDir = makeTempDir("codex-queue-driven-");
+  installFakeCodex(binDir, "queue-driven");
+
+  const statePath = path.join(binDir, "fake-codex-state.json");
+  const initialState = {
+    nextThreadId: 1,
+    nextTurnId: 1,
+    appServerStarts: 0,
+    threads: [],
+    capabilities: null,
+    lastInterrupt: null,
+    queue: [],
+    requests: []
+  };
+  fs.writeFileSync(statePath, JSON.stringify(initialState, null, 2));
+
+  const sep = process.platform === "win32" ? ";" : ":";
+  process.env.PATH = `${binDir}${sep}${process.env.PATH}`;
+
+  const env = buildEnv(binDir);
+  const resolvedCwd = cwd || process.cwd();
+
+  function readState() {
+    return JSON.parse(fs.readFileSync(statePath, "utf8"));
+  }
+
+  function writeState(state) {
+    fs.writeFileSync(statePath, JSON.stringify(state, null, 2));
+  }
+
+  return {
+    cwd: resolvedCwd,
+    env,
+    binDir,
+    queueTurnResponse(entry) {
+      const state = readState();
+      if (!state.queue) { state.queue = []; }
+      state.queue.push(entry);
+      writeState(state);
+    },
+    queueTurnRpcError({ message }) {
+      const state = readState();
+      if (!state.queue) { state.queue = []; }
+      state.queue.push({ rpcError: { message } });
+      writeState(state);
+    },
+    // `requests` re-reads the state file each access; assign to a local variable for repeated use.
+    get requests() {
+      const state = readState();
+      return state.requests ?? [];
+    },
+    close() {
+      const sep = process.platform === "win32" ? ";" : ":";
+      process.env.PATH = (process.env.PATH ?? "")
+        .split(sep)
+        .filter((entry) => entry !== binDir)
+        .join(sep);
+      fs.rmSync(binDir, { recursive: true, force: true });
+    }
+  };
+}
diff --git a/tests/fake-codex-fixture.test.mjs b/tests/fake-codex-fixture.test.mjs
new file mode 100644
index 00000000..5876460d
--- /dev/null
+++ b/tests/fake-codex-fixture.test.mjs
@@ -0,0 +1,107 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { setupFakeCodex } from "./fake-codex-fixture.mjs";
+import { runAppServerTurn } from "../plugins/codex/scripts/lib/codex.mjs";
+import { makeTempDir } from "./helpers.mjs";
+
+test("queue-driven fake: final answer is returned via runAppServerTurn", async () => {
+  const cwd = makeTempDir("codex-queue-test-");
+  const handle = setupFakeCodex({ cwd });
+  try {
+    handle.queueTurnResponse({ finalAnswer: { text: "hi" } });
+
+    const result = await runAppServerTurn(cwd, {
+      prompt: "say hi"
+    });
+
+    assert.equal(result.finalMessage, "hi");
+    assert.equal(result.status, 0);
+  } finally {
+    handle.close();
+  }
+});
+
+test("queue-driven fake: requests are captured with params", async () => {
+  const cwd = makeTempDir("codex-queue-test-");
+  const handle = setupFakeCodex({ cwd });
+  try {
+    handle.queueTurnResponse({ finalAnswer: { text: "captured" } });
+
+    await runAppServerTurn(cwd, {
+      prompt: "check capture"
+    });
+
+    const turnStarts = handle.requests.filter((r) => r.method === "turn/start");
+    assert.equal(turnStarts.length, 1);
+    // Verify the captured params include the input text
+    const inputTexts = turnStarts[0].params.input
+      .filter((item) => item.type === "text")
+      .map((item) => item.text);
+    assert.ok(inputTexts.some((text) => text.includes("check capture")));
+  } finally {
+    handle.close();
+  }
+});
+
+test("queue-driven fake: commandExecution items are emitted and captured", async () => {
+  const cwd = makeTempDir("codex-queue-test-");
+  const handle = setupFakeCodex({ cwd });
+  try {
+    handle.queueTurnResponse({
+      commands: [{ command: "git diff", exitCode: 0 }],
+      finalAnswer: { text: "done" }
+    });
+
+    const result = await runAppServerTurn(cwd, {
+      prompt: "run commands"
+    });
+
+    assert.equal(result.finalMessage, "done");
+    assert.equal(result.commandExecutions.length, 1);
+    assert.equal(result.commandExecutions[0].command, "git diff");
+    assert.equal(result.commandExecutions[0].exitCode, 0);
+  } finally {
+    handle.close();
+  }
+});
+
+test("queue-driven fake: RPC error causes runAppServerTurn to reject", async () => {
+  const cwd = makeTempDir("codex-queue-test-");
+  const handle = setupFakeCodex({ cwd });
+  try {
+    handle.queueTurnRpcError({ message: "boom" });
+
+    await assert.rejects(
+      runAppServerTurn(cwd, { prompt: "should fail" }),
+      (error) => {
+        assert.ok(error.message.includes("boom"));
+        return true;
+      }
+    );
+  } finally {
+    handle.close();
+  }
+});
+
+test("queue-driven fake: soft error (turnError) is captured", async () => {
+  const cwd = makeTempDir("codex-queue-test-");
+  const handle = setupFakeCodex({ cwd });
+  try {
+    handle.queueTurnResponse({
+      finalAnswer: { text: "partial" },
+      turnError: { message: "soft failure" }
+    });
+
+    const result = await runAppServerTurn(cwd, {
+      prompt: "trigger soft error"
+    });
+
+    // The turn still completes, but state.error is set
+    assert.equal(result.finalMessage, "partial");
+    assert.ok(result.error);
+    assert.equal(result.error.message, "soft failure");
+  } finally {
+    handle.close();
+  }
+});
diff --git a/tests/git.test.mjs b/tests/git.test.mjs
index 14ff2576..9b567e4d 100644
--- a/tests/git.test.mjs
+++ b/tests/git.test.mjs
@@ -86,6 +86,28 @@ test("collectReviewContext keeps inline diffs for tiny adversarial reviews", ()
   assert.match(context.content, /INLINE_MARKER/);
 });
 
+test("collectReviewContext routes 2-file changes to self-collect (inline cap is 1)", () => {
+  // Regression guard: a 2-file change used to slip into inline-diff because
+  // the cap was 2, embedding both files into a single-turn schema-pinned
+  // prompt — and the model often responded with a tool-call stub instead
+  // of the review JSON. Two files now go through the two-phase self-collect
+  // path which tolerates exploratory turns.
+  const cwd = makeTempDir();
+  initGitRepo(cwd);
+  fs.writeFileSync(path.join(cwd, "seed.js"), "export const value = 'seed';\n");
+  run("git", ["add", "seed.js"], { cwd });
+  run("git", ["commit", "-m", "init"], { cwd });
+  fs.writeFileSync(path.join(cwd, "doc-one.md"), "# planning doc\n".repeat(50));
+  fs.writeFileSync(path.join(cwd, "doc-two.md"), "# spec doc\n".repeat(50));
+
+  const target = resolveReviewTarget(cwd, {});
+  const context = collectReviewContext(cwd, target);
+
+  assert.equal(context.fileCount, 2);
+  assert.equal(context.inputMode, "self-collect",
+    "2-file changes must NOT be inlined; they hit the schema-pinned single-turn bug otherwise");
+});
+
 test("collectReviewContext skips untracked directories in working tree review", () => {
   const cwd = makeTempDir();
   initGitRepo(cwd);
diff --git a/tests/investigation.test.mjs b/tests/investigation.test.mjs
new file mode 100644
index 00000000..4ee571ec
--- /dev/null
+++ b/tests/investigation.test.mjs
@@ -0,0 +1,883 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { setupFakeCodex } from "./fake-codex-fixture.mjs";
+import { runAppServerInvestigation } from "../plugins/codex/scripts/lib/codex.mjs";
+import { makeTempDir } from "./helpers.mjs";
+
+// Structured JSON payloads used by multiple tests.
+const STRUCTURED_REVIEW = JSON.stringify({
+  verdict: "needs-attention",
+  summary: "Concern X.",
+  findings: [{
+    severity: "high",
+    title: "Race",
+    file: "a.js",
+    line_start: 10,
+    line_end: 12,
+    confidence: 0.8,
+    body: "Potential race condition.",
+    recommendation: "Add a mutex."
+  }],
+  next_steps: []
+});
+
+const APPROVE_REVIEW = JSON.stringify({
+  verdict: "approve",
+  summary: "No material issues found.",
+  findings: [],
+  next_steps: []
+});
+
+test("converges when Codex emits a final-answer turn with no commands", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands, no final answer
+    fake.queueTurnResponse({
+      commands: [{ command: "git diff HEAD~1", exitCode: 0 }],
+      finalAnswer: null
+    });
+    // Recon turn 2: no commands, final answer => converges
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // Finalize turn 3
+    fake.queueTurnResponse({
+      finalAnswer: { text: STRUCTURED_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate the changes.",
+      finalizePrompt: "Produce your structured verdict.",
+      outputSchema: { type: "object", required: ["verdict"] }
+    });
+
+    assert.equal(result.investigation.turnCount, 2);
+    assert.equal(result.investigation.truncated, false);
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 3, "should have 3 turn/start requests (2 recon + 1 finalize)");
+  } finally {
+    fake.close();
+  }
+});
+
+test("converges when agentMessage has no `final_answer` phase tag (real-world case)", async () => {
+  // In production, recon turns run with outputSchema=null, and the
+  // app-server does NOT always tag agent messages with phase="final_answer".
+  // The convergence detector must treat any 0-command turn that emits an
+  // agent message as convergence — otherwise the model keeps insisting it
+  // has converged but the loop refuses to stop.
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    fake.queueTurnResponse({
+      commands: [{ command: "git diff HEAD~1", exitCode: 0 }],
+      finalAnswer: null
+    });
+    // Recon turn 2: no commands, agent message WITHOUT final_answer phase
+    // => must still converge (this is what real codex sends in recon mode).
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "My investigation is complete.", phase: "agent_message" }
+    });
+    fake.queueTurnResponse({
+      finalAnswer: { text: STRUCTURED_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize.",
+      outputSchema: { type: "object", required: ["verdict"] }
+    });
+
+    assert.equal(result.investigation.turnCount, 2,
+      "convergence on the no-command + agentMessage turn must not be missed");
+    assert.equal(result.investigation.truncated, false);
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 3, "2 recon + 1 finalize");
+  } finally {
+    fake.close();
+  }
+});
+
+test("respects maxInvestigationTurns and marks truncated", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Queue 3 recon turns: all have commands, no final answer.
+    // (With maxInvestigationTurns=3 the loop will exhaust here.)
+    for (let i = 0; i < 3; i++) {
+      fake.queueTurnResponse({
+        commands: [{ command: `check-${i}`, exitCode: 0 }]
+      });
+    }
+    // Finalize turn: pure JSON, zero commands.
+    fake.queueTurnResponse({
+      finalAnswer: { text: APPROVE_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize.",
+      maxInvestigationTurns: 3
+    });
+
+    assert.equal(result.investigation.turnCount, 3);
+    assert.equal(result.investigation.truncated, true);
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 4, "3 recon + 1 finalize");
+  } finally {
+    fake.close();
+  }
+});
+
+test("turn with both finalAnswer and commands does not converge", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands AND final answer => does NOT converge
+    fake.queueTurnResponse({
+      commands: [{ command: "grep -r TODO", exitCode: 0 }],
+      finalAnswer: { text: "Partial finding." }
+    });
+    // Recon turn 2: only final answer, no commands => converges
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // Finalize turn
+    fake.queueTurnResponse({
+      finalAnswer: { text: APPROVE_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize."
+    });
+
+    assert.equal(result.investigation.turnCount, 2);
+    assert.equal(result.investigation.truncated, false);
+  } finally {
+    fake.close();
+  }
+});
+
+test("outputSchema is null on recon turns and set on finalize turn", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands
+    fake.queueTurnResponse({
+      commands: [{ command: "cat file.js", exitCode: 0 }]
+    });
+    // Recon turn 2: final answer, no commands => converges
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Done investigating." }
+    });
+    // Finalize turn
+    fake.queueTurnResponse({
+      finalAnswer: { text: STRUCTURED_REVIEW }
+    });
+
+    const schema = { type: "object", required: ["verdict"] };
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize.",
+      outputSchema: schema
+    });
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 3);
+
+    // Recon turns must have outputSchema === null
+    assert.equal(starts[0].params.outputSchema, null, "recon turn 1 outputSchema should be null");
+    assert.equal(starts[1].params.outputSchema, null, "recon turn 2 outputSchema should be null");
+
+    // Finalize turn must have the schema
+    assert.deepEqual(starts[2].params.outputSchema, schema, "finalize turn should have the outputSchema");
+  } finally {
+    fake.close();
+  }
+});
+
+test("phase-1 soft error (turn/failed) aborts before phase-2 finalize", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands, normal
+    fake.queueTurnResponse({
+      commands: [{ command: "git log --oneline", exitCode: 0 }]
+    });
+    // Recon turn 2: soft error
+    fake.queueTurnResponse({
+      turnError: { message: "model produced unrenderable response" }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize."
+    });
+
+    assert.ok(result.error, "result should have an error");
+    assert.match(result.error.message, /model produced unrenderable response/);
+    assert.equal(result.investigation.turnCount, 2, "soft-error turn IS counted");
+    // A turn/completed with status="completed" can still arrive after an
+    // error notification, so result.status must be derived from the error,
+    // not from finalTurn.status. CI/automation relies on this.
+    assert.equal(result.status, 1, "soft-error path returns numeric status 1");
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 2, "NO finalize turn should be attempted");
+  } finally {
+    fake.close();
+  }
+});
+
+test("phase-1 hard error (transport throw) aborts before phase-2 finalize", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands, normal
+    fake.queueTurnResponse({
+      commands: [{ command: "git status", exitCode: 0 }]
+    });
+    // Recon turn 2: RPC error (transport throw)
+    fake.queueTurnRpcError({ message: "ECONNRESET" });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize."
+    });
+
+    assert.ok(result.error, "result should have an error");
+    assert.match(result.error.message, /ECONNRESET/);
+    assert.equal(result.investigation.turnCount, 1, "hard error returns BEFORE incrementing turnCount");
+    assert.equal(result.status, 1, "transport-error path returns numeric status");
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 2, "the failing turn was still attempted");
+  } finally {
+    fake.close();
+  }
+});
+
+test("finalize turn that runs commands triggers one strict-prompt retry", async () => {
+  // Production observation: the model occasionally emits a tool-call stub
+  // during finalize (e.g. {"cmd": "wc -l ..."}) instead of the structured
+  // JSON. When that happens the finalize turn has commandExecutions.length > 0.
+  // The orchestrator should detect this contract violation and retry once
+  // with a stricter prompt.
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon: converge.
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // First finalize attempt: model misbehaves and runs a command.
+    fake.queueTurnResponse({
+      commands: [{ command: "wc -l README.md", exitCode: 0 }],
+      finalAnswer: { text: "{\"cmd\":\"wc -l README.md\"}" }
+    });
+    // Second finalize attempt: model behaves and emits proper JSON.
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: APPROVE_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize.",
+      outputSchema: { type: "object", required: ["verdict"] }
+    });
+
+    assert.equal(result.finalMessage, APPROVE_REVIEW,
+      "the second (well-behaved) finalize attempt should be the final message");
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 3, "1 recon + 2 finalize attempts");
+
+    // The retry must use a stricter prompt distinct from the first try.
+    const finalizeStarts = starts.slice(1); // first start is recon
+    assert.match(
+      finalizeStarts[1].params.input?.[0]?.text ?? "",
+      /STRICT FINALIZE/,
+      "retry prompt must include the stricter directive"
+    );
+    assert.doesNotMatch(
+      finalizeStarts[0].params.input?.[0]?.text ?? "",
+      /STRICT FINALIZE/,
+      "first finalize attempt uses the normal prompt"
+    );
+  } finally {
+    fake.close();
+  }
+});
+
+test("finalize retry gives up after the second attempt and surfaces the output", async () => {
+  // If the model misbehaves twice in a row, the orchestrator must not loop
+  // forever — it should accept the second attempt's output and let the
+  // upstream parser produce a useful validation error.
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // Both finalize attempts misbehave.
+    fake.queueTurnResponse({
+      commands: [{ command: "wc -l a", exitCode: 0 }],
+      finalAnswer: { text: "{\"cmd\":\"wc -l a\"}" }
+    });
+    fake.queueTurnResponse({
+      commands: [{ command: "wc -l b", exitCode: 0 }],
+      finalAnswer: { text: "{\"cmd\":\"wc -l b\"}" }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize.",
+      outputSchema: { type: "object", required: ["verdict"] }
+    });
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 3, "1 recon + 2 finalize attempts only — no infinite loop");
+    assert.equal(result.finalMessage, "{\"cmd\":\"wc -l b\"}",
+      "the second-attempt output is surfaced so the parser can flag it");
+  } finally {
+    fake.close();
+  }
+});
+
+test("phase-2 finalize transport error preserves investigation metadata", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands
+    fake.queueTurnResponse({
+      commands: [{ command: "git diff", exitCode: 0 }]
+    });
+    // Recon turn 2: converge with final answer + no commands
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // Finalize turn: RPC error
+    fake.queueTurnRpcError({ message: "ETIMEDOUT during finalize" });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize."
+    });
+
+    assert.ok(result.error, "result should carry the finalize error");
+    assert.match(result.error.message, /ETIMEDOUT during finalize/);
+    assert.equal(result.investigation.turnCount, 2, "investigation completed both recon turns before finalize failed");
+    assert.equal(result.investigation.truncated, false, "investigation converged; not truncated");
+    assert.equal(result.status, 1, "finalize-error path returns numeric status");
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 3, "2 recon + 1 finalize attempted");
+  } finally {
+    fake.close();
+  }
+});
+
+test("converges with zero commands flags truncated=true", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: only final answer, no commands => converges immediately
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Nothing to investigate." }
+    });
+    // Finalize turn
+    fake.queueTurnResponse({
+      finalAnswer: { text: APPROVE_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate.",
+      finalizePrompt: "Finalize."
+    });
+
+    assert.equal(result.investigation.turnCount, 1);
+    assert.equal(result.investigation.truncated, true, "zero commands across investigation => truncated");
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 2, "1 recon + 1 finalize");
+  } finally {
+    fake.close();
+  }
+});
+
+test("recon turn 1 sends the investigate prompt; turn 2+ sends the continuation cue", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands
+    fake.queueTurnResponse({
+      commands: [{ command: "ls", exitCode: 0 }]
+    });
+    // Recon turn 2: commands
+    fake.queueTurnResponse({
+      commands: [{ command: "cat a.js", exitCode: 0 }]
+    });
+    // Recon turn 3: final answer, no commands => converges
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "All done." }
+    });
+    // Finalize turn
+    fake.queueTurnResponse({
+      finalAnswer: { text: APPROVE_REVIEW }
+    });
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "FULL INVESTIGATE PROMPT: look at the code",
+      finalizePrompt: "FINALIZE PROMPT: produce verdict"
+    });
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 4, "3 recon + 1 finalize");
+
+    // Extract input text from each turn/start
+    const inputTexts = starts.map((s) =>
+      (s.params.input || [])
+        .filter((p) => p.type === "text")
+        .map((p) => p.text)
+        .join("")
+    );
+
+    // Turn 1: investigate prompt
+    assert.match(inputTexts[0], /FULL INVESTIGATE PROMPT/, "turn 1 should use investigate prompt");
+    // Turn 2 and 3: continuation cue
+    assert.equal(inputTexts[1], "Continue your investigation.", "turn 2 should use continuation cue");
+    assert.equal(inputTexts[2], "Continue your investigation.", "turn 3 should use continuation cue");
+    // Turn 4: finalize prompt
+    assert.match(inputTexts[3], /FINALIZE PROMPT/, "turn 4 should use finalize prompt");
+  } finally {
+    fake.close();
+  }
+});
+
+// -------------------------------------------------------------------
+// Integration tests: subprocess-based end-to-end companion tests
+// -------------------------------------------------------------------
+
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import path from "node:path";
+import { spawnSync } from "node:child_process";
+import { fileURLToPath } from "node:url";
+
+const COMPANION_PATH = fileURLToPath(
+  new URL("../plugins/codex/scripts/codex-companion.mjs", import.meta.url)
+);
+
+function makeSelfCollectGitFixture() {
+  // 3+ changed files triggers self-collect (DEFAULT_INLINE_DIFF_MAX_FILES = 2)
+  const root = mkdtempSync(path.join(tmpdir(), "codex-self-collect-test-"));
+  spawnSync("git", ["init", "-q", "-b", "main"], { cwd: root });
+  spawnSync("git", ["config", "user.email", "test@example.com"], { cwd: root });
+  spawnSync("git", ["config", "user.name", "Test"], { cwd: root });
+  spawnSync("git", ["config", "commit.gpgsign", "false"], { cwd: root });
+  writeFileSync(path.join(root, "README.md"), "# repo\n");
+  spawnSync("git", ["add", "."], { cwd: root });
+  spawnSync("git", ["commit", "-q", "-m", "init"], { cwd: root });
+  spawnSync("git", ["checkout", "-q", "-b", "feature"], { cwd: root });
+  mkdirSync(path.join(root, "src"), { recursive: true });
+  for (let i = 0; i < 5; i += 1) {
+    writeFileSync(path.join(root, "src", `f${i}.js`), `export const v${i} = ${i};\n`);
+  }
+  spawnSync("git", ["add", "."], { cwd: root });
+  spawnSync("git", ["commit", "-q", "-m", "feature"], { cwd: root });
+  return root;
+}
+
+function makeInlineGitFixture() {
+  // 1 changed file stays on inline-diff path
+  const root = mkdtempSync(path.join(tmpdir(), "codex-inline-test-"));
+  spawnSync("git", ["init", "-q", "-b", "main"], { cwd: root });
+  spawnSync("git", ["config", "user.email", "test@example.com"], { cwd: root });
+  spawnSync("git", ["config", "user.name", "Test"], { cwd: root });
+  spawnSync("git", ["config", "commit.gpgsign", "false"], { cwd: root });
+  writeFileSync(path.join(root, "README.md"), "# repo\n");
+  spawnSync("git", ["add", "."], { cwd: root });
+  spawnSync("git", ["commit", "-q", "-m", "init"], { cwd: root });
+  spawnSync("git", ["checkout", "-q", "-b", "feature"], { cwd: root });
+  writeFileSync(path.join(root, "one.js"), "export const v = 1;\n");
+  spawnSync("git", ["add", "."], { cwd: root });
+  spawnSync("git", ["commit", "-q", "-m", "tiny"], { cwd: root });
+  return root;
+}
+
+function runCompanion(args, env) {
+  return spawnSync("node", [COMPANION_PATH, ...args], {
+    env: { ...process.env, ...env },
+    encoding: "utf8",
+    timeout: 30000
+  });
+}
+
+test("self-collect path uses runAppServerInvestigation end-to-end", async () => {
+  const cwd = makeSelfCollectGitFixture();
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: runs a command, no convergence
+    fake.queueTurnResponse({
+      commands: [{ command: "git diff main...HEAD", exitCode: 0 }],
+      finalAnswer: null
+    });
+    // Recon turn 2: no commands, final answer => converges
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // Finalize turn: structured output
+    fake.queueTurnResponse({
+      finalAnswer: {
+        text: JSON.stringify({
+          verdict: "needs-attention",
+          summary: "Found risk in src/f1.js.",
+          findings: [{
+            severity: "high",
+            title: "Unguarded export",
+            file: "src/f1.js",
+            line_start: 1,
+            line_end: 1,
+            confidence: 0.7,
+            body: "Module exports v1 with no validation.",
+            recommendation: "Add validation."
+          }],
+          next_steps: []
+        })
+      }
+    });
+
+    const result = runCompanion(
+      ["adversarial-review", "--base", "main", "--scope", "branch", "--cwd", cwd, "--json"],
+      fake.env
+    );
+
+    assert.equal(result.status, 0, `expected exit 0, stderr: ${result.stderr}`);
+
+    // stdout may contain progress lines followed by JSON; parse the last JSON object
+    const stdout = result.stdout.trim();
+    const payload = JSON.parse(stdout);
+    assert.ok(payload.investigation, "self-collect payload must have investigation field");
+    assert.equal(payload.investigation.turnCount, 2);
+    assert.equal(payload.investigation.truncated, false);
+    assert.equal(payload.result?.verdict, "needs-attention");
+  } finally {
+    fake.close();
+    rmSync(cwd, { recursive: true, force: true });
+  }
+});
+
+test("inline-diff path does not call runAppServerInvestigation", async () => {
+  const cwd = makeInlineGitFixture();
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Single turn: inline path uses runAppServerTurn
+    fake.queueTurnResponse({
+      finalAnswer: {
+        text: JSON.stringify({
+          verdict: "approve",
+          summary: "No material issues found.",
+          findings: [],
+          next_steps: []
+        })
+      }
+    });
+
+    const result = runCompanion(
+      ["adversarial-review", "--base", "main", "--scope", "branch", "--cwd", cwd, "--json"],
+      fake.env
+    );
+
+    assert.equal(result.status, 0, `expected exit 0, stderr: ${result.stderr}`);
+    const payload = JSON.parse(result.stdout.trim());
+    assert.equal(payload.investigation, undefined,
+      "inline-path payload must not carry the investigation field");
+
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.equal(starts.length, 1, "inline path is single-turn");
+  } finally {
+    fake.close();
+    rmSync(cwd, { recursive: true, force: true });
+  }
+});
+
+// -------------------------------------------------------------------
+// Unit tests for runAppServerInvestigation (continued from above)
+// -------------------------------------------------------------------
+
+test("outputSchema-set finalize turn produces schema-conformant final message", async () => {
+  const cwd = makeTempDir("codex-inv-test-");
+  const fake = setupFakeCodex({ cwd });
+  try {
+    // Recon turn 1: commands, no final answer
+    fake.queueTurnResponse({
+      commands: [{ command: "git diff HEAD~1", exitCode: 0 }],
+      finalAnswer: null
+    });
+    // Recon turn 2: no commands, final answer => converges
+    fake.queueTurnResponse({
+      commands: [],
+      finalAnswer: { text: "Investigation done." }
+    });
+    // Finalize turn with structured output
+    fake.queueTurnResponse({
+      finalAnswer: { text: STRUCTURED_REVIEW }
+    });
+
+    const schema = {
+      type: "object",
+      required: ["verdict"],
+      properties: {
+        verdict: { type: "string" },
+        summary: { type: "string" },
+        findings: { type: "array" },
+        next_steps: { type: "array" }
+      }
+    };
+
+    const result = await runAppServerInvestigation(fake.cwd, {
+      investigatePrompt: "Investigate the changes.",
+      finalizePrompt: "Produce your structured verdict.",
+      outputSchema: schema
+    });
+
+    // The finalMessage should be parseable JSON with a verdict field
+    const parsed = JSON.parse(result.finalMessage);
+    assert.equal(parsed.verdict, "needs-attention");
+    assert.equal(parsed.findings.length, 1);
+    assert.equal(parsed.findings[0].severity, "high");
+
+    // The finalize turn should have received the schema
+    const requests = fake.requests;
+    const starts = requests.filter((r) => r.method === "turn/start");
+    assert.deepEqual(starts[starts.length - 1].params.outputSchema, schema);
+
+    // Only the finalize turn's reasoningSummary is returned
+    assert.ok(Array.isArray(result.reasoningSummary));
+  } finally {
+    fake.close();
+  }
+});
+
+// -------------------------------------------------------------------
+// Task 5: renderer truncation banner
+// -------------------------------------------------------------------
+
+import { renderReviewResult } from "../plugins/codex/scripts/lib/render.mjs";
+
+test("renderer prepends truncation banner when investigation.truncated is true", () => {
+  const parsed = {
+    parsed: {
+      verdict: "needs-attention",
+      summary: "Risk identified.",
+      findings: [],
+      next_steps: []
+    }
+  };
+  const out = renderReviewResult(parsed, {
+    reviewLabel: "Adversarial Review",
+    targetLabel: "branch:feature",
+    reasoningSummary: [],
+    investigation: { turnCount: 10, truncated: true }
+  });
+  assert.match(out, /Investigation truncated at 10 turns; findings may be shallow\./);
+});
+
+test("renderer omits truncation banner when investigation is null or not truncated", () => {
+  const parsed = {
+    parsed: {
+      verdict: "approve",
+      summary: "Looks fine.",
+      findings: [],
+      next_steps: []
+    }
+  };
+  const outNull = renderReviewResult(parsed, {
+    reviewLabel: "Adversarial Review",
+    targetLabel: "branch:feature",
+    reasoningSummary: [],
+    investigation: null
+  });
+  assert.doesNotMatch(outNull, /Investigation truncated/);
+
+  const outOk = renderReviewResult(parsed, {
+    reviewLabel: "Adversarial Review",
+    targetLabel: "branch:feature",
+    reasoningSummary: [],
+    investigation: { turnCount: 4, truncated: false }
+  });
+  assert.doesNotMatch(outOk, /Investigation truncated/);
+});
+
+test("renderer shows truncation banner on parse-error and validation-error paths", () => {
+  const parseErrorOut = renderReviewResult(
+    { parsed: null, parseError: "Unexpected token", rawOutput: "{not json" },
+    {
+      reviewLabel: "Adversarial Review",
+      targetLabel: "branch:feature",
+      reasoningSummary: [],
+      investigation: { turnCount: 10, truncated: true }
+    }
+  );
+  assert.match(parseErrorOut, /Investigation truncated at 10 turns/,
+    "banner must appear when output is unparseable AND investigation was truncated");
+
+  const validationErrorOut = renderReviewResult(
+    { parsed: { not: "review-shaped" } },
+    {
+      reviewLabel: "Adversarial Review",
+      targetLabel: "branch:feature",
+      reasoningSummary: [],
+      investigation: { turnCount: 10, truncated: true }
+    }
+  );
+  assert.match(validationErrorOut, /Investigation truncated at 10 turns/,
+    "banner must appear when output has wrong shape AND investigation was truncated");
+});
+
+// -------------------------------------------------------------------
+// Task 6: --max-investigation-turns CLI flag
+// -------------------------------------------------------------------
+
+import { parseArgs } from "../plugins/codex/scripts/lib/args.mjs";
+
+test("parseArgs accepts --max-investigation-turns as a value option", () => {
+  const { options } = parseArgs(
+    ["--base", "main", "--max-investigation-turns", "15", "auth"],
+    {
+      valueOptions: ["base", "scope", "model", "cwd", "max-investigation-turns"],
+      booleanOptions: ["json", "background", "wait"]
+    }
+  );
+  assert.equal(options["max-investigation-turns"], "15");
+});
+
+test("--max-investigation-turns propagates from CLI to runAppServerInvestigation", async () => {
+  const cwd = makeSelfCollectGitFixture();
+  const fake = setupFakeCodex({ cwd });
+  try {
+    for (let i = 0; i < 6; i += 1) {
+      fake.queueTurnResponse({
+        commands: [{ command: "git diff", exitCode: 0 }],
+        finalAnswer: null
+      });
+    }
+    fake.queueTurnResponse({
+      finalAnswer: { text: JSON.stringify({ verdict: "approve", summary: "ok", findings: [], next_steps: [] }) }
+    });
+
+    const result = runCompanion(
+      ["adversarial-review",
+       "--base", "main", "--scope", "branch", "--cwd", cwd,
+       "--max-investigation-turns", "5",
+       "--json"],
+      fake.env
+    );
+
+    assert.equal(result.status, 0, `expected exit 0, stderr: ${result.stderr}`);
+    const payload = JSON.parse(result.stdout.trim());
+    assert.equal(payload.investigation.turnCount, 5);
+    assert.equal(payload.investigation.truncated, true);
+  } finally {
+    fake.close();
+    rmSync(cwd, { recursive: true, force: true });
+  }
+});
+
+test("invalid --max-investigation-turns raises a clear error", () => {
+  const r = spawnSync("node",
+    [COMPANION_PATH, "adversarial-review", "--max-investigation-turns", "abc"],
+    { encoding: "utf8", timeout: 30000 }
+  );
+  assert.notEqual(r.status, 0, "should exit non-zero on invalid flag value");
+  assert.match(
+    r.stderr,
+    /must be a positive integer/,
+    "error message should explain the validation failure"
+  );
+});
+
+test("--max-investigation-turns rejects malformed numeric tokens", () => {
+  // parseInt-style salvage must NOT accept these — they are typos, not valid input.
+  const cases = [
+    "1.5",      // parseInt would yield 1
+    "10abc",    // parseInt would yield 10
+    "1e2",      // exponential notation: Number(...) yields 100, but the contract is "integer literal"
+    "  5",      // leading whitespace from accidental quoting
+    "0",        // zero is not positive
+    "-3",       // negative integer
+    ""          // empty string
+  ];
+  for (const value of cases) {
+    const r = spawnSync("node",
+      [COMPANION_PATH, "adversarial-review", "--max-investigation-turns", value],
+      { encoding: "utf8", timeout: 30000 }
+    );
+    assert.notEqual(r.status, 0, `value ${JSON.stringify(value)} should exit non-zero`);
+    assert.match(
+      r.stderr,
+      /must be a positive integer/,
+      `value ${JSON.stringify(value)} should trigger the validation error`
+    );
+  }
+});
+
+// -------------------------------------------------------------------
+// Prompt contract: inline + finalize prompts must forbid tool-call stubs
+// -------------------------------------------------------------------
+
+import { readFileSync } from "node:fs";
+import { fileURLToPath as fileURLToPathPromptCheck } from "node:url";
+
+const PROMPT_DIR = fileURLToPathPromptCheck(
+  new URL("../plugins/codex/prompts/", import.meta.url)
+);
+
+test("inline adversarial-review prompt forbids tool-call stub output", () => {
+  const text = readFileSync(`${PROMPT_DIR}adversarial-review.md`, "utf8");
+  // The model used to respond with payloads like {"cmd": "wc -l ..."}
+  // instead of the review JSON — the prompt must explicitly disallow it
+  // since this path is single-turn and has no recovery.
+  assert.match(text, /tool[- ]?call|tool[- ]?use/i,
+    "inline prompt must mention tool-call/tool-use");
+  assert.match(text, /\{"cmd"|stub/i,
+    "inline prompt must show or name the tool-call stub anti-pattern");
+  assert.match(text, /Do NOT run any shell commands/,
+    "inline prompt must explicitly forbid running shell commands");
+});
+
+test("finalize prompt forbids tool-call stub output", () => {
+  const text = readFileSync(`${PROMPT_DIR}adversarial-review-finalize.md`, "utf8");
+  assert.match(text, /Do NOT run any shell commands/,
+    "finalize prompt must explicitly forbid running shell commands");
+  assert.match(text, /no tool-call payloads|no shell commands/i,
+    "finalize prompt must spell out the no-tool-call rule");
+});
diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs
index 90408372..1961a592 100644
--- a/tests/runtime.test.mjs
+++ b/tests/runtime.test.mjs
@@ -295,8 +295,13 @@ test("adversarial review asks Codex to inspect larger diffs itself", () => {
 
   assert.equal(result.status, 0, result.stderr);
   const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
-  assert.match(state.lastTurnStart.prompt, /lightweight summary/i);
-  assert.match(state.lastTurnStart.prompt, /read-only git commands/i);
+  // With the two-phase investigation flow, the default fake converges on turn 1
+  // (no commands, finalAnswer), then the finalize turn fires. lastTurnStart
+  // captures the finalize turn, which uses buildAdversarialFinalizePrompt.
+  // The investigate prompt (turn 1) contained the self-collect guidance; the
+  // finalize prompt (turn 2) references the investigation completed in prior turns.
+  assert.match(state.lastTurnStart.prompt, /investigation|structured review/i);
+  // File contents must not be inlined in either prompt (self-collect mode)
   assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/);
 });