From 3ca49067c3e34c56bbc4477eaa053afb9405e548 Mon Sep 17 00:00:00 2001
From: Charles Vien <charles.v@posthog.com>
Date: Sat, 6 Jun 2026 16:07:11 -0700
Subject: [PATCH 1/4] update claude adapter with upstream

---
 packages/agent/package.json                   |   4 +-
 packages/agent/src/adapters/claude/SKILL.md   | 164 +++++++++
 .../agent/src/adapters/claude/UPSTREAM.md     |  70 +++-
 .../agent/src/adapters/claude/claude-agent.ts | 315 ++++++++++++++----
 .../adapters/claude/conversion/sdk-to-acp.ts  | 138 ++++++--
 .../src/adapters/claude/session/models.ts     |   6 +
 packages/agent/src/adapters/claude/types.ts   |  10 +
 .../agent/src/adapters/codex/codex-agent.ts   |   8 +-
 packages/agent/src/adapters/codex/models.ts   |  12 +
 packages/agent/src/test/mocks/claude-sdk.ts   |   1 +
 pnpm-lock.yaml                                | 108 +++++-
 11 files changed, 735 insertions(+), 101 deletions(-)
 create mode 100644 packages/agent/src/adapters/claude/SKILL.md
diff --git a/packages/agent/package.json b/packages/agent/package.json
index 5b20023755..1680eb6290 100644
--- a/packages/agent/package.json
+++ b/packages/agent/package.json
@@ -128,8 +128,8 @@
     "vitest": "^2.1.8"
   },
   "dependencies": {
-    "@agentclientprotocol/sdk": "0.22.1",
-    "@anthropic-ai/claude-agent-sdk": "0.3.156",
+    "@agentclientprotocol/sdk": "0.25.0",
+    "@anthropic-ai/claude-agent-sdk": "0.3.165",
     "@anthropic-ai/sdk": "0.100.1",
     "@hono/node-server": "^1.19.9",
     "@opentelemetry/api-logs": "^0.208.0",
diff --git a/packages/agent/src/adapters/claude/SKILL.md b/packages/agent/src/adapters/claude/SKILL.md
new file mode 100644
index 0000000000..ce8ab70e5b
--- /dev/null
+++ b/packages/agent/src/adapters/claude/SKILL.md
@@ -0,0 +1,164 @@
+---
+name: upgrade-claude-adapter
+description: >-
+  Sync this fork of @anthropic-ai/claude-agent-acp (packages/agent/src/adapters/claude)
+  with a newer upstream release: bump the claude-agent-sdk / @agentclientprotocol/sdk,
+  port upstream bug fixes and new SDK message handling, preserve the fork's divergences,
+  verify, and update UPSTREAM.md. Use when asked to "upgrade/sync the claude adapter",
+  "bump the agent SDK", or "port upstream claude-agent-acp changes".
+---
+
+# Upgrade the Claude ACP adapter (upstream sync)
+
+This is a runbook for syncing our **fork** of `@anthropic-ai/claude-agent-acp` (the upstream
+Zed/agentclientprotocol ACP agent) that lives in `packages/agent/src/adapters/claude/` with a newer
+upstream release. The fork is heavily diverged. The job is to port the *valuable* upstream changes
+(SDK bumps, bug fixes, new SDK-message handling) while preserving every intentional divergence — not
+to make the fork identical to upstream.
+
+`UPSTREAM.md` (this directory) is the source of truth for the **fork point**, **last-synced
+version/commit**, the **file mapping**, the **PostHog-only code**, and the **intentional
+divergences**. Read it first, update it last.
+
+> This file is a runbook, not an auto-registered slash command. Invoke it by telling Claude to
+> "follow the upgrade skill in the claude adapter dir." Move it to `.claude/skills/<name>/SKILL.md`
+> if you ever want it runnable as `/<name>`.
+
+## Inputs you need before starting
+
+1. **Upstream source checkout** — a local git clone of `github.com/agentclientprotocol/claude-agent-acp`.
+   You need its history to diff. If the user hasn't given the path, **ask for it** (it's usually
+   somewhere like `~/Cloud/claude-agent-acp`). Do not guess.
+2. **This repo** — the fork under `packages/agent/`.
+
+## Process
+
+### 0. Orient (read, don't write)
+
+- Read `UPSTREAM.md`. Note **Last sync** (commit + version), the pinned **SDK** versions, the
+  **File Mapping**, **PostHog Code-Only Code (Do Not Sync)**, and **Intentional Divergences**.
+- In the upstream checkout, list the change set since the last sync and skim the changelog:
+  - `git -C <upstream> log --oneline <last-sync-sha>..HEAD`
+  - `git -C <upstream> show <upstream>/CHANGELOG.md:CHANGELOG.md` (or just read `CHANGELOG.md`)
+- Confirm the new target version + HEAD sha and the target SDK versions from the upstream
+  `package.json`.
+
+### 1. Triage every commit
+
+Bucket each commit since the last sync:
+
+- **Port** — bug fixes and new feature / SDK-message handling that are *not* in the PostHog-only
+  list and don't fight a divergence.
+- **Dep bump** — record the target SDK versions; the diff tells you if code changes ride along.
+- **Skip** — `chore(main): release …`, `actions/* ` CI bumps, pure dependabot **dev**-dep bumps, and
+  anything matching the PostHog-only / divergence lists.
+
+Read intent from source diffs (exclude tests + JSON first):
+
+```
+git -C <upstream> show <sha> -- src/ ':(exclude)src/tests/*' ':(exclude)*.json'
+```
+
+A dependabot SDK-bump commit often *also* carries real code (new message handling). Don't assume
+"deps" == "no code".
+
+### 2. Map upstream → fork
+
+Upstream is one large `src/acp-agent.ts`; our fork is split. Use the File Mapping in `UPSTREAM.md`.
+Rough guide:
+
+| Upstream | Fork |
+| --- | --- |
+| `acp-agent.ts` prompt loop, lifecycle, cancel | `claude-agent.ts` |
+| inline message/stream/result/system conversion | `conversion/sdk-to-acp.ts` |
+| inline prompt→SDK conversion | `conversion/acp-to-sdk.ts` |
+| `tools.ts` (tool_use→ACP, PostToolUse hook) | `conversion/tool-use-to-acp.ts`, `hooks.ts` |
+| model alias resolution | `session/models.ts`, `session/model-config.ts` |
+| options / system prompt | `session/options.ts` |
+| permissions | `permissions/*` |
+
+For each upstream change, `rg` the fork for the touched symbol first — the fork usually already has a
+diverged version of it, so you're editing, not adding.
+
+### 3. Bump dependencies
+
+In `packages/agent/package.json`, set `@anthropic-ai/claude-agent-sdk`, `@agentclientprotocol/sdk`,
+and `@anthropic-ai/sdk` to the upstream `package.json` versions, then `pnpm install` from the repo
+root. (`packages/shared` pins its own older `@agentclientprotocol/sdk`; leave it unless a
+cross-package type error forces a bump.)
+
+### 4. Find the breaking-change surface
+
+Run `pnpm --filter agent typecheck`. The errors are your ACP/SDK breaking-change list. Gotchas seen
+in past syncs:
+
+- **The ACP SDK ships name-mangled generated types.** `dist/schema/*.gen.d.ts` shows enum literals as
+  `n` (e.g. `StopReason = "…" | "n" | "cancelled"`). Don't trust grep there. Read the hand-written
+  `dist/acp.d.ts`, or download the exact target to inspect cleanly:
+  ```
+  cd /tmp && npm pack @agentclientprotocol/sdk@<ver> && tar xzf *.tgz
+  rg -n "type StopReason|deleteSession|SessionModelState" package/dist/schema/types.gen.d.ts package/dist/acp.d.ts
+  ```
+- **`node -e "require('<pkg>/package.json')"` may fail** on the SDKs (exports map blocks the subpath).
+  Read `node_modules/<pkg>/package.json` directly for the installed version.
+- **An ACP SDK bump can break code outside the claude adapter.** The whole `packages/agent` package
+  must typecheck — expect to also fix `adapters/codex/*` and `server/agent-server.ts`. Keep those
+  fixes minimal and behavior-preserving (e.g. when ACP removed the `models` response field, the codex
+  adapter derived the model id from `configOptions` instead).
+
+### 5. Port in phases — bug fixes first, then features
+
+For each ported change:
+
+- **Preserve divergences** (see `UPSTREAM.md` → Intentional Divergences + PostHog-only). The big ones:
+  single-session `this.session` (not `this.sessions[id]`); `interruptReason` on cancel; gateway models
+  via `fetchGatewayModels` (not `initializationResult.models`); `_posthog/*` ext notifications;
+  the "Unsupported slash command" gate on `knownSlashCommands`; `SYSTEM_REMINDER` stripping; plan /
+  questions / MCP-metadata machinery.
+- **New SDK `system` subtypes are safe by default.** `handleSystemMessage` ends in `default: break`,
+  and the prompt-loop top-level `switch (message.type)` only `unreachable()`s unknown top-level
+  *types*. So a new subtype won't crash the loop — port real handling only where there's user value
+  (e.g. `permission_denied` → failed tool_call, `tool_progress` → in_progress, `commands_changed` →
+  available_commands_update, `mirror_error` → log).
+- When upstream reads new fields (`stop_details`, `getContextUsage`, `thinking`), confirm the
+  installed SDK `.d.ts` actually has them before porting. Skip ports the fork can't use (e.g. the
+  fork doesn't read `MAX_THINKING_TOKENS`, so upstream's `resolveThinkingConfig` was N/A).
+- Typecheck after each logical group, not just at the end.
+
+### 6. Verify (all of it)
+
+```
+pnpm --filter agent typecheck
+pnpm --filter agent build
+npx biome check --write <changed files>      # biome is the formatter/linter, not prettier/eslint
+pnpm typecheck                                # whole repo: confirms apps/code compiles vs the new ACP SDK
+pnpm --filter agent test
+pnpm --filter code test
+```
+
+- The `apps/code` renderer unit tests `analytics.test.ts` and `panelLayoutStore.test.ts` are **flaky**
+  — they sometimes throw in `getElectronTRPC` / electron-trpc `ipcLink` depending on test ordering. If
+  they fail, re-run; a clean rerun (or `git stash` + run on the clean tree) passing confirms it's the
+  known flake, not your change.
+
+### 7. Update `UPSTREAM.md` (do this last)
+
+- Bump **Last sync** (version + HEAD sha + date) and the pinned **SDK** versions.
+- Add `## Changes Ported in v<X> Sync` (one bullet per change, with PR # and short sha) and
+  `## Skipped in v<X> Sync` (with the reason for each skip).
+- If a port made a former divergence match upstream, move it out of the Intentional Divergences table.
+
+## Fork facts worth remembering
+
+- **Single session.** The agent owns one `this.session` (from `BaseAcpAgent`), not a `sessions` map.
+  Upstream's per-session refactors usually collapse to "just use `this.session`".
+- **Renderer uses config options only.** Model/mode/effort selection is `SessionConfigOption` end to
+  end; the renderer never reads the legacy `models` response field or calls `unstable_setSessionModel`.
+  That's why upstream's ACP-0.24/0.25 model-state removals are safe to follow.
+- **`toolUseCache` is never cleared** in the fork (created once in the constructor), so long sessions
+  accumulate — keep the prune-at-tool_result behavior, and make any PostToolUse hook close over the
+  data it needs rather than re-reading the cache.
+- **Conversion is split out.** `claude-agent.ts` calls `handleSystemMessage` / `handleStreamEvent` /
+  `handleResultMessage` / `handleUserAssistantMessage` from `conversion/sdk-to-acp.ts`. Upstream
+  inlines all of this in `acp-agent.ts`.
+- **Don't commit or push** unless the user explicitly asks. Leave the work on the current branch.
diff --git a/packages/agent/src/adapters/claude/UPSTREAM.md b/packages/agent/src/adapters/claude/UPSTREAM.md
index 46ab27ccc3..2c694f9b30 100644
--- a/packages/agent/src/adapters/claude/UPSTREAM.md
+++ b/packages/agent/src/adapters/claude/UPSTREAM.md
@@ -5,8 +5,8 @@ Fork of `@anthropic-ai/claude-agent-acp`. Upstream repo: https://github.com/anth
 ## Fork Point
 
 - **Forked**: v0.10.9, commit `5411e0f4`, Dec 2 2025
-- **Last sync**: v0.39.0, commit `51a370e`, May 29 2026
-- **SDK**: `@anthropic-ai/claude-agent-sdk` 0.3.156, `@agentclientprotocol/sdk` 0.22.1, `@anthropic-ai/sdk` 0.100.1
+- **Last sync**: v0.42.0, commit `0dbccf5`, Jun 5 2026
+- **SDK**: `@anthropic-ai/claude-agent-sdk` 0.3.165, `@agentclientprotocol/sdk` 0.25.0, `@anthropic-ai/sdk` 0.100.1
 
 ## File Mapping
 
@@ -55,6 +55,70 @@ Fork of `@anthropic-ai/claude-agent-acp`. Upstream repo: https://github.com/anth
 | Shutdown on ACP close | Process exits | No standalone process | Agent is embedded in server |
 | Unsupported slash commands | Loops silently on early idle | Emits "Unsupported slash command" chunk, gated on `initializationResult().commands` so plugin/skill commands (e.g. `/skills-store`) whose echoes use a fresh uuid are not false-flagged | The SDK consumes some slash commands without producing output (e.g. `/plugin` in non-interactive mode); without this we hang. The known-commands gate avoids racing plugin/skill loads where idle can arrive before the transformed user-message echo. |
 
+## Changes Ported in v0.42.0 Sync
+
+- **SDK bumps**: claude-agent-sdk 0.3.156 -> 0.3.165, ACP SDK 0.22.1 -> 0.25.0, anthropic SDK
+  unchanged at 0.100.1.
+- **ACP SDK 0.25.0 model-state removal** (#737, 32175b8): 0.24.0 deleted `SessionModelState`,
+  `SetSessionModelRequest/Response`, `ModelInfo`, and the `models` field on every session lifecycle
+  response; model selection moved entirely into `SessionConfigOption` (category "model"). Our fork
+  already drove model selection through config options, so this just removed the vestigial legacy
+  path: dropped those imports, the `unstable_setSessionModel` method, and the `models` build/return
+  in `createSession` / `getExistingSessionState` / `loadSession`. The codex adapter's
+  `response.models?.currentModelId` read was replaced with a `modelIdFromConfigOptions()` helper
+  (codex `models.ts`). Verified the renderer reads only `configOptions`, never `.models`.
+- **ACP SDK 0.25.0 `deleteSession` rename** (#753, 0dbccf5): No-op for us — our fork never
+  implemented `unstable_deleteSession`, and the method is optional on the `Agent` interface.
+- **Refusal handling** (SDK 0.3.162, #740, add7e31): Capture the refused assistant message's
+  `stop_details.explanation`; the terminal `result` (stop_reason "refusal") emits it as an
+  `agent_message_chunk` and returns ACP's dedicated `refusal` stop reason instead of letting the
+  `is_error` path surface it as an internal error.
+- **commands_changed** (SDK 0.3.162, #740, add7e31): New `system` subtype handled inline in the
+  prompt loop — pushes `available_commands_update` straight from `message.commands` (rather than
+  re-querying `supportedCommands()`, which only ever reflects the init list) and refreshes
+  `session.knownSlashCommands` so the unsupported-slash-command gate stays accurate.
+- **Optimized marker stripping** (#738, 895422c): `stripMarkerTags` rewritten as a single-pass
+  scanner in `conversion/sdk-to-acp.ts`, removing the `[\s\S]*?` backtracking risk on pathological
+  input.
+- **Force-cancel backstop** (#742, cffea4b): Added per-turn `cancelController` + `forceCancelTimer`
+  on `Session` and a mutable `forceCancelGraceMs` (30s) on the agent. The prompt loop races
+  `query.next()` against the cancel signal; `interrupt()` arms a grace-period timer that aborts it,
+  so a wedged SDK that never yields after interrupt (issue #680, e.g. a blocking `TaskOutput` poll)
+  returns "cancelled" instead of hanging. Adapted to our single-session model; preserves the
+  `interruptReason` meta on the forced return.
+- **Cross-family model match fix** (#731, f4704c1): `scoreModelMatch` (session/models.ts) now
+  returns 0 when only the context-hint token matched, so `claude-opus-4-6[1m]` can't resolve to
+  `sonnet[1m]` purely on the shared "1m" token. Layers on top of our existing
+  `modelVersionsCompatible` filter.
+- **compact_boundary getContextUsage** (#747, 398f763): compact_boundary now fetches the
+  authoritative post-compaction `used` via `query.getContextUsage()` (helper
+  `fetchContextUsedTokens`), falling back to 0 on failure. `size` still comes from the
+  gateway-learned window (getContextUsage under-reports 1M windows). Our fork-specific
+  `promptReplayed = true` side effect is preserved.
+- **New SDK message handling** (#747, 398f763): `tool_progress` -> `tool_call_update` `in_progress`
+  with `elapsedTimeSeconds`; `rate_limit_event` -> `usage_update` carrying `_claude/rateLimit`;
+  `permission_denied` -> `tool_call_update` `failed` (in `handleSystemMessage`); `mirror_error` ->
+  logged (history-persistence failure / potential data loss on resume).
+- **Prune tool cache** (#748, ec14211): `toolUseCache` was never cleared in our fork (set once in
+  the constructor, accumulated for the whole agent lifetime). Now pruned at `tool_result` time. The
+  PostToolUse hook closes over the tool name + bash command instead of re-reading the cache, so the
+  Edit/Write diff survives any hook/result reordering. We did NOT adopt upstream's per-session cache
+  move (we are single-session) or its `backgroundTerminals` deletion.
+- **Test mock**: added `reloadSkills` to the SDK `MockQuery` (new method on the SDK `Query`
+  interface in 0.3.165).
+
+## Skipped in v0.42.0 Sync
+
+- **Message ids** (#750, 18516a3): Upstream records an ACP `messageId` -> SDK uuid map for a future
+  fork/rewind feature, explicitly "NOT READ YET". We don't consume it, it adds a `Session` field and
+  threads `messageId` through many `toAcpNotifications` call sites, so it is deferred until we wire
+  up rewind. (ACP 0.25.0 does expose the `messageId` field, so the port is unblocked when wanted.)
+- **resolveThinkingConfig** (#747, 398f763): Upstream maps the legacy `MAX_THINKING_TOKENS` env var
+  to the SDK's new `thinking` option. Our fork never reads `MAX_THINKING_TOKENS` (model setup is
+  gateway-driven via `session/options.ts`), so there is nothing to migrate.
+- **Pure dep-group / release / CI bumps** (#736, #741, #745, #728, #743): No fork-relevant code
+  beyond the SDK versions captured above.
+
 ## Changes Ported in v0.30.0 Sync
 
 - **SDK bumps**: claude-agent-sdk 0.2.112 -> 0.2.114, ACP SDK 0.16.1 -> 0.19.0, anthropic SDK -> 0.89.0
@@ -165,7 +229,7 @@ Fork of `@anthropic-ai/claude-agent-acp`. Upstream repo: https://github.com/anth
 
 ## Next Sync
 
-1. Check upstream changelog since v0.37.0
+1. Check upstream changelog since v0.42.0
 2. Diff upstream source against PostHog Code using the file mapping above
 3. Port in phases: bug fixes first, then features
 4. After each phase: `pnpm --filter agent typecheck && pnpm --filter agent build && pnpm lint`
diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
index 95d2afb286..217f06e278 100644
--- a/packages/agent/src/adapters/claude/claude-agent.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -3,7 +3,6 @@ import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
 import {
-  type ModelInfo as AcpModelInfo,
   type AgentSideConnection,
   type ClientCapabilities,
   type ForkSessionRequest,
@@ -24,12 +23,9 @@ import {
   type SessionConfigOption,
   type SessionConfigOptionCategory,
   type SessionConfigSelectOption,
-  type SessionModelState,
   type SessionModeState,
   type SetSessionConfigOptionRequest,
   type SetSessionConfigOptionResponse,
-  type SetSessionModelRequest,
-  type SetSessionModelResponse,
   type SetSessionModeRequest,
   type SetSessionModeResponse,
   type Usage,
@@ -42,6 +38,7 @@ import {
   type Options,
   type Query,
   query,
+  type SDKMessage,
   type SDKUserMessage,
   type SlashCommand,
 } from "@anthropic-ai/claude-agent-sdk";
@@ -130,11 +127,24 @@ import type {
   NewSessionMeta,
   SDKMessageFilter,
   Session,
+  ToolUpdateMeta,
   ToolUseCache,
   ToolUseStreamCache,
 } from "./types";
 
 const SESSION_VALIDATION_TIMEOUT_MS = 30_000;
+
+/** Grace period after `session/cancel` before the adapter forces a wedged
+ *  prompt loop to return "cancelled". `query.interrupt()` normally makes the SDK
+ *  yield a trailing idle within milliseconds and the loop returns through its
+ *  usual path, so this timer is armed and cleared (never fired) on healthy
+ *  cancels. It only trips when the SDK is genuinely wedged (e.g. a
+ *  `TaskOutput { block: true }` poll against a hung background task — issue
+ *  #680) and never yields. Deliberately loose: an "obviously stuck" ceiling,
+ *  not a guess at interrupt latency, so it can't pre-empt a slow-but-healthy
+ *  interrupt. */
+const DEFAULT_FORCE_CANCEL_GRACE_MS = 30_000;
+
 const MAX_TITLE_LENGTH = 256;
 const LOCAL_ONLY_COMMANDS = new Set(["/context", "/heapdump", "/extra-usage"]);
 
@@ -189,6 +199,28 @@ function shouldEmitRawMessage(
   );
 }
 
+/** Fetch the SDK's authoritative context-window occupancy via the
+ *  `getContextUsage` control request. Unlike the per-message API usage numbers
+ *  (which only count message tokens), `totalTokens` includes the system prompt,
+ *  tool schemas, MCP tools, and memory-file overhead — the real occupancy the
+ *  user sees. Returns `null` on any control-request failure.
+ *
+ *  We deliberately do NOT use this response's window fields for `size`: they
+ *  have been observed to under-report extended (1M) context windows, so the
+ *  window keeps coming from the gateway / model heuristic. */
+async function fetchContextUsedTokens(
+  sdkQuery: Query,
+  logger: Logger,
+): Promise<number | null> {
+  try {
+    const usage = await sdkQuery.getContextUsage();
+    return usage.totalTokens;
+  } catch (error) {
+    logger.error("Failed to fetch context usage from SDK:", error);
+    return null;
+  }
+}
+
 export interface ClaudeAcpAgentOptions {
   onProcessSpawned?: (info: ProcessSpawnedInfo) => void;
   onProcessExited?: (pid: number) => void;
@@ -204,6 +236,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
   toolUseStreamCache: ToolUseStreamCache;
   backgroundTerminals: { [key: string]: BackgroundTerminal } = {};
   clientCapabilities?: ClientCapabilities;
+  /** Grace period before a `session/cancel` forces a wedged prompt loop to
+   *  return "cancelled". See {@link DEFAULT_FORCE_CANCEL_GRACE_MS}. Mutable so
+   *  tests can shrink it. */
+  forceCancelGraceMs: number = DEFAULT_FORCE_CANCEL_GRACE_MS;
   private options?: ClaudeAcpAgentOptions;
   private enrichment?: Enrichment;
   private enrichedReadCache: EnrichedReadCache = new Map();
@@ -357,7 +393,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
 
     return {
       modes: response.modes,
-      models: response.models,
       configOptions: response.configOptions,
     };
   }
@@ -446,9 +481,24 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     await this.broadcastUserMessage(params);
 
     this.session.promptRunning = true;
+    // Wake-up channel so cancel() can force this loop to return "cancelled" even
+    // when query.next() is wedged and never yields again (issue #680). The
+    // force-cancel backstop armed in interrupt() aborts this controller.
+    const cancelController = new AbortController();
+    this.session.cancelController = cancelController;
+    const cancelled = new Promise<void>((resolve) => {
+      cancelController.signal.addEventListener("abort", () => resolve(), {
+        once: true,
+      });
+    });
     let handedOff = false;
     let errored = false;
     let lastAssistantTotalUsage: number | null = null;
+    // When a streaming classifier refuses a turn, the assistant message carries
+    // stop_reason "refusal" and structured stop_details. We capture the
+    // human-readable explanation here so the terminal `result` can surface it to
+    // the user (the refused assistant message itself usually has no content).
+    let lastRefusalExplanation: string | null = null;
     let lastStreamUsage = {
       input_tokens: 0,
       output_tokens: 0,
@@ -494,7 +544,25 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
 
     try {
       while (true) {
-        const { value: message, done } = await this.session.query.next();
+        const nextMessage = this.session.query.next();
+        const next = await Promise.race([nextMessage, cancelled]);
+        if (cancelController.signal.aborted) {
+          // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput
+          // block). Abandon the in-flight next() — swallowing any later
+          // rejection so it can't surface as an unhandled rejection — and honor
+          // the cancel per the ACP contract.
+          void nextMessage.catch(() => {});
+          return {
+            stopReason: "cancelled",
+            _meta: this.session.interruptReason
+              ? { interruptReason: this.session.interruptReason }
+              : undefined,
+          };
+        }
+        const { value: message, done } = next as IteratorResult<
+          SDKMessage,
+          void
+        >;
 
         if (done || !message) {
           if (this.session.cancelled) {
@@ -521,19 +589,51 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
         switch (message.type) {
           case "system":
             if (message.subtype === "compact_boundary") {
-              // Send used:0 immediately so the client doesn't keep showing
-              // the stale pre-compaction context size until the next turn.
-              lastAssistantTotalUsage = 0;
+              // Refresh the displayed usage immediately so the client doesn't
+              // keep showing the stale pre-compaction size right after the user
+              // sees "Compacting completed". Prefer the SDK's authoritative
+              // post-compaction `used` via getContextUsage — it reflects the
+              // real retained context (system prompt + tools + surviving
+              // messages), which per-message API usage can't give us until the
+              // next turn. Fall back to 0 on failure: directionally correct
+              // (context just dropped) and replaced within seconds by the next
+              // result. `size` keeps coming from the gateway-learned window
+              // (getContextUsage under-reports extended 1M windows).
+              const usedTokens = await fetchContextUsedTokens(
+                this.session.query,
+                this.logger,
+              );
+              lastAssistantTotalUsage = usedTokens ?? 0;
               promptReplayed = true;
               await this.client.sessionUpdate({
                 sessionId: params.sessionId,
                 update: {
                   sessionUpdate: "usage_update",
-                  used: 0,
+                  used: lastAssistantTotalUsage,
                   size: lastContextWindowSize,
                 },
               });
             }
+            if (message.subtype === "commands_changed") {
+              // Mid-session command-list change (e.g. skills discovered as the
+              // agent works in a subdirectory). Push the new list straight from
+              // the message rather than re-querying (supportedCommands() only
+              // ever reflects the init list), and refresh the known-commands
+              // gate used to flag unsupported slash commands.
+              this.session.knownSlashCommands = collectKnownSlashCommands(
+                message.commands,
+              );
+              await this.client.sessionUpdate({
+                sessionId: params.sessionId,
+                update: {
+                  sessionUpdate: "available_commands_update",
+                  availableCommands: getAvailableSlashCommands(
+                    message.commands,
+                  ),
+                },
+              });
+              break;
+            }
             if (message.subtype === "local_command_output") {
               promptReplayed = true;
             }
@@ -748,6 +848,27 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
                 this.session.accumulatedUsage.cachedWriteTokens,
             };
 
+            // A refusal can arrive on any result subtype (and may even set
+            // is_error), so handle it before handleResultMessage — otherwise the
+            // is_error path would surface it as an internal error. The refused
+            // assistant message carries no visible content, so surface the
+            // classifier's explanation (when available) and report ACP's
+            // dedicated `refusal` stop reason.
+            if (
+              (message as { stop_reason?: string }).stop_reason === "refusal"
+            ) {
+              if (lastRefusalExplanation) {
+                await this.client.sessionUpdate({
+                  sessionId: params.sessionId,
+                  update: {
+                    sessionUpdate: "agent_message_chunk",
+                    content: { type: "text", text: lastRefusalExplanation },
+                  },
+                });
+              }
+              return { stopReason: "refusal", usage };
+            }
+
             const result = handleResultMessage(message);
             if (result.error) throw result.error;
 
@@ -865,6 +986,21 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
               break;
             }
 
+            // Capture a refusal explanation from the assistant message so the
+            // terminal `result` can surface it (the refused message itself has
+            // no visible content). stop_reason/stop_details live on the inner
+            // Anthropic message; read them via cast like the usage block below.
+            if (message.type === "assistant") {
+              const inner = message.message as unknown as {
+                stop_reason?: string | null;
+                stop_details?: { explanation?: string | null } | null;
+              };
+              if (inner.stop_reason === "refusal") {
+                lastRefusalExplanation =
+                  inner.stop_details?.explanation ?? null;
+              }
+            }
+
             // Store latest assistant usage (excluding subagents)
             // Sum all token types as a proxy for post-turn context occupancy:
             // current turn's output will become next turn's input.
@@ -908,11 +1044,46 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
             break;
           }
 
-          case "tool_progress":
+          case "tool_progress": {
+            // Surface "still working" progress on a long-running tool call so
+            // the client can show elapsed time instead of a stalled spinner.
+            await this.client.sessionUpdate({
+              sessionId: message.session_id,
+              update: {
+                sessionUpdate: "tool_call_update",
+                toolCallId: message.tool_use_id,
+                status: "in_progress",
+                _meta: {
+                  claudeCode: {
+                    toolName: message.tool_name,
+                    toolResponse: {
+                      elapsedTimeSeconds: message.elapsed_time_seconds,
+                    },
+                  },
+                } satisfies ToolUpdateMeta,
+              },
+            });
+            break;
+          }
+          case "rate_limit_event": {
+            // Re-emit the current usage carrying the subscription rate-limit
+            // info so the client can warn before the limit bites.
+            if (lastAssistantTotalUsage !== null) {
+              await this.client.sessionUpdate({
+                sessionId: message.session_id,
+                update: {
+                  sessionUpdate: "usage_update",
+                  used: lastAssistantTotalUsage,
+                  size: lastContextWindowSize,
+                  _meta: { "_claude/rateLimit": message.rate_limit_info },
+                },
+              });
+            }
+            break;
+          }
           case "auth_status":
           case "tool_use_summary":
           case "prompt_suggestion":
-          case "rate_limit_event":
             break;
 
           default:
@@ -976,6 +1147,16 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
       }
       throw error;
     } finally {
+      // The loop is returning — interrupt() succeeded or the prompt finished —
+      // so disarm the force-cancel backstop and release the wake-up channel
+      // (only if we still own it; a handoff installs the next prompt's).
+      if (this.session.forceCancelTimer) {
+        clearTimeout(this.session.forceCancelTimer);
+        this.session.forceCancelTimer = undefined;
+      }
+      if (this.session.cancelController === cancelController) {
+        this.session.cancelController = undefined;
+      }
       // Drop any leftover streaming-input buffers. Normally cleared per index
       // on `content_block_stop`, but a cancelled or errored turn may leave
       // entries behind; without this they'd carry over into the next turn
@@ -1014,6 +1195,31 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
       pending.resolve(true);
     }
     this.session.pendingMessages.clear();
+
+    // Arm a backstop before interrupting: if a prompt is actively consuming the
+    // query and interrupt() doesn't make the SDK yield (e.g. a wedged TaskOutput
+    // block — issue #680), force the loop to return "cancelled" after the grace
+    // period so the pending prompt() resolves per the ACP cancellation contract
+    // instead of hanging forever. The loop's `finally` clears this timer when
+    // interrupt() works and it returns through the normal idle path, so on
+    // healthy cancels it is armed but never fires. Arm at most once per turn:
+    // the floor is an absolute ceiling from the first cancel, so a client that
+    // re-sends cancel can't keep pushing the deadline out.
+    if (
+      this.session.promptRunning &&
+      this.session.cancelController &&
+      !this.session.cancelController.signal.aborted &&
+      !this.session.forceCancelTimer
+    ) {
+      const cancelController = this.session.cancelController;
+      this.session.forceCancelTimer = setTimeout(() => {
+        this.logger.error(
+          `Session ${this.sessionId}: cancel floor elapsed without the SDK yielding; forcing "cancelled". The underlying query may still be wedged — a new session may be required.`,
+        );
+        cancelController.abort();
+      }, this.forceCancelGraceMs);
+    }
+
     await this.session.query.interrupt();
   }
 
@@ -1148,19 +1354,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     this.deferBackgroundFetches(newQuery);
   }
 
-  async unstable_setSessionModel(
-    params: SetSessionModelRequest,
-  ): Promise<SetSessionModelResponse | undefined> {
-    await this.session.query.setModel(toSdkModelId(params.modelId));
-    this.session.modelId = params.modelId;
-    this.session.lastContextWindowSize = this.getContextWindowForModel(
-      params.modelId,
-    );
-    this.rebuildEffortConfigOption(params.modelId);
-    await this.updateConfigOption("model", params.modelId);
-    return {};
-  }
-
   async setSessionMode(
     params: SetSessionModeRequest,
   ): Promise<SetSessionModeResponse> {
@@ -1309,6 +1502,38 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     }
   }
 
+  /**
+   * Ensures the requested `cwd` is an absolute path that points at an existing
+   * directory before we create a session. Throws an `invalidParams` error with
+   * an actionable message so clients can surface it to the user instead of
+   * failing later with an opaque "native binary failed to launch" SDK error.
+   */
+  private async validateCwd(cwd: string): Promise<void> {
+    if (!path.isAbsolute(cwd)) {
+      throw RequestError.invalidParams(
+        { cwd },
+        `\`cwd\` must be an absolute path, but received: ${cwd}`,
+      );
+    }
+
+    let stats: fs.Stats;
+    try {
+      stats = await fs.promises.stat(cwd);
+    } catch {
+      throw RequestError.invalidParams(
+        { cwd },
+        `\`cwd\` does not exist on the machine running the agent: ${cwd}`,
+      );
+    }
+
+    if (!stats.isDirectory()) {
+      throw RequestError.invalidParams(
+        { cwd },
+        `\`cwd\` is not a directory: ${cwd}`,
+      );
+    }
+  }
+
   private async createSession(
     params: {
       cwd: string;
@@ -1325,6 +1550,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     const { cwd } = params;
     const { resume, forkSession } = creationOpts;
 
+    // Validate `cwd` up front. The ACP spec requires an absolute path, and the
+    // directory must exist on the machine running the agent. Without this the
+    // failure only surfaces later as a confusing SDK launch error (issue #749).
+    await this.validateCwd(cwd);
+
     const isResume = !!resume;
 
     const meta = params._meta as NewSessionMeta | undefined;
@@ -1606,17 +1836,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
       })),
     };
 
-    const models: SessionModelState = {
-      currentModelId: resolvedModelId,
-      availableModels: modelOptions.options.map(
-        (opt): AcpModelInfo => ({
-          modelId: opt.value,
-          name: opt.name,
-          description: opt.description,
-        }),
-      ),
-    };
-
     const configOptions = this.buildConfigOptions(
       permissionMode,
       modelOptions,
@@ -1628,7 +1847,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
       this.deferBackgroundFetches(q);
     }
 
-    return { sessionId, modes, models, configOptions };
+    return { sessionId, modes, configOptions };
   }
 
   private createCanUseTool(
@@ -1713,31 +1932,9 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
       })),
     };
 
-    const modelOptions = this.session.configOptions.find(
-      (o) => o.id === "model",
-    );
-    const models: SessionModelState = {
-      currentModelId: this.session.modelId ?? DEFAULT_MODEL,
-      availableModels:
-        modelOptions && "options" in modelOptions
-          ? (
-              modelOptions.options as Array<{
-                value: string;
-                name: string;
-                description?: string;
-              }>
-            ).map((opt) => ({
-              modelId: opt.value,
-              name: opt.name,
-              description: opt.description,
-            }))
-          : [],
-    };
-
     return {
       sessionId,
       modes,
-      models,
       configOptions: this.session.configOptions,
     };
   }
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
index 633aa20ae9..807c39f990 100644
--- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
+++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
@@ -190,34 +190,34 @@ function handleToolUseChunk(
   }
 
   if (!alreadyCached && ctx.registerHooks !== false) {
+    // Capture what the hook needs in the closure rather than re-reading the
+    // cache when it fires. The cache entry is pruned at tool_result time, and a
+    // PostToolUse hook can fire after that, so closing over the name and bash
+    // command keeps the diff working without depending on (or pinning) the
+    // cache entry's lifetime.
+    const toolName = chunk.name;
+    const bashCommand = bashCommandFromToolUse(chunk);
     registerHookCallback(chunk.id, {
       onPostToolUseHook: async (toolUseId, _toolInput, toolResponse) => {
-        const toolUse = ctx.toolUseCache[toolUseId];
-        if (toolUse) {
-          const editUpdate =
-            toolUse.name === "Edit" || toolUse.name === "Write"
-              ? toolUpdateFromEditToolResponse(toolResponse)
-              : null;
-
-          await ctx.client.sessionUpdate({
-            sessionId: ctx.sessionId,
-            update: {
-              _meta: toolMeta(
-                toolUse.name,
-                toolResponse,
-                ctx.parentToolCallId,
-                bashCommandFromToolUse(toolUse),
-              ),
-              toolCallId: toolUseId,
-              sessionUpdate: "tool_call_update",
-              ...(editUpdate ? editUpdate : {}),
-            },
-          });
-        } else {
-          ctx.logger.error(
-            `Got a tool response for tool use that wasn't tracked: ${toolUseId}`,
-          );
-        }
+        const editUpdate =
+          toolName === "Edit" || toolName === "Write"
+            ? toolUpdateFromEditToolResponse(toolResponse)
+            : null;
+
+        await ctx.client.sessionUpdate({
+          sessionId: ctx.sessionId,
+          update: {
+            _meta: toolMeta(
+              toolName,
+              toolResponse,
+              ctx.parentToolCallId,
+              bashCommand,
+            ),
+            toolCallId: toolUseId,
+            sessionUpdate: "tool_call_update",
+            ...(editUpdate ? editUpdate : {}),
+          },
+        });
       },
     });
   }
@@ -344,6 +344,12 @@ function handleToolResultChunk(
     return [];
   }
 
+  // The tool_use is fully resolved now — drop it so a long-running session
+  // doesn't retain every tool call for its whole lifetime. Everything below uses
+  // the captured `toolUse` local, and the PostToolUse hook closes over the tool
+  // name/bash command, so pruning here is safe regardless of hook/result order.
+  delete ctx.toolUseCache[chunk.tool_use_id];
+
   if (
     toolUse.name === "TaskCreate" ||
     toolUse.name === "TaskUpdate" ||
@@ -772,6 +778,46 @@ export async function handleSystemMessage(
       });
       break;
     }
+    case "mirror_error":
+      // The SDK failed to persist session history (append rejected/timed out
+      // after retry) — potential data loss on resume the user should know about
+      // rather than a silent gap. Log it; no user-facing chunk.
+      logger.error(
+        `Session ${sessionId}: failed to persist history: ${message.error}`,
+      );
+      break;
+    case "permission_denied": {
+      // A tool call was auto-denied (by a rule, the classifier, dontAsk mode,
+      // etc.) before running. The tool_use block was already emitted as a
+      // tool_call, so mark it failed with the rejection reason — otherwise the
+      // client shows a tool call that silently never resolves.
+      const reason = message.decision_reason ?? message.message;
+      await client.sessionUpdate({
+        sessionId: message.session_id,
+        update: {
+          sessionUpdate: "tool_call_update",
+          toolCallId: message.tool_use_id,
+          status: "failed",
+          content: [
+            {
+              type: "content",
+              content: { type: "text", text: `Permission denied: ${reason}` },
+            },
+          ],
+          _meta: {
+            claudeCode: {
+              toolName: message.tool_name,
+              toolResponse: {
+                decisionReasonType: message.decision_reason_type,
+                decisionReason: message.decision_reason,
+                message: message.message,
+              },
+            },
+          } satisfies ToolUpdateMeta,
+        },
+      });
+      break;
+    }
     default:
       break;
   }
@@ -949,11 +995,43 @@ function isSdkLocalCommandMessage(content: AnthropicMessageContent): boolean {
 // that the CLI uses for its own display. The live prompt loop must strip them
 // so they don't leak into the UI, while preserving any real prose mixed in
 // alongside.
-const LOCAL_COMMAND_TAG_PATTERN =
-  /<(command-name|command-message|command-args|local-command-stdout|local-command-stderr)>[\s\S]*?<\/\1>/g;
-
+const LOCAL_COMMAND_MARKERS = [
+  "command-name",
+  "command-message",
+  "command-args",
+  "local-command-stdout",
+  "local-command-stderr",
+].map((tag) => ({ open: `<${tag}>`, close: `</${tag}>` }));
+
+// Single-pass scanner that removes each `<tag>…</tag>` marker (matching the
+// nearest closing tag of the same name, like a lazy regex would) without the
+// catastrophic-backtracking risk of `[\s\S]*?` over pathological input.
 function stripMarkerTags(text: string): string {
-  return text.replace(LOCAL_COMMAND_TAG_PATTERN, "");
+  const dead = new Set<string>();
+  let result = "";
+  let copiedUpTo = 0;
+  let i = 0;
+  while (i < text.length) {
+    if (text[i] === "<") {
+      const marker = LOCAL_COMMAND_MARKERS.find(
+        (m) => !dead.has(m.open) && text.startsWith(m.open, i),
+      );
+      if (marker) {
+        const end = text.indexOf(marker.close, i + marker.open.length);
+        if (end !== -1) {
+          result += text.slice(copiedUpTo, i);
+          i = copiedUpTo = end + marker.close.length;
+          continue;
+        }
+        // No closing marker remains anywhere ahead, and `indexOf` only ever
+        // searches forward from here on, so stop treating this tag as an
+        // opener — that avoids rescanning the tail for it on every match.
+        dead.add(marker.open);
+      }
+    }
+    i++;
+  }
+  return result + text.slice(copiedUpTo);
 }
 
 /**
diff --git a/packages/agent/src/adapters/claude/session/models.ts b/packages/agent/src/adapters/claude/session/models.ts
index ec2a561246..58bc797b4d 100644
--- a/packages/agent/src/adapters/claude/session/models.ts
+++ b/packages/agent/src/adapters/claude/session/models.ts
@@ -138,11 +138,17 @@ function scoreModelMatch(
 ): number {
   const haystack = `${model.value} ${model.name ?? ""}`.toLowerCase();
   let score = 0;
+  let nonHintMatched = false;
   for (const token of tokens) {
     if (haystack.includes(token)) {
+      if (token !== contextHint) nonHintMatched = true;
       score += token === contextHint ? 3 : 1;
     }
   }
+  // A context hint alone (e.g. "1m") must not carry a match across model
+  // families: without a real family/name token also matching, "opus[1m]" would
+  // otherwise score against "sonnet[1m]" purely on the shared "1m" token.
+  if (contextHint && !nonHintMatched) return 0;
   return score;
 }
 
diff --git a/packages/agent/src/adapters/claude/types.ts b/packages/agent/src/adapters/claude/types.ts
index da46b327f0..69407afdea 100644
--- a/packages/agent/src/adapters/claude/types.ts
+++ b/packages/agent/src/adapters/claude/types.ts
@@ -70,6 +70,16 @@ export type Session = BaseSession & {
   /** Persists across prompt() calls so SDK-reported values survive turn boundaries */
   lastContextWindowSize?: number;
   promptRunning: boolean;
+  /** Per-turn signal the active prompt loop races `query.next()` against.
+   *  Aborted by the force-cancel backstop when the SDK wedges and never yields
+   *  after `interrupt()` (issue #680), forcing the loop to return "cancelled"
+   *  instead of hanging. Distinct from `abortController`: this only wakes the
+   *  loop; it does not touch the SDK query/subprocess. Undefined when no prompt
+   *  is actively consuming the query. */
+  cancelController?: AbortController;
+  /** Pending grace-period timer that aborts `cancelController`. Cleared when the
+   *  loop returns normally so the backstop never fires after a clean cancel. */
+  forceCancelTimer?: ReturnType<typeof setTimeout>;
   pendingMessages: Map<string, PendingMessage>;
   nextPendingOrder: number;
   emitRawSDKMessages: boolean | SDKMessageFilter[];
diff --git a/packages/agent/src/adapters/codex/codex-agent.ts b/packages/agent/src/adapters/codex/codex-agent.ts
index 5fa5829e7d..3976edd7ab 100644
--- a/packages/agent/src/adapters/codex/codex-agent.ts
+++ b/packages/agent/src/adapters/codex/codex-agent.ts
@@ -79,7 +79,10 @@ import {
 } from "../local-tools";
 import { resolveTaskId } from "../session-meta";
 import { createCodexClient } from "./codex-client";
-import { normalizeCodexConfigOptions } from "./models";
+import {
+  modelIdFromConfigOptions,
+  normalizeCodexConfigOptions,
+} from "./models";
 import {
   type CodexSessionState,
   createSessionState,
@@ -421,7 +424,7 @@ export class CodexAcpAgent extends BaseAcpAgent {
       taskRunId: meta?.taskRunId,
       taskId: resolveTaskId(meta),
       modeId: response.modes?.currentModeId ?? "auto",
-      modelId: response.models?.currentModelId,
+      modelId: modelIdFromConfigOptions(response.configOptions),
       permissionMode: requestedPermissionMode,
     });
     this.sessionId = response.sessionId;
@@ -537,7 +540,6 @@ export class CodexAcpAgent extends BaseAcpAgent {
 
     return {
       modes: loadResponse.modes,
-      models: loadResponse.models,
       configOptions: loadResponse.configOptions,
     };
   }
diff --git a/packages/agent/src/adapters/codex/models.ts b/packages/agent/src/adapters/codex/models.ts
index 3264974fc0..054db3932f 100644
--- a/packages/agent/src/adapters/codex/models.ts
+++ b/packages/agent/src/adapters/codex/models.ts
@@ -25,6 +25,18 @@ export function formatCodexModelName(value: string): string {
   return value.toLowerCase();
 }
 
+/** Derive the current model id from the "model" config option's currentValue.
+ *  Replaces the legacy `response.models.currentModelId` lookup that ACP SDK
+ *  0.25.0 removed (model selection moved entirely into config options). */
+export function modelIdFromConfigOptions(
+  configOptions: SessionConfigOption[] | null | undefined,
+): string | undefined {
+  const modelOption = configOptions?.find((o) => o.category === "model");
+  return typeof modelOption?.currentValue === "string"
+    ? modelOption.currentValue
+    : undefined;
+}
+
 export function normalizeCodexConfigOptions(
   configOptions: SessionConfigOption[] | null | undefined,
 ): SessionConfigOption[] | null | undefined {
diff --git a/packages/agent/src/test/mocks/claude-sdk.ts b/packages/agent/src/test/mocks/claude-sdk.ts
index e54cb05ccf..8f6228aeb2 100644
--- a/packages/agent/src/test/mocks/claude-sdk.ts
+++ b/packages/agent/src/test/mocks/claude-sdk.ts
@@ -104,6 +104,7 @@ export function createMockQuery(
     applyFlagSettings: vi.fn().mockResolvedValue(undefined),
     getContextUsage: vi.fn().mockResolvedValue({}),
     reloadPlugins: vi.fn().mockResolvedValue(undefined),
+    reloadSkills: vi.fn().mockResolvedValue(undefined),
     seedReadState: vi.fn().mockResolvedValue(undefined),
     readFile: vi.fn().mockResolvedValue(""),
     backgroundTasks: vi.fn().mockResolvedValue([]),
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b9822d6afe..a0540d4136 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -707,11 +707,11 @@ importers:
   packages/agent:
     dependencies:
       '@agentclientprotocol/sdk':
-        specifier: 0.22.1
-        version: 0.22.1(zod@4.3.6)
+        specifier: 0.25.0
+        version: 0.25.0(zod@4.3.6)
       '@anthropic-ai/claude-agent-sdk':
-        specifier: 0.3.156
-        version: 0.3.156(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)
+        specifier: 0.3.165
+        version: 0.3.165(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)
       '@anthropic-ai/sdk':
         specifier: 0.100.1
         version: 0.100.1(zod@4.3.6)
@@ -1557,6 +1557,11 @@ packages:
     peerDependencies:
       zod: 4.3.6
 
+  '@agentclientprotocol/sdk@0.25.0':
+    resolution: {integrity: sha512-wU1VgXNtMvdVotX49txc3WJUDV+/QbLpsgjMvFhlRmp37osdLbI7L7y+iwAlQATwfjLxcv1r1p3ZxZBcXlGhcQ==}
+    peerDependencies:
+      zod: 4.3.6
+
   '@alloc/quick-lru@5.2.0':
     resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==}
     engines: {node: '>=10'}
@@ -1570,45 +1575,89 @@ packages:
     cpu: [arm64]
     os: [darwin]
 
+  '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.165':
+    resolution: {integrity: sha512-obVodJmppNc6lgcM6Y5y3VCQLrYO2curOXrRaziKtjxYbuZP7kYsUhnonMvGoVAQh3uHKz2tivQDeztvWe3f9w==}
+    cpu: [arm64]
+    os: [darwin]
+
   '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.156':
     resolution: {integrity: sha512-6PKi5fPmGRuzXu+Em/iwLmPG3mqg0hl92wcTU8fmChqyNtxhxsjCw7LTbdFqp/05o5NeZVVV4k3p7YUv5IFD6g==}
     cpu: [x64]
     os: [darwin]
 
+  '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.165':
+    resolution: {integrity: sha512-0jc1tlYLXzPvZIkHKGHzsEEKq2YqTS8oHSNFroqLgbhrIk1Zy05ZXbciI289VDAe1Fq2a+qcUhkXct8Parx1Rg==}
+    cpu: [x64]
+    os: [darwin]
+
   '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.156':
     resolution: {integrity: sha512-R7KEVjxkR4rYgIQoHGBzwPdUJYxRTO8I4vHjRbMLH1eW4FS7BJvVs7ogfKR/NnHFBvMVqtC+l6jHLQv8bobUiw==}
     cpu: [arm64]
     os: [linux]
     libc: [musl]
 
+  '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.165':
+    resolution: {integrity: sha512-Rccmr5chZdZJVRvoB0nildB5PTKX+amatUho9JIcNOf1iX/6ej39fwf8q9W1MRHYP7AEc4t9GrSAGLcn7/JO4w==}
+    cpu: [arm64]
+    os: [linux]
+    libc: [musl]
+
   '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.156':
     resolution: {integrity: sha512-H0Nfd41iw5isto9uQI1FlVSZ0eaDttr8rBpJMR25oK/mj3egMO5EmZ6aAxeeUYSLn2mSU50HA5VNxlGUE118TQ==}
     cpu: [arm64]
     os: [linux]
     libc: [glibc]
 
+  '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.165':
+    resolution: {integrity: sha512-t87HgDPPaRYMTTB5cqA0M36Fyq4DOny89yk71BMgA8hAzhOjV9bla8pMVZTuX3xYYPjsa/TOmxSzwI8GZLf4Aw==}
+    cpu: [arm64]
+    os: [linux]
+    libc: [glibc]
+
   '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.156':
     resolution: {integrity: sha512-/Q6WUizI6a+hqZZ6ElwRU0PEuFhOoN4v6CuU35HHbiZ/7uaocGht4A8ZIgK1Fw6wOGtZzGLbc00CA1OU1Zg8EA==}
     cpu: [x64]
     os: [linux]
     libc: [musl]
 
+  '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.165':
+    resolution: {integrity: sha512-Y9Acr1RmydfEX+t+3mFn0K9VOx6nfyo08QuQH9R6ap1YYZWuobze++pNUY/rzwbQjXqcbjORtPKbO/kLQtSr9w==}
+    cpu: [x64]
+    os: [linux]
+    libc: [musl]
+
   '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.156':
     resolution: {integrity: sha512-ymhrdlbWoYvTACUdaGdhrEv+ZMfwXLsf0BRLkr/IvY5aqybP7URzWmmZGOtDQpqkT/8xu/UCGqUYH3woJwUxfg==}
     cpu: [x64]
     os: [linux]
     libc: [glibc]
 
+  '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.165':
+    resolution: {integrity: sha512-Y8fEW0zKBn0XZI5AOQWHep0Srz0qsCauynTWkhsC6J2vSPxkTiOxv2hmb7qdfiNlFn0k1etCWVFoRkhhFJzGfg==}
+    cpu: [x64]
+    os: [linux]
+    libc: [glibc]
+
   '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.156':
     resolution: {integrity: sha512-5sAeNObQQrMy4NF9HwxewrMnU7mVxZDHh+/MfJVQSz0GSTvXQ6gOuRH8helMlfspoU6VOdekPxVLRooX/3foEw==}
     cpu: [arm64]
     os: [win32]
 
+  '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.165':
+    resolution: {integrity: sha512-4Q01L3xaDDCvlOhABf2MnO7v7yJxKwwDyiMr+DaneUSvuh1qH0YE7qErSYLf6D9VfH8TdRwKZXwQplVVwCoHWw==}
+    cpu: [arm64]
+    os: [win32]
+
   '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.156':
     resolution: {integrity: sha512-/PofeTWoiKgnWNSNk0wG4SsRn22GGLmnLhg2R94WcNhCRFOyOTmiZcYH2DBlWZBIRVTZDsSfa/Pl1DyPvYCGKw==}
     cpu: [x64]
     os: [win32]
 
+  '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.165':
+    resolution: {integrity: sha512-Y0uOx7b7ZnkguvFFI5T5fSLnRA/e0uvMC++gSnyz6XMpNekgWc3+Mny7Dv2NO22nKbV2YiFsj6MkYYFEd51BDw==}
+    cpu: [x64]
+    os: [win32]
+
   '@anthropic-ai/claude-agent-sdk@0.3.156':
     resolution: {integrity: sha512-6nM/Dj+VMds52UXJ2YaV4IKhYamlUqN0HtdDrFzYz5lvPMpDS935qD8YZDAUpy+ltdoD6PJMd1V/CKFY3/oWCQ==}
     engines: {node: '>=18.0.0'}
@@ -1617,6 +1666,14 @@ packages:
       '@modelcontextprotocol/sdk': ^1.29.0
       zod: 4.3.6
 
+  '@anthropic-ai/claude-agent-sdk@0.3.165':
+    resolution: {integrity: sha512-wEUJNTAWkE6KMV35abqGi30lwhZz+jQLMtLh4SuTN2Hllzsysq8kmQFgcWulza3FLHG/GHzGHPi0+Sp2fb8xlw==}
+    engines: {node: '>=18.0.0'}
+    peerDependencies:
+      '@anthropic-ai/sdk': '>=0.93.0'
+      '@modelcontextprotocol/sdk': ^1.29.0
+      zod: 4.3.6
+
   '@anthropic-ai/sdk@0.100.1':
     resolution: {integrity: sha512-RANcEe7LpiLczkKGOwoXOTuFdPhuubS0i4xaAKOMpcqc55YO0mukgxppV7eygx3DXNjxWT6RYOLPyOy0aIAmwg==}
     hasBin: true
@@ -13420,6 +13477,10 @@ snapshots:
     dependencies:
       zod: 4.4.3
 
+  '@agentclientprotocol/sdk@0.25.0(zod@4.3.6)':
+    dependencies:
+      zod: 4.3.6
+
   '@alloc/quick-lru@5.2.0': {}
 
   '@ampproject/remapping@2.3.0':
@@ -13430,27 +13491,51 @@ snapshots:
   '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.156':
     optional: true
 
+  '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.165':
+    optional: true
+
   '@anthropic-ai/claude-agent-sdk@0.3.156(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)':
     dependencies:
       '@anthropic-ai/sdk': 0.100.1(zod@4.3.6)
@@ -13466,6 +13551,21 @@ snapshots:
       '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.3.156
       '@anthropic-ai/claude-agent-sdk-win32-x64': 0.3.156
 
+  '@anthropic-ai/claude-agent-sdk@0.3.165(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)':
+    dependencies:
+      '@anthropic-ai/sdk': 0.100.1(zod@4.3.6)
+      '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6)
+      zod: 4.3.6
+    optionalDependencies:
+      '@anthropic-ai/claude-agent-sdk-darwin-arm64': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-darwin-x64': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-linux-arm64': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-linux-arm64-musl': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-linux-x64': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-linux-x64-musl': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.3.165
+      '@anthropic-ai/claude-agent-sdk-win32-x64': 0.3.165
+
   '@anthropic-ai/sdk@0.100.1(zod@4.3.6)':
     dependencies:
       json-schema-to-ts: 3.1.1

From 50d25b236990af6aaa2c876dd0248b509a81fef1 Mon Sep 17 00:00:00 2001
From: Charles Vien <charles.v@posthog.com>
Date: Sat, 6 Jun 2026 17:16:47 -0700
Subject: [PATCH 2/4] fix acp session id routing and skills baseline

---
 .../agent/src/adapters/claude/claude-agent.ts | 22 ++++++++++++++-----
 .../adapters/claude/conversion/sdk-to-acp.ts  |  5 ++++-
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
index 217f06e278..cd09aa6360 100644
--- a/packages/agent/src/adapters/claude/claude-agent.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -623,15 +623,21 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
               this.session.knownSlashCommands = collectKnownSlashCommands(
                 message.commands,
               );
+              const available = getAvailableSlashCommands(message.commands);
               await this.client.sessionUpdate({
                 sessionId: params.sessionId,
                 update: {
                   sessionUpdate: "available_commands_update",
-                  availableCommands: getAvailableSlashCommands(
-                    message.commands,
-                  ),
+                  availableCommands: available,
                 },
               });
+              // Keep the context-breakdown skills estimate in sync with the new
+              // command list (mirrors sendAvailableCommandsUpdate), so later
+              // usage breakdowns don't report stale skills context.
+              this.updateBreakdownCategory(
+                "skills",
+                estimateSkillsTokens(available),
+              );
               break;
             }
             if (message.subtype === "local_command_output") {
@@ -1047,8 +1053,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
           case "tool_progress": {
             // Surface "still working" progress on a long-running tool call so
             // the client can show elapsed time instead of a stalled spinner.
+            // Route by the ACP session id (params.sessionId) like every other
+            // update in this loop — the client renders by ACP session, not the
+            // SDK's message.session_id.
             await this.client.sessionUpdate({
-              sessionId: message.session_id,
+              sessionId: params.sessionId,
               update: {
                 sessionUpdate: "tool_call_update",
                 toolCallId: message.tool_use_id,
@@ -1067,10 +1076,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
           }
           case "rate_limit_event": {
             // Re-emit the current usage carrying the subscription rate-limit
-            // info so the client can warn before the limit bites.
+            // info so the client can warn before the limit bites. Route by the
+            // ACP session id (params.sessionId) like every other update here.
             if (lastAssistantTotalUsage !== null) {
               await this.client.sessionUpdate({
-                sessionId: message.session_id,
+                sessionId: params.sessionId,
                 update: {
                   sessionUpdate: "usage_update",
                   used: lastAssistantTotalUsage,
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
index 807c39f990..5b333da989 100644
--- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
+++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
@@ -792,8 +792,11 @@ export async function handleSystemMessage(
       // tool_call, so mark it failed with the rejection reason — otherwise the
       // client shows a tool call that silently never resolves.
       const reason = message.decision_reason ?? message.message;
+      // Route by the ACP session id (context) — the original tool_call was
+      // emitted under it, so the failed update must match or the client drops it
+      // and the tool call hangs unresolved.
       await client.sessionUpdate({
-        sessionId: message.session_id,
+        sessionId,
         update: {
           sessionUpdate: "tool_call_update",
           toolCallId: message.tool_use_id,

From 2196e9fa69e10033fe5f96fff299297fe5e04864 Mon Sep 17 00:00:00 2001
From: Charles Vien <charles.v@posthog.com>
Date: Sun, 7 Jun 2026 17:33:13 -0700
Subject: [PATCH 3/4] fix compact-boundary cancel race and add tests

---
 .../claude/claude-agent.slash-command.test.ts | 58 +++++++++++++++++++
 .../agent/src/adapters/claude/claude-agent.ts | 37 ++++++++----
 .../claude/conversion/sdk-to-acp.test.ts      | 50 ++++++++++++++++
 .../adapters/claude/conversion/sdk-to-acp.ts  |  3 +-
 .../adapters/claude/session/models.test.ts    | 21 +++++++
 .../agent/src/adapters/codex/models.test.ts   | 43 +++++++++++++-
 6 files changed, 200 insertions(+), 12 deletions(-)
 create mode 100644 packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts

diff --git a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
index 810f34e4d8..7ca63c83b0 100644
--- a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
@@ -172,3 +172,61 @@ describe("ClaudeAcpAgent.prompt — early idle handling", () => {
     }
   });
 });
+
+describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("returns 'cancelled' when the SDK never yields after interrupt (issue #680)", async () => {
+    const { agent } = makeAgent();
+    const sessionId = "s-wedged";
+    const query = installFakeSession(agent, sessionId);
+    // Simulate a wedged SDK: interrupt() resolves but never makes next() yield.
+    query.interrupt.mockImplementation(async () => {});
+    // Shrink the grace period so the backstop fires promptly under real timers.
+    (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = 5;
+
+    const promptPromise = agent.prompt({
+      sessionId,
+      prompt: [{ type: "text", text: "do something slow" }],
+    });
+
+    // Let the loop reach `await query.next()`, which stays pending forever.
+    await new Promise((resolve) => setImmediate(resolve));
+
+    // Arms the backstop and calls the (no-op) interrupt; the timer must drive
+    // the loop to return rather than hanging on the wedged next().
+    await agent.cancel({ sessionId });
+
+    const result = await promptPromise;
+    expect(result.stopReason).toBe("cancelled");
+  });
+
+  it("clears the backstop timer on a healthy cancel (interrupt yields)", async () => {
+    const { agent } = makeAgent();
+    const sessionId = "s-healthy";
+    installFakeSession(agent, sessionId);
+    // Large grace so the test can only pass via the normal idle/done path, not
+    // the timer; the loop must clear the armed timer in its finally.
+    (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs =
+      50_000;
+
+    const promptPromise = agent.prompt({
+      sessionId,
+      prompt: [{ type: "text", text: "do something" }],
+    });
+    await new Promise((resolve) => setImmediate(resolve));
+
+    // The mock's default interrupt() resolves next() with done, so the loop
+    // returns through its normal path well before the 50s backstop.
+    await agent.cancel({ sessionId });
+
+    const result = await promptPromise;
+    expect(result.stopReason).toBe("cancelled");
+    expect(
+      (agent as unknown as { session: { forceCancelTimer?: unknown } }).session
+        .forceCancelTimer,
+    ).toBeUndefined();
+  });
+});
diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
index cd09aa6360..654e868761 100644
--- a/packages/agent/src/adapters/claude/claude-agent.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -486,7 +486,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     // force-cancel backstop armed in interrupt() aborts this controller.
     const cancelController = new AbortController();
     this.session.cancelController = cancelController;
-    const cancelled = new Promise<void>((resolve) => {
+    // Resolves when the backstop aborts the controller. Named distinctly from
+    // the `cancelled` boolean above (the queue-handoff result) to avoid two
+    // variables named `cancelled` in this method.
+    const cancelWake = new Promise<void>((resolve) => {
       cancelController.signal.addEventListener("abort", () => resolve(), {
         once: true,
       });
@@ -545,13 +548,19 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     try {
       while (true) {
         const nextMessage = this.session.query.next();
-        const next = await Promise.race([nextMessage, cancelled]);
+        const next = await Promise.race([nextMessage, cancelWake]);
         if (cancelController.signal.aborted) {
           // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput
-          // block). Abandon the in-flight next() — swallowing any later
-          // rejection so it can't surface as an unhandled rejection — and honor
-          // the cancel per the ACP contract.
-          void nextMessage.catch(() => {});
+          // block). Abandon the in-flight next(); log any later rejection (an
+          // auth/process error the SDK threw at cancel time would otherwise be
+          // lost) but swallow it so it can't surface as an unhandled rejection,
+          // then honor the cancel per the ACP contract.
+          void nextMessage.catch((err) =>
+            this.logger.warn("in-flight query.next() rejected after cancel", {
+              sessionId: params.sessionId,
+              error: err instanceof Error ? err.message : String(err),
+            }),
+          );
           return {
             stopReason: "cancelled",
             _meta: this.session.interruptReason
@@ -599,10 +608,16 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
               // (context just dropped) and replaced within seconds by the next
               // result. `size` keeps coming from the gateway-learned window
               // (getContextUsage under-reports extended 1M windows).
-              const usedTokens = await fetchContextUsedTokens(
-                this.session.query,
-                this.logger,
-              );
+              // Race the control request against the force-cancel wake: the
+              // loop only observes cancelWake at its top, so a wedged
+              // getContextUsage() awaited here would otherwise re-introduce the
+              // exact hang the backstop exists to break (issue #680). On a
+              // forced cancel usedTokens is null and the next iteration returns
+              // "cancelled".
+              const usedTokens = await Promise.race([
+                fetchContextUsedTokens(this.session.query, this.logger),
+                cancelWake.then(() => null),
+              ]);
               lastAssistantTotalUsage = usedTokens ?? 0;
               promptReplayed = true;
               await this.client.sessionUpdate({
@@ -613,6 +628,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
                   size: lastContextWindowSize,
                 },
               });
+              // No break: intentionally falls through to handleSystemMessage so
+              // the COMPACT_BOUNDARY ext notification still fires.
             }
             if (message.subtype === "commands_changed") {
               // Mid-session command-list change (e.g. skills discovered as the
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts
new file mode 100644
index 0000000000..ee6846d8cf
--- /dev/null
+++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it } from "vitest";
+import { stripMarkerTags } from "./sdk-to-acp";
+
+describe("stripMarkerTags", () => {
+  it("strips a single marker and keeps surrounding prose", () => {
+    expect(
+      stripMarkerTags("before<command-name>/model</command-name>after"),
+    ).toBe("beforeafter");
+  });
+
+  it("strips multiple different markers in one pass", () => {
+    const input =
+      "a<command-args>x</command-args>b<local-command-stdout>out</local-command-stdout>c";
+    expect(stripMarkerTags(input)).toBe("abc");
+  });
+
+  it("leaves text without markers unchanged", () => {
+    expect(stripMarkerTags("")).toBe("");
+    expect(stripMarkerTags("plain prose with < and > but no tags")).toBe(
+      "plain prose with < and > but no tags",
+    );
+  });
+
+  it("passes an unclosed opener through verbatim (dead-set path)", () => {
+    const input = "<command-name>no closing tag, prose continues";
+    expect(stripMarkerTags(input)).toBe(input);
+  });
+
+  it("does not treat an orphan closing tag as an opener", () => {
+    expect(
+      stripMarkerTags("</command-name>text<command-name>real</command-name>"),
+    ).toBe("</command-name>text");
+  });
+
+  it("matches the nearest closing tag for a repeated opener", () => {
+    // Lazy match: the first opener pairs with the first close, swallowing the
+    // inner opener and its text, exactly like the original `[\s\S]*?` regex.
+    expect(
+      stripMarkerTags(
+        "<command-name>outer<command-name>inner</command-name>trailing",
+      ),
+    ).toBe("trailing");
+  });
+
+  it("stays linear on pathological unclosed input", () => {
+    // A long run of openers with no close must not catastrophically backtrack.
+    const input = `${"<command-name>".repeat(20000)}tail`;
+    expect(stripMarkerTags(input)).toBe(input);
+  });
+});
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
index 5b333da989..7b29dbfb58 100644
--- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
+++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
@@ -1009,7 +1009,8 @@ const LOCAL_COMMAND_MARKERS = [
 // Single-pass scanner that removes each `<tag>…</tag>` marker (matching the
 // nearest closing tag of the same name, like a lazy regex would) without the
 // catastrophic-backtracking risk of `[\s\S]*?` over pathological input.
-function stripMarkerTags(text: string): string {
+// Exported for unit testing.
+export function stripMarkerTags(text: string): string {
   const dead = new Set<string>();
   let result = "";
   let copiedUpTo = 0;
diff --git a/packages/agent/src/adapters/claude/session/models.test.ts b/packages/agent/src/adapters/claude/session/models.test.ts
index 9b6c0649bb..bbb0d67869 100644
--- a/packages/agent/src/adapters/claude/session/models.test.ts
+++ b/packages/agent/src/adapters/claude/session/models.test.ts
@@ -133,6 +133,27 @@ describe("resolveModelPreference", () => {
     expect(resolveModelPreference("gpt-5", options)).toBeNull();
   });
 
+  it("does not inherit a cross-family match from the context hint alone", () => {
+    // `opus[1m]` must not resolve to a sonnet entry purely because both share
+    // the "1m" hint token, with no real family token matching (#731).
+    const sonnetOnly = [
+      { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" },
+    ];
+    expect(resolveModelPreference("opus[1m]", sonnetOnly)).toBeNull();
+  });
+
+  it("resolves a hinted alias to the right family when a family token matches", () => {
+    // Both entries carry the "1m" hint; the "opus" token must break the tie so
+    // the hint alone can't pull the match onto sonnet.
+    const withHints = [
+      { value: "claude-opus-4-8", name: "Claude Opus 4.8 (1M context)" },
+      { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" },
+    ];
+    expect(resolveModelPreference("opus[1m]", withHints)).toBe(
+      "claude-opus-4-8",
+    );
+  });
+
   it("treats `best` and `default` as wildcards (no tokens contribute)", () => {
     expect(resolveModelPreference("best", options)).toBeNull();
     expect(resolveModelPreference("default", options)).toBeNull();
diff --git a/packages/agent/src/adapters/codex/models.test.ts b/packages/agent/src/adapters/codex/models.test.ts
index b31a039ac8..c898bd9dc4 100644
--- a/packages/agent/src/adapters/codex/models.test.ts
+++ b/packages/agent/src/adapters/codex/models.test.ts
@@ -1,8 +1,49 @@
+import type { SessionConfigOption } from "@agentclientprotocol/sdk";
 import { describe, expect, it } from "vitest";
-import { formatCodexModelName } from "./models";
+import { formatCodexModelName, modelIdFromConfigOptions } from "./models";
 
 describe("formatCodexModelName", () => {
   it("uses raw lowercase model ids", () => {
     expect(formatCodexModelName("GPT-5.5")).toBe("gpt-5.5");
   });
 });
+
+describe("modelIdFromConfigOptions", () => {
+  const modelOption = (currentValue: unknown): SessionConfigOption =>
+    ({
+      id: "model",
+      name: "Model",
+      type: "select",
+      category: "model",
+      currentValue,
+      options: [],
+    }) as unknown as SessionConfigOption;
+
+  it("returns the currentValue of the model-category option", () => {
+    expect(modelIdFromConfigOptions([modelOption("gpt-5.5-codex")])).toBe(
+      "gpt-5.5-codex",
+    );
+  });
+
+  it("ignores non-model categories", () => {
+    const modeOption = {
+      id: "mode",
+      name: "Mode",
+      type: "select",
+      category: "mode",
+      currentValue: "auto",
+      options: [],
+    } as unknown as SessionConfigOption;
+    expect(modelIdFromConfigOptions([modeOption])).toBeUndefined();
+  });
+
+  it("returns undefined when currentValue is not a string", () => {
+    expect(modelIdFromConfigOptions([modelOption(null)])).toBeUndefined();
+    expect(modelIdFromConfigOptions([modelOption(123)])).toBeUndefined();
+  });
+
+  it("returns undefined for null/undefined input", () => {
+    expect(modelIdFromConfigOptions(null)).toBeUndefined();
+    expect(modelIdFromConfigOptions(undefined)).toBeUndefined();
+  });
+});

From ae78cad1835599ccec65dfbda49a4eea3456cc0d Mon Sep 17 00:00:00 2001
From: Charles Vien <charles.v@posthog.com>
Date: Mon, 8 Jun 2026 08:31:29 -0700
Subject: [PATCH 4/4] remove verbose claude adapter comments

---
 .../claude/claude-agent.slash-command.test.ts |   9 --
 .../agent/src/adapters/claude/claude-agent.ts | 101 ------------------
 .../claude/conversion/sdk-to-acp.test.ts      |   3 -
 .../adapters/claude/conversion/sdk-to-acp.ts  |  26 -----
 .../adapters/claude/session/models.test.ts    |   4 -
 .../src/adapters/claude/session/models.ts     |   3 -
 packages/agent/src/adapters/claude/types.ts   |   8 --
 packages/agent/src/adapters/codex/models.ts   |   3 -
 8 files changed, 157 deletions(-)

diff --git a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
index 7ca63c83b0..776663417e 100644
--- a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
@@ -182,9 +182,7 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => {
     const { agent } = makeAgent();
     const sessionId = "s-wedged";
     const query = installFakeSession(agent, sessionId);
-    // Simulate a wedged SDK: interrupt() resolves but never makes next() yield.
     query.interrupt.mockImplementation(async () => {});
-    // Shrink the grace period so the backstop fires promptly under real timers.
     (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = 5;
 
     const promptPromise = agent.prompt({
@@ -192,11 +190,8 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => {
       prompt: [{ type: "text", text: "do something slow" }],
     });
 
-    // Let the loop reach `await query.next()`, which stays pending forever.
     await new Promise((resolve) => setImmediate(resolve));
 
-    // Arms the backstop and calls the (no-op) interrupt; the timer must drive
-    // the loop to return rather than hanging on the wedged next().
     await agent.cancel({ sessionId });
 
     const result = await promptPromise;
@@ -207,8 +202,6 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => {
     const { agent } = makeAgent();
     const sessionId = "s-healthy";
     installFakeSession(agent, sessionId);
-    // Large grace so the test can only pass via the normal idle/done path, not
-    // the timer; the loop must clear the armed timer in its finally.
     (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs =
       50_000;
 
@@ -218,8 +211,6 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => {
     });
     await new Promise((resolve) => setImmediate(resolve));
 
-    // The mock's default interrupt() resolves next() with done, so the loop
-    // returns through its normal path well before the 50s backstop.
     await agent.cancel({ sessionId });
 
     const result = await promptPromise;
diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
index 654e868761..30423188f6 100644
--- a/packages/agent/src/adapters/claude/claude-agent.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -134,15 +134,6 @@ import type {
 
 const SESSION_VALIDATION_TIMEOUT_MS = 30_000;
 
-/** Grace period after `session/cancel` before the adapter forces a wedged
- *  prompt loop to return "cancelled". `query.interrupt()` normally makes the SDK
- *  yield a trailing idle within milliseconds and the loop returns through its
- *  usual path, so this timer is armed and cleared (never fired) on healthy
- *  cancels. It only trips when the SDK is genuinely wedged (e.g. a
- *  `TaskOutput { block: true }` poll against a hung background task — issue
- *  #680) and never yields. Deliberately loose: an "obviously stuck" ceiling,
- *  not a guess at interrupt latency, so it can't pre-empt a slow-but-healthy
- *  interrupt. */
 const DEFAULT_FORCE_CANCEL_GRACE_MS = 30_000;
 
 const MAX_TITLE_LENGTH = 256;
@@ -199,15 +190,6 @@ function shouldEmitRawMessage(
   );
 }
 
-/** Fetch the SDK's authoritative context-window occupancy via the
- *  `getContextUsage` control request. Unlike the per-message API usage numbers
- *  (which only count message tokens), `totalTokens` includes the system prompt,
- *  tool schemas, MCP tools, and memory-file overhead — the real occupancy the
- *  user sees. Returns `null` on any control-request failure.
- *
- *  We deliberately do NOT use this response's window fields for `size`: they
- *  have been observed to under-report extended (1M) context windows, so the
- *  window keeps coming from the gateway / model heuristic. */
 async function fetchContextUsedTokens(
   sdkQuery: Query,
   logger: Logger,
@@ -236,9 +218,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
   toolUseStreamCache: ToolUseStreamCache;
   backgroundTerminals: { [key: string]: BackgroundTerminal } = {};
   clientCapabilities?: ClientCapabilities;
-  /** Grace period before a `session/cancel` forces a wedged prompt loop to
-   *  return "cancelled". See {@link DEFAULT_FORCE_CANCEL_GRACE_MS}. Mutable so
-   *  tests can shrink it. */
   forceCancelGraceMs: number = DEFAULT_FORCE_CANCEL_GRACE_MS;
   private options?: ClaudeAcpAgentOptions;
   private enrichment?: Enrichment;
@@ -481,14 +460,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     await this.broadcastUserMessage(params);
 
     this.session.promptRunning = true;
-    // Wake-up channel so cancel() can force this loop to return "cancelled" even
-    // when query.next() is wedged and never yields again (issue #680). The
-    // force-cancel backstop armed in interrupt() aborts this controller.
     const cancelController = new AbortController();
     this.session.cancelController = cancelController;
-    // Resolves when the backstop aborts the controller. Named distinctly from
-    // the `cancelled` boolean above (the queue-handoff result) to avoid two
-    // variables named `cancelled` in this method.
     const cancelWake = new Promise<void>((resolve) => {
       cancelController.signal.addEventListener("abort", () => resolve(), {
         once: true,
@@ -497,10 +470,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     let handedOff = false;
     let errored = false;
     let lastAssistantTotalUsage: number | null = null;
-    // When a streaming classifier refuses a turn, the assistant message carries
-    // stop_reason "refusal" and structured stop_details. We capture the
-    // human-readable explanation here so the terminal `result` can surface it to
-    // the user (the refused assistant message itself usually has no content).
     let lastRefusalExplanation: string | null = null;
     let lastStreamUsage = {
       input_tokens: 0,
@@ -550,11 +519,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
         const nextMessage = this.session.query.next();
         const next = await Promise.race([nextMessage, cancelWake]);
         if (cancelController.signal.aborted) {
-          // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput
-          // block). Abandon the in-flight next(); log any later rejection (an
-          // auth/process error the SDK threw at cancel time would otherwise be
-          // lost) but swallow it so it can't surface as an unhandled rejection,
-          // then honor the cancel per the ACP contract.
           void nextMessage.catch((err) =>
             this.logger.warn("in-flight query.next() rejected after cancel", {
               sessionId: params.sessionId,
@@ -598,22 +562,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
         switch (message.type) {
           case "system":
             if (message.subtype === "compact_boundary") {
-              // Refresh the displayed usage immediately so the client doesn't
-              // keep showing the stale pre-compaction size right after the user
-              // sees "Compacting completed". Prefer the SDK's authoritative
-              // post-compaction `used` via getContextUsage — it reflects the
-              // real retained context (system prompt + tools + surviving
-              // messages), which per-message API usage can't give us until the
-              // next turn. Fall back to 0 on failure: directionally correct
-              // (context just dropped) and replaced within seconds by the next
-              // result. `size` keeps coming from the gateway-learned window
-              // (getContextUsage under-reports extended 1M windows).
-              // Race the control request against the force-cancel wake: the
-              // loop only observes cancelWake at its top, so a wedged
-              // getContextUsage() awaited here would otherwise re-introduce the
-              // exact hang the backstop exists to break (issue #680). On a
-              // forced cancel usedTokens is null and the next iteration returns
-              // "cancelled".
               const usedTokens = await Promise.race([
                 fetchContextUsedTokens(this.session.query, this.logger),
                 cancelWake.then(() => null),
@@ -628,15 +576,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
                   size: lastContextWindowSize,
                 },
               });
-              // No break: intentionally falls through to handleSystemMessage so
-              // the COMPACT_BOUNDARY ext notification still fires.
             }
             if (message.subtype === "commands_changed") {
-              // Mid-session command-list change (e.g. skills discovered as the
-              // agent works in a subdirectory). Push the new list straight from
-              // the message rather than re-querying (supportedCommands() only
-              // ever reflects the init list), and refresh the known-commands
-              // gate used to flag unsupported slash commands.
               this.session.knownSlashCommands = collectKnownSlashCommands(
                 message.commands,
               );
@@ -648,9 +589,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
                   availableCommands: available,
                 },
               });
-              // Keep the context-breakdown skills estimate in sync with the new
-              // command list (mirrors sendAvailableCommandsUpdate), so later
-              // usage breakdowns don't report stale skills context.
               this.updateBreakdownCategory(
                 "skills",
                 estimateSkillsTokens(available),
@@ -871,12 +809,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
                 this.session.accumulatedUsage.cachedWriteTokens,
             };
 
-            // A refusal can arrive on any result subtype (and may even set
-            // is_error), so handle it before handleResultMessage — otherwise the
-            // is_error path would surface it as an internal error. The refused
-            // assistant message carries no visible content, so surface the
-            // classifier's explanation (when available) and report ACP's
-            // dedicated `refusal` stop reason.
             if (
               (message as { stop_reason?: string }).stop_reason === "refusal"
             ) {
@@ -1009,10 +941,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
               break;
             }
 
-            // Capture a refusal explanation from the assistant message so the
-            // terminal `result` can surface it (the refused message itself has
-            // no visible content). stop_reason/stop_details live on the inner
-            // Anthropic message; read them via cast like the usage block below.
             if (message.type === "assistant") {
               const inner = message.message as unknown as {
                 stop_reason?: string | null;
@@ -1068,11 +996,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
           }
 
           case "tool_progress": {
-            // Surface "still working" progress on a long-running tool call so
-            // the client can show elapsed time instead of a stalled spinner.
-            // Route by the ACP session id (params.sessionId) like every other
-            // update in this loop — the client renders by ACP session, not the
-            // SDK's message.session_id.
             await this.client.sessionUpdate({
               sessionId: params.sessionId,
               update: {
@@ -1092,9 +1015,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
             break;
           }
           case "rate_limit_event": {
-            // Re-emit the current usage carrying the subscription rate-limit
-            // info so the client can warn before the limit bites. Route by the
-            // ACP session id (params.sessionId) like every other update here.
             if (lastAssistantTotalUsage !== null) {
               await this.client.sessionUpdate({
                 sessionId: params.sessionId,
@@ -1174,9 +1094,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
       }
       throw error;
     } finally {
-      // The loop is returning — interrupt() succeeded or the prompt finished —
-      // so disarm the force-cancel backstop and release the wake-up channel
-      // (only if we still own it; a handoff installs the next prompt's).
       if (this.session.forceCancelTimer) {
         clearTimeout(this.session.forceCancelTimer);
         this.session.forceCancelTimer = undefined;
@@ -1223,15 +1140,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     }
     this.session.pendingMessages.clear();
 
-    // Arm a backstop before interrupting: if a prompt is actively consuming the
-    // query and interrupt() doesn't make the SDK yield (e.g. a wedged TaskOutput
-    // block — issue #680), force the loop to return "cancelled" after the grace
-    // period so the pending prompt() resolves per the ACP cancellation contract
-    // instead of hanging forever. The loop's `finally` clears this timer when
-    // interrupt() works and it returns through the normal idle path, so on
-    // healthy cancels it is armed but never fires. Arm at most once per turn:
-    // the floor is an absolute ceiling from the first cancel, so a client that
-    // re-sends cancel can't keep pushing the deadline out.
     if (
       this.session.promptRunning &&
       this.session.cancelController &&
@@ -1529,12 +1437,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     }
   }
 
-  /**
-   * Ensures the requested `cwd` is an absolute path that points at an existing
-   * directory before we create a session. Throws an `invalidParams` error with
-   * an actionable message so clients can surface it to the user instead of
-   * failing later with an opaque "native binary failed to launch" SDK error.
-   */
   private async validateCwd(cwd: string): Promise<void> {
     if (!path.isAbsolute(cwd)) {
       throw RequestError.invalidParams(
@@ -1577,9 +1479,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     const { cwd } = params;
     const { resume, forkSession } = creationOpts;
 
-    // Validate `cwd` up front. The ACP spec requires an absolute path, and the
-    // directory must exist on the machine running the agent. Without this the
-    // failure only surfaces later as a confusing SDK launch error (issue #749).
     await this.validateCwd(cwd);
 
     const isResume = !!resume;
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts
index ee6846d8cf..76b4d06b2c 100644
--- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts
+++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts
@@ -33,8 +33,6 @@ describe("stripMarkerTags", () => {
   });
 
   it("matches the nearest closing tag for a repeated opener", () => {
-    // Lazy match: the first opener pairs with the first close, swallowing the
-    // inner opener and its text, exactly like the original `[\s\S]*?` regex.
     expect(
       stripMarkerTags(
         "<command-name>outer<command-name>inner</command-name>trailing",
@@ -43,7 +41,6 @@ describe("stripMarkerTags", () => {
   });
 
   it("stays linear on pathological unclosed input", () => {
-    // A long run of openers with no close must not catastrophically backtrack.
     const input = `${"<command-name>".repeat(20000)}tail`;
     expect(stripMarkerTags(input)).toBe(input);
   });
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
index 7b29dbfb58..e17ab7ffee 100644
--- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
+++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
@@ -190,11 +190,6 @@ function handleToolUseChunk(
   }
 
   if (!alreadyCached && ctx.registerHooks !== false) {
-    // Capture what the hook needs in the closure rather than re-reading the
-    // cache when it fires. The cache entry is pruned at tool_result time, and a
-    // PostToolUse hook can fire after that, so closing over the name and bash
-    // command keeps the diff working without depending on (or pinning) the
-    // cache entry's lifetime.
     const toolName = chunk.name;
     const bashCommand = bashCommandFromToolUse(chunk);
     registerHookCallback(chunk.id, {
@@ -344,10 +339,6 @@ function handleToolResultChunk(
     return [];
   }
 
-  // The tool_use is fully resolved now — drop it so a long-running session
-  // doesn't retain every tool call for its whole lifetime. Everything below uses
-  // the captured `toolUse` local, and the PostToolUse hook closes over the tool
-  // name/bash command, so pruning here is safe regardless of hook/result order.
   delete ctx.toolUseCache[chunk.tool_use_id];
 
   if (
@@ -779,22 +770,12 @@ export async function handleSystemMessage(
       break;
     }
     case "mirror_error":
-      // The SDK failed to persist session history (append rejected/timed out
-      // after retry) — potential data loss on resume the user should know about
-      // rather than a silent gap. Log it; no user-facing chunk.
       logger.error(
         `Session ${sessionId}: failed to persist history: ${message.error}`,
       );
       break;
     case "permission_denied": {
-      // A tool call was auto-denied (by a rule, the classifier, dontAsk mode,
-      // etc.) before running. The tool_use block was already emitted as a
-      // tool_call, so mark it failed with the rejection reason — otherwise the
-      // client shows a tool call that silently never resolves.
       const reason = message.decision_reason ?? message.message;
-      // Route by the ACP session id (context) — the original tool_call was
-      // emitted under it, so the failed update must match or the client drops it
-      // and the tool call hangs unresolved.
       await client.sessionUpdate({
         sessionId,
         update: {
@@ -1006,10 +987,6 @@ const LOCAL_COMMAND_MARKERS = [
   "local-command-stderr",
 ].map((tag) => ({ open: `<${tag}>`, close: `</${tag}>` }));
 
-// Single-pass scanner that removes each `<tag>…</tag>` marker (matching the
-// nearest closing tag of the same name, like a lazy regex would) without the
-// catastrophic-backtracking risk of `[\s\S]*?` over pathological input.
-// Exported for unit testing.
 export function stripMarkerTags(text: string): string {
   const dead = new Set<string>();
   let result = "";
@@ -1027,9 +1004,6 @@ export function stripMarkerTags(text: string): string {
           i = copiedUpTo = end + marker.close.length;
           continue;
         }
-        // No closing marker remains anywhere ahead, and `indexOf` only ever
-        // searches forward from here on, so stop treating this tag as an
-        // opener — that avoids rescanning the tail for it on every match.
         dead.add(marker.open);
       }
     }
diff --git a/packages/agent/src/adapters/claude/session/models.test.ts b/packages/agent/src/adapters/claude/session/models.test.ts
index bbb0d67869..39b812e40d 100644
--- a/packages/agent/src/adapters/claude/session/models.test.ts
+++ b/packages/agent/src/adapters/claude/session/models.test.ts
@@ -134,8 +134,6 @@ describe("resolveModelPreference", () => {
   });
 
   it("does not inherit a cross-family match from the context hint alone", () => {
-    // `opus[1m]` must not resolve to a sonnet entry purely because both share
-    // the "1m" hint token, with no real family token matching (#731).
     const sonnetOnly = [
       { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" },
     ];
@@ -143,8 +141,6 @@ describe("resolveModelPreference", () => {
   });
 
   it("resolves a hinted alias to the right family when a family token matches", () => {
-    // Both entries carry the "1m" hint; the "opus" token must break the tie so
-    // the hint alone can't pull the match onto sonnet.
     const withHints = [
       { value: "claude-opus-4-8", name: "Claude Opus 4.8 (1M context)" },
       { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" },
diff --git a/packages/agent/src/adapters/claude/session/models.ts b/packages/agent/src/adapters/claude/session/models.ts
index 58bc797b4d..2d87eaa8d8 100644
--- a/packages/agent/src/adapters/claude/session/models.ts
+++ b/packages/agent/src/adapters/claude/session/models.ts
@@ -145,9 +145,6 @@ function scoreModelMatch(
       score += token === contextHint ? 3 : 1;
     }
   }
-  // A context hint alone (e.g. "1m") must not carry a match across model
-  // families: without a real family/name token also matching, "opus[1m]" would
-  // otherwise score against "sonnet[1m]" purely on the shared "1m" token.
   if (contextHint && !nonHintMatched) return 0;
   return score;
 }
diff --git a/packages/agent/src/adapters/claude/types.ts b/packages/agent/src/adapters/claude/types.ts
index 69407afdea..6246da57b8 100644
--- a/packages/agent/src/adapters/claude/types.ts
+++ b/packages/agent/src/adapters/claude/types.ts
@@ -70,15 +70,7 @@ export type Session = BaseSession & {
   /** Persists across prompt() calls so SDK-reported values survive turn boundaries */
   lastContextWindowSize?: number;
   promptRunning: boolean;
-  /** Per-turn signal the active prompt loop races `query.next()` against.
-   *  Aborted by the force-cancel backstop when the SDK wedges and never yields
-   *  after `interrupt()` (issue #680), forcing the loop to return "cancelled"
-   *  instead of hanging. Distinct from `abortController`: this only wakes the
-   *  loop; it does not touch the SDK query/subprocess. Undefined when no prompt
-   *  is actively consuming the query. */
   cancelController?: AbortController;
-  /** Pending grace-period timer that aborts `cancelController`. Cleared when the
-   *  loop returns normally so the backstop never fires after a clean cancel. */
   forceCancelTimer?: ReturnType<typeof setTimeout>;
   pendingMessages: Map<string, PendingMessage>;
   nextPendingOrder: number;
diff --git a/packages/agent/src/adapters/codex/models.ts b/packages/agent/src/adapters/codex/models.ts
index 054db3932f..b0598ddfa7 100644
--- a/packages/agent/src/adapters/codex/models.ts
+++ b/packages/agent/src/adapters/codex/models.ts
@@ -25,9 +25,6 @@ export function formatCodexModelName(value: string): string {
   return value.toLowerCase();
 }
 
-/** Derive the current model id from the "model" config option's currentValue.
- *  Replaces the legacy `response.models.currentModelId` lookup that ACP SDK
- *  0.25.0 removed (model selection moved entirely into config options). */
 export function modelIdFromConfigOptions(
   configOptions: SessionConfigOption[] | null | undefined,
 ): string | undefined {