From 3ca49067c3e34c56bbc4477eaa053afb9405e548 Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Sat, 6 Jun 2026 16:07:11 -0700 Subject: [PATCH 1/4] update claude adapter with upstream --- packages/agent/package.json | 4 +- packages/agent/src/adapters/claude/SKILL.md | 164 +++++++++ .../agent/src/adapters/claude/UPSTREAM.md | 70 +++- .../agent/src/adapters/claude/claude-agent.ts | 315 ++++++++++++++---- .../adapters/claude/conversion/sdk-to-acp.ts | 138 ++++++-- .../src/adapters/claude/session/models.ts | 6 + packages/agent/src/adapters/claude/types.ts | 10 + .../agent/src/adapters/codex/codex-agent.ts | 8 +- packages/agent/src/adapters/codex/models.ts | 12 + packages/agent/src/test/mocks/claude-sdk.ts | 1 + pnpm-lock.yaml | 108 +++++- 11 files changed, 735 insertions(+), 101 deletions(-) create mode 100644 packages/agent/src/adapters/claude/SKILL.md diff --git a/packages/agent/package.json b/packages/agent/package.json index 5b20023755..1680eb6290 100644 --- a/packages/agent/package.json +++ b/packages/agent/package.json @@ -128,8 +128,8 @@ "vitest": "^2.1.8" }, "dependencies": { - "@agentclientprotocol/sdk": "0.22.1", - "@anthropic-ai/claude-agent-sdk": "0.3.156", + "@agentclientprotocol/sdk": "0.25.0", + "@anthropic-ai/claude-agent-sdk": "0.3.165", "@anthropic-ai/sdk": "0.100.1", "@hono/node-server": "^1.19.9", "@opentelemetry/api-logs": "^0.208.0", diff --git a/packages/agent/src/adapters/claude/SKILL.md b/packages/agent/src/adapters/claude/SKILL.md new file mode 100644 index 0000000000..ce8ab70e5b --- /dev/null +++ b/packages/agent/src/adapters/claude/SKILL.md @@ -0,0 +1,164 @@ +--- +name: upgrade-claude-adapter +description: >- + Sync this fork of @anthropic-ai/claude-agent-acp (packages/agent/src/adapters/claude) + with a newer upstream release: bump the claude-agent-sdk / @agentclientprotocol/sdk, + port upstream bug fixes and new SDK message handling, preserve the fork's divergences, + verify, and update UPSTREAM.md. Use when asked to "upgrade/sync the claude adapter", + "bump the agent SDK", or "port upstream claude-agent-acp changes". +--- + +# Upgrade the Claude ACP adapter (upstream sync) + +This is a runbook for syncing our **fork** of `@anthropic-ai/claude-agent-acp` (the upstream +Zed/agentclientprotocol ACP agent) that lives in `packages/agent/src/adapters/claude/` with a newer +upstream release. The fork is heavily diverged. The job is to port the *valuable* upstream changes +(SDK bumps, bug fixes, new SDK-message handling) while preserving every intentional divergence — not +to make the fork identical to upstream. + +`UPSTREAM.md` (this directory) is the source of truth for the **fork point**, **last-synced +version/commit**, the **file mapping**, the **PostHog-only code**, and the **intentional +divergences**. Read it first, update it last. + +> This file is a runbook, not an auto-registered slash command. Invoke it by telling Claude to +> "follow the upgrade skill in the claude adapter dir." Move it to `.claude/skills//SKILL.md` +> if you ever want it runnable as `/`. + +## Inputs you need before starting + +1. **Upstream source checkout** — a local git clone of `github.com/agentclientprotocol/claude-agent-acp`. + You need its history to diff. If the user hasn't given the path, **ask for it** (it's usually + somewhere like `~/Cloud/claude-agent-acp`). Do not guess. +2. **This repo** — the fork under `packages/agent/`. + +## Process + +### 0. Orient (read, don't write) + +- Read `UPSTREAM.md`. Note **Last sync** (commit + version), the pinned **SDK** versions, the + **File Mapping**, **PostHog Code-Only Code (Do Not Sync)**, and **Intentional Divergences**. +- In the upstream checkout, list the change set since the last sync and skim the changelog: + - `git -C log --oneline ..HEAD` + - `git -C show /CHANGELOG.md:CHANGELOG.md` (or just read `CHANGELOG.md`) +- Confirm the new target version + HEAD sha and the target SDK versions from the upstream + `package.json`. + +### 1. Triage every commit + +Bucket each commit since the last sync: + +- **Port** — bug fixes and new feature / SDK-message handling that are *not* in the PostHog-only + list and don't fight a divergence. +- **Dep bump** — record the target SDK versions; the diff tells you if code changes ride along. +- **Skip** — `chore(main): release …`, `actions/* ` CI bumps, pure dependabot **dev**-dep bumps, and + anything matching the PostHog-only / divergence lists. + +Read intent from source diffs (exclude tests + JSON first): + +``` +git -C show -- src/ ':(exclude)src/tests/*' ':(exclude)*.json' +``` + +A dependabot SDK-bump commit often *also* carries real code (new message handling). Don't assume +"deps" == "no code". + +### 2. Map upstream → fork + +Upstream is one large `src/acp-agent.ts`; our fork is split. Use the File Mapping in `UPSTREAM.md`. +Rough guide: + +| Upstream | Fork | +| --- | --- | +| `acp-agent.ts` prompt loop, lifecycle, cancel | `claude-agent.ts` | +| inline message/stream/result/system conversion | `conversion/sdk-to-acp.ts` | +| inline prompt→SDK conversion | `conversion/acp-to-sdk.ts` | +| `tools.ts` (tool_use→ACP, PostToolUse hook) | `conversion/tool-use-to-acp.ts`, `hooks.ts` | +| model alias resolution | `session/models.ts`, `session/model-config.ts` | +| options / system prompt | `session/options.ts` | +| permissions | `permissions/*` | + +For each upstream change, `rg` the fork for the touched symbol first — the fork usually already has a +diverged version of it, so you're editing, not adding. + +### 3. Bump dependencies + +In `packages/agent/package.json`, set `@anthropic-ai/claude-agent-sdk`, `@agentclientprotocol/sdk`, +and `@anthropic-ai/sdk` to the upstream `package.json` versions, then `pnpm install` from the repo +root. (`packages/shared` pins its own older `@agentclientprotocol/sdk`; leave it unless a +cross-package type error forces a bump.) + +### 4. Find the breaking-change surface + +Run `pnpm --filter agent typecheck`. The errors are your ACP/SDK breaking-change list. Gotchas seen +in past syncs: + +- **The ACP SDK ships name-mangled generated types.** `dist/schema/*.gen.d.ts` shows enum literals as + `n` (e.g. `StopReason = "…" | "n" | "cancelled"`). Don't trust grep there. Read the hand-written + `dist/acp.d.ts`, or download the exact target to inspect cleanly: + ``` + cd /tmp && npm pack @agentclientprotocol/sdk@ && tar xzf *.tgz + rg -n "type StopReason|deleteSession|SessionModelState" package/dist/schema/types.gen.d.ts package/dist/acp.d.ts + ``` +- **`node -e "require('/package.json')"` may fail** on the SDKs (exports map blocks the subpath). + Read `node_modules//package.json` directly for the installed version. +- **An ACP SDK bump can break code outside the claude adapter.** The whole `packages/agent` package + must typecheck — expect to also fix `adapters/codex/*` and `server/agent-server.ts`. Keep those + fixes minimal and behavior-preserving (e.g. when ACP removed the `models` response field, the codex + adapter derived the model id from `configOptions` instead). + +### 5. Port in phases — bug fixes first, then features + +For each ported change: + +- **Preserve divergences** (see `UPSTREAM.md` → Intentional Divergences + PostHog-only). The big ones: + single-session `this.session` (not `this.sessions[id]`); `interruptReason` on cancel; gateway models + via `fetchGatewayModels` (not `initializationResult.models`); `_posthog/*` ext notifications; + the "Unsupported slash command" gate on `knownSlashCommands`; `SYSTEM_REMINDER` stripping; plan / + questions / MCP-metadata machinery. +- **New SDK `system` subtypes are safe by default.** `handleSystemMessage` ends in `default: break`, + and the prompt-loop top-level `switch (message.type)` only `unreachable()`s unknown top-level + *types*. So a new subtype won't crash the loop — port real handling only where there's user value + (e.g. `permission_denied` → failed tool_call, `tool_progress` → in_progress, `commands_changed` → + available_commands_update, `mirror_error` → log). +- When upstream reads new fields (`stop_details`, `getContextUsage`, `thinking`), confirm the + installed SDK `.d.ts` actually has them before porting. Skip ports the fork can't use (e.g. the + fork doesn't read `MAX_THINKING_TOKENS`, so upstream's `resolveThinkingConfig` was N/A). +- Typecheck after each logical group, not just at the end. + +### 6. Verify (all of it) + +``` +pnpm --filter agent typecheck +pnpm --filter agent build +npx biome check --write # biome is the formatter/linter, not prettier/eslint +pnpm typecheck # whole repo: confirms apps/code compiles vs the new ACP SDK +pnpm --filter agent test +pnpm --filter code test +``` + +- The `apps/code` renderer unit tests `analytics.test.ts` and `panelLayoutStore.test.ts` are **flaky** + — they sometimes throw in `getElectronTRPC` / electron-trpc `ipcLink` depending on test ordering. If + they fail, re-run; a clean rerun (or `git stash` + run on the clean tree) passing confirms it's the + known flake, not your change. + +### 7. Update `UPSTREAM.md` (do this last) + +- Bump **Last sync** (version + HEAD sha + date) and the pinned **SDK** versions. +- Add `## Changes Ported in v Sync` (one bullet per change, with PR # and short sha) and + `## Skipped in v Sync` (with the reason for each skip). +- If a port made a former divergence match upstream, move it out of the Intentional Divergences table. + +## Fork facts worth remembering + +- **Single session.** The agent owns one `this.session` (from `BaseAcpAgent`), not a `sessions` map. + Upstream's per-session refactors usually collapse to "just use `this.session`". +- **Renderer uses config options only.** Model/mode/effort selection is `SessionConfigOption` end to + end; the renderer never reads the legacy `models` response field or calls `unstable_setSessionModel`. + That's why upstream's ACP-0.24/0.25 model-state removals are safe to follow. +- **`toolUseCache` is never cleared** in the fork (created once in the constructor), so long sessions + accumulate — keep the prune-at-tool_result behavior, and make any PostToolUse hook close over the + data it needs rather than re-reading the cache. +- **Conversion is split out.** `claude-agent.ts` calls `handleSystemMessage` / `handleStreamEvent` / + `handleResultMessage` / `handleUserAssistantMessage` from `conversion/sdk-to-acp.ts`. Upstream + inlines all of this in `acp-agent.ts`. +- **Don't commit or push** unless the user explicitly asks. Leave the work on the current branch. diff --git a/packages/agent/src/adapters/claude/UPSTREAM.md b/packages/agent/src/adapters/claude/UPSTREAM.md index 46ab27ccc3..2c694f9b30 100644 --- a/packages/agent/src/adapters/claude/UPSTREAM.md +++ b/packages/agent/src/adapters/claude/UPSTREAM.md @@ -5,8 +5,8 @@ Fork of `@anthropic-ai/claude-agent-acp`. Upstream repo: https://github.com/anth ## Fork Point - **Forked**: v0.10.9, commit `5411e0f4`, Dec 2 2025 -- **Last sync**: v0.39.0, commit `51a370e`, May 29 2026 -- **SDK**: `@anthropic-ai/claude-agent-sdk` 0.3.156, `@agentclientprotocol/sdk` 0.22.1, `@anthropic-ai/sdk` 0.100.1 +- **Last sync**: v0.42.0, commit `0dbccf5`, Jun 5 2026 +- **SDK**: `@anthropic-ai/claude-agent-sdk` 0.3.165, `@agentclientprotocol/sdk` 0.25.0, `@anthropic-ai/sdk` 0.100.1 ## File Mapping @@ -55,6 +55,70 @@ Fork of `@anthropic-ai/claude-agent-acp`. Upstream repo: https://github.com/anth | Shutdown on ACP close | Process exits | No standalone process | Agent is embedded in server | | Unsupported slash commands | Loops silently on early idle | Emits "Unsupported slash command" chunk, gated on `initializationResult().commands` so plugin/skill commands (e.g. `/skills-store`) whose echoes use a fresh uuid are not false-flagged | The SDK consumes some slash commands without producing output (e.g. `/plugin` in non-interactive mode); without this we hang. The known-commands gate avoids racing plugin/skill loads where idle can arrive before the transformed user-message echo. | +## Changes Ported in v0.42.0 Sync + +- **SDK bumps**: claude-agent-sdk 0.3.156 -> 0.3.165, ACP SDK 0.22.1 -> 0.25.0, anthropic SDK + unchanged at 0.100.1. +- **ACP SDK 0.25.0 model-state removal** (#737, 32175b8): 0.24.0 deleted `SessionModelState`, + `SetSessionModelRequest/Response`, `ModelInfo`, and the `models` field on every session lifecycle + response; model selection moved entirely into `SessionConfigOption` (category "model"). Our fork + already drove model selection through config options, so this just removed the vestigial legacy + path: dropped those imports, the `unstable_setSessionModel` method, and the `models` build/return + in `createSession` / `getExistingSessionState` / `loadSession`. The codex adapter's + `response.models?.currentModelId` read was replaced with a `modelIdFromConfigOptions()` helper + (codex `models.ts`). Verified the renderer reads only `configOptions`, never `.models`. +- **ACP SDK 0.25.0 `deleteSession` rename** (#753, 0dbccf5): No-op for us — our fork never + implemented `unstable_deleteSession`, and the method is optional on the `Agent` interface. +- **Refusal handling** (SDK 0.3.162, #740, add7e31): Capture the refused assistant message's + `stop_details.explanation`; the terminal `result` (stop_reason "refusal") emits it as an + `agent_message_chunk` and returns ACP's dedicated `refusal` stop reason instead of letting the + `is_error` path surface it as an internal error. +- **commands_changed** (SDK 0.3.162, #740, add7e31): New `system` subtype handled inline in the + prompt loop — pushes `available_commands_update` straight from `message.commands` (rather than + re-querying `supportedCommands()`, which only ever reflects the init list) and refreshes + `session.knownSlashCommands` so the unsupported-slash-command gate stays accurate. +- **Optimized marker stripping** (#738, 895422c): `stripMarkerTags` rewritten as a single-pass + scanner in `conversion/sdk-to-acp.ts`, removing the `[\s\S]*?` backtracking risk on pathological + input. +- **Force-cancel backstop** (#742, cffea4b): Added per-turn `cancelController` + `forceCancelTimer` + on `Session` and a mutable `forceCancelGraceMs` (30s) on the agent. The prompt loop races + `query.next()` against the cancel signal; `interrupt()` arms a grace-period timer that aborts it, + so a wedged SDK that never yields after interrupt (issue #680, e.g. a blocking `TaskOutput` poll) + returns "cancelled" instead of hanging. Adapted to our single-session model; preserves the + `interruptReason` meta on the forced return. +- **Cross-family model match fix** (#731, f4704c1): `scoreModelMatch` (session/models.ts) now + returns 0 when only the context-hint token matched, so `claude-opus-4-6[1m]` can't resolve to + `sonnet[1m]` purely on the shared "1m" token. Layers on top of our existing + `modelVersionsCompatible` filter. +- **compact_boundary getContextUsage** (#747, 398f763): compact_boundary now fetches the + authoritative post-compaction `used` via `query.getContextUsage()` (helper + `fetchContextUsedTokens`), falling back to 0 on failure. `size` still comes from the + gateway-learned window (getContextUsage under-reports 1M windows). Our fork-specific + `promptReplayed = true` side effect is preserved. +- **New SDK message handling** (#747, 398f763): `tool_progress` -> `tool_call_update` `in_progress` + with `elapsedTimeSeconds`; `rate_limit_event` -> `usage_update` carrying `_claude/rateLimit`; + `permission_denied` -> `tool_call_update` `failed` (in `handleSystemMessage`); `mirror_error` -> + logged (history-persistence failure / potential data loss on resume). +- **Prune tool cache** (#748, ec14211): `toolUseCache` was never cleared in our fork (set once in + the constructor, accumulated for the whole agent lifetime). Now pruned at `tool_result` time. The + PostToolUse hook closes over the tool name + bash command instead of re-reading the cache, so the + Edit/Write diff survives any hook/result reordering. We did NOT adopt upstream's per-session cache + move (we are single-session) or its `backgroundTerminals` deletion. +- **Test mock**: added `reloadSkills` to the SDK `MockQuery` (new method on the SDK `Query` + interface in 0.3.165). + +## Skipped in v0.42.0 Sync + +- **Message ids** (#750, 18516a3): Upstream records an ACP `messageId` -> SDK uuid map for a future + fork/rewind feature, explicitly "NOT READ YET". We don't consume it, it adds a `Session` field and + threads `messageId` through many `toAcpNotifications` call sites, so it is deferred until we wire + up rewind. (ACP 0.25.0 does expose the `messageId` field, so the port is unblocked when wanted.) +- **resolveThinkingConfig** (#747, 398f763): Upstream maps the legacy `MAX_THINKING_TOKENS` env var + to the SDK's new `thinking` option. Our fork never reads `MAX_THINKING_TOKENS` (model setup is + gateway-driven via `session/options.ts`), so there is nothing to migrate. +- **Pure dep-group / release / CI bumps** (#736, #741, #745, #728, #743): No fork-relevant code + beyond the SDK versions captured above. + ## Changes Ported in v0.30.0 Sync - **SDK bumps**: claude-agent-sdk 0.2.112 -> 0.2.114, ACP SDK 0.16.1 -> 0.19.0, anthropic SDK -> 0.89.0 @@ -165,7 +229,7 @@ Fork of `@anthropic-ai/claude-agent-acp`. Upstream repo: https://github.com/anth ## Next Sync -1. Check upstream changelog since v0.37.0 +1. Check upstream changelog since v0.42.0 2. Diff upstream source against PostHog Code using the file mapping above 3. Port in phases: bug fixes first, then features 4. After each phase: `pnpm --filter agent typecheck && pnpm --filter agent build && pnpm lint` diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 95d2afb286..217f06e278 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -3,7 +3,6 @@ import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; import { - type ModelInfo as AcpModelInfo, type AgentSideConnection, type ClientCapabilities, type ForkSessionRequest, @@ -24,12 +23,9 @@ import { type SessionConfigOption, type SessionConfigOptionCategory, type SessionConfigSelectOption, - type SessionModelState, type SessionModeState, type SetSessionConfigOptionRequest, type SetSessionConfigOptionResponse, - type SetSessionModelRequest, - type SetSessionModelResponse, type SetSessionModeRequest, type SetSessionModeResponse, type Usage, @@ -42,6 +38,7 @@ import { type Options, type Query, query, + type SDKMessage, type SDKUserMessage, type SlashCommand, } from "@anthropic-ai/claude-agent-sdk"; @@ -130,11 +127,24 @@ import type { NewSessionMeta, SDKMessageFilter, Session, + ToolUpdateMeta, ToolUseCache, ToolUseStreamCache, } from "./types"; const SESSION_VALIDATION_TIMEOUT_MS = 30_000; + +/** Grace period after `session/cancel` before the adapter forces a wedged + * prompt loop to return "cancelled". `query.interrupt()` normally makes the SDK + * yield a trailing idle within milliseconds and the loop returns through its + * usual path, so this timer is armed and cleared (never fired) on healthy + * cancels. It only trips when the SDK is genuinely wedged (e.g. a + * `TaskOutput { block: true }` poll against a hung background task — issue + * #680) and never yields. Deliberately loose: an "obviously stuck" ceiling, + * not a guess at interrupt latency, so it can't pre-empt a slow-but-healthy + * interrupt. */ +const DEFAULT_FORCE_CANCEL_GRACE_MS = 30_000; + const MAX_TITLE_LENGTH = 256; const LOCAL_ONLY_COMMANDS = new Set(["/context", "/heapdump", "/extra-usage"]); @@ -189,6 +199,28 @@ function shouldEmitRawMessage( ); } +/** Fetch the SDK's authoritative context-window occupancy via the + * `getContextUsage` control request. Unlike the per-message API usage numbers + * (which only count message tokens), `totalTokens` includes the system prompt, + * tool schemas, MCP tools, and memory-file overhead — the real occupancy the + * user sees. Returns `null` on any control-request failure. + * + * We deliberately do NOT use this response's window fields for `size`: they + * have been observed to under-report extended (1M) context windows, so the + * window keeps coming from the gateway / model heuristic. */ +async function fetchContextUsedTokens( + sdkQuery: Query, + logger: Logger, +): Promise { + try { + const usage = await sdkQuery.getContextUsage(); + return usage.totalTokens; + } catch (error) { + logger.error("Failed to fetch context usage from SDK:", error); + return null; + } +} + export interface ClaudeAcpAgentOptions { onProcessSpawned?: (info: ProcessSpawnedInfo) => void; onProcessExited?: (pid: number) => void; @@ -204,6 +236,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent { toolUseStreamCache: ToolUseStreamCache; backgroundTerminals: { [key: string]: BackgroundTerminal } = {}; clientCapabilities?: ClientCapabilities; + /** Grace period before a `session/cancel` forces a wedged prompt loop to + * return "cancelled". See {@link DEFAULT_FORCE_CANCEL_GRACE_MS}. Mutable so + * tests can shrink it. */ + forceCancelGraceMs: number = DEFAULT_FORCE_CANCEL_GRACE_MS; private options?: ClaudeAcpAgentOptions; private enrichment?: Enrichment; private enrichedReadCache: EnrichedReadCache = new Map(); @@ -357,7 +393,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { return { modes: response.modes, - models: response.models, configOptions: response.configOptions, }; } @@ -446,9 +481,24 @@ export class ClaudeAcpAgent extends BaseAcpAgent { await this.broadcastUserMessage(params); this.session.promptRunning = true; + // Wake-up channel so cancel() can force this loop to return "cancelled" even + // when query.next() is wedged and never yields again (issue #680). The + // force-cancel backstop armed in interrupt() aborts this controller. + const cancelController = new AbortController(); + this.session.cancelController = cancelController; + const cancelled = new Promise((resolve) => { + cancelController.signal.addEventListener("abort", () => resolve(), { + once: true, + }); + }); let handedOff = false; let errored = false; let lastAssistantTotalUsage: number | null = null; + // When a streaming classifier refuses a turn, the assistant message carries + // stop_reason "refusal" and structured stop_details. We capture the + // human-readable explanation here so the terminal `result` can surface it to + // the user (the refused assistant message itself usually has no content). + let lastRefusalExplanation: string | null = null; let lastStreamUsage = { input_tokens: 0, output_tokens: 0, @@ -494,7 +544,25 @@ export class ClaudeAcpAgent extends BaseAcpAgent { try { while (true) { - const { value: message, done } = await this.session.query.next(); + const nextMessage = this.session.query.next(); + const next = await Promise.race([nextMessage, cancelled]); + if (cancelController.signal.aborted) { + // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput + // block). Abandon the in-flight next() — swallowing any later + // rejection so it can't surface as an unhandled rejection — and honor + // the cancel per the ACP contract. + void nextMessage.catch(() => {}); + return { + stopReason: "cancelled", + _meta: this.session.interruptReason + ? { interruptReason: this.session.interruptReason } + : undefined, + }; + } + const { value: message, done } = next as IteratorResult< + SDKMessage, + void + >; if (done || !message) { if (this.session.cancelled) { @@ -521,19 +589,51 @@ export class ClaudeAcpAgent extends BaseAcpAgent { switch (message.type) { case "system": if (message.subtype === "compact_boundary") { - // Send used:0 immediately so the client doesn't keep showing - // the stale pre-compaction context size until the next turn. - lastAssistantTotalUsage = 0; + // Refresh the displayed usage immediately so the client doesn't + // keep showing the stale pre-compaction size right after the user + // sees "Compacting completed". Prefer the SDK's authoritative + // post-compaction `used` via getContextUsage — it reflects the + // real retained context (system prompt + tools + surviving + // messages), which per-message API usage can't give us until the + // next turn. Fall back to 0 on failure: directionally correct + // (context just dropped) and replaced within seconds by the next + // result. `size` keeps coming from the gateway-learned window + // (getContextUsage under-reports extended 1M windows). + const usedTokens = await fetchContextUsedTokens( + this.session.query, + this.logger, + ); + lastAssistantTotalUsage = usedTokens ?? 0; promptReplayed = true; await this.client.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "usage_update", - used: 0, + used: lastAssistantTotalUsage, size: lastContextWindowSize, }, }); } + if (message.subtype === "commands_changed") { + // Mid-session command-list change (e.g. skills discovered as the + // agent works in a subdirectory). Push the new list straight from + // the message rather than re-querying (supportedCommands() only + // ever reflects the init list), and refresh the known-commands + // gate used to flag unsupported slash commands. + this.session.knownSlashCommands = collectKnownSlashCommands( + message.commands, + ); + await this.client.sessionUpdate({ + sessionId: params.sessionId, + update: { + sessionUpdate: "available_commands_update", + availableCommands: getAvailableSlashCommands( + message.commands, + ), + }, + }); + break; + } if (message.subtype === "local_command_output") { promptReplayed = true; } @@ -748,6 +848,27 @@ export class ClaudeAcpAgent extends BaseAcpAgent { this.session.accumulatedUsage.cachedWriteTokens, }; + // A refusal can arrive on any result subtype (and may even set + // is_error), so handle it before handleResultMessage — otherwise the + // is_error path would surface it as an internal error. The refused + // assistant message carries no visible content, so surface the + // classifier's explanation (when available) and report ACP's + // dedicated `refusal` stop reason. + if ( + (message as { stop_reason?: string }).stop_reason === "refusal" + ) { + if (lastRefusalExplanation) { + await this.client.sessionUpdate({ + sessionId: params.sessionId, + update: { + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: lastRefusalExplanation }, + }, + }); + } + return { stopReason: "refusal", usage }; + } + const result = handleResultMessage(message); if (result.error) throw result.error; @@ -865,6 +986,21 @@ export class ClaudeAcpAgent extends BaseAcpAgent { break; } + // Capture a refusal explanation from the assistant message so the + // terminal `result` can surface it (the refused message itself has + // no visible content). stop_reason/stop_details live on the inner + // Anthropic message; read them via cast like the usage block below. + if (message.type === "assistant") { + const inner = message.message as unknown as { + stop_reason?: string | null; + stop_details?: { explanation?: string | null } | null; + }; + if (inner.stop_reason === "refusal") { + lastRefusalExplanation = + inner.stop_details?.explanation ?? null; + } + } + // Store latest assistant usage (excluding subagents) // Sum all token types as a proxy for post-turn context occupancy: // current turn's output will become next turn's input. @@ -908,11 +1044,46 @@ export class ClaudeAcpAgent extends BaseAcpAgent { break; } - case "tool_progress": + case "tool_progress": { + // Surface "still working" progress on a long-running tool call so + // the client can show elapsed time instead of a stalled spinner. + await this.client.sessionUpdate({ + sessionId: message.session_id, + update: { + sessionUpdate: "tool_call_update", + toolCallId: message.tool_use_id, + status: "in_progress", + _meta: { + claudeCode: { + toolName: message.tool_name, + toolResponse: { + elapsedTimeSeconds: message.elapsed_time_seconds, + }, + }, + } satisfies ToolUpdateMeta, + }, + }); + break; + } + case "rate_limit_event": { + // Re-emit the current usage carrying the subscription rate-limit + // info so the client can warn before the limit bites. + if (lastAssistantTotalUsage !== null) { + await this.client.sessionUpdate({ + sessionId: message.session_id, + update: { + sessionUpdate: "usage_update", + used: lastAssistantTotalUsage, + size: lastContextWindowSize, + _meta: { "_claude/rateLimit": message.rate_limit_info }, + }, + }); + } + break; + } case "auth_status": case "tool_use_summary": case "prompt_suggestion": - case "rate_limit_event": break; default: @@ -976,6 +1147,16 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } throw error; } finally { + // The loop is returning — interrupt() succeeded or the prompt finished — + // so disarm the force-cancel backstop and release the wake-up channel + // (only if we still own it; a handoff installs the next prompt's). + if (this.session.forceCancelTimer) { + clearTimeout(this.session.forceCancelTimer); + this.session.forceCancelTimer = undefined; + } + if (this.session.cancelController === cancelController) { + this.session.cancelController = undefined; + } // Drop any leftover streaming-input buffers. Normally cleared per index // on `content_block_stop`, but a cancelled or errored turn may leave // entries behind; without this they'd carry over into the next turn @@ -1014,6 +1195,31 @@ export class ClaudeAcpAgent extends BaseAcpAgent { pending.resolve(true); } this.session.pendingMessages.clear(); + + // Arm a backstop before interrupting: if a prompt is actively consuming the + // query and interrupt() doesn't make the SDK yield (e.g. a wedged TaskOutput + // block — issue #680), force the loop to return "cancelled" after the grace + // period so the pending prompt() resolves per the ACP cancellation contract + // instead of hanging forever. The loop's `finally` clears this timer when + // interrupt() works and it returns through the normal idle path, so on + // healthy cancels it is armed but never fires. Arm at most once per turn: + // the floor is an absolute ceiling from the first cancel, so a client that + // re-sends cancel can't keep pushing the deadline out. + if ( + this.session.promptRunning && + this.session.cancelController && + !this.session.cancelController.signal.aborted && + !this.session.forceCancelTimer + ) { + const cancelController = this.session.cancelController; + this.session.forceCancelTimer = setTimeout(() => { + this.logger.error( + `Session ${this.sessionId}: cancel floor elapsed without the SDK yielding; forcing "cancelled". The underlying query may still be wedged — a new session may be required.`, + ); + cancelController.abort(); + }, this.forceCancelGraceMs); + } + await this.session.query.interrupt(); } @@ -1148,19 +1354,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { this.deferBackgroundFetches(newQuery); } - async unstable_setSessionModel( - params: SetSessionModelRequest, - ): Promise { - await this.session.query.setModel(toSdkModelId(params.modelId)); - this.session.modelId = params.modelId; - this.session.lastContextWindowSize = this.getContextWindowForModel( - params.modelId, - ); - this.rebuildEffortConfigOption(params.modelId); - await this.updateConfigOption("model", params.modelId); - return {}; - } - async setSessionMode( params: SetSessionModeRequest, ): Promise { @@ -1309,6 +1502,38 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } } + /** + * Ensures the requested `cwd` is an absolute path that points at an existing + * directory before we create a session. Throws an `invalidParams` error with + * an actionable message so clients can surface it to the user instead of + * failing later with an opaque "native binary failed to launch" SDK error. + */ + private async validateCwd(cwd: string): Promise { + if (!path.isAbsolute(cwd)) { + throw RequestError.invalidParams( + { cwd }, + `\`cwd\` must be an absolute path, but received: ${cwd}`, + ); + } + + let stats: fs.Stats; + try { + stats = await fs.promises.stat(cwd); + } catch { + throw RequestError.invalidParams( + { cwd }, + `\`cwd\` does not exist on the machine running the agent: ${cwd}`, + ); + } + + if (!stats.isDirectory()) { + throw RequestError.invalidParams( + { cwd }, + `\`cwd\` is not a directory: ${cwd}`, + ); + } + } + private async createSession( params: { cwd: string; @@ -1325,6 +1550,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent { const { cwd } = params; const { resume, forkSession } = creationOpts; + // Validate `cwd` up front. The ACP spec requires an absolute path, and the + // directory must exist on the machine running the agent. Without this the + // failure only surfaces later as a confusing SDK launch error (issue #749). + await this.validateCwd(cwd); + const isResume = !!resume; const meta = params._meta as NewSessionMeta | undefined; @@ -1606,17 +1836,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { })), }; - const models: SessionModelState = { - currentModelId: resolvedModelId, - availableModels: modelOptions.options.map( - (opt): AcpModelInfo => ({ - modelId: opt.value, - name: opt.name, - description: opt.description, - }), - ), - }; - const configOptions = this.buildConfigOptions( permissionMode, modelOptions, @@ -1628,7 +1847,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { this.deferBackgroundFetches(q); } - return { sessionId, modes, models, configOptions }; + return { sessionId, modes, configOptions }; } private createCanUseTool( @@ -1713,31 +1932,9 @@ export class ClaudeAcpAgent extends BaseAcpAgent { })), }; - const modelOptions = this.session.configOptions.find( - (o) => o.id === "model", - ); - const models: SessionModelState = { - currentModelId: this.session.modelId ?? DEFAULT_MODEL, - availableModels: - modelOptions && "options" in modelOptions - ? ( - modelOptions.options as Array<{ - value: string; - name: string; - description?: string; - }> - ).map((opt) => ({ - modelId: opt.value, - name: opt.name, - description: opt.description, - })) - : [], - }; - return { sessionId, modes, - models, configOptions: this.session.configOptions, }; } diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts index 633aa20ae9..807c39f990 100644 --- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts +++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts @@ -190,34 +190,34 @@ function handleToolUseChunk( } if (!alreadyCached && ctx.registerHooks !== false) { + // Capture what the hook needs in the closure rather than re-reading the + // cache when it fires. The cache entry is pruned at tool_result time, and a + // PostToolUse hook can fire after that, so closing over the name and bash + // command keeps the diff working without depending on (or pinning) the + // cache entry's lifetime. + const toolName = chunk.name; + const bashCommand = bashCommandFromToolUse(chunk); registerHookCallback(chunk.id, { onPostToolUseHook: async (toolUseId, _toolInput, toolResponse) => { - const toolUse = ctx.toolUseCache[toolUseId]; - if (toolUse) { - const editUpdate = - toolUse.name === "Edit" || toolUse.name === "Write" - ? toolUpdateFromEditToolResponse(toolResponse) - : null; - - await ctx.client.sessionUpdate({ - sessionId: ctx.sessionId, - update: { - _meta: toolMeta( - toolUse.name, - toolResponse, - ctx.parentToolCallId, - bashCommandFromToolUse(toolUse), - ), - toolCallId: toolUseId, - sessionUpdate: "tool_call_update", - ...(editUpdate ? editUpdate : {}), - }, - }); - } else { - ctx.logger.error( - `Got a tool response for tool use that wasn't tracked: ${toolUseId}`, - ); - } + const editUpdate = + toolName === "Edit" || toolName === "Write" + ? toolUpdateFromEditToolResponse(toolResponse) + : null; + + await ctx.client.sessionUpdate({ + sessionId: ctx.sessionId, + update: { + _meta: toolMeta( + toolName, + toolResponse, + ctx.parentToolCallId, + bashCommand, + ), + toolCallId: toolUseId, + sessionUpdate: "tool_call_update", + ...(editUpdate ? editUpdate : {}), + }, + }); }, }); } @@ -344,6 +344,12 @@ function handleToolResultChunk( return []; } + // The tool_use is fully resolved now — drop it so a long-running session + // doesn't retain every tool call for its whole lifetime. Everything below uses + // the captured `toolUse` local, and the PostToolUse hook closes over the tool + // name/bash command, so pruning here is safe regardless of hook/result order. + delete ctx.toolUseCache[chunk.tool_use_id]; + if ( toolUse.name === "TaskCreate" || toolUse.name === "TaskUpdate" || @@ -772,6 +778,46 @@ export async function handleSystemMessage( }); break; } + case "mirror_error": + // The SDK failed to persist session history (append rejected/timed out + // after retry) — potential data loss on resume the user should know about + // rather than a silent gap. Log it; no user-facing chunk. + logger.error( + `Session ${sessionId}: failed to persist history: ${message.error}`, + ); + break; + case "permission_denied": { + // A tool call was auto-denied (by a rule, the classifier, dontAsk mode, + // etc.) before running. The tool_use block was already emitted as a + // tool_call, so mark it failed with the rejection reason — otherwise the + // client shows a tool call that silently never resolves. + const reason = message.decision_reason ?? message.message; + await client.sessionUpdate({ + sessionId: message.session_id, + update: { + sessionUpdate: "tool_call_update", + toolCallId: message.tool_use_id, + status: "failed", + content: [ + { + type: "content", + content: { type: "text", text: `Permission denied: ${reason}` }, + }, + ], + _meta: { + claudeCode: { + toolName: message.tool_name, + toolResponse: { + decisionReasonType: message.decision_reason_type, + decisionReason: message.decision_reason, + message: message.message, + }, + }, + } satisfies ToolUpdateMeta, + }, + }); + break; + } default: break; } @@ -949,11 +995,43 @@ function isSdkLocalCommandMessage(content: AnthropicMessageContent): boolean { // that the CLI uses for its own display. The live prompt loop must strip them // so they don't leak into the UI, while preserving any real prose mixed in // alongside. -const LOCAL_COMMAND_TAG_PATTERN = - /<(command-name|command-message|command-args|local-command-stdout|local-command-stderr)>[\s\S]*?<\/\1>/g; - +const LOCAL_COMMAND_MARKERS = [ + "command-name", + "command-message", + "command-args", + "local-command-stdout", + "local-command-stderr", +].map((tag) => ({ open: `<${tag}>`, close: `` })); + +// Single-pass scanner that removes each `` marker (matching the +// nearest closing tag of the same name, like a lazy regex would) without the +// catastrophic-backtracking risk of `[\s\S]*?` over pathological input. function stripMarkerTags(text: string): string { - return text.replace(LOCAL_COMMAND_TAG_PATTERN, ""); + const dead = new Set(); + let result = ""; + let copiedUpTo = 0; + let i = 0; + while (i < text.length) { + if (text[i] === "<") { + const marker = LOCAL_COMMAND_MARKERS.find( + (m) => !dead.has(m.open) && text.startsWith(m.open, i), + ); + if (marker) { + const end = text.indexOf(marker.close, i + marker.open.length); + if (end !== -1) { + result += text.slice(copiedUpTo, i); + i = copiedUpTo = end + marker.close.length; + continue; + } + // No closing marker remains anywhere ahead, and `indexOf` only ever + // searches forward from here on, so stop treating this tag as an + // opener — that avoids rescanning the tail for it on every match. + dead.add(marker.open); + } + } + i++; + } + return result + text.slice(copiedUpTo); } /** diff --git a/packages/agent/src/adapters/claude/session/models.ts b/packages/agent/src/adapters/claude/session/models.ts index ec2a561246..58bc797b4d 100644 --- a/packages/agent/src/adapters/claude/session/models.ts +++ b/packages/agent/src/adapters/claude/session/models.ts @@ -138,11 +138,17 @@ function scoreModelMatch( ): number { const haystack = `${model.value} ${model.name ?? ""}`.toLowerCase(); let score = 0; + let nonHintMatched = false; for (const token of tokens) { if (haystack.includes(token)) { + if (token !== contextHint) nonHintMatched = true; score += token === contextHint ? 3 : 1; } } + // A context hint alone (e.g. "1m") must not carry a match across model + // families: without a real family/name token also matching, "opus[1m]" would + // otherwise score against "sonnet[1m]" purely on the shared "1m" token. + if (contextHint && !nonHintMatched) return 0; return score; } diff --git a/packages/agent/src/adapters/claude/types.ts b/packages/agent/src/adapters/claude/types.ts index da46b327f0..69407afdea 100644 --- a/packages/agent/src/adapters/claude/types.ts +++ b/packages/agent/src/adapters/claude/types.ts @@ -70,6 +70,16 @@ export type Session = BaseSession & { /** Persists across prompt() calls so SDK-reported values survive turn boundaries */ lastContextWindowSize?: number; promptRunning: boolean; + /** Per-turn signal the active prompt loop races `query.next()` against. + * Aborted by the force-cancel backstop when the SDK wedges and never yields + * after `interrupt()` (issue #680), forcing the loop to return "cancelled" + * instead of hanging. Distinct from `abortController`: this only wakes the + * loop; it does not touch the SDK query/subprocess. Undefined when no prompt + * is actively consuming the query. */ + cancelController?: AbortController; + /** Pending grace-period timer that aborts `cancelController`. Cleared when the + * loop returns normally so the backstop never fires after a clean cancel. */ + forceCancelTimer?: ReturnType; pendingMessages: Map; nextPendingOrder: number; emitRawSDKMessages: boolean | SDKMessageFilter[]; diff --git a/packages/agent/src/adapters/codex/codex-agent.ts b/packages/agent/src/adapters/codex/codex-agent.ts index 5fa5829e7d..3976edd7ab 100644 --- a/packages/agent/src/adapters/codex/codex-agent.ts +++ b/packages/agent/src/adapters/codex/codex-agent.ts @@ -79,7 +79,10 @@ import { } from "../local-tools"; import { resolveTaskId } from "../session-meta"; import { createCodexClient } from "./codex-client"; -import { normalizeCodexConfigOptions } from "./models"; +import { + modelIdFromConfigOptions, + normalizeCodexConfigOptions, +} from "./models"; import { type CodexSessionState, createSessionState, @@ -421,7 +424,7 @@ export class CodexAcpAgent extends BaseAcpAgent { taskRunId: meta?.taskRunId, taskId: resolveTaskId(meta), modeId: response.modes?.currentModeId ?? "auto", - modelId: response.models?.currentModelId, + modelId: modelIdFromConfigOptions(response.configOptions), permissionMode: requestedPermissionMode, }); this.sessionId = response.sessionId; @@ -537,7 +540,6 @@ export class CodexAcpAgent extends BaseAcpAgent { return { modes: loadResponse.modes, - models: loadResponse.models, configOptions: loadResponse.configOptions, }; } diff --git a/packages/agent/src/adapters/codex/models.ts b/packages/agent/src/adapters/codex/models.ts index 3264974fc0..054db3932f 100644 --- a/packages/agent/src/adapters/codex/models.ts +++ b/packages/agent/src/adapters/codex/models.ts @@ -25,6 +25,18 @@ export function formatCodexModelName(value: string): string { return value.toLowerCase(); } +/** Derive the current model id from the "model" config option's currentValue. + * Replaces the legacy `response.models.currentModelId` lookup that ACP SDK + * 0.25.0 removed (model selection moved entirely into config options). */ +export function modelIdFromConfigOptions( + configOptions: SessionConfigOption[] | null | undefined, +): string | undefined { + const modelOption = configOptions?.find((o) => o.category === "model"); + return typeof modelOption?.currentValue === "string" + ? modelOption.currentValue + : undefined; +} + export function normalizeCodexConfigOptions( configOptions: SessionConfigOption[] | null | undefined, ): SessionConfigOption[] | null | undefined { diff --git a/packages/agent/src/test/mocks/claude-sdk.ts b/packages/agent/src/test/mocks/claude-sdk.ts index e54cb05ccf..8f6228aeb2 100644 --- a/packages/agent/src/test/mocks/claude-sdk.ts +++ b/packages/agent/src/test/mocks/claude-sdk.ts @@ -104,6 +104,7 @@ export function createMockQuery( applyFlagSettings: vi.fn().mockResolvedValue(undefined), getContextUsage: vi.fn().mockResolvedValue({}), reloadPlugins: vi.fn().mockResolvedValue(undefined), + reloadSkills: vi.fn().mockResolvedValue(undefined), seedReadState: vi.fn().mockResolvedValue(undefined), readFile: vi.fn().mockResolvedValue(""), backgroundTasks: vi.fn().mockResolvedValue([]), diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b9822d6afe..a0540d4136 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -707,11 +707,11 @@ importers: packages/agent: dependencies: '@agentclientprotocol/sdk': - specifier: 0.22.1 - version: 0.22.1(zod@4.3.6) + specifier: 0.25.0 + version: 0.25.0(zod@4.3.6) '@anthropic-ai/claude-agent-sdk': - specifier: 0.3.156 - version: 0.3.156(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6) + specifier: 0.3.165 + version: 0.3.165(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6) '@anthropic-ai/sdk': specifier: 0.100.1 version: 0.100.1(zod@4.3.6) @@ -1557,6 +1557,11 @@ packages: peerDependencies: zod: 4.3.6 + '@agentclientprotocol/sdk@0.25.0': + resolution: {integrity: sha512-wU1VgXNtMvdVotX49txc3WJUDV+/QbLpsgjMvFhlRmp37osdLbI7L7y+iwAlQATwfjLxcv1r1p3ZxZBcXlGhcQ==} + peerDependencies: + zod: 4.3.6 + '@alloc/quick-lru@5.2.0': resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==} engines: {node: '>=10'} @@ -1570,45 +1575,89 @@ packages: cpu: [arm64] os: [darwin] + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.165': + resolution: {integrity: sha512-obVodJmppNc6lgcM6Y5y3VCQLrYO2curOXrRaziKtjxYbuZP7kYsUhnonMvGoVAQh3uHKz2tivQDeztvWe3f9w==} + cpu: [arm64] + os: [darwin] + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.156': resolution: {integrity: sha512-6PKi5fPmGRuzXu+Em/iwLmPG3mqg0hl92wcTU8fmChqyNtxhxsjCw7LTbdFqp/05o5NeZVVV4k3p7YUv5IFD6g==} cpu: [x64] os: [darwin] + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.165': + resolution: {integrity: sha512-0jc1tlYLXzPvZIkHKGHzsEEKq2YqTS8oHSNFroqLgbhrIk1Zy05ZXbciI289VDAe1Fq2a+qcUhkXct8Parx1Rg==} + cpu: [x64] + os: [darwin] + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.156': resolution: {integrity: sha512-R7KEVjxkR4rYgIQoHGBzwPdUJYxRTO8I4vHjRbMLH1eW4FS7BJvVs7ogfKR/NnHFBvMVqtC+l6jHLQv8bobUiw==} cpu: [arm64] os: [linux] libc: [musl] + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.165': + resolution: {integrity: sha512-Rccmr5chZdZJVRvoB0nildB5PTKX+amatUho9JIcNOf1iX/6ej39fwf8q9W1MRHYP7AEc4t9GrSAGLcn7/JO4w==} + cpu: [arm64] + os: [linux] + libc: [musl] + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.156': resolution: {integrity: sha512-H0Nfd41iw5isto9uQI1FlVSZ0eaDttr8rBpJMR25oK/mj3egMO5EmZ6aAxeeUYSLn2mSU50HA5VNxlGUE118TQ==} cpu: [arm64] os: [linux] libc: [glibc] + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.165': + resolution: {integrity: sha512-t87HgDPPaRYMTTB5cqA0M36Fyq4DOny89yk71BMgA8hAzhOjV9bla8pMVZTuX3xYYPjsa/TOmxSzwI8GZLf4Aw==} + cpu: [arm64] + os: [linux] + libc: [glibc] + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.156': resolution: {integrity: sha512-/Q6WUizI6a+hqZZ6ElwRU0PEuFhOoN4v6CuU35HHbiZ/7uaocGht4A8ZIgK1Fw6wOGtZzGLbc00CA1OU1Zg8EA==} cpu: [x64] os: [linux] libc: [musl] + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.165': + resolution: {integrity: sha512-Y9Acr1RmydfEX+t+3mFn0K9VOx6nfyo08QuQH9R6ap1YYZWuobze++pNUY/rzwbQjXqcbjORtPKbO/kLQtSr9w==} + cpu: [x64] + os: [linux] + libc: [musl] + '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.156': resolution: {integrity: sha512-ymhrdlbWoYvTACUdaGdhrEv+ZMfwXLsf0BRLkr/IvY5aqybP7URzWmmZGOtDQpqkT/8xu/UCGqUYH3woJwUxfg==} cpu: [x64] os: [linux] libc: [glibc] + '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.165': + resolution: {integrity: sha512-Y8fEW0zKBn0XZI5AOQWHep0Srz0qsCauynTWkhsC6J2vSPxkTiOxv2hmb7qdfiNlFn0k1etCWVFoRkhhFJzGfg==} + cpu: [x64] + os: [linux] + libc: [glibc] + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.156': resolution: {integrity: sha512-5sAeNObQQrMy4NF9HwxewrMnU7mVxZDHh+/MfJVQSz0GSTvXQ6gOuRH8helMlfspoU6VOdekPxVLRooX/3foEw==} cpu: [arm64] os: [win32] + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.165': + resolution: {integrity: sha512-4Q01L3xaDDCvlOhABf2MnO7v7yJxKwwDyiMr+DaneUSvuh1qH0YE7qErSYLf6D9VfH8TdRwKZXwQplVVwCoHWw==} + cpu: [arm64] + os: [win32] + '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.156': resolution: {integrity: sha512-/PofeTWoiKgnWNSNk0wG4SsRn22GGLmnLhg2R94WcNhCRFOyOTmiZcYH2DBlWZBIRVTZDsSfa/Pl1DyPvYCGKw==} cpu: [x64] os: [win32] + '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.165': + resolution: {integrity: sha512-Y0uOx7b7ZnkguvFFI5T5fSLnRA/e0uvMC++gSnyz6XMpNekgWc3+Mny7Dv2NO22nKbV2YiFsj6MkYYFEd51BDw==} + cpu: [x64] + os: [win32] + '@anthropic-ai/claude-agent-sdk@0.3.156': resolution: {integrity: sha512-6nM/Dj+VMds52UXJ2YaV4IKhYamlUqN0HtdDrFzYz5lvPMpDS935qD8YZDAUpy+ltdoD6PJMd1V/CKFY3/oWCQ==} engines: {node: '>=18.0.0'} @@ -1617,6 +1666,14 @@ packages: '@modelcontextprotocol/sdk': ^1.29.0 zod: 4.3.6 + '@anthropic-ai/claude-agent-sdk@0.3.165': + resolution: {integrity: sha512-wEUJNTAWkE6KMV35abqGi30lwhZz+jQLMtLh4SuTN2Hllzsysq8kmQFgcWulza3FLHG/GHzGHPi0+Sp2fb8xlw==} + engines: {node: '>=18.0.0'} + peerDependencies: + '@anthropic-ai/sdk': '>=0.93.0' + '@modelcontextprotocol/sdk': ^1.29.0 + zod: 4.3.6 + '@anthropic-ai/sdk@0.100.1': resolution: {integrity: sha512-RANcEe7LpiLczkKGOwoXOTuFdPhuubS0i4xaAKOMpcqc55YO0mukgxppV7eygx3DXNjxWT6RYOLPyOy0aIAmwg==} hasBin: true @@ -13420,6 +13477,10 @@ snapshots: dependencies: zod: 4.4.3 + '@agentclientprotocol/sdk@0.25.0(zod@4.3.6)': + dependencies: + zod: 4.3.6 + '@alloc/quick-lru@5.2.0': {} '@ampproject/remapping@2.3.0': @@ -13430,27 +13491,51 @@ snapshots: '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.156': optional: true + '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.165': + optional: true + '@anthropic-ai/claude-agent-sdk@0.3.156(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)': dependencies: '@anthropic-ai/sdk': 0.100.1(zod@4.3.6) @@ -13466,6 +13551,21 @@ snapshots: '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.3.156 '@anthropic-ai/claude-agent-sdk-win32-x64': 0.3.156 + '@anthropic-ai/claude-agent-sdk@0.3.165(@anthropic-ai/sdk@0.100.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)': + dependencies: + '@anthropic-ai/sdk': 0.100.1(zod@4.3.6) + '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6) + zod: 4.3.6 + optionalDependencies: + '@anthropic-ai/claude-agent-sdk-darwin-arm64': 0.3.165 + '@anthropic-ai/claude-agent-sdk-darwin-x64': 0.3.165 + '@anthropic-ai/claude-agent-sdk-linux-arm64': 0.3.165 + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl': 0.3.165 + '@anthropic-ai/claude-agent-sdk-linux-x64': 0.3.165 + '@anthropic-ai/claude-agent-sdk-linux-x64-musl': 0.3.165 + '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.3.165 + '@anthropic-ai/claude-agent-sdk-win32-x64': 0.3.165 + '@anthropic-ai/sdk@0.100.1(zod@4.3.6)': dependencies: json-schema-to-ts: 3.1.1 From 50d25b236990af6aaa2c876dd0248b509a81fef1 Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Sat, 6 Jun 2026 17:16:47 -0700 Subject: [PATCH 2/4] fix acp session id routing and skills baseline --- .../agent/src/adapters/claude/claude-agent.ts | 22 ++++++++++++++----- .../adapters/claude/conversion/sdk-to-acp.ts | 5 ++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 217f06e278..cd09aa6360 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -623,15 +623,21 @@ export class ClaudeAcpAgent extends BaseAcpAgent { this.session.knownSlashCommands = collectKnownSlashCommands( message.commands, ); + const available = getAvailableSlashCommands(message.commands); await this.client.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "available_commands_update", - availableCommands: getAvailableSlashCommands( - message.commands, - ), + availableCommands: available, }, }); + // Keep the context-breakdown skills estimate in sync with the new + // command list (mirrors sendAvailableCommandsUpdate), so later + // usage breakdowns don't report stale skills context. + this.updateBreakdownCategory( + "skills", + estimateSkillsTokens(available), + ); break; } if (message.subtype === "local_command_output") { @@ -1047,8 +1053,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent { case "tool_progress": { // Surface "still working" progress on a long-running tool call so // the client can show elapsed time instead of a stalled spinner. + // Route by the ACP session id (params.sessionId) like every other + // update in this loop — the client renders by ACP session, not the + // SDK's message.session_id. await this.client.sessionUpdate({ - sessionId: message.session_id, + sessionId: params.sessionId, update: { sessionUpdate: "tool_call_update", toolCallId: message.tool_use_id, @@ -1067,10 +1076,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } case "rate_limit_event": { // Re-emit the current usage carrying the subscription rate-limit - // info so the client can warn before the limit bites. + // info so the client can warn before the limit bites. Route by the + // ACP session id (params.sessionId) like every other update here. if (lastAssistantTotalUsage !== null) { await this.client.sessionUpdate({ - sessionId: message.session_id, + sessionId: params.sessionId, update: { sessionUpdate: "usage_update", used: lastAssistantTotalUsage, diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts index 807c39f990..5b333da989 100644 --- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts +++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts @@ -792,8 +792,11 @@ export async function handleSystemMessage( // tool_call, so mark it failed with the rejection reason — otherwise the // client shows a tool call that silently never resolves. const reason = message.decision_reason ?? message.message; + // Route by the ACP session id (context) — the original tool_call was + // emitted under it, so the failed update must match or the client drops it + // and the tool call hangs unresolved. await client.sessionUpdate({ - sessionId: message.session_id, + sessionId, update: { sessionUpdate: "tool_call_update", toolCallId: message.tool_use_id, From 2196e9fa69e10033fe5f96fff299297fe5e04864 Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Sun, 7 Jun 2026 17:33:13 -0700 Subject: [PATCH 3/4] fix compact-boundary cancel race and add tests --- .../claude/claude-agent.slash-command.test.ts | 58 +++++++++++++++++++ .../agent/src/adapters/claude/claude-agent.ts | 37 ++++++++---- .../claude/conversion/sdk-to-acp.test.ts | 50 ++++++++++++++++ .../adapters/claude/conversion/sdk-to-acp.ts | 3 +- .../adapters/claude/session/models.test.ts | 21 +++++++ .../agent/src/adapters/codex/models.test.ts | 43 +++++++++++++- 6 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts diff --git a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts index 810f34e4d8..7ca63c83b0 100644 --- a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts +++ b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts @@ -172,3 +172,61 @@ describe("ClaudeAcpAgent.prompt — early idle handling", () => { } }); }); + +describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("returns 'cancelled' when the SDK never yields after interrupt (issue #680)", async () => { + const { agent } = makeAgent(); + const sessionId = "s-wedged"; + const query = installFakeSession(agent, sessionId); + // Simulate a wedged SDK: interrupt() resolves but never makes next() yield. + query.interrupt.mockImplementation(async () => {}); + // Shrink the grace period so the backstop fires promptly under real timers. + (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = 5; + + const promptPromise = agent.prompt({ + sessionId, + prompt: [{ type: "text", text: "do something slow" }], + }); + + // Let the loop reach `await query.next()`, which stays pending forever. + await new Promise((resolve) => setImmediate(resolve)); + + // Arms the backstop and calls the (no-op) interrupt; the timer must drive + // the loop to return rather than hanging on the wedged next(). + await agent.cancel({ sessionId }); + + const result = await promptPromise; + expect(result.stopReason).toBe("cancelled"); + }); + + it("clears the backstop timer on a healthy cancel (interrupt yields)", async () => { + const { agent } = makeAgent(); + const sessionId = "s-healthy"; + installFakeSession(agent, sessionId); + // Large grace so the test can only pass via the normal idle/done path, not + // the timer; the loop must clear the armed timer in its finally. + (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = + 50_000; + + const promptPromise = agent.prompt({ + sessionId, + prompt: [{ type: "text", text: "do something" }], + }); + await new Promise((resolve) => setImmediate(resolve)); + + // The mock's default interrupt() resolves next() with done, so the loop + // returns through its normal path well before the 50s backstop. + await agent.cancel({ sessionId }); + + const result = await promptPromise; + expect(result.stopReason).toBe("cancelled"); + expect( + (agent as unknown as { session: { forceCancelTimer?: unknown } }).session + .forceCancelTimer, + ).toBeUndefined(); + }); +}); diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index cd09aa6360..654e868761 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -486,7 +486,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent { // force-cancel backstop armed in interrupt() aborts this controller. const cancelController = new AbortController(); this.session.cancelController = cancelController; - const cancelled = new Promise((resolve) => { + // Resolves when the backstop aborts the controller. Named distinctly from + // the `cancelled` boolean above (the queue-handoff result) to avoid two + // variables named `cancelled` in this method. + const cancelWake = new Promise((resolve) => { cancelController.signal.addEventListener("abort", () => resolve(), { once: true, }); @@ -545,13 +548,19 @@ export class ClaudeAcpAgent extends BaseAcpAgent { try { while (true) { const nextMessage = this.session.query.next(); - const next = await Promise.race([nextMessage, cancelled]); + const next = await Promise.race([nextMessage, cancelWake]); if (cancelController.signal.aborted) { // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput - // block). Abandon the in-flight next() — swallowing any later - // rejection so it can't surface as an unhandled rejection — and honor - // the cancel per the ACP contract. - void nextMessage.catch(() => {}); + // block). Abandon the in-flight next(); log any later rejection (an + // auth/process error the SDK threw at cancel time would otherwise be + // lost) but swallow it so it can't surface as an unhandled rejection, + // then honor the cancel per the ACP contract. + void nextMessage.catch((err) => + this.logger.warn("in-flight query.next() rejected after cancel", { + sessionId: params.sessionId, + error: err instanceof Error ? err.message : String(err), + }), + ); return { stopReason: "cancelled", _meta: this.session.interruptReason @@ -599,10 +608,16 @@ export class ClaudeAcpAgent extends BaseAcpAgent { // (context just dropped) and replaced within seconds by the next // result. `size` keeps coming from the gateway-learned window // (getContextUsage under-reports extended 1M windows). - const usedTokens = await fetchContextUsedTokens( - this.session.query, - this.logger, - ); + // Race the control request against the force-cancel wake: the + // loop only observes cancelWake at its top, so a wedged + // getContextUsage() awaited here would otherwise re-introduce the + // exact hang the backstop exists to break (issue #680). On a + // forced cancel usedTokens is null and the next iteration returns + // "cancelled". + const usedTokens = await Promise.race([ + fetchContextUsedTokens(this.session.query, this.logger), + cancelWake.then(() => null), + ]); lastAssistantTotalUsage = usedTokens ?? 0; promptReplayed = true; await this.client.sessionUpdate({ @@ -613,6 +628,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent { size: lastContextWindowSize, }, }); + // No break: intentionally falls through to handleSystemMessage so + // the COMPACT_BOUNDARY ext notification still fires. } if (message.subtype === "commands_changed") { // Mid-session command-list change (e.g. skills discovered as the diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts new file mode 100644 index 0000000000..ee6846d8cf --- /dev/null +++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from "vitest"; +import { stripMarkerTags } from "./sdk-to-acp"; + +describe("stripMarkerTags", () => { + it("strips a single marker and keeps surrounding prose", () => { + expect( + stripMarkerTags("before/modelafter"), + ).toBe("beforeafter"); + }); + + it("strips multiple different markers in one pass", () => { + const input = + "axboutc"; + expect(stripMarkerTags(input)).toBe("abc"); + }); + + it("leaves text without markers unchanged", () => { + expect(stripMarkerTags("")).toBe(""); + expect(stripMarkerTags("plain prose with < and > but no tags")).toBe( + "plain prose with < and > but no tags", + ); + }); + + it("passes an unclosed opener through verbatim (dead-set path)", () => { + const input = "no closing tag, prose continues"; + expect(stripMarkerTags(input)).toBe(input); + }); + + it("does not treat an orphan closing tag as an opener", () => { + expect( + stripMarkerTags("textreal"), + ).toBe("text"); + }); + + it("matches the nearest closing tag for a repeated opener", () => { + // Lazy match: the first opener pairs with the first close, swallowing the + // inner opener and its text, exactly like the original `[\s\S]*?` regex. + expect( + stripMarkerTags( + "outerinnertrailing", + ), + ).toBe("trailing"); + }); + + it("stays linear on pathological unclosed input", () => { + // A long run of openers with no close must not catastrophically backtrack. + const input = `${"".repeat(20000)}tail`; + expect(stripMarkerTags(input)).toBe(input); + }); +}); diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts index 5b333da989..7b29dbfb58 100644 --- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts +++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts @@ -1009,7 +1009,8 @@ const LOCAL_COMMAND_MARKERS = [ // Single-pass scanner that removes each `` marker (matching the // nearest closing tag of the same name, like a lazy regex would) without the // catastrophic-backtracking risk of `[\s\S]*?` over pathological input. -function stripMarkerTags(text: string): string { +// Exported for unit testing. +export function stripMarkerTags(text: string): string { const dead = new Set(); let result = ""; let copiedUpTo = 0; diff --git a/packages/agent/src/adapters/claude/session/models.test.ts b/packages/agent/src/adapters/claude/session/models.test.ts index 9b6c0649bb..bbb0d67869 100644 --- a/packages/agent/src/adapters/claude/session/models.test.ts +++ b/packages/agent/src/adapters/claude/session/models.test.ts @@ -133,6 +133,27 @@ describe("resolveModelPreference", () => { expect(resolveModelPreference("gpt-5", options)).toBeNull(); }); + it("does not inherit a cross-family match from the context hint alone", () => { + // `opus[1m]` must not resolve to a sonnet entry purely because both share + // the "1m" hint token, with no real family token matching (#731). + const sonnetOnly = [ + { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" }, + ]; + expect(resolveModelPreference("opus[1m]", sonnetOnly)).toBeNull(); + }); + + it("resolves a hinted alias to the right family when a family token matches", () => { + // Both entries carry the "1m" hint; the "opus" token must break the tie so + // the hint alone can't pull the match onto sonnet. + const withHints = [ + { value: "claude-opus-4-8", name: "Claude Opus 4.8 (1M context)" }, + { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" }, + ]; + expect(resolveModelPreference("opus[1m]", withHints)).toBe( + "claude-opus-4-8", + ); + }); + it("treats `best` and `default` as wildcards (no tokens contribute)", () => { expect(resolveModelPreference("best", options)).toBeNull(); expect(resolveModelPreference("default", options)).toBeNull(); diff --git a/packages/agent/src/adapters/codex/models.test.ts b/packages/agent/src/adapters/codex/models.test.ts index b31a039ac8..c898bd9dc4 100644 --- a/packages/agent/src/adapters/codex/models.test.ts +++ b/packages/agent/src/adapters/codex/models.test.ts @@ -1,8 +1,49 @@ +import type { SessionConfigOption } from "@agentclientprotocol/sdk"; import { describe, expect, it } from "vitest"; -import { formatCodexModelName } from "./models"; +import { formatCodexModelName, modelIdFromConfigOptions } from "./models"; describe("formatCodexModelName", () => { it("uses raw lowercase model ids", () => { expect(formatCodexModelName("GPT-5.5")).toBe("gpt-5.5"); }); }); + +describe("modelIdFromConfigOptions", () => { + const modelOption = (currentValue: unknown): SessionConfigOption => + ({ + id: "model", + name: "Model", + type: "select", + category: "model", + currentValue, + options: [], + }) as unknown as SessionConfigOption; + + it("returns the currentValue of the model-category option", () => { + expect(modelIdFromConfigOptions([modelOption("gpt-5.5-codex")])).toBe( + "gpt-5.5-codex", + ); + }); + + it("ignores non-model categories", () => { + const modeOption = { + id: "mode", + name: "Mode", + type: "select", + category: "mode", + currentValue: "auto", + options: [], + } as unknown as SessionConfigOption; + expect(modelIdFromConfigOptions([modeOption])).toBeUndefined(); + }); + + it("returns undefined when currentValue is not a string", () => { + expect(modelIdFromConfigOptions([modelOption(null)])).toBeUndefined(); + expect(modelIdFromConfigOptions([modelOption(123)])).toBeUndefined(); + }); + + it("returns undefined for null/undefined input", () => { + expect(modelIdFromConfigOptions(null)).toBeUndefined(); + expect(modelIdFromConfigOptions(undefined)).toBeUndefined(); + }); +}); From ae78cad1835599ccec65dfbda49a4eea3456cc0d Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Mon, 8 Jun 2026 08:31:29 -0700 Subject: [PATCH 4/4] remove verbose claude adapter comments --- .../claude/claude-agent.slash-command.test.ts | 9 -- .../agent/src/adapters/claude/claude-agent.ts | 101 ------------------ .../claude/conversion/sdk-to-acp.test.ts | 3 - .../adapters/claude/conversion/sdk-to-acp.ts | 26 ----- .../adapters/claude/session/models.test.ts | 4 - .../src/adapters/claude/session/models.ts | 3 - packages/agent/src/adapters/claude/types.ts | 8 -- packages/agent/src/adapters/codex/models.ts | 3 - 8 files changed, 157 deletions(-) diff --git a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts index 7ca63c83b0..776663417e 100644 --- a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts +++ b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts @@ -182,9 +182,7 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => { const { agent } = makeAgent(); const sessionId = "s-wedged"; const query = installFakeSession(agent, sessionId); - // Simulate a wedged SDK: interrupt() resolves but never makes next() yield. query.interrupt.mockImplementation(async () => {}); - // Shrink the grace period so the backstop fires promptly under real timers. (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = 5; const promptPromise = agent.prompt({ @@ -192,11 +190,8 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => { prompt: [{ type: "text", text: "do something slow" }], }); - // Let the loop reach `await query.next()`, which stays pending forever. await new Promise((resolve) => setImmediate(resolve)); - // Arms the backstop and calls the (no-op) interrupt; the timer must drive - // the loop to return rather than hanging on the wedged next(). await agent.cancel({ sessionId }); const result = await promptPromise; @@ -207,8 +202,6 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => { const { agent } = makeAgent(); const sessionId = "s-healthy"; installFakeSession(agent, sessionId); - // Large grace so the test can only pass via the normal idle/done path, not - // the timer; the loop must clear the armed timer in its finally. (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = 50_000; @@ -218,8 +211,6 @@ describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => { }); await new Promise((resolve) => setImmediate(resolve)); - // The mock's default interrupt() resolves next() with done, so the loop - // returns through its normal path well before the 50s backstop. await agent.cancel({ sessionId }); const result = await promptPromise; diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 654e868761..30423188f6 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -134,15 +134,6 @@ import type { const SESSION_VALIDATION_TIMEOUT_MS = 30_000; -/** Grace period after `session/cancel` before the adapter forces a wedged - * prompt loop to return "cancelled". `query.interrupt()` normally makes the SDK - * yield a trailing idle within milliseconds and the loop returns through its - * usual path, so this timer is armed and cleared (never fired) on healthy - * cancels. It only trips when the SDK is genuinely wedged (e.g. a - * `TaskOutput { block: true }` poll against a hung background task — issue - * #680) and never yields. Deliberately loose: an "obviously stuck" ceiling, - * not a guess at interrupt latency, so it can't pre-empt a slow-but-healthy - * interrupt. */ const DEFAULT_FORCE_CANCEL_GRACE_MS = 30_000; const MAX_TITLE_LENGTH = 256; @@ -199,15 +190,6 @@ function shouldEmitRawMessage( ); } -/** Fetch the SDK's authoritative context-window occupancy via the - * `getContextUsage` control request. Unlike the per-message API usage numbers - * (which only count message tokens), `totalTokens` includes the system prompt, - * tool schemas, MCP tools, and memory-file overhead — the real occupancy the - * user sees. Returns `null` on any control-request failure. - * - * We deliberately do NOT use this response's window fields for `size`: they - * have been observed to under-report extended (1M) context windows, so the - * window keeps coming from the gateway / model heuristic. */ async function fetchContextUsedTokens( sdkQuery: Query, logger: Logger, @@ -236,9 +218,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { toolUseStreamCache: ToolUseStreamCache; backgroundTerminals: { [key: string]: BackgroundTerminal } = {}; clientCapabilities?: ClientCapabilities; - /** Grace period before a `session/cancel` forces a wedged prompt loop to - * return "cancelled". See {@link DEFAULT_FORCE_CANCEL_GRACE_MS}. Mutable so - * tests can shrink it. */ forceCancelGraceMs: number = DEFAULT_FORCE_CANCEL_GRACE_MS; private options?: ClaudeAcpAgentOptions; private enrichment?: Enrichment; @@ -481,14 +460,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent { await this.broadcastUserMessage(params); this.session.promptRunning = true; - // Wake-up channel so cancel() can force this loop to return "cancelled" even - // when query.next() is wedged and never yields again (issue #680). The - // force-cancel backstop armed in interrupt() aborts this controller. const cancelController = new AbortController(); this.session.cancelController = cancelController; - // Resolves when the backstop aborts the controller. Named distinctly from - // the `cancelled` boolean above (the queue-handoff result) to avoid two - // variables named `cancelled` in this method. const cancelWake = new Promise((resolve) => { cancelController.signal.addEventListener("abort", () => resolve(), { once: true, @@ -497,10 +470,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { let handedOff = false; let errored = false; let lastAssistantTotalUsage: number | null = null; - // When a streaming classifier refuses a turn, the assistant message carries - // stop_reason "refusal" and structured stop_details. We capture the - // human-readable explanation here so the terminal `result` can surface it to - // the user (the refused assistant message itself usually has no content). let lastRefusalExplanation: string | null = null; let lastStreamUsage = { input_tokens: 0, @@ -550,11 +519,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { const nextMessage = this.session.query.next(); const next = await Promise.race([nextMessage, cancelWake]); if (cancelController.signal.aborted) { - // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput - // block). Abandon the in-flight next(); log any later rejection (an - // auth/process error the SDK threw at cancel time would otherwise be - // lost) but swallow it so it can't surface as an unhandled rejection, - // then honor the cancel per the ACP contract. void nextMessage.catch((err) => this.logger.warn("in-flight query.next() rejected after cancel", { sessionId: params.sessionId, @@ -598,22 +562,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { switch (message.type) { case "system": if (message.subtype === "compact_boundary") { - // Refresh the displayed usage immediately so the client doesn't - // keep showing the stale pre-compaction size right after the user - // sees "Compacting completed". Prefer the SDK's authoritative - // post-compaction `used` via getContextUsage — it reflects the - // real retained context (system prompt + tools + surviving - // messages), which per-message API usage can't give us until the - // next turn. Fall back to 0 on failure: directionally correct - // (context just dropped) and replaced within seconds by the next - // result. `size` keeps coming from the gateway-learned window - // (getContextUsage under-reports extended 1M windows). - // Race the control request against the force-cancel wake: the - // loop only observes cancelWake at its top, so a wedged - // getContextUsage() awaited here would otherwise re-introduce the - // exact hang the backstop exists to break (issue #680). On a - // forced cancel usedTokens is null and the next iteration returns - // "cancelled". const usedTokens = await Promise.race([ fetchContextUsedTokens(this.session.query, this.logger), cancelWake.then(() => null), @@ -628,15 +576,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent { size: lastContextWindowSize, }, }); - // No break: intentionally falls through to handleSystemMessage so - // the COMPACT_BOUNDARY ext notification still fires. } if (message.subtype === "commands_changed") { - // Mid-session command-list change (e.g. skills discovered as the - // agent works in a subdirectory). Push the new list straight from - // the message rather than re-querying (supportedCommands() only - // ever reflects the init list), and refresh the known-commands - // gate used to flag unsupported slash commands. this.session.knownSlashCommands = collectKnownSlashCommands( message.commands, ); @@ -648,9 +589,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { availableCommands: available, }, }); - // Keep the context-breakdown skills estimate in sync with the new - // command list (mirrors sendAvailableCommandsUpdate), so later - // usage breakdowns don't report stale skills context. this.updateBreakdownCategory( "skills", estimateSkillsTokens(available), @@ -871,12 +809,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { this.session.accumulatedUsage.cachedWriteTokens, }; - // A refusal can arrive on any result subtype (and may even set - // is_error), so handle it before handleResultMessage — otherwise the - // is_error path would surface it as an internal error. The refused - // assistant message carries no visible content, so surface the - // classifier's explanation (when available) and report ACP's - // dedicated `refusal` stop reason. if ( (message as { stop_reason?: string }).stop_reason === "refusal" ) { @@ -1009,10 +941,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { break; } - // Capture a refusal explanation from the assistant message so the - // terminal `result` can surface it (the refused message itself has - // no visible content). stop_reason/stop_details live on the inner - // Anthropic message; read them via cast like the usage block below. if (message.type === "assistant") { const inner = message.message as unknown as { stop_reason?: string | null; @@ -1068,11 +996,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } case "tool_progress": { - // Surface "still working" progress on a long-running tool call so - // the client can show elapsed time instead of a stalled spinner. - // Route by the ACP session id (params.sessionId) like every other - // update in this loop — the client renders by ACP session, not the - // SDK's message.session_id. await this.client.sessionUpdate({ sessionId: params.sessionId, update: { @@ -1092,9 +1015,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { break; } case "rate_limit_event": { - // Re-emit the current usage carrying the subscription rate-limit - // info so the client can warn before the limit bites. Route by the - // ACP session id (params.sessionId) like every other update here. if (lastAssistantTotalUsage !== null) { await this.client.sessionUpdate({ sessionId: params.sessionId, @@ -1174,9 +1094,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } throw error; } finally { - // The loop is returning — interrupt() succeeded or the prompt finished — - // so disarm the force-cancel backstop and release the wake-up channel - // (only if we still own it; a handoff installs the next prompt's). if (this.session.forceCancelTimer) { clearTimeout(this.session.forceCancelTimer); this.session.forceCancelTimer = undefined; @@ -1223,15 +1140,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } this.session.pendingMessages.clear(); - // Arm a backstop before interrupting: if a prompt is actively consuming the - // query and interrupt() doesn't make the SDK yield (e.g. a wedged TaskOutput - // block — issue #680), force the loop to return "cancelled" after the grace - // period so the pending prompt() resolves per the ACP cancellation contract - // instead of hanging forever. The loop's `finally` clears this timer when - // interrupt() works and it returns through the normal idle path, so on - // healthy cancels it is armed but never fires. Arm at most once per turn: - // the floor is an absolute ceiling from the first cancel, so a client that - // re-sends cancel can't keep pushing the deadline out. if ( this.session.promptRunning && this.session.cancelController && @@ -1529,12 +1437,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } } - /** - * Ensures the requested `cwd` is an absolute path that points at an existing - * directory before we create a session. Throws an `invalidParams` error with - * an actionable message so clients can surface it to the user instead of - * failing later with an opaque "native binary failed to launch" SDK error. - */ private async validateCwd(cwd: string): Promise { if (!path.isAbsolute(cwd)) { throw RequestError.invalidParams( @@ -1577,9 +1479,6 @@ export class ClaudeAcpAgent extends BaseAcpAgent { const { cwd } = params; const { resume, forkSession } = creationOpts; - // Validate `cwd` up front. The ACP spec requires an absolute path, and the - // directory must exist on the machine running the agent. Without this the - // failure only surfaces later as a confusing SDK launch error (issue #749). await this.validateCwd(cwd); const isResume = !!resume; diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts index ee6846d8cf..76b4d06b2c 100644 --- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts +++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts @@ -33,8 +33,6 @@ describe("stripMarkerTags", () => { }); it("matches the nearest closing tag for a repeated opener", () => { - // Lazy match: the first opener pairs with the first close, swallowing the - // inner opener and its text, exactly like the original `[\s\S]*?` regex. expect( stripMarkerTags( "outerinnertrailing", @@ -43,7 +41,6 @@ describe("stripMarkerTags", () => { }); it("stays linear on pathological unclosed input", () => { - // A long run of openers with no close must not catastrophically backtrack. const input = `${"".repeat(20000)}tail`; expect(stripMarkerTags(input)).toBe(input); }); diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts index 7b29dbfb58..e17ab7ffee 100644 --- a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts +++ b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts @@ -190,11 +190,6 @@ function handleToolUseChunk( } if (!alreadyCached && ctx.registerHooks !== false) { - // Capture what the hook needs in the closure rather than re-reading the - // cache when it fires. The cache entry is pruned at tool_result time, and a - // PostToolUse hook can fire after that, so closing over the name and bash - // command keeps the diff working without depending on (or pinning) the - // cache entry's lifetime. const toolName = chunk.name; const bashCommand = bashCommandFromToolUse(chunk); registerHookCallback(chunk.id, { @@ -344,10 +339,6 @@ function handleToolResultChunk( return []; } - // The tool_use is fully resolved now — drop it so a long-running session - // doesn't retain every tool call for its whole lifetime. Everything below uses - // the captured `toolUse` local, and the PostToolUse hook closes over the tool - // name/bash command, so pruning here is safe regardless of hook/result order. delete ctx.toolUseCache[chunk.tool_use_id]; if ( @@ -779,22 +770,12 @@ export async function handleSystemMessage( break; } case "mirror_error": - // The SDK failed to persist session history (append rejected/timed out - // after retry) — potential data loss on resume the user should know about - // rather than a silent gap. Log it; no user-facing chunk. logger.error( `Session ${sessionId}: failed to persist history: ${message.error}`, ); break; case "permission_denied": { - // A tool call was auto-denied (by a rule, the classifier, dontAsk mode, - // etc.) before running. The tool_use block was already emitted as a - // tool_call, so mark it failed with the rejection reason — otherwise the - // client shows a tool call that silently never resolves. const reason = message.decision_reason ?? message.message; - // Route by the ACP session id (context) — the original tool_call was - // emitted under it, so the failed update must match or the client drops it - // and the tool call hangs unresolved. await client.sessionUpdate({ sessionId, update: { @@ -1006,10 +987,6 @@ const LOCAL_COMMAND_MARKERS = [ "local-command-stderr", ].map((tag) => ({ open: `<${tag}>`, close: `` })); -// Single-pass scanner that removes each `` marker (matching the -// nearest closing tag of the same name, like a lazy regex would) without the -// catastrophic-backtracking risk of `[\s\S]*?` over pathological input. -// Exported for unit testing. export function stripMarkerTags(text: string): string { const dead = new Set(); let result = ""; @@ -1027,9 +1004,6 @@ export function stripMarkerTags(text: string): string { i = copiedUpTo = end + marker.close.length; continue; } - // No closing marker remains anywhere ahead, and `indexOf` only ever - // searches forward from here on, so stop treating this tag as an - // opener — that avoids rescanning the tail for it on every match. dead.add(marker.open); } } diff --git a/packages/agent/src/adapters/claude/session/models.test.ts b/packages/agent/src/adapters/claude/session/models.test.ts index bbb0d67869..39b812e40d 100644 --- a/packages/agent/src/adapters/claude/session/models.test.ts +++ b/packages/agent/src/adapters/claude/session/models.test.ts @@ -134,8 +134,6 @@ describe("resolveModelPreference", () => { }); it("does not inherit a cross-family match from the context hint alone", () => { - // `opus[1m]` must not resolve to a sonnet entry purely because both share - // the "1m" hint token, with no real family token matching (#731). const sonnetOnly = [ { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" }, ]; @@ -143,8 +141,6 @@ describe("resolveModelPreference", () => { }); it("resolves a hinted alias to the right family when a family token matches", () => { - // Both entries carry the "1m" hint; the "opus" token must break the tie so - // the hint alone can't pull the match onto sonnet. const withHints = [ { value: "claude-opus-4-8", name: "Claude Opus 4.8 (1M context)" }, { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" }, diff --git a/packages/agent/src/adapters/claude/session/models.ts b/packages/agent/src/adapters/claude/session/models.ts index 58bc797b4d..2d87eaa8d8 100644 --- a/packages/agent/src/adapters/claude/session/models.ts +++ b/packages/agent/src/adapters/claude/session/models.ts @@ -145,9 +145,6 @@ function scoreModelMatch( score += token === contextHint ? 3 : 1; } } - // A context hint alone (e.g. "1m") must not carry a match across model - // families: without a real family/name token also matching, "opus[1m]" would - // otherwise score against "sonnet[1m]" purely on the shared "1m" token. if (contextHint && !nonHintMatched) return 0; return score; } diff --git a/packages/agent/src/adapters/claude/types.ts b/packages/agent/src/adapters/claude/types.ts index 69407afdea..6246da57b8 100644 --- a/packages/agent/src/adapters/claude/types.ts +++ b/packages/agent/src/adapters/claude/types.ts @@ -70,15 +70,7 @@ export type Session = BaseSession & { /** Persists across prompt() calls so SDK-reported values survive turn boundaries */ lastContextWindowSize?: number; promptRunning: boolean; - /** Per-turn signal the active prompt loop races `query.next()` against. - * Aborted by the force-cancel backstop when the SDK wedges and never yields - * after `interrupt()` (issue #680), forcing the loop to return "cancelled" - * instead of hanging. Distinct from `abortController`: this only wakes the - * loop; it does not touch the SDK query/subprocess. Undefined when no prompt - * is actively consuming the query. */ cancelController?: AbortController; - /** Pending grace-period timer that aborts `cancelController`. Cleared when the - * loop returns normally so the backstop never fires after a clean cancel. */ forceCancelTimer?: ReturnType; pendingMessages: Map; nextPendingOrder: number; diff --git a/packages/agent/src/adapters/codex/models.ts b/packages/agent/src/adapters/codex/models.ts index 054db3932f..b0598ddfa7 100644 --- a/packages/agent/src/adapters/codex/models.ts +++ b/packages/agent/src/adapters/codex/models.ts @@ -25,9 +25,6 @@ export function formatCodexModelName(value: string): string { return value.toLowerCase(); } -/** Derive the current model id from the "model" config option's currentValue. - * Replaces the legacy `response.models.currentModelId` lookup that ACP SDK - * 0.25.0 removed (model selection moved entirely into config options). */ export function modelIdFromConfigOptions( configOptions: SessionConfigOption[] | null | undefined, ): string | undefined {