From 679db4069d182adc20df12367c27630cd2ecaf62 Mon Sep 17 00:00:00 2001 From: cyning Date: Wed, 10 Jun 2026 20:42:37 +0800 Subject: [PATCH] fix(agent-core): classify user-cancelled tool telemetry correctly Use cancelledByUser on tool results instead of guessing outcome from output text, fixing Bash user interrupts misreported as errors. Fixes #583 Co-authored-by: Cursor --- .changeset/fix-583-telemetry-bash-cancel.md | 6 + packages/agent-core/src/agent/turn/index.ts | 27 +-- .../src/agent/turn/tool-telemetry.ts | 37 ++++ packages/agent-core/src/loop/tool-call.ts | 39 +++- packages/agent-core/src/loop/types.ts | 5 + .../src/tools/builtin/shell/bash.ts | 13 +- packages/agent-core/test/agent/turn.test.ts | 180 +++++++++++++++++- 7 files changed, 270 insertions(+), 37 deletions(-) create mode 100644 .changeset/fix-583-telemetry-bash-cancel.md create mode 100644 packages/agent-core/src/agent/turn/tool-telemetry.ts diff --git a/.changeset/fix-583-telemetry-bash-cancel.md b/.changeset/fix-583-telemetry-bash-cancel.md new file mode 100644 index 000000000..3ced40396 --- /dev/null +++ b/.changeset/fix-583-telemetry-bash-cancel.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Fix tool_call telemetry misclassifying user-cancelled Bash runs as errors. diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts index 3fe647fb7..1715472c6 100644 --- a/packages/agent-core/src/agent/turn/index.ts +++ b/packages/agent-core/src/agent/turn/index.ts @@ -40,6 +40,7 @@ import { USER_PROMPT_ORIGIN, type PromptOrigin } from '../context'; import { renderUserPromptHookBlockResult, renderUserPromptHookResult } from '../../session/hooks'; import { canonicalTelemetryArgs, isPlainRecord } from './canonical-args'; import { ToolCallDeduplicator } from './tool-dedup'; +import { telemetryToolErrorType, telemetryToolOutcome } from './tool-telemetry'; interface ActiveTurn { readonly turnId: number; @@ -1101,29 +1102,3 @@ function currentTurnInputTokens(usage: TokenUsage | undefined): number | undefin if (usage === undefined) return undefined; return inputTotal(usage); } - -type ToolTelemetryResult = Extract['result']; - -function telemetryToolOutcome(result: ToolTelemetryResult): 'success' | 'error' | 'cancelled' { - if (result.isError !== true) return 'success'; - const text = toolResultText(result).toLowerCase(); - return text.includes('aborted') || - text.includes('cancelled') || - text.includes('manually interrupted') - ? 'cancelled' - : 'error'; -} - -function telemetryToolErrorType(result: ToolTelemetryResult): string { - const text = toolResultText(result); - if (text.startsWith('Tool "') && text.includes('" not found')) return 'ToolNotFound'; - if (text.startsWith('Invalid args for tool "')) return 'ToolInputError'; - if (text.includes('prepareToolExecution hook failed')) return 'HookError'; - if (text.includes('finalizeToolResult hook failed')) return 'HookError'; - if (text.includes('blocked')) return 'ToolBlocked'; - return 'ToolError'; -} - -function toolResultText(result: ToolTelemetryResult): string { - return toolOutputText(result.output); -} diff --git a/packages/agent-core/src/agent/turn/tool-telemetry.ts b/packages/agent-core/src/agent/turn/tool-telemetry.ts new file mode 100644 index 000000000..63d3295a4 --- /dev/null +++ b/packages/agent-core/src/agent/turn/tool-telemetry.ts @@ -0,0 +1,37 @@ +import type { ContentPart } from '@moonshot-ai/kosong'; + +import type { ExecutableToolResult } from '../../loop/types'; + +export type ToolTelemetryResult = ExecutableToolResult; + +function toolOutputText(output: ExecutableToolResult['output']): string { + if (typeof output === 'string') return output; + return output + .filter((part): part is Extract => { + return typeof part === 'object' && part !== null && part.type === 'text'; + }) + .map((part) => part.text) + .join(''); +} + +function toolResultText(result: ToolTelemetryResult): string { + return toolOutputText(result.output); +} + +export function telemetryToolOutcome( + result: ToolTelemetryResult, +): 'success' | 'error' | 'cancelled' { + if (result.isError !== true) return 'success'; + if (result.cancelledByUser === true) return 'cancelled'; + return 'error'; +} + +export function telemetryToolErrorType(result: ToolTelemetryResult): string { + const text = toolResultText(result); + if (text.startsWith('Tool "') && text.includes('" not found')) return 'ToolNotFound'; + if (text.startsWith('Invalid args for tool "')) return 'ToolInputError'; + if (text.includes('prepareToolExecution hook failed')) return 'HookError'; + if (text.includes('finalizeToolResult hook failed')) return 'HookError'; + if (text.includes('blocked')) return 'ToolBlocked'; + return 'ToolError'; +} diff --git a/packages/agent-core/src/loop/tool-call.ts b/packages/agent-core/src/loop/tool-call.ts index 311594bc3..6174844c7 100644 --- a/packages/agent-core/src/loop/tool-call.ts +++ b/packages/agent-core/src/loop/tool-call.ts @@ -33,6 +33,7 @@ import { ToolScheduler, type ToolCallTask } from './tool-scheduler'; import type { AuthorizeToolExecutionResult, ExecutableTool, + ExecutableToolErrorResult, LoopHooks, ToolCall, PrepareToolExecutionResult, @@ -242,9 +243,12 @@ async function prepareToolCall( args: unknown, output: string, displayFields?: ToolCallDisplayFields, + cancelledByUser?: true, ): Promise => { await dispatchToolCall(step, call, args, displayFields); - return { task: makeResolvedToolCallTask(makeErrorToolResult(call, args, output)) }; + return { + task: makeResolvedToolCallTask(makeErrorToolResult(call, args, output, cancelledByUser)), + }; }; const settleSynthetic = async ( @@ -299,7 +303,12 @@ async function prepareToolCall( const displayFields = toolCallDisplayFieldsFromExecution(execution); const settleAborted = (): Promise => - settleError(effectiveArgs, abortedToolOutput(call.toolName, step.signal), displayFields); + settleError( + effectiveArgs, + abortedToolOutput(call.toolName, step.signal), + displayFields, + isUserCancellation(step.signal.reason) ? true : undefined, + ); if (step.signal.aborted) return settleAborted(); @@ -467,7 +476,12 @@ async function runRunnableToolCall( const { toolCall, toolName } = call; if (signal.aborted) { - return makeErrorToolResult(call, effectiveArgs, abortedToolOutput(toolName, signal)); + return makeErrorToolResult( + call, + effectiveArgs, + abortedToolOutput(toolName, signal), + isUserCancellation(signal.reason) ? true : undefined, + ); } let toolResult: ExecutableToolResult; @@ -486,7 +500,12 @@ async function runRunnableToolCall( const output = aborted ? abortedToolOutput(toolName, signal) : `Tool "${toolName}" failed: ${errorMessage(error)}`; - return makeErrorToolResult(call, effectiveArgs, output); + return makeErrorToolResult( + call, + effectiveArgs, + output, + aborted && isUserCancellation(signal.reason) ? true : undefined, + ); } return makeToolResult(call, effectiveArgs, toolResult); @@ -663,7 +682,14 @@ function normalizeToolResult(r: ExecutableToolResult): ExecutableToolResult { output = textJoined.length > 0 ? textJoined : TOOL_OUTPUT_EMPTY; } } - return r.isError === true ? { output, isError: true } : { output }; + if (r.isError === true) { + const errorResult: ExecutableToolErrorResult = + r.cancelledByUser === true + ? { output, isError: true, cancelledByUser: true } + : { output, isError: true }; + return errorResult; + } + return { output }; } function makeToolResult( @@ -688,8 +714,9 @@ function makeErrorToolResult( call: PreflightedToolCall, args: unknown, output: string, + cancelledByUser?: true, ): PendingToolResult { - return makeToolResult(call, args, { output, isError: true }); + return makeToolResult(call, args, { output, isError: true, cancelledByUser }); } /** diff --git a/packages/agent-core/src/loop/types.ts b/packages/agent-core/src/loop/types.ts index 5c59c988f..63f19cc4e 100644 --- a/packages/agent-core/src/loop/types.ts +++ b/packages/agent-core/src/loop/types.ts @@ -85,6 +85,11 @@ export interface ExecutableToolErrorResult { readonly isError: true; /** See {@link ExecutableToolSuccessResult.message}. */ readonly message?: string | undefined; + /** + * Internal telemetry-only hint. Tool result events may carry this field; + * it is not projected into model-facing tool messages. + */ + readonly cancelledByUser?: true | undefined; /** See {@link ExecutableToolSuccessResult.stopTurn}. */ readonly stopTurn?: boolean | undefined; } diff --git a/packages/agent-core/src/tools/builtin/shell/bash.ts b/packages/agent-core/src/tools/builtin/shell/bash.ts index 3c4ffafee..53e0fbefc 100644 --- a/packages/agent-core/src/tools/builtin/shell/bash.ts +++ b/packages/agent-core/src/tools/builtin/shell/bash.ts @@ -31,8 +31,9 @@ import { z } from 'zod'; import { ProcessBackgroundTask, type BackgroundManager } from '../../../agent/background'; import type { BuiltinTool } from '../../../agent/tool'; -import type { ExecutableToolResult, ToolExecution } from '../../../loop/types'; +import type { ExecutableToolErrorResult, ExecutableToolResult, ToolExecution } from '../../../loop/types'; import { renderPrompt } from '../../../utils/render-prompt'; +import { isUserCancellation } from '../../../utils/abort'; import { toInputJsonSchema } from '../../support/input-schema'; import { literalRulePattern, matchesGlobRuleSubject } from '../../support/rule-match'; import { ToolResultBuilder } from '../../support/result-builder'; @@ -325,7 +326,15 @@ export class BashTool implements BuiltinTool { }); } if (aborted) { - return builder.error('Interrupted by user', { brief: 'Interrupted by user' }); + const built = builder.error('Interrupted by user', { brief: 'Interrupted by user' }); + const errorResult: ExecutableToolErrorResult = { + output: built.output, + isError: true, + message: built.message, + }; + return isUserCancellation(signal.reason) + ? { ...errorResult, cancelledByUser: true } + : errorResult; } const isError = exitCode !== 0; diff --git a/packages/agent-core/test/agent/turn.test.ts b/packages/agent-core/test/agent/turn.test.ts index 0d3873fa8..61b427e7f 100644 --- a/packages/agent-core/test/agent/turn.test.ts +++ b/packages/agent-core/test/agent/turn.test.ts @@ -1,9 +1,10 @@ import { existsSync, mkdtempSync } from 'node:fs'; import { tmpdir } from 'node:os'; +import { Readable, type Writable } from 'node:stream'; import { join } from 'pathe'; import { setTimeout as delay } from 'node:timers/promises'; -import type { Kaos } from '@moonshot-ai/kaos'; +import type { Kaos, KaosProcess } from '@moonshot-ai/kaos'; import { APIConnectionError, APIEmptyResponseError, @@ -25,6 +26,10 @@ import type { SessionSubagentHost, } from '../../src/session/subagent-host'; import { recordingTelemetry, type TelemetryRecord } from '../fixtures/telemetry'; +import { + telemetryToolErrorType, + telemetryToolOutcome, +} from '../../src/agent/turn/tool-telemetry'; import { createFakeKaos } from '../tools/fixtures/fake-kaos'; import { createCommandKaos, testAgent, type TestAgentOptions } from './harness/agent'; import { executeTool } from '../tools/fixtures/execute-tool'; @@ -53,6 +58,63 @@ function captureLogs(): { logger: Logger; entries: CapturedLogEntry[] } { return { logger, entries }; } +describe('telemetryToolOutcome', () => { + it('returns cancelled when cancelledByUser is set', () => { + expect( + telemetryToolOutcome({ + output: 'Interrupted by user', + isError: true, + cancelledByUser: true, + }), + ).toBe('cancelled'); + }); + + it('returns error when output contains cancelled/aborted without user flag', () => { + expect( + telemetryToolOutcome({ + output: 'Upstream request was cancelled by the server', + isError: true, + }), + ).toBe('error'); + expect( + telemetryToolErrorType({ + output: 'Upstream request was cancelled by the server', + isError: true, + }), + ).toBe('ToolError'); + }); + + it('returns error for non-user abort output', () => { + expect( + telemetryToolOutcome({ + output: 'Tool "Lookup" was aborted', + isError: true, + }), + ).toBe('error'); + expect( + telemetryToolErrorType({ + output: 'Tool "Lookup" was aborted', + isError: true, + }), + ).toBe('ToolError'); + }); + + it('returns error for grace timeout output', () => { + expect( + telemetryToolOutcome({ + output: 'Tool "Bash" aborted by grace timeout (2000ms)', + isError: true, + }), + ).toBe('error'); + expect( + telemetryToolErrorType({ + output: 'Tool "Bash" aborted by grace timeout (2000ms)', + isError: true, + }), + ).toBe('ToolError'); + }); +}); + describe('Agent turn flow', () => { it('tracks turn_started and turn_interrupted telemetry', async () => { const records: TelemetryRecord[] = []; @@ -227,6 +289,79 @@ describe('Agent turn flow', () => { }); }); + it('tracks cancelled telemetry when Bash is interrupted by the user during execution', async () => { + const records: TelemetryRecord[] = []; + const kaos = createSlowCommandKaos(); + const ctx = testAgent({ kaos, telemetry: recordingTelemetry(records) }); + ctx.configure({ tools: ['Bash'] }); + await ctx.rpc.setPermission({ mode: 'yolo' }); + records.length = 0; + + ctx.mockNextResponse( + { type: 'text', text: 'Running a long command.' }, + bashCallWithId('call_bash', 'sleep 60'), + ); + + const turnEnd = ctx.untilTurnEnd(); + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Run sleep' }] }); + await vi.waitFor(() => { + expect(kaos.execWithEnv).toHaveBeenCalled(); + }); + await ctx.rpc.cancel({ turnId: 0 }); + await turnEnd; + + expect(records).toContainEqual({ + event: 'tool_call', + properties: expect.objectContaining({ + tool_name: 'Bash', + outcome: 'cancelled', + dup_type: 'normal', + duration_ms: expect.any(Number), + }), + }); + expect( + records.some( + (record) => + record.event === 'tool_call' && + 'error_type' in (record.properties as Record), + ), + ).toBe(false); + }); + + it('tracks Bash timeout as error telemetry', async () => { + const records: TelemetryRecord[] = []; + const kaos = createSlowCommandKaos(); + const ctx = testAgent({ kaos, telemetry: recordingTelemetry(records) }); + ctx.configure({ tools: ['Bash'] }); + await ctx.rpc.setPermission({ mode: 'yolo' }); + records.length = 0; + + ctx.mockNextResponse( + { type: 'text', text: 'Running a timed command.' }, + { + type: 'function', + id: 'call_bash_timeout', + name: 'Bash', + arguments: JSON.stringify({ command: 'sleep 60', timeout: 1 }), + }, + ); + ctx.mockNextResponse({ type: 'text', text: 'done' }); + + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Run with timeout' }] }); + await ctx.untilTurnEnd(); + + expect(records).toContainEqual({ + event: 'tool_call', + properties: expect.objectContaining({ + tool_name: 'Bash', + outcome: 'error', + error_type: 'ToolError', + dup_type: 'normal', + duration_ms: expect.any(Number), + }), + }); + }); + it('emits a failed turn and error when generation fails', async () => { const ctx = testAgent(); ctx.configure(); @@ -841,6 +976,7 @@ describe('Agent turn flow', () => { }); it('does not fire Interrupt for a non-user (programmatic) abort', async () => { + const records: TelemetryRecord[] = []; const triggered: Array<[string, string, number]> = []; const hookEngine = new HookEngine( [ @@ -858,12 +994,14 @@ describe('Agent turn flow', () => { const ctx = testAgent({ hookEngine, kaos: createCommandKaos('should-not-run'), + telemetry: recordingTelemetry(records), }); ctx.configure({ tools: ['Bash'] }); ctx.mockNextResponse({ type: 'text', text: 'I will run Bash.' }, bashCall()); await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Run a command' }] }); await ctx.untilApprovalRequest(); + records.length = 0; // A programmatic abort (e.g. a subagent deadline timeout) carries a plain // AbortError as its reason, not a UserCancellationError, so it must not be @@ -872,6 +1010,16 @@ describe('Agent turn flow', () => { await ctx.untilTurnEnd(); expect(triggered).toEqual([]); + expect(records).toContainEqual({ + event: 'tool_call', + properties: expect.objectContaining({ + tool_name: 'Bash', + outcome: 'error', + error_type: 'ToolError', + dup_type: 'normal', + duration_ms: expect.any(Number), + }), + }); }); it('resolves the latest request-scoped OAuth auth before each generation', async () => { @@ -1499,7 +1647,7 @@ describe('Agent turn flow', () => { [wire] turn.cancel { "turnId": 0, "time": "