Skip to content

Commit f5004ac

Browse files
authored
fix: prevent time-travel bug in parallel tool calling (#11046)
1 parent e7965d9 commit f5004ac

2 files changed

Lines changed: 146 additions & 1 deletion

File tree

src/core/task/Task.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,20 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
355355
userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolResultBlockParam)[] = []
356356
userMessageContentReady = false
357357

358+
/**
359+
* Flag indicating whether the assistant message for the current streaming session
360+
* has been saved to API conversation history.
361+
*
362+
* This is critical for parallel tool calling: tools should NOT execute until
363+
* the assistant message is saved. Otherwise, if a tool like `new_task` triggers
364+
* `flushPendingToolResultsToHistory()`, the user message with tool_results would
365+
* appear BEFORE the assistant message with tool_uses, causing API errors.
366+
*
367+
* Reset to `false` at the start of each API request.
368+
* Set to `true` after the assistant message is saved in `recursivelyMakeClineRequests`.
369+
*/
370+
assistantMessageSavedToHistory = false
371+
358372
/**
359373
* Push a tool_result block to userMessageContent, preventing duplicates.
360374
* Duplicate tool_use_ids cause API errors.
@@ -1063,6 +1077,36 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
10631077
return
10641078
}
10651079

1080+
// CRITICAL: Wait for the assistant message to be saved to API history first.
1081+
// Without this, tool_result blocks would appear BEFORE tool_use blocks in the
1082+
// conversation history, causing API errors like:
1083+
// "unexpected `tool_use_id` found in `tool_result` blocks"
1084+
//
1085+
// This can happen when parallel tools are called (e.g., update_todo_list + new_task).
1086+
// Tools execute during streaming via presentAssistantMessage, BEFORE the assistant
1087+
// message is saved. When new_task triggers delegation, it calls this method to
1088+
// flush pending results - but the assistant message hasn't been saved yet.
1089+
//
1090+
// The assistantMessageSavedToHistory flag is:
1091+
// - Reset to false at the start of each API request
1092+
// - Set to true after the assistant message is saved in recursivelyMakeClineRequests
1093+
if (!this.assistantMessageSavedToHistory) {
1094+
await pWaitFor(() => this.assistantMessageSavedToHistory || this.abort, {
1095+
interval: 50,
1096+
timeout: 30_000, // 30 second timeout as safety net
1097+
}).catch(() => {
1098+
// If timeout or abort, log and proceed anyway to avoid hanging
1099+
console.warn(
1100+
`[Task#${this.taskId}] flushPendingToolResultsToHistory: timed out waiting for assistant message to be saved`,
1101+
)
1102+
})
1103+
}
1104+
1105+
// If task was aborted while waiting, don't flush
1106+
if (this.abort) {
1107+
return
1108+
}
1109+
10661110
// Save the user message with tool_result blocks
10671111
const userMessage: Anthropic.MessageParam = {
10681112
role: "user",
@@ -2707,6 +2751,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
27072751
this.userMessageContentReady = false
27082752
this.didRejectTool = false
27092753
this.didAlreadyUseTool = false
2754+
this.assistantMessageSavedToHistory = false
27102755
// Reset tool failure flag for each new assistant turn - this ensures that tool failures
27112756
// only prevent attempt_completion within the same assistant message, not across turns
27122757
// (e.g., if a tool fails, then user sends a message saying "just complete anyway")
@@ -3488,6 +3533,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
34883533
{ role: "assistant", content: assistantContent },
34893534
reasoningMessage || undefined,
34903535
)
3536+
this.assistantMessageSavedToHistory = true
34913537

34923538
TelemetryService.instance.captureConversationMessage(this.taskId, "assistant")
34933539
}

src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ vi.mock("fs/promises", async (importOriginal) => {
3838
}
3939
})
4040

41+
const { mockPWaitFor } = vi.hoisted(() => {
42+
return { mockPWaitFor: vi.fn().mockImplementation(async () => Promise.resolve()) }
43+
})
44+
4145
vi.mock("p-wait-for", () => ({
42-
default: vi.fn().mockImplementation(async () => Promise.resolve()),
46+
default: mockPWaitFor,
4347
}))
4448

4549
vi.mock("vscode", () => {
@@ -344,4 +348,99 @@ describe("flushPendingToolResultsToHistory", () => {
344348
expect((task.apiConversationHistory[0] as any).ts).toBeGreaterThanOrEqual(beforeTs)
345349
expect((task.apiConversationHistory[0] as any).ts).toBeLessThanOrEqual(afterTs)
346350
})
351+
352+
it("should skip waiting for assistantMessageSavedToHistory when flag is already true", async () => {
353+
const task = new Task({
354+
provider: mockProvider,
355+
apiConfiguration: mockApiConfig,
356+
task: "test task",
357+
startTask: false,
358+
})
359+
360+
// Set flag to true (assistant message already saved)
361+
task.assistantMessageSavedToHistory = true
362+
363+
// Set up pending tool result
364+
task.userMessageContent = [
365+
{
366+
type: "tool_result",
367+
tool_use_id: "tool-skip-wait",
368+
content: "Result when flag is true",
369+
},
370+
]
371+
372+
// Clear mock call history
373+
mockPWaitFor.mockClear()
374+
375+
await task.flushPendingToolResultsToHistory()
376+
377+
// Should not have called pWaitFor since flag was already true
378+
expect(mockPWaitFor).not.toHaveBeenCalled()
379+
380+
// Should still save the message
381+
expect(task.apiConversationHistory.length).toBe(1)
382+
expect((task.apiConversationHistory[0].content as any[])[0].tool_use_id).toBe("tool-skip-wait")
383+
})
384+
385+
it("should wait for assistantMessageSavedToHistory when flag is false", async () => {
386+
const task = new Task({
387+
provider: mockProvider,
388+
apiConfiguration: mockApiConfig,
389+
task: "test task",
390+
startTask: false,
391+
})
392+
393+
// Flag is false by default - assistant message not yet saved
394+
expect(task.assistantMessageSavedToHistory).toBe(false)
395+
396+
// Set up pending tool result
397+
task.userMessageContent = [
398+
{
399+
type: "tool_result",
400+
tool_use_id: "tool-wait",
401+
content: "Result when flag is false",
402+
},
403+
]
404+
405+
// Clear mock call history
406+
mockPWaitFor.mockClear()
407+
408+
await task.flushPendingToolResultsToHistory()
409+
410+
// Should have called pWaitFor since flag was false
411+
expect(mockPWaitFor).toHaveBeenCalled()
412+
413+
// Should still save the message (mock resolves immediately)
414+
expect(task.apiConversationHistory.length).toBe(1)
415+
})
416+
417+
it("should not flush when task is aborted during wait", async () => {
418+
const task = new Task({
419+
provider: mockProvider,
420+
apiConfiguration: mockApiConfig,
421+
task: "test task",
422+
startTask: false,
423+
})
424+
425+
// Flag is false - will need to wait
426+
task.assistantMessageSavedToHistory = false
427+
428+
// Set up pending tool result
429+
task.userMessageContent = [
430+
{
431+
type: "tool_result",
432+
tool_use_id: "tool-aborted",
433+
content: "Should not be saved",
434+
},
435+
]
436+
437+
// Set abort flag - this will cause the condition in pWaitFor to return true
438+
// AND will cause early return after the wait
439+
task.abort = true
440+
441+
await task.flushPendingToolResultsToHistory()
442+
443+
// Should not have saved anything since task was aborted
444+
expect(task.apiConversationHistory.length).toBe(0)
445+
})
347446
})

0 commit comments

Comments
 (0)