fix: prevent time-travel bug in parallel tool calling (#11046)

daniel-lxs · web-flow · commit f5004ac40a66 · 2026-01-28T13:25:02.000-05:00
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -355,6 +355,20 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolResultBlockParam)[] = []
 	userMessageContentReady = false
 
+	/**
+	 * Flag indicating whether the assistant message for the current streaming session
+	 * has been saved to API conversation history.
+	 *
+	 * This is critical for parallel tool calling: tools should NOT execute until
+	 * the assistant message is saved. Otherwise, if a tool like `new_task` triggers
+	 * `flushPendingToolResultsToHistory()`, the user message with tool_results would
+	 * appear BEFORE the assistant message with tool_uses, causing API errors.
+	 *
+	 * Reset to `false` at the start of each API request.
+	 * Set to `true` after the assistant message is saved in `recursivelyMakeClineRequests`.
+	 */
+	assistantMessageSavedToHistory = false
+
 	/**
 	 * Push a tool_result block to userMessageContent, preventing duplicates.
 	 * Duplicate tool_use_ids cause API errors.
@@ -1063,6 +1077,36 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			return
 		}
 
+		// CRITICAL: Wait for the assistant message to be saved to API history first.
+		// Without this, tool_result blocks would appear BEFORE tool_use blocks in the
+		// conversation history, causing API errors like:
+		// "unexpected `tool_use_id` found in `tool_result` blocks"
+		//
+		// This can happen when parallel tools are called (e.g., update_todo_list + new_task).
+		// Tools execute during streaming via presentAssistantMessage, BEFORE the assistant
+		// message is saved. When new_task triggers delegation, it calls this method to
+		// flush pending results - but the assistant message hasn't been saved yet.
+		//
+		// The assistantMessageSavedToHistory flag is:
+		// - Reset to false at the start of each API request
+		// - Set to true after the assistant message is saved in recursivelyMakeClineRequests
+		if (!this.assistantMessageSavedToHistory) {
+			await pWaitFor(() => this.assistantMessageSavedToHistory || this.abort, {
+				interval: 50,
+				timeout: 30_000, // 30 second timeout as safety net
+			}).catch(() => {
+				// If timeout or abort, log and proceed anyway to avoid hanging
+				console.warn(
+					`[Task#${this.taskId}] flushPendingToolResultsToHistory: timed out waiting for assistant message to be saved`,
+				)
+			})
+		}
+
+		// If task was aborted while waiting, don't flush
+		if (this.abort) {
+			return
+		}
+
 		// Save the user message with tool_result blocks
 		const userMessage: Anthropic.MessageParam = {
 			role: "user",
@@ -2707,6 +2751,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				this.userMessageContentReady = false
 				this.didRejectTool = false
 				this.didAlreadyUseTool = false
+				this.assistantMessageSavedToHistory = false
 				// Reset tool failure flag for each new assistant turn - this ensures that tool failures
 				// only prevent attempt_completion within the same assistant message, not across turns
 				// (e.g., if a tool fails, then user sends a message saying "just complete anyway")
@@ -3488,6 +3533,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						{ role: "assistant", content: assistantContent },
 						reasoningMessage || undefined,
 					)
+					this.assistantMessageSavedToHistory = true
 
 					TelemetryService.instance.captureConversationMessage(this.taskId, "assistant")
 				}
diff --git a/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts b/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts
@@ -38,8 +38,12 @@ vi.mock("fs/promises", async (importOriginal) => {
 	}
 })
 
+const { mockPWaitFor } = vi.hoisted(() => {
+	return { mockPWaitFor: vi.fn().mockImplementation(async () => Promise.resolve()) }
+})
+
 vi.mock("p-wait-for", () => ({
-	default: vi.fn().mockImplementation(async () => Promise.resolve()),
+	default: mockPWaitFor,
 }))
 
 vi.mock("vscode", () => {
@@ -344,4 +348,99 @@ describe("flushPendingToolResultsToHistory", () => {
 		expect((task.apiConversationHistory[0] as any).ts).toBeGreaterThanOrEqual(beforeTs)
 		expect((task.apiConversationHistory[0] as any).ts).toBeLessThanOrEqual(afterTs)
 	})
+
+	it("should skip waiting for assistantMessageSavedToHistory when flag is already true", async () => {
+		const task = new Task({
+			provider: mockProvider,
+			apiConfiguration: mockApiConfig,
+			task: "test task",
+			startTask: false,
+		})
+
+		// Set flag to true (assistant message already saved)
+		task.assistantMessageSavedToHistory = true
+
+		// Set up pending tool result
+		task.userMessageContent = [
+			{
+				type: "tool_result",
+				tool_use_id: "tool-skip-wait",
+				content: "Result when flag is true",
+			},
+		]
+
+		// Clear mock call history
+		mockPWaitFor.mockClear()
+
+		await task.flushPendingToolResultsToHistory()
+
+		// Should not have called pWaitFor since flag was already true
+		expect(mockPWaitFor).not.toHaveBeenCalled()
+
+		// Should still save the message
+		expect(task.apiConversationHistory.length).toBe(1)
+		expect((task.apiConversationHistory[0].content as any[])[0].tool_use_id).toBe("tool-skip-wait")
+	})
+
+	it("should wait for assistantMessageSavedToHistory when flag is false", async () => {
+		const task = new Task({
+			provider: mockProvider,
+			apiConfiguration: mockApiConfig,
+			task: "test task",
+			startTask: false,
+		})
+
+		// Flag is false by default - assistant message not yet saved
+		expect(task.assistantMessageSavedToHistory).toBe(false)
+
+		// Set up pending tool result
+		task.userMessageContent = [
+			{
+				type: "tool_result",
+				tool_use_id: "tool-wait",
+				content: "Result when flag is false",
+			},
+		]
+
+		// Clear mock call history
+		mockPWaitFor.mockClear()
+
+		await task.flushPendingToolResultsToHistory()
+
+		// Should have called pWaitFor since flag was false
+		expect(mockPWaitFor).toHaveBeenCalled()
+
+		// Should still save the message (mock resolves immediately)
+		expect(task.apiConversationHistory.length).toBe(1)
+	})
+
+	it("should not flush when task is aborted during wait", async () => {
+		const task = new Task({
+			provider: mockProvider,
+			apiConfiguration: mockApiConfig,
+			task: "test task",
+			startTask: false,
+		})
+
+		// Flag is false - will need to wait
+		task.assistantMessageSavedToHistory = false
+
+		// Set up pending tool result
+		task.userMessageContent = [
+			{
+				type: "tool_result",
+				tool_use_id: "tool-aborted",
+				content: "Should not be saved",
+			},
+		]
+
+		// Set abort flag - this will cause the condition in pWaitFor to return true
+		// AND will cause early return after the wait
+		task.abort = true
+
+		await task.flushPendingToolResultsToHistory()
+
+		// Should not have saved anything since task was aborted
+		expect(task.apiConversationHistory.length).toBe(0)
+	})
 })