fix: defer truncation when last message has pending tool_use

philipph-askui · philipph-askui · commit 47844c51f829 · 2026-04-08T17:05:13.000+02:00
diff --git a/src/askui/models/shared/truncation_strategies.py b/src/askui/models/shared/truncation_strategies.py
@@ -54,6 +54,27 @@ def _has_orphaned_tool_results(msg: MessageParam) -> bool:
     return any(isinstance(b, ToolResultBlockParam) for b in msg.content)
 
 
+def _has_pending_tool_use(messages: list[MessageParam]) -> bool:
+    """Check if the last message is an assistant message with tool_use blocks.
+
+    Truncation can fire when ``Conversation._get_next_message``
+    adds the assistant response to history *before* the matching
+    ``tool_result`` user message is appended by
+    ``_execute_tools_if_present``.  In that window the history
+    ends on an assistant ``tool_use``, and
+    ``_summarize_message_history`` would append a plain-text user
+    message (the summarization prompt) which violates the API
+    constraint that every ``tool_use`` must be followed by its
+    ``tool_result``.
+    """
+    if not messages:
+        return False
+    last = messages[-1]
+    if last.role != "assistant" or isinstance(last.content, str):
+        return False
+    return any(isinstance(b, ToolUseBlockParam) for b in last.content)
+
+
 def _summarize_message_history(
     vlm_provider: VlmProvider,
     messages: list[MessageParam],
@@ -379,6 +400,11 @@ def truncate(self) -> None:
             msg = "Cannot truncate: no vlm_provider available"
             logger.warning(msg)
             return
+        if _has_pending_tool_use(self._truncated_message_history):
+            logger.debug(
+                "Deferring truncation: last message has pending tool_use"
+            )
+            return
 
         logger.info("Summarizing message history")
         system, tools, provider_options = self._summarization_request_context()
@@ -702,6 +728,11 @@ def truncate(self) -> None:
             msg = "Cannot truncate: no vlm_provider available"
             logger.warning(msg)
             return
+        if _has_pending_tool_use(self._truncated_message_history):
+            logger.debug(
+                "Deferring truncation: last message has pending tool_use"
+            )
+            return
 
         logger.info("Summarizing message history")
         system, tools, provider_options = self._summarization_request_context()
diff --git a/tests/unit/models/test_truncation_strategies.py b/tests/unit/models/test_truncation_strategies.py
@@ -471,6 +471,41 @@ def test_auto_truncation_on_token_limit(self) -> None:
         # Should have been auto-truncated
         vlm.create_message.assert_called_once()
 
+    def test_truncate_deferred_when_last_message_has_tool_use(self) -> None:
+        """Truncation must not fire when the last message is an assistant
+        tool_use whose tool_result hasn't been appended yet."""
+        vlm = _make_vlm_provider()
+        strategy = _make_strategy(vlm_provider=vlm, n_messages_to_keep=2)
+        for i in range(4):
+            role = "user" if i % 2 == 0 else "assistant"
+            strategy.append_message(MessageParam(role=role, content=f"msg {i}"))
+        # Append an assistant message with tool_use (simulates the window
+        # between _get_next_message and _execute_tools_if_present)
+        strategy.append_message(
+            MessageParam(
+                role="assistant",
+                content=[
+                    ToolUseBlockParam(
+                        id="tu_1", input={}, name="tool_a", type="tool_use"
+                    ),
+                ],
+            )
+        )
+        # Truncation should be deferred — VLM must NOT be called
+        strategy.truncate()
+        vlm.create_message.assert_not_called()
+        # After appending the matching tool_result, truncation should proceed
+        strategy.append_message(
+            MessageParam(
+                role="user",
+                content=[
+                    ToolResultBlockParam(tool_use_id="tu_1", content="result"),
+                ],
+            )
+        )
+        strategy.truncate()
+        vlm.create_message.assert_called_once()
+
 
 # ---------------------------------------------------------------------------
 # Edge cases
@@ -743,6 +778,37 @@ def test_auto_truncation_on_token_limit(self) -> None:
         strategy.append_message(MessageParam(role="user", content="z" * 300))
         vlm.create_message.assert_called_once()
 
+    def test_truncate_deferred_when_last_message_has_tool_use(self) -> None:
+        """Truncation must not fire when the last message is an assistant
+        tool_use whose tool_result hasn't been appended yet."""
+        vlm = _make_vlm_provider()
+        strategy = _make_summarizing_strategy(vlm_provider=vlm, n_messages_to_keep=2)
+        for i in range(4):
+            role = "user" if i % 2 == 0 else "assistant"
+            strategy.append_message(MessageParam(role=role, content=f"msg {i}"))
+        strategy.append_message(
+            MessageParam(
+                role="assistant",
+                content=[
+                    ToolUseBlockParam(
+                        id="tu_1", input={}, name="tool_a", type="tool_use"
+                    ),
+                ],
+            )
+        )
+        strategy.truncate()
+        vlm.create_message.assert_not_called()
+        strategy.append_message(
+            MessageParam(
+                role="user",
+                content=[
+                    ToolResultBlockParam(tool_use_id="tu_1", content="result"),
+                ],
+            )
+        )
+        strategy.truncate()
+        vlm.create_message.assert_called_once()
+
 
 class TestReporterIntegration:
     def test_summarizing_strategy_reports_summary_response(self) -> None: