Skip to content

Commit ded3735

Browse files
committed
test(#2053): add coverage for tool_use/tool_result sequencing fixes
Add tests covering: - Reverse validation: orphan tool_results referencing non-existent tool_use in the preceding assistant message are detected. - Reverse validation: tool_results with no preceding assistant message at all are detected. - Repair merge: when partial tool_results exist in the next user message, missing results are merged into it (not inserted as a separate synthetic message that would break sequencing). - Repair insert: when there is no next user message at all, a synthetic user message is correctly inserted. - Beta converter: orphan tool_results (no preceding assistant tool_use) are dropped by the pendingAssistantToolUse guard. - Beta converter: normal tool_use/tool_result flow continues to work correctly with the guard in place. - Runtime event tests updated for SessionID field added to AgentChoiceEvent and AgentChoiceReasoningEvent. Assisted-By: docker-agent
1 parent 876e910 commit ded3735

1 file changed

Lines changed: 205 additions & 0 deletions

File tree

pkg/model/provider/anthropic/client_test.go

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,3 +600,208 @@ func TestExtractSystemBlocksCacheControl(t *testing.T) {
600600
assert.Equal(t, "ephemeral", string(blocks[3].CacheControl.Type))
601601
assert.Empty(t, string(blocks[3].CacheControl.TTL))
602602
}
603+
604+
func TestValidateSequencing_ReverseOrphanToolResult(t *testing.T) {
605+
// A user message with tool_result that references a tool_use_id not present
606+
// in the preceding assistant message should be caught by reverse validation.
607+
msgs := []anthropic.MessageParam{
608+
anthropic.NewUserMessage(anthropic.NewTextBlock("start")),
609+
anthropic.NewAssistantMessage(
610+
anthropic.ContentBlockParamUnion{
611+
OfToolUse: &anthropic.ToolUseBlockParam{
612+
ID: "tool-A",
613+
Input: map[string]any{},
614+
Name: "read_file",
615+
},
616+
},
617+
),
618+
// User message with tool_results for A (matching) and B (orphan)
619+
anthropic.NewUserMessage(
620+
anthropic.NewToolResultBlock("tool-A", "result-A", false),
621+
anthropic.NewToolResultBlock("tool-B", "result-B", false),
622+
),
623+
}
624+
625+
err := validateAnthropicSequencing(msgs)
626+
require.Error(t, err)
627+
assert.Contains(t, err.Error(), "orphan tool_result")
628+
assert.Contains(t, err.Error(), "tool-B")
629+
}
630+
631+
func TestValidateSequencing_ReverseNoAssistantBeforeToolResult(t *testing.T) {
632+
// A user message with tool_result as the first message should fail.
633+
msgs := []anthropic.MessageParam{
634+
anthropic.NewUserMessage(
635+
anthropic.NewToolResultBlock("tool-A", "result-A", false),
636+
),
637+
}
638+
639+
err := validateAnthropicSequencing(msgs)
640+
require.Error(t, err)
641+
assert.Contains(t, err.Error(), "no preceding assistant message")
642+
}
643+
644+
func TestRepairSequencing_MergesIntoExistingUserMessage(t *testing.T) {
645+
// When an assistant has tool_use A and B, but the next user message only has
646+
// tool_result for A, repair should merge a synthetic tool_result for B into
647+
// the same user message rather than inserting a separate synthetic message.
648+
msgs := []anthropic.MessageParam{
649+
anthropic.NewUserMessage(anthropic.NewTextBlock("start")),
650+
anthropic.NewAssistantMessage(
651+
anthropic.ContentBlockParamUnion{
652+
OfToolUse: &anthropic.ToolUseBlockParam{
653+
ID: "tool-A",
654+
Input: map[string]any{},
655+
Name: "t1",
656+
},
657+
},
658+
anthropic.ContentBlockParamUnion{
659+
OfToolUse: &anthropic.ToolUseBlockParam{
660+
ID: "tool-B",
661+
Input: map[string]any{},
662+
Name: "t2",
663+
},
664+
},
665+
),
666+
// Next user message only has tool_result for A, missing B
667+
anthropic.NewUserMessage(
668+
anthropic.NewToolResultBlock("tool-A", "result-A", false),
669+
),
670+
anthropic.NewUserMessage(anthropic.NewTextBlock("continue")),
671+
}
672+
673+
// Should fail validation
674+
require.Error(t, validateAnthropicSequencing(msgs))
675+
676+
// Repair
677+
repaired := repairAnthropicSequencing(msgs)
678+
679+
// Should pass validation after repair
680+
require.NoError(t, validateAnthropicSequencing(repaired))
681+
682+
// The total message count should NOT increase (merged, not inserted)
683+
assert.Len(t, repaired, 4, "repair should merge into existing user message, not insert a new one")
684+
685+
// Verify the user message at index 2 now has both tool_results
686+
b, err := json.Marshal(repaired[2])
687+
require.NoError(t, err)
688+
var m map[string]any
689+
require.NoError(t, json.Unmarshal(b, &m))
690+
691+
content, ok := m["content"].([]any)
692+
require.True(t, ok)
693+
694+
toolResultIDs := make(map[string]struct{})
695+
for _, c := range content {
696+
if cb, ok := c.(map[string]any); ok {
697+
if cb["type"] == "tool_result" {
698+
if id, _ := cb["tool_use_id"].(string); id != "" {
699+
toolResultIDs[id] = struct{}{}
700+
}
701+
}
702+
}
703+
}
704+
assert.Contains(t, toolResultIDs, "tool-A")
705+
assert.Contains(t, toolResultIDs, "tool-B")
706+
}
707+
708+
func TestRepairSequencing_InsertsWhenNoNextUserMessage(t *testing.T) {
709+
// When an assistant has tool_use but there's no following user message at all,
710+
// repair should insert a synthetic user message.
711+
msgs := []anthropic.MessageParam{
712+
anthropic.NewUserMessage(anthropic.NewTextBlock("start")),
713+
anthropic.NewAssistantMessage(
714+
anthropic.ContentBlockParamUnion{
715+
OfToolUse: &anthropic.ToolUseBlockParam{
716+
ID: "tool-X",
717+
Input: map[string]any{},
718+
Name: "do_thing",
719+
},
720+
},
721+
),
722+
}
723+
724+
require.Error(t, validateAnthropicSequencing(msgs))
725+
726+
repaired := repairAnthropicSequencing(msgs)
727+
728+
require.NoError(t, validateAnthropicSequencing(repaired))
729+
assert.Len(t, repaired, 3, "should insert a synthetic user message")
730+
}
731+
732+
func TestConvertBetaMessages_DropsOrphanToolResults(t *testing.T) {
733+
// When a tool result message appears without a preceding assistant message
734+
// with tool_use, the beta converter should drop it.
735+
msgs := []chat.Message{
736+
{Role: chat.MessageRoleUser, Content: "start"},
737+
{Role: chat.MessageRoleAssistant, Content: "sure, let me help"},
738+
// Orphan tool result — previous assistant has no tool_use
739+
{Role: chat.MessageRoleTool, ToolCallID: "tool-orphan", Content: "orphan result"},
740+
{Role: chat.MessageRoleUser, Content: "continue"},
741+
}
742+
743+
converted, err := testClient().convertBetaMessages(t.Context(), msgs)
744+
require.NoError(t, err)
745+
746+
// Should have: user(start), assistant(text), user(continue)
747+
// The orphan tool result should be dropped
748+
require.Len(t, converted, 3)
749+
750+
for _, msg := range converted {
751+
b, err := json.Marshal(msg)
752+
require.NoError(t, err)
753+
var m map[string]any
754+
require.NoError(t, json.Unmarshal(b, &m))
755+
content, _ := m["content"].([]any)
756+
for _, c := range content {
757+
if cb, ok := c.(map[string]any); ok {
758+
assert.NotEqual(t, "tool_result", cb["type"],
759+
"orphan tool_result should not be included in beta messages")
760+
}
761+
}
762+
}
763+
}
764+
765+
func TestConvertBetaMessages_IncludesToolResultsAfterToolUse(t *testing.T) {
766+
// Normal case: assistant with tool_use followed by tool results should work.
767+
msgs := []chat.Message{
768+
{Role: chat.MessageRoleUser, Content: "start"},
769+
{
770+
Role: chat.MessageRoleAssistant,
771+
ToolCalls: []tools.ToolCall{
772+
{ID: "tool-1", Function: tools.FunctionCall{Name: "read_file", Arguments: "{}"}},
773+
{ID: "tool-2", Function: tools.FunctionCall{Name: "write_file", Arguments: "{}"}},
774+
},
775+
},
776+
{Role: chat.MessageRoleTool, ToolCallID: "tool-1", Content: "file content"},
777+
{Role: chat.MessageRoleTool, ToolCallID: "tool-2", Content: "ok"},
778+
{Role: chat.MessageRoleUser, Content: "done"},
779+
}
780+
781+
converted, err := testClient().convertBetaMessages(t.Context(), msgs)
782+
require.NoError(t, err)
783+
784+
// Should have: user(start), assistant(tool_use x2), user(tool_result x2), user(done)
785+
require.Len(t, converted, 4)
786+
787+
// Verify the tool results are in the third message
788+
b, err := json.Marshal(converted[2])
789+
require.NoError(t, err)
790+
var m map[string]any
791+
require.NoError(t, json.Unmarshal(b, &m))
792+
assert.Equal(t, "user", m["role"])
793+
content, ok := m["content"].([]any)
794+
require.True(t, ok)
795+
assert.Len(t, content, 2)
796+
797+
ids := make(map[string]struct{})
798+
for _, c := range content {
799+
if cb, ok := c.(map[string]any); ok && cb["type"] == "tool_result" {
800+
if id, _ := cb["tool_use_id"].(string); id != "" {
801+
ids[id] = struct{}{}
802+
}
803+
}
804+
}
805+
assert.Contains(t, ids, "tool-1")
806+
assert.Contains(t, ids, "tool-2")
807+
}

0 commit comments

Comments
 (0)