From 77ffb63e3af5770f568d81f84480bdacf93b5869 Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Thu, 25 Jun 2026 23:22:40 +0300 Subject: [PATCH 1/2] fix(agent): honor AddTools/RemoveTools mid-turn at next LLM step (#76) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runtime AddTools/RemoveTools changes were not visible to an in-flight turn — they only took effect on the next PromptResult, contradicting the documented "next LLM step" contract. The whole agentic loop runs inside a single fantasy Stream call that captures the tool snapshot taken when Stream began, so swapping the rebuilt fantasy agent mid-turn had no effect on the running stream. - Always wire PrepareStep and set PrepareStepResult.Tools from the live tool set each step, so fantasy re-reads tools per step - Extract composeAllTools() shared with rebuildFantasyAgent so the per-step set matches the baked-in composition (steady state unchanged) - Guard extraTools with a RWMutex so the in-flight Stream can read the live set while a concurrent SetExtraTools mutates it - Fix contradictory AddTools doc ("next turn" -> "next LLM step") - Add regression tests for mid-turn tool addition and removal Fixes #76 --- internal/agent/agent.go | 142 ++++++++++------- internal/agent/agent_midturn_tools_test.go | 168 +++++++++++++++++++++ pkg/kit/kit.go | 6 +- 3 files changed, 262 insertions(+), 54 deletions(-) create mode 100644 internal/agent/agent_midturn_tools_test.go diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 099e2b73..028a45ec 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -252,6 +252,12 @@ type Agent struct { // callers (e.g. Kit.applyComposedSystemPrompt invoked from multiple // goroutines) don't race on a.systemPrompt / a.fantasyAgent. promptMu sync.Mutex + + // toolsMu guards extraTools so the live tool set can be re-read each + // step (via composeAllTools) from inside an in-flight Stream while a + // concurrent SetExtraTools mutates it. Without this, mid-turn AddTools/ + // RemoveTools would race on the extraTools slice header. + toolsMu sync.RWMutex } // GenerateWithLoopResult contains the result and conversation history from an agent interaction. @@ -430,17 +436,7 @@ func (a *Agent) ensureMCPTools() { // tool set (core + MCP + extension tools). Used after MCP tools arrive // asynchronously and by SetModel. func (a *Agent) rebuildFantasyAgent() { - allTools := make([]fantasy.AgentTool, len(a.coreTools)) - copy(allTools, a.coreTools) - if a.toolManager != nil { - allTools = append(allTools, mcpToolsToAgentTools(a.toolManager.GetTools(), a.toolManager)...) - } - if len(a.extraTools) > 0 { - allTools = append(allTools, a.extraTools...) - } - if a.toolWrapper != nil { - allTools = a.toolWrapper(allTools) - } + allTools := a.composeAllTools() providerResult := &models.ProviderResult{ Model: a.model, @@ -456,6 +452,29 @@ func (a *Agent) rebuildFantasyAgent() { a.fantasyAgent = fantasy.NewAgent(a.model, agentOpts...) } +// composeAllTools builds the full live tool set (core + MCP + extra tools) +// with the tool wrapper applied, matching the tools baked into the fantasy +// agent by rebuildFantasyAgent. It re-reads the current extraTools under +// toolsMu so callers (notably the per-step PrepareStep callback) observe +// runtime AddTools/RemoveTools changes mid-turn. +func (a *Agent) composeAllTools() []fantasy.AgentTool { + a.toolsMu.RLock() + defer a.toolsMu.RUnlock() + + allTools := make([]fantasy.AgentTool, len(a.coreTools)) + copy(allTools, a.coreTools) + if a.toolManager != nil { + allTools = append(allTools, mcpToolsToAgentTools(a.toolManager.GetTools(), a.toolManager)...) + } + if len(a.extraTools) > 0 { + allTools = append(allTools, a.extraTools...) + } + if a.toolWrapper != nil { + allTools = a.toolWrapper(allTools) + } + return allTools +} + // buildAgentOptions constructs the fantasy.AgentOption slice from config, // provider result, and the combined tool list. Shared by NewAgent, // rebuildFantasyAgent, and SetModel. @@ -825,60 +844,71 @@ func (a *Agent) GenerateWithCallbacks(ctx context.Context, messages []fantasy.Me }, } - // Always wire up PrepareStep to handle both steering and the - // OnPrepareStep hook. Steering drains its channel first, then - // OnPrepareStep hooks run against the (possibly already steered) - // messages. + // Always wire up PrepareStep. It serves three purposes: + // 1. Re-read the live tool set each step so runtime AddTools/ + // RemoveTools (and MCP server changes) take effect at the next + // LLM step of the *current* turn, as documented. The entire + // multi-step loop runs inside a single fantasy Stream call that + // otherwise captures the tool snapshot taken when Stream began; + // populating PrepareStepResult.Tools makes fantasy re-read tools + // per step instead. + // 2. Steering: drain queued steer messages. + // 3. The OnPrepareStep hook. + // Steering drains its channel first, then OnPrepareStep hooks run + // against the (possibly already steered) messages. steerCh := steerChFromContext(ctx) onConsumed := steerConsumedFromContext(ctx) hasSteering := steerCh != nil hasPrepareStepHook := cb.OnPrepareStep != nil - if hasSteering || hasPrepareStepHook { - streamCall.PrepareStep = func( - stepCtx context.Context, - opts fantasy.PrepareStepFunctionOptions, - ) (context.Context, fantasy.PrepareStepResult, error) { - result := fantasy.PrepareStepResult{ - Model: opts.Model, - Messages: opts.Messages, - } + streamCall.PrepareStep = func( + stepCtx context.Context, + opts fantasy.PrepareStepFunctionOptions, + ) (context.Context, fantasy.PrepareStepResult, error) { + result := fantasy.PrepareStepResult{ + Model: opts.Model, + Messages: opts.Messages, + // Re-read the live tool set so mid-turn tool changes are + // honored. composeAllTools matches the composition baked + // into the fantasy agent, so in the steady state this is + // identical to the snapshot fantasy would have used. + Tools: a.composeAllTools(), + } - // Phase 1: Drain steering channel (if present). - if hasSteering { - var steered []SteerMessage - for { - select { - case msg := <-steerCh: - steered = append(steered, msg) - default: - goto done - } + // Phase 1: Drain steering channel (if present). + if hasSteering { + var steered []SteerMessage + for { + select { + case msg := <-steerCh: + steered = append(steered, msg) + default: + goto done + } + } + done: + if len(steered) > 0 { + for _, sm := range steered { + result.Messages = append(result.Messages, + fantasy.NewUserMessage(sm.Text, sm.Files...)) } - done: - if len(steered) > 0 { - for _, sm := range steered { - result.Messages = append(result.Messages, - fantasy.NewUserMessage(sm.Text, sm.Files...)) - } - if onConsumed != nil { - onConsumed(len(steered)) - } + if onConsumed != nil { + onConsumed(len(steered)) } } + } - // Phase 2: Run OnPrepareStep hook (if registered). - if hasPrepareStepHook { - if replacement := cb.OnPrepareStep(opts.StepNumber, result.Messages); replacement != nil { - result.Messages = replacement - } + // Phase 2: Run OnPrepareStep hook (if registered). + if hasPrepareStepHook { + if replacement := cb.OnPrepareStep(opts.StepNumber, result.Messages); replacement != nil { + result.Messages = replacement } + } - // Apply message-level cache control for Anthropic models. - result.Messages = applyCacheControlToMessages(result.Messages) + // Apply message-level cache control for Anthropic models. + result.Messages = applyCacheControlToMessages(result.Messages) - return stepCtx, result, nil - } + return stepCtx, result, nil } // Wire OnRetry callback if provided. @@ -1076,6 +1106,8 @@ func extractMCPContentText(result string) string { // GetTools returns the list of available tools loaded in the agent, // including core tools, MCP tools, and extension-registered tools. func (a *Agent) GetTools() []fantasy.AgentTool { + a.toolsMu.RLock() + defer a.toolsMu.RUnlock() allTools := make([]fantasy.AgentTool, len(a.coreTools)) copy(allTools, a.coreTools) if a.toolManager != nil { @@ -1102,6 +1134,8 @@ func (a *Agent) GetMCPToolCount() int { // GetExtensionToolCount returns the number of tools registered by extensions. func (a *Agent) GetExtensionToolCount() int { + a.toolsMu.RLock() + defer a.toolsMu.RUnlock() return len(a.extraTools) } @@ -1109,6 +1143,8 @@ func (a *Agent) GetExtensionToolCount() int { // extension-registered tools). The returned slice is a copy so callers can // snapshot and later restore it via SetExtraTools. func (a *Agent) GetExtraTools() []fantasy.AgentTool { + a.toolsMu.RLock() + defer a.toolsMu.RUnlock() if len(a.extraTools) == 0 { return nil } @@ -1121,7 +1157,9 @@ func (a *Agent) GetExtraTools() []fantasy.AgentTool { // tools) and rebuilds the internal agent with the updated tool list. The // model, system prompt, and all other configuration are preserved. func (a *Agent) SetExtraTools(extraTools []fantasy.AgentTool) { + a.toolsMu.Lock() a.extraTools = extraTools + a.toolsMu.Unlock() a.rebuildFantasyAgent() } diff --git a/internal/agent/agent_midturn_tools_test.go b/internal/agent/agent_midturn_tools_test.go new file mode 100644 index 00000000..8c87c743 --- /dev/null +++ b/internal/agent/agent_midturn_tools_test.go @@ -0,0 +1,168 @@ +package agent + +import ( + "context" + "testing" + + "charm.land/fantasy" +) + +// toolNamesFromStep extracts tool names from a PrepareStep result for assertions. +func toolNamesFromStep(tools []fantasy.AgentTool) map[string]struct{} { + out := make(map[string]struct{}, len(tools)) + for _, t := range tools { + out[t.Info().Name] = struct{}{} + } + return out +} + +// newTestTool builds a minimal AgentTool with the given name for use in tests. +func newTestTool(name string) fantasy.AgentTool { + type emptyInput struct{} + return fantasy.NewAgentTool(name, "test tool "+name, + func(_ context.Context, _ emptyInput, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { + return fantasy.NewTextResponse("ok"), nil + }, + ) +} + +// midTurnToolAgent is a fake fantasy.Agent that simulates the multi-step +// agentic loop fantasy runs inside a single Stream call. Between step 0 and +// step 1 it invokes mutate(), emulating a tool handler that calls AddTools/ +// RemoveTools mid-turn. It records the tool list each step's PrepareStep +// callback yields so the test can assert that mid-turn changes are observed. +type midTurnToolAgent struct { + mutate func() + stepTools []map[string]struct{} + prepareErr error +} + +func (f *midTurnToolAgent) Generate(_ context.Context, _ fantasy.AgentCall) (*fantasy.AgentResult, error) { + return &fantasy.AgentResult{}, nil +} + +func (f *midTurnToolAgent) Stream(ctx context.Context, opts fantasy.AgentStreamCall) (*fantasy.AgentResult, error) { + // Step 0: capture the tools fantasy would use for the first step. + for step := range 2 { + if step == 1 && f.mutate != nil { + // Emulate a tool handler that mutates the live tool set + // mid-turn (e.g. enable_toolset calling host.AddTools). + f.mutate() + } + if opts.PrepareStep != nil { + _, prepared, err := opts.PrepareStep(ctx, fantasy.PrepareStepFunctionOptions{ + StepNumber: step, + Model: nil, + Messages: nil, + }) + if err != nil { + f.prepareErr = err + return nil, err + } + f.stepTools = append(f.stepTools, toolNamesFromStep(prepared.Tools)) + } + } + return &fantasy.AgentResult{}, nil +} + +// TestPrepareStepReflectsMidTurnTools is the regression test for #76. +// AddTools/RemoveTools must take effect at the next LLM step of the current +// turn. Because the whole agentic loop runs inside a single fantasy Stream +// call, this only works if Kit's PrepareStep callback re-reads the live tool +// set each step and populates PrepareStepResult.Tools. Before the fix the +// per-step tool list was the snapshot captured when Stream began, so a tool +// added mid-turn never appeared until the next turn. +func TestPrepareStepReflectsMidTurnTools(t *testing.T) { + t.Parallel() + + core := newTestTool("read") + loadMore := newTestTool("load_more") + foo := newTestTool("foo") + + fake := &midTurnToolAgent{} + + a := &Agent{ + fantasyAgent: fake, + streamingEnabled: true, + coreTools: []fantasy.AgentTool{core}, + extraTools: []fantasy.AgentTool{loadMore}, + } + + // Mid-turn, add a brand new tool "foo" (additive to load_more). + fake.mutate = func() { + a.SetExtraTools([]fantasy.AgentTool{loadMore, foo}) + } + + msgs := []fantasy.Message{fantasy.NewUserMessage("go")} + if _, err := a.GenerateWithCallbacks(context.Background(), msgs, GenerateCallbacks{}); err != nil { + t.Fatalf("GenerateWithCallbacks returned error: %v", err) + } + + if len(fake.stepTools) != 2 { + t.Fatalf("expected 2 prepared steps, got %d", len(fake.stepTools)) + } + + // Step 0: foo must NOT be present yet; load_more and read are. + step0 := fake.stepTools[0] + if _, ok := step0["read"]; !ok { + t.Errorf("step 0: expected core tool 'read' to be present") + } + if _, ok := step0["load_more"]; !ok { + t.Errorf("step 0: expected 'load_more' to be present") + } + if _, ok := step0["foo"]; ok { + t.Errorf("step 0: 'foo' should not be present before it was added") + } + + // Step 1: after the mid-turn AddTools, foo MUST be visible to the step. + step1 := fake.stepTools[1] + if _, ok := step1["foo"]; !ok { + t.Errorf("step 1: expected mid-turn-added 'foo' to be present (regression #76)") + } + if _, ok := step1["read"]; !ok { + t.Errorf("step 1: expected core tool 'read' to remain present") + } + if _, ok := step1["load_more"]; !ok { + t.Errorf("step 1: expected 'load_more' to remain present") + } +} + +// TestPrepareStepReflectsMidTurnToolRemoval verifies the RemoveTools side of +// the contract: a tool removed mid-turn disappears from the next step. +func TestPrepareStepReflectsMidTurnToolRemoval(t *testing.T) { + t.Parallel() + + core := newTestTool("read") + loadMore := newTestTool("load_more") + temp := newTestTool("temp") + + fake := &midTurnToolAgent{} + + a := &Agent{ + fantasyAgent: fake, + streamingEnabled: true, + coreTools: []fantasy.AgentTool{core}, + extraTools: []fantasy.AgentTool{loadMore, temp}, + } + + // Mid-turn, remove "temp". + fake.mutate = func() { + a.SetExtraTools([]fantasy.AgentTool{loadMore}) + } + + msgs := []fantasy.Message{fantasy.NewUserMessage("go")} + if _, err := a.GenerateWithCallbacks(context.Background(), msgs, GenerateCallbacks{}); err != nil { + t.Fatalf("GenerateWithCallbacks returned error: %v", err) + } + + if len(fake.stepTools) != 2 { + t.Fatalf("expected 2 prepared steps, got %d", len(fake.stepTools)) + } + + if _, ok := fake.stepTools[0]["temp"]; !ok { + t.Errorf("step 0: expected 'temp' to be present before removal") + } + if _, ok := fake.stepTools[1]["temp"]; ok { + t.Errorf("step 1: 'temp' should be gone after mid-turn RemoveTools (regression #76)") + } +} diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go index a2353434..b0c2f38f 100644 --- a/pkg/kit/kit.go +++ b/pkg/kit/kit.go @@ -166,14 +166,16 @@ func (m *Kit) GetToolsForSubagent() []Tool { } // AddTools additively registers native Go tools on the live host. Added tools -// persist for the session and become visible to the model on the next turn. +// persist for the session and become visible to the model at the next LLM +// step (see below). // If a provided tool shares a name with a tool that is already in the // extra-tool set, the new tool replaces the previous one (last-write-wins). // Core tools and MCP tools are not affected. // // AddTools is safe to call while the agent is idle. If a turn is in progress // ([Kit.IsGenerating] returns true), the change takes effect starting from the -// next LLM step. +// next LLM step of the current turn — a tool added from within a tool handler +// is callable by the model on its next step without waiting for a new turn. func (m *Kit) AddTools(tools ...Tool) { m.promptOptsMu.Lock() defer m.promptOptsMu.Unlock() From a331588b6e42c2889d40bbd3767acc6d2d7bd413 Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Thu, 25 Jun 2026 23:41:38 +0300 Subject: [PATCH 2/2] fix(agent): harden SetExtraTools concurrency Address CodeRabbit review on #77: - Clone the incoming slice before storing it so later caller mutations cannot bypass toolsMu and race composeAllTools - Serialize the fantasyAgent rebuild under promptMu, consistent with the other rebuild paths (SetSystemPrompt, SetModel), so concurrent rebuilds can't last-writer-win a stale snapshot --- internal/agent/agent.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 028a45ec..6031cfb9 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -1155,11 +1155,17 @@ func (a *Agent) GetExtraTools() []fantasy.AgentTool { // SetExtraTools replaces the agent's extra tools (e.g. extension-registered // tools) and rebuilds the internal agent with the updated tool list. The -// model, system prompt, and all other configuration are preserved. +// model, system prompt, and all other configuration are preserved. The +// incoming slice is cloned so later caller mutations cannot bypass toolsMu +// and race composeAllTools. The rebuild is serialized under promptMu so it +// can't race other fantasyAgent rebuilds (SetSystemPrompt, SetModel, MCP). func (a *Agent) SetExtraTools(extraTools []fantasy.AgentTool) { a.toolsMu.Lock() - a.extraTools = extraTools + a.extraTools = append([]fantasy.AgentTool(nil), extraTools...) a.toolsMu.Unlock() + + a.promptMu.Lock() + defer a.promptMu.Unlock() a.rebuildFantasyAgent() }