docker
diff --git a/‎cagent-schema.json‎
Lines changed: 9 additions & 3 deletions b/‎cagent-schema.json‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎docs/USAGE.md‎
Lines changed: 48 additions & 15 deletions b/‎docs/USAGE.md‎
Lines changed: 48 additions & 15 deletions
diff --git a/‎examples/thinking_budget.yaml‎
Lines changed: 40 additions & 0 deletions b/‎examples/thinking_budget.yaml‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎pkg/chat/chat.go‎
Lines changed: 9 additions & 5 deletions b/‎pkg/chat/chat.go‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎pkg/config/v2/types.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/config/v2/types.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/model/provider/anthropic/adapter.go‎
Lines changed: 14 additions & 3 deletions b/‎pkg/model/provider/anthropic/adapter.go‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎pkg/model/provider/anthropic/beta_adapter.go‎
Lines changed: 125 additions & 0 deletions b/‎pkg/model/provider/anthropic/beta_adapter.go‎
Lines changed: 125 additions & 0 deletions
@@ -174,23 +174,29 @@
         },
         "provider_opts": {
           "type": "object",
-          "description": "Provider-specific options (currently used for dmr provider only)",
+          "description": "Provider-specific options. dmr: runtime_flags. anthropic: interleaved_thinking (boolean, default false)",
           "additionalProperties": true
         },
         "track_usage": {
           "type": "boolean",
           "description": "Whether to track usage"
         },
         "thinking_budget": {
-          "description": "Controls reasoning effort/budget. For OpenAI: string levels ('minimal', 'low', 'medium', 'high').",
+          "description": "Controls reasoning effort/budget. For OpenAI: string levels ('minimal', 'low', 'medium', 'high'). For Anthropic: integer token budget (1024-32768)",
           "oneOf": [
             {
               "type": "string",
               "enum": ["minimal", "low", "medium", "high"],
               "description": "Reasoning effort level (OpenAI)"
+            },
+            {
+              "type": "integer",
+              "minimum": 1024,
+              "maximum": 32768,
+              "description": "Token budget for extended thinking (Anthropic)"
             }
           ],
-          "examples": ["minimal", "low", "medium", "high"]
+          "examples": ["minimal", "low", "medium", "high", 1024, 32768]
         }
       },
       "additionalProperties": false
 
@@ -139,17 +139,17 @@ cagent run ./agent.yaml --command ls
 
 ### Model Properties
 
-| Property            | Type    | Description                                                           | Required |
-|---------------------|---------|-----------------------------------------------------------------------|----------|
-| `provider`          | string  | Provider: `openai`, `anthropic`, `dmr`                                | ✓        |
-| `model`             | string  | Model name (e.g., `gpt-4o`, `claude-sonnet-4-0`)                      | ✓        |
-| `temperature`       | float   | Randomness (0.0-1.0)                                                  | ✗        |
-| `max_tokens`        | integer | Response length limit                                                 | ✗        |
-| `top_p`             | float   | Nucleus sampling (0.0-1.0)                                            | ✗        |
-| `frequency_penalty` | float   | Repetition penalty (0.0-2.0)                                          | ✗        |
-| `presence_penalty`  | float   | Topic repetition penalty (0.0-2.0)                                    | ✗        |
-| `base_url`          | string  | Custom API endpoint                                                   | ✗        |
-| `thinking_budget`   | string  | Reasoning effort — OpenAI: minimal/low/medium/high; Anthropic: tokens | ✗        |
+| Property            | Type       | Description                                                           | Required |
+|---------------------|------------|-----------------------------------------------------------------------|----------|
+| `provider`          | string     | Provider: `openai`, `anthropic`, `dmr`                                | ✓        |
+| `model`             | string     | Model name (e.g., `gpt-4o`, `claude-sonnet-4-0`)                      | ✓        |
+| `temperature`       | float      | Randomness (0.0-1.0)                                                  | ✗        |
+| `max_tokens`        | integer    | Response length limit                                                 | ✗        |
+| `top_p`             | float      | Nucleus sampling (0.0-1.0)                                            | ✗        |
+| `frequency_penalty` | float      | Repetition penalty (0.0-2.0)                                          | ✗        |
+| `presence_penalty`  | float      | Topic repetition penalty (0.0-2.0)                                    | ✗        |
+| `base_url`          | string     | Custom API endpoint                                                   | ✗        |
+| `thinking_budget`   | string/int | Reasoning effort — OpenAI: effort string, Anthropic: token budget int | ✗        |
 
 #### Example
 
@@ -164,15 +164,15 @@ models:
     frequency_penalty: float # Repetition penalty (0.0-2.0)
     presence_penalty: float # Topic repetition penalty (0.0-2.0)
     parallel_tool_calls: boolean
-    thinking_budget: string # How much the model should think. Currently only string supported for OpenAI models
+    thinking_budget: string|integer # OpenAI: effort level string; Anthropic: integer token budget
 ```
 
 ### Reasoning Effort (thinking_budget)
 
 Determine how much the model should think by setting the `thinking_budget`
 
 - **OpenAI**: use effort levels — `minimal`, `low`, `medium`, `high`
-- **Anthropic, Google (Gemini), DMR, others**: coming soon
+- **Anthropic**: set an integer token budget. Minimum is 1024; range is 1024–32768; must be strictly less than `max_tokens`. When set, cagent uses Anthropic's Beta Messages API with interleaved thinking enabled.
 
 Examples (OpenAI):
 
@@ -189,11 +189,44 @@ agents:
     instruction: you are a helpful assistant
 ```
 
+Examples (Anthropic):
+
+```yaml
+models:
+  claude:
+    provider: anthropic
+    model: claude-sonnet-4-5-20250929
+    thinking_budget: 1024
+
+agents:
+  root:
+    model: claude
+    instruction: you are a helpful assistant that doesn't think very much
+```
+
+#### Interleaved Thinking (Anthropic)
+
+Anthropic's interleaved thinking feature uses the Beta Messages API to provide tool calling during model reasoning. You can control this behavior using the `interleaved_thinking` provider option:
+
+```yaml
+models:
+  claude:
+    provider: anthropic
+    model: claude-sonnet-4-5-20250929
+    thinking_budget: 8192  # Optional: defaults to 16384 when interleaved thinking is enabled
+    provider_opts:
+      interleaved_thinking: true   # Enable interleaved thinking (default: false)
+```
+
 Notes:
 
 - If an invalid OpenAI effort value is set, the request will fail with a clear error
+- For Anthropic, values < 1024 or ≥ `max_tokens` are ignored (warning logged)
+- When `interleaved_thinking` is enabled, cagent uses Anthropic's Beta Messages API with a default thinking budget of 16384 tokens if not specified
 - For unsupported providers, `thinking_budget` has no effect
-- Debug logs include the applied effort (e.g., "OpenAI request using thinking_budget")
+- Debug logs include the applied effort (e.g., "OpenAI request using thinking_budget", "Anthropic Beta API using thinking_budget")
+
+See `examples/thinking_budget.yaml` for a complete runnable demo.
 
 #### Model Examples
 
@@ -282,7 +315,7 @@ Requirements and notes:
 - Docker Model plugin must be available for auto-configure/auto-discovery
   - Verify with: `docker model status --json`
 - Configuration is best-effort; failures fall back to the default base URL
-- `provider_opts` is currently scoped to the `dmr` provider only
+- `provider_opts` currently apply to `dmr` and `anthropic` providers
 - `runtime_flags` are passed after `--` to the inference runtime (e.g., llama.cpp)
 
 Parameter mapping and precedence (DMR):
 
@@ -0,0 +1,40 @@
+#!/usr/bin/env cagent run
+
+# Run the demo command with:
+# cagent run thinking_budget.yaml -c demo
+
+version: "2"
+
+agents:
+  root:
+    model: gpt-5-mini-min # <- try with gpt-5-mini-high
+    # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high
+    description: a helpful assistant that thinks
+    instruction: you are a helpful assistant
+    commands:
+      demo: "hey i need python code for a mandelbrot fractal"
+    toolsets:
+      - type: shell
+
+models:
+  gpt-5-mini-min:
+    provider: openai
+    model: gpt-5-mini
+    thinking_budget: minimal # <- openai supports "minimal", "low", "medium", "high"
+
+  gpt-5-mini-high:
+    provider: openai
+    model: gpt-5-mini
+    thinking_budget: high
+
+  claude-4-5-sonnet-min:
+    provider: anthropic
+    model: claude-sonnet-4-5-20250929
+    thinking_budget: 1024 # <- tokens, 1024 is the minimum
+  
+  claude-4-5-sonnet-high:
+    provider: anthropic
+    model: claude-sonnet-4-5-20250929
+    thinking_budget: 32768 # <- tokens, 32768 is the suggested maximum without batching
+    provider_opts:
+      interleaved_thinking: true # <- enable interleaved thinking, aka tool calling during model reasoning
@@ -49,6 +49,9 @@ type Message struct {
 	// - https://api-docs.deepseek.com/api/create-chat-completion#responses
 	ReasoningContent string `json:"reasoning_content,omitempty"`
 
+	// ThinkingSignature is used for Anthropic's extended thinking feature
+	ThinkingSignature string `json:"thinking_signature,omitempty"`
+
 	FunctionCall *tools.FunctionCall `json:"function_call,omitempty"`
 
 	// For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.
@@ -86,11 +89,12 @@ const (
 
 // MessageDelta represents a delta/chunk in a streaming response
 type MessageDelta struct {
-	Role             string              `json:"role,omitempty"`
-	Content          string              `json:"content,omitempty"`
-	ReasoningContent string              `json:"reasoning_content,omitempty"`
-	FunctionCall     *tools.FunctionCall `json:"function_call,omitempty"`
-	ToolCalls        []tools.ToolCall    `json:"tool_calls,omitempty"`
+	Role              string              `json:"role,omitempty"`
+	Content           string              `json:"content,omitempty"`
+	ReasoningContent  string              `json:"reasoning_content,omitempty"`
+	ThinkingSignature string              `json:"thinking_signature,omitempty"`
+	FunctionCall      *tools.FunctionCall `json:"function_call,omitempty"`
+	ToolCalls         []tools.ToolCall    `json:"tool_calls,omitempty"`
 }
 
 // MessageStreamChoice represents a choice in a streaming response
 
@@ -143,7 +143,7 @@ type Remote struct {
 // ThinkingBudget represents reasoning budget configuration.
 // It accepts either a string effort level or an integer token budget:
 // - String: "minimal", "low", "medium", "high" (for OpenAI)
-// - Integer: token count (for Anthropic, range 1024-32000)
+// - Integer: token count (for Anthropic, range 1024-32768)
 type ThinkingBudget struct {
 	// Effort stores string-based reasoning effort levels
 	Effort string `json:"effort,omitempty"`
 
@@ -52,24 +52,35 @@ func (a *streamAdapter) Recv() (chat.MessageStreamResponse, error) {
 	// Handle different event types
 	switch eventVariant := event.AsAny().(type) {
 	case anthropic.ContentBlockStartEvent:
-		if contentBlock, ok := eventVariant.ContentBlock.AsAny().(anthropic.ToolUseBlock); ok {
-			a.toolID = contentBlock.ID
+		switch block := eventVariant.ContentBlock.AsAny().(type) {
+		case anthropic.ToolUseBlock:
+			a.toolID = block.ID
 			a.toolCall = true
 			toolCall := tools.ToolCall{
 				ID:   a.toolID,
 				Type: "function",
 				Function: tools.FunctionCall{
-					Name: contentBlock.Name,
+					Name: block.Name,
 				},
 			}
 			response.Choices[0].Delta.ToolCalls = []tools.ToolCall{toolCall}
+		case anthropic.ThinkingBlock:
+			// Emit initial thinking content and signature
+			if block.Thinking != "" {
+				response.Choices[0].Delta.ReasoningContent = block.Thinking
+			}
+			if block.Signature != "" {
+				response.Choices[0].Delta.ThinkingSignature = block.Signature
+			}
 		}
 	case anthropic.ContentBlockDeltaEvent:
 		switch deltaVariant := eventVariant.Delta.AsAny().(type) {
 		case anthropic.TextDelta:
 			response.Choices[0].Delta.Content = deltaVariant.Text
 		case anthropic.ThinkingDelta:
 			response.Choices[0].Delta.ReasoningContent = deltaVariant.Thinking
+		case anthropic.SignatureDelta:
+			response.Choices[0].Delta.ThinkingSignature = deltaVariant.Signature
 		case anthropic.InputJSONDelta:
 			inputBytes := deltaVariant.PartialJSON
 			toolCall := tools.ToolCall{
 
@@ -0,0 +1,125 @@
+package anthropic
+
+import (
+	"fmt"
+	"io"
+	"log/slog"
+
+	"github.com/anthropics/anthropic-sdk-go"
+	"github.com/anthropics/anthropic-sdk-go/packages/ssestream"
+
+	"github.com/docker/cagent/pkg/chat"
+	"github.com/docker/cagent/pkg/tools"
+)
+
+// betaStreamAdapter adapts the Anthropic Beta stream to our interface
+type betaStreamAdapter struct {
+	stream   *ssestream.Stream[anthropic.BetaRawMessageStreamEventUnion]
+	toolCall bool
+	toolID   string
+}
+
+// newBetaStreamAdapter creates a new Beta stream adapter
+func newBetaStreamAdapter(stream *ssestream.Stream[anthropic.BetaRawMessageStreamEventUnion]) *betaStreamAdapter {
+	return &betaStreamAdapter{
+		stream: stream,
+	}
+}
+
+// Recv gets the next completion chunk from the Beta stream
+func (a *betaStreamAdapter) Recv() (chat.MessageStreamResponse, error) {
+	if !a.stream.Next() {
+		if a.stream.Err() != nil {
+			return chat.MessageStreamResponse{}, a.stream.Err()
+		}
+		return chat.MessageStreamResponse{}, io.EOF
+	}
+
+	event := a.stream.Current()
+
+	response := chat.MessageStreamResponse{
+		ID:     event.Message.ID,
+		Object: "chat.completion.chunk",
+		Model:  string(event.Message.Model),
+		Choices: []chat.MessageStreamChoice{
+			{
+				Index: 0,
+				Delta: chat.MessageDelta{
+					Role: string(chat.MessageRoleAssistant),
+				},
+			},
+		},
+	}
+
+	// Handle different event types
+	switch eventVariant := event.AsAny().(type) {
+	case anthropic.BetaRawContentBlockStartEvent:
+		switch block := eventVariant.ContentBlock.AsAny().(type) {
+		case anthropic.BetaToolUseBlock:
+			a.toolID = block.ID
+			a.toolCall = true
+			toolCall := tools.ToolCall{
+				ID:   a.toolID,
+				Type: "function",
+				Function: tools.FunctionCall{
+					Name: block.Name,
+				},
+			}
+			response.Choices[0].Delta.ToolCalls = []tools.ToolCall{toolCall}
+		case anthropic.BetaThinkingBlock:
+			if block.Thinking != "" {
+				response.Choices[0].Delta.ReasoningContent = block.Thinking
+				slog.Debug("Received thinking", "thinking", block.Thinking)
+			}
+			if block.Signature != "" {
+				response.Choices[0].Delta.ThinkingSignature = block.Signature
+				slog.Debug("Received thinking signature (start)", "signature", block.Signature)
+			}
+		}
+	case anthropic.BetaRawContentBlockDeltaEvent:
+		switch deltaVariant := eventVariant.Delta.AsAny().(type) {
+		case anthropic.BetaTextDelta:
+			response.Choices[0].Delta.Content = deltaVariant.Text
+		case anthropic.BetaThinkingDelta:
+			response.Choices[0].Delta.ReasoningContent = deltaVariant.Thinking
+		case anthropic.BetaInputJSONDelta:
+			inputBytes := deltaVariant.PartialJSON
+			toolCall := tools.ToolCall{
+				ID:   a.toolID,
+				Type: "function",
+				Function: tools.FunctionCall{
+					Arguments: inputBytes,
+				},
+			}
+			response.Choices[0].Delta.ToolCalls = []tools.ToolCall{toolCall}
+		case anthropic.BetaSignatureDelta:
+			// Signature delta is for thinking blocks - capture it so we can replay thinking in history
+			response.Choices[0].Delta.ThinkingSignature = deltaVariant.Signature
+			slog.Debug("Received thinking signature", "signature", deltaVariant.Signature)
+		default:
+			return response, fmt.Errorf("unknown delta type: %T", deltaVariant)
+		}
+	case anthropic.BetaRawMessageDeltaEvent:
+		response.Usage = &chat.Usage{
+			InputTokens:        int(eventVariant.Usage.InputTokens),
+			OutputTokens:       int(eventVariant.Usage.OutputTokens),
+			CachedInputTokens:  int(eventVariant.Usage.CacheReadInputTokens),
+			CachedOutputTokens: int(eventVariant.Usage.CacheCreationInputTokens),
+		}
+	case anthropic.BetaRawMessageStopEvent:
+		if a.toolCall {
+			response.Choices[0].FinishReason = chat.FinishReasonToolCalls
+		} else {
+			response.Choices[0].FinishReason = chat.FinishReasonStop
+		}
+	}
+
+	return response, nil
+}
+
+// Close closes the Beta stream
+func (a *betaStreamAdapter) Close() {
+	if a.stream != nil {
+		a.stream.Close()
+	}
+}