Skip to content

Commit b64ba8d

Browse files
authored
Merge pull request #458 from krissetto/thinking-budgets-anthropic
Anthropic thinking budget + interleaved thinking support
2 parents ac57734 + e6a8d69 commit b64ba8d

11 files changed

Lines changed: 706 additions & 63 deletions

File tree

cagent-schema.json

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,23 +174,29 @@
174174
},
175175
"provider_opts": {
176176
"type": "object",
177-
"description": "Provider-specific options (currently used for dmr provider only)",
177+
"description": "Provider-specific options. dmr: runtime_flags. anthropic: interleaved_thinking (boolean, default false)",
178178
"additionalProperties": true
179179
},
180180
"track_usage": {
181181
"type": "boolean",
182182
"description": "Whether to track usage"
183183
},
184184
"thinking_budget": {
185-
"description": "Controls reasoning effort/budget. For OpenAI: string levels ('minimal', 'low', 'medium', 'high').",
185+
"description": "Controls reasoning effort/budget. For OpenAI: string levels ('minimal', 'low', 'medium', 'high'). For Anthropic: integer token budget (1024-32768)",
186186
"oneOf": [
187187
{
188188
"type": "string",
189189
"enum": ["minimal", "low", "medium", "high"],
190190
"description": "Reasoning effort level (OpenAI)"
191+
},
192+
{
193+
"type": "integer",
194+
"minimum": 1024,
195+
"maximum": 32768,
196+
"description": "Token budget for extended thinking (Anthropic)"
191197
}
192198
],
193-
"examples": ["minimal", "low", "medium", "high"]
199+
"examples": ["minimal", "low", "medium", "high", 1024, 32768]
194200
}
195201
},
196202
"additionalProperties": false

docs/USAGE.md

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -139,17 +139,17 @@ cagent run ./agent.yaml --command ls
139139

140140
### Model Properties
141141

142-
| Property | Type | Description | Required |
143-
|---------------------|---------|-----------------------------------------------------------------------|----------|
144-
| `provider` | string | Provider: `openai`, `anthropic`, `dmr` | ✓ |
145-
| `model` | string | Model name (e.g., `gpt-4o`, `claude-sonnet-4-0`) | ✓ |
146-
| `temperature` | float | Randomness (0.0-1.0) | ✗ |
147-
| `max_tokens` | integer | Response length limit | ✗ |
148-
| `top_p` | float | Nucleus sampling (0.0-1.0) | ✗ |
149-
| `frequency_penalty` | float | Repetition penalty (0.0-2.0) | ✗ |
150-
| `presence_penalty` | float | Topic repetition penalty (0.0-2.0) | ✗ |
151-
| `base_url` | string | Custom API endpoint | ✗ |
152-
| `thinking_budget` | string | Reasoning effort — OpenAI: minimal/low/medium/high; Anthropic: tokens | ✗ |
142+
| Property | Type | Description | Required |
143+
|---------------------|------------|-----------------------------------------------------------------------|----------|
144+
| `provider` | string | Provider: `openai`, `anthropic`, `dmr` | ✓ |
145+
| `model` | string | Model name (e.g., `gpt-4o`, `claude-sonnet-4-0`) | ✓ |
146+
| `temperature` | float | Randomness (0.0-1.0) | ✗ |
147+
| `max_tokens` | integer | Response length limit | ✗ |
148+
| `top_p` | float | Nucleus sampling (0.0-1.0) | ✗ |
149+
| `frequency_penalty` | float | Repetition penalty (0.0-2.0) | ✗ |
150+
| `presence_penalty` | float | Topic repetition penalty (0.0-2.0) | ✗ |
151+
| `base_url` | string | Custom API endpoint | ✗ |
152+
| `thinking_budget` | string/int | Reasoning effort — OpenAI: effort string, Anthropic: token budget int | ✗ |
153153

154154
#### Example
155155

@@ -164,15 +164,15 @@ models:
164164
frequency_penalty: float # Repetition penalty (0.0-2.0)
165165
presence_penalty: float # Topic repetition penalty (0.0-2.0)
166166
parallel_tool_calls: boolean
167-
thinking_budget: string # How much the model should think. Currently only string supported for OpenAI models
167+
thinking_budget: string|integer # OpenAI: effort level string; Anthropic: integer token budget
168168
```
169169

170170
### Reasoning Effort (thinking_budget)
171171

172172
Determine how much the model should think by setting the `thinking_budget`
173173

174174
- **OpenAI**: use effort levels — `minimal`, `low`, `medium`, `high`
175-
- **Anthropic, Google (Gemini), DMR, others**: coming soon
175+
- **Anthropic**: set an integer token budget. Minimum is 1024; range is 1024–32768; must be strictly less than `max_tokens`. When set, cagent uses Anthropic's Beta Messages API with interleaved thinking enabled.
176176

177177
Examples (OpenAI):
178178

@@ -189,11 +189,44 @@ agents:
189189
instruction: you are a helpful assistant
190190
```
191191

192+
Examples (Anthropic):
193+
194+
```yaml
195+
models:
196+
claude:
197+
provider: anthropic
198+
model: claude-sonnet-4-5-20250929
199+
thinking_budget: 1024
200+
201+
agents:
202+
root:
203+
model: claude
204+
instruction: you are a helpful assistant that doesn't think very much
205+
```
206+
207+
#### Interleaved Thinking (Anthropic)
208+
209+
Anthropic's interleaved thinking feature uses the Beta Messages API to provide tool calling during model reasoning. You can control this behavior using the `interleaved_thinking` provider option:
210+
211+
```yaml
212+
models:
213+
claude:
214+
provider: anthropic
215+
model: claude-sonnet-4-5-20250929
216+
thinking_budget: 8192 # Optional: defaults to 16384 when interleaved thinking is enabled
217+
provider_opts:
218+
interleaved_thinking: true # Enable interleaved thinking (default: false)
219+
```
220+
192221
Notes:
193222

194223
- If an invalid OpenAI effort value is set, the request will fail with a clear error
224+
- For Anthropic, values < 1024 or ≥ `max_tokens` are ignored (warning logged)
225+
- When `interleaved_thinking` is enabled, cagent uses Anthropic's Beta Messages API with a default thinking budget of 16384 tokens if not specified
195226
- For unsupported providers, `thinking_budget` has no effect
196-
- Debug logs include the applied effort (e.g., "OpenAI request using thinking_budget")
227+
- Debug logs include the applied effort (e.g., "OpenAI request using thinking_budget", "Anthropic Beta API using thinking_budget")
228+
229+
See `examples/thinking_budget.yaml` for a complete runnable demo.
197230

198231
#### Model Examples
199232

@@ -282,7 +315,7 @@ Requirements and notes:
282315
- Docker Model plugin must be available for auto-configure/auto-discovery
283316
- Verify with: `docker model status --json`
284317
- Configuration is best-effort; failures fall back to the default base URL
285-
- `provider_opts` is currently scoped to the `dmr` provider only
318+
- `provider_opts` currently apply to `dmr` and `anthropic` providers
286319
- `runtime_flags` are passed after `--` to the inference runtime (e.g., llama.cpp)
287320

288321
Parameter mapping and precedence (DMR):

examples/thinking_budget.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env cagent run
2+
3+
# Run the demo command with:
4+
# cagent run thinking_budget.yaml -c demo
5+
6+
version: "2"
7+
8+
agents:
9+
root:
10+
model: gpt-5-mini-min # <- try with gpt-5-mini-high
11+
# model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high
12+
description: a helpful assistant that thinks
13+
instruction: you are a helpful assistant
14+
commands:
15+
demo: "hey i need python code for a mandelbrot fractal"
16+
toolsets:
17+
- type: shell
18+
19+
models:
20+
gpt-5-mini-min:
21+
provider: openai
22+
model: gpt-5-mini
23+
thinking_budget: minimal # <- openai supports "minimal", "low", "medium", "high"
24+
25+
gpt-5-mini-high:
26+
provider: openai
27+
model: gpt-5-mini
28+
thinking_budget: high
29+
30+
claude-4-5-sonnet-min:
31+
provider: anthropic
32+
model: claude-sonnet-4-5-20250929
33+
thinking_budget: 1024 # <- tokens, 1024 is the minimum
34+
35+
claude-4-5-sonnet-high:
36+
provider: anthropic
37+
model: claude-sonnet-4-5-20250929
38+
thinking_budget: 32768 # <- tokens, 32768 is the suggested maximum without batching
39+
provider_opts:
40+
interleaved_thinking: true # <- enable interleaved thinking, aka tool calling during model reasoning

pkg/chat/chat.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ type Message struct {
4949
// - https://api-docs.deepseek.com/api/create-chat-completion#responses
5050
ReasoningContent string `json:"reasoning_content,omitempty"`
5151

52+
// ThinkingSignature is used for Anthropic's extended thinking feature
53+
ThinkingSignature string `json:"thinking_signature,omitempty"`
54+
5255
FunctionCall *tools.FunctionCall `json:"function_call,omitempty"`
5356

5457
// For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.
@@ -86,11 +89,12 @@ const (
8689

8790
// MessageDelta represents a delta/chunk in a streaming response
8891
type MessageDelta struct {
89-
Role string `json:"role,omitempty"`
90-
Content string `json:"content,omitempty"`
91-
ReasoningContent string `json:"reasoning_content,omitempty"`
92-
FunctionCall *tools.FunctionCall `json:"function_call,omitempty"`
93-
ToolCalls []tools.ToolCall `json:"tool_calls,omitempty"`
92+
Role string `json:"role,omitempty"`
93+
Content string `json:"content,omitempty"`
94+
ReasoningContent string `json:"reasoning_content,omitempty"`
95+
ThinkingSignature string `json:"thinking_signature,omitempty"`
96+
FunctionCall *tools.FunctionCall `json:"function_call,omitempty"`
97+
ToolCalls []tools.ToolCall `json:"tool_calls,omitempty"`
9498
}
9599

96100
// MessageStreamChoice represents a choice in a streaming response

pkg/config/v2/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ type Remote struct {
143143
// ThinkingBudget represents reasoning budget configuration.
144144
// It accepts either a string effort level or an integer token budget:
145145
// - String: "minimal", "low", "medium", "high" (for OpenAI)
146-
// - Integer: token count (for Anthropic, range 1024-32000)
146+
// - Integer: token count (for Anthropic, range 1024-32768)
147147
type ThinkingBudget struct {
148148
// Effort stores string-based reasoning effort levels
149149
Effort string `json:"effort,omitempty"`

pkg/model/provider/anthropic/adapter.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,24 +52,35 @@ func (a *streamAdapter) Recv() (chat.MessageStreamResponse, error) {
5252
// Handle different event types
5353
switch eventVariant := event.AsAny().(type) {
5454
case anthropic.ContentBlockStartEvent:
55-
if contentBlock, ok := eventVariant.ContentBlock.AsAny().(anthropic.ToolUseBlock); ok {
56-
a.toolID = contentBlock.ID
55+
switch block := eventVariant.ContentBlock.AsAny().(type) {
56+
case anthropic.ToolUseBlock:
57+
a.toolID = block.ID
5758
a.toolCall = true
5859
toolCall := tools.ToolCall{
5960
ID: a.toolID,
6061
Type: "function",
6162
Function: tools.FunctionCall{
62-
Name: contentBlock.Name,
63+
Name: block.Name,
6364
},
6465
}
6566
response.Choices[0].Delta.ToolCalls = []tools.ToolCall{toolCall}
67+
case anthropic.ThinkingBlock:
68+
// Emit initial thinking content and signature
69+
if block.Thinking != "" {
70+
response.Choices[0].Delta.ReasoningContent = block.Thinking
71+
}
72+
if block.Signature != "" {
73+
response.Choices[0].Delta.ThinkingSignature = block.Signature
74+
}
6675
}
6776
case anthropic.ContentBlockDeltaEvent:
6877
switch deltaVariant := eventVariant.Delta.AsAny().(type) {
6978
case anthropic.TextDelta:
7079
response.Choices[0].Delta.Content = deltaVariant.Text
7180
case anthropic.ThinkingDelta:
7281
response.Choices[0].Delta.ReasoningContent = deltaVariant.Thinking
82+
case anthropic.SignatureDelta:
83+
response.Choices[0].Delta.ThinkingSignature = deltaVariant.Signature
7384
case anthropic.InputJSONDelta:
7485
inputBytes := deltaVariant.PartialJSON
7586
toolCall := tools.ToolCall{
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package anthropic
2+
3+
import (
4+
"fmt"
5+
"io"
6+
"log/slog"
7+
8+
"github.com/anthropics/anthropic-sdk-go"
9+
"github.com/anthropics/anthropic-sdk-go/packages/ssestream"
10+
11+
"github.com/docker/cagent/pkg/chat"
12+
"github.com/docker/cagent/pkg/tools"
13+
)
14+
15+
// betaStreamAdapter adapts the Anthropic Beta stream to our interface
16+
type betaStreamAdapter struct {
17+
stream *ssestream.Stream[anthropic.BetaRawMessageStreamEventUnion]
18+
toolCall bool
19+
toolID string
20+
}
21+
22+
// newBetaStreamAdapter creates a new Beta stream adapter
23+
func newBetaStreamAdapter(stream *ssestream.Stream[anthropic.BetaRawMessageStreamEventUnion]) *betaStreamAdapter {
24+
return &betaStreamAdapter{
25+
stream: stream,
26+
}
27+
}
28+
29+
// Recv gets the next completion chunk from the Beta stream
30+
func (a *betaStreamAdapter) Recv() (chat.MessageStreamResponse, error) {
31+
if !a.stream.Next() {
32+
if a.stream.Err() != nil {
33+
return chat.MessageStreamResponse{}, a.stream.Err()
34+
}
35+
return chat.MessageStreamResponse{}, io.EOF
36+
}
37+
38+
event := a.stream.Current()
39+
40+
response := chat.MessageStreamResponse{
41+
ID: event.Message.ID,
42+
Object: "chat.completion.chunk",
43+
Model: string(event.Message.Model),
44+
Choices: []chat.MessageStreamChoice{
45+
{
46+
Index: 0,
47+
Delta: chat.MessageDelta{
48+
Role: string(chat.MessageRoleAssistant),
49+
},
50+
},
51+
},
52+
}
53+
54+
// Handle different event types
55+
switch eventVariant := event.AsAny().(type) {
56+
case anthropic.BetaRawContentBlockStartEvent:
57+
switch block := eventVariant.ContentBlock.AsAny().(type) {
58+
case anthropic.BetaToolUseBlock:
59+
a.toolID = block.ID
60+
a.toolCall = true
61+
toolCall := tools.ToolCall{
62+
ID: a.toolID,
63+
Type: "function",
64+
Function: tools.FunctionCall{
65+
Name: block.Name,
66+
},
67+
}
68+
response.Choices[0].Delta.ToolCalls = []tools.ToolCall{toolCall}
69+
case anthropic.BetaThinkingBlock:
70+
if block.Thinking != "" {
71+
response.Choices[0].Delta.ReasoningContent = block.Thinking
72+
slog.Debug("Received thinking", "thinking", block.Thinking)
73+
}
74+
if block.Signature != "" {
75+
response.Choices[0].Delta.ThinkingSignature = block.Signature
76+
slog.Debug("Received thinking signature (start)", "signature", block.Signature)
77+
}
78+
}
79+
case anthropic.BetaRawContentBlockDeltaEvent:
80+
switch deltaVariant := eventVariant.Delta.AsAny().(type) {
81+
case anthropic.BetaTextDelta:
82+
response.Choices[0].Delta.Content = deltaVariant.Text
83+
case anthropic.BetaThinkingDelta:
84+
response.Choices[0].Delta.ReasoningContent = deltaVariant.Thinking
85+
case anthropic.BetaInputJSONDelta:
86+
inputBytes := deltaVariant.PartialJSON
87+
toolCall := tools.ToolCall{
88+
ID: a.toolID,
89+
Type: "function",
90+
Function: tools.FunctionCall{
91+
Arguments: inputBytes,
92+
},
93+
}
94+
response.Choices[0].Delta.ToolCalls = []tools.ToolCall{toolCall}
95+
case anthropic.BetaSignatureDelta:
96+
// Signature delta is for thinking blocks - capture it so we can replay thinking in history
97+
response.Choices[0].Delta.ThinkingSignature = deltaVariant.Signature
98+
slog.Debug("Received thinking signature", "signature", deltaVariant.Signature)
99+
default:
100+
return response, fmt.Errorf("unknown delta type: %T", deltaVariant)
101+
}
102+
case anthropic.BetaRawMessageDeltaEvent:
103+
response.Usage = &chat.Usage{
104+
InputTokens: int(eventVariant.Usage.InputTokens),
105+
OutputTokens: int(eventVariant.Usage.OutputTokens),
106+
CachedInputTokens: int(eventVariant.Usage.CacheReadInputTokens),
107+
CachedOutputTokens: int(eventVariant.Usage.CacheCreationInputTokens),
108+
}
109+
case anthropic.BetaRawMessageStopEvent:
110+
if a.toolCall {
111+
response.Choices[0].FinishReason = chat.FinishReasonToolCalls
112+
} else {
113+
response.Choices[0].FinishReason = chat.FinishReasonStop
114+
}
115+
}
116+
117+
return response, nil
118+
}
119+
120+
// Close closes the Beta stream
121+
func (a *betaStreamAdapter) Close() {
122+
if a.stream != nil {
123+
a.stream.Close()
124+
}
125+
}

0 commit comments

Comments
 (0)