Skip to content

Commit b5035ca

Browse files
committed
Implement structured outputs
Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
1 parent 6706c1e commit b5035ca

8 files changed

Lines changed: 277 additions & 38 deletions

File tree

cagent-schema.json

Lines changed: 128 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,18 @@
88
"version": {
99
"type": "string",
1010
"description": "Configuration version",
11-
"enum": ["1", "2", "v1", "v2"],
12-
"examples": ["1", "2", "v1", "v2"]
11+
"enum": [
12+
"1",
13+
"2",
14+
"v1",
15+
"v2"
16+
],
17+
"examples": [
18+
"1",
19+
"2",
20+
"v1",
21+
"v2"
22+
]
1323
},
1424
"agents": {
1525
"type": "object",
@@ -103,16 +113,79 @@
103113
"oneOf": [
104114
{
105115
"type": "object",
106-
"additionalProperties": { "type": "string" }
116+
"additionalProperties": {
117+
"type": "string"
118+
}
107119
},
108120
{
109121
"type": "array",
110122
"items": {
111123
"type": "object",
112-
"additionalProperties": { "type": "string" }
124+
"additionalProperties": {
125+
"type": "string"
126+
}
113127
}
114128
}
115129
]
130+
},
131+
"structured_output": {
132+
"type": "object",
133+
"description": "Structured output configuration for constraining model responses to a specific JSON schema. Supported by OpenAI (native) and Google Gemini (native). Anthropic requires prompt engineering or tool-based approaches.",
134+
"properties": {
135+
"name": {
136+
"type": "string",
137+
"description": "Name of the response format schema"
138+
},
139+
"description": {
140+
"type": "string",
141+
"description": "Optional description of what the schema represents"
142+
},
143+
"strict": {
144+
"type": "boolean",
145+
"description": "Enable strict schema adherence (OpenAI only). When true, all properties must be in required array.",
146+
"default": false
147+
},
148+
"schema": {
149+
"type": "object",
150+
"description": "JSON Schema object defining the structure of the response. Must include type, properties, and required fields.",
151+
"required": [
152+
"type",
153+
"properties"
154+
],
155+
"properties": {
156+
"type": {
157+
"type": "string",
158+
"enum": [
159+
"object"
160+
],
161+
"description": "Schema type, must be 'object' for structured outputs"
162+
},
163+
"properties": {
164+
"type": "object",
165+
"description": "Object properties with their schemas",
166+
"additionalProperties": true
167+
},
168+
"required": {
169+
"type": "array",
170+
"description": "List of required property names",
171+
"items": {
172+
"type": "string"
173+
}
174+
},
175+
"additionalProperties": {
176+
"type": "boolean",
177+
"description": "Whether additional properties are allowed",
178+
"default": false
179+
}
180+
},
181+
"additionalProperties": true
182+
}
183+
},
184+
"required": [
185+
"name",
186+
"schema"
187+
],
188+
"additionalProperties": false
116189
}
117190
},
118191
"additionalProperties": false
@@ -124,7 +197,12 @@
124197
"provider": {
125198
"type": "string",
126199
"description": "Model provider (e.g., openai, anthropic, dmr)",
127-
"examples": ["openai", "anthropic", "dmr", "ollama"]
200+
"examples": [
201+
"openai",
202+
"anthropic",
203+
"dmr",
204+
"ollama"
205+
]
128206
},
129207
"model": {
130208
"type": "string",
@@ -186,7 +264,12 @@
186264
"oneOf": [
187265
{
188266
"type": "string",
189-
"enum": ["minimal", "low", "medium", "high"],
267+
"enum": [
268+
"minimal",
269+
"low",
270+
"medium",
271+
"high"
272+
],
190273
"description": "Reasoning effort level (OpenAI)"
191274
},
192275
{
@@ -196,7 +279,14 @@
196279
"description": "Token budget for extended thinking (Anthropic, Google)"
197280
}
198281
],
199-
"examples": ["minimal", "low", "medium", "high", -1, 0, 1024, 24576, 32768]
282+
"examples": [
283+
"minimal",
284+
"low",
285+
"medium",
286+
"high",
287+
1024,
288+
32768
289+
]
200290
}
201291
},
202292
"additionalProperties": false
@@ -294,7 +384,7 @@
294384
"^[A-Za-z_][A-Za-z0-9_\\-]*$": {
295385
"$ref": "#/definitions/ScriptShellToolConfig"
296386
}
297-
},
387+
},
298388
"additionalProperties": false
299389
},
300390
"post_edit": {
@@ -309,12 +399,30 @@
309399
"anyOf": [
310400
{
311401
"allOf": [
312-
{ "properties": { "type": { "const": "mcp" } } },
402+
{
403+
"properties": {
404+
"type": {
405+
"const": "mcp"
406+
}
407+
}
408+
},
313409
{
314410
"anyOf": [
315-
{ "required": ["command"] },
316-
{ "required": ["remote"] },
317-
{ "required": ["ref"] }
411+
{
412+
"required": [
413+
"command"
414+
]
415+
},
416+
{
417+
"required": [
418+
"remote"
419+
]
420+
},
421+
{
422+
"required": [
423+
"ref"
424+
]
425+
}
318426
]
319427
}
320428
]
@@ -357,7 +465,9 @@
357465
}
358466
}
359467
},
360-
"required": ["url"],
468+
"required": [
469+
"url"
470+
],
361471
"additionalProperties": false
362472
},
363473
"ScriptShellToolConfig": {
@@ -411,8 +521,11 @@
411521
"description": "Command to execute after edit"
412522
}
413523
},
414-
"required": ["path", "cmd"],
524+
"required": [
525+
"path",
526+
"cmd"
527+
],
415528
"additionalProperties": false
416529
}
417530
}
418-
}
531+
}

examples/structured-output.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
version: "2"
2+
3+
models:
4+
gpt4_structured:
5+
provider: openai
6+
model: gpt-4o-2024-08-06
7+
temperature: 0.7
8+
max_tokens: 1000
9+
10+
agents:
11+
root:
12+
model: gpt4_structured
13+
description: "Agent that extracts structured information about people"
14+
instruction: |
15+
You are an assistant that extracts structured information about people from conversations.
16+
Always respond with valid JSON matching the specified schema.
17+
commands:
18+
demo: My name is John Doe, I am 30 years old, my email is john.doe@example.com, and I enjoy hiking and photography.
19+
structured_output:
20+
name: "person_info"
21+
description: "Information about a person"
22+
strict: true
23+
schema:
24+
type: object
25+
properties:
26+
name:
27+
type: string
28+
description: "The person's full name"
29+
age:
30+
type: integer
31+
description: "The person's age"
32+
email:
33+
type: string
34+
description: "The person's email address"
35+
hobbies:
36+
type: array
37+
items:
38+
type: string
39+
description: "List of the person's hobbies"
40+
required:
41+
- name
42+
- age
43+
- email
44+
- hobbies
45+
additionalProperties: false

pkg/config/v2/types.go

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,19 @@ type Config struct {
1414

1515
// AgentConfig represents a single agent configuration
1616
type AgentConfig struct {
17-
Model string `json:"model,omitempty"`
18-
Description string `json:"description,omitempty"`
19-
Toolsets []Toolset `json:"toolsets,omitempty"`
20-
Instruction string `json:"instruction,omitempty"`
21-
SubAgents []string `json:"sub_agents,omitempty"`
22-
AddDate bool `json:"add_date,omitempty"`
23-
AddEnvironmentInfo bool `json:"add_environment_info,omitempty"`
24-
CodeModeTools bool `json:"code_mode_tools,omitempty"`
25-
MaxIterations int `json:"max_iterations,omitempty"`
26-
NumHistoryItems int `json:"num_history_items,omitempty"`
27-
AddPromptFiles []string `json:"add_prompt_files,omitempty" yaml:"add_prompt_files,omitempty"`
28-
Commands types.Commands `json:"commands,omitempty"`
17+
Model string `json:"model,omitempty"`
18+
Description string `json:"description,omitempty"`
19+
Toolsets []Toolset `json:"toolsets,omitempty"`
20+
Instruction string `json:"instruction,omitempty"`
21+
SubAgents []string `json:"sub_agents,omitempty"`
22+
AddDate bool `json:"add_date,omitempty"`
23+
AddEnvironmentInfo bool `json:"add_environment_info,omitempty"`
24+
CodeModeTools bool `json:"code_mode_tools,omitempty"`
25+
MaxIterations int `json:"max_iterations,omitempty"`
26+
NumHistoryItems int `json:"num_history_items,omitempty"`
27+
AddPromptFiles []string `json:"add_prompt_files,omitempty" yaml:"add_prompt_files,omitempty"`
28+
Commands types.Commands `json:"commands,omitempty"`
29+
StructuredOutput *StructuredOutput `json:"structured_output,omitempty"`
2930
}
3031

3132
// ModelConfig represents the configuration for a model
@@ -168,3 +169,15 @@ func (t *ThinkingBudget) UnmarshalYAML(unmarshal func(any) error) error {
168169

169170
return nil
170171
}
172+
173+
// StructuredOutput defines a JSON schema for structured output
174+
type StructuredOutput struct {
175+
// Name is the name of the response format
176+
Name string `json:"name"`
177+
// Description is optional description of the response format
178+
Description string `json:"description,omitempty"`
179+
// Schema is a JSON schema object defining the structure
180+
Schema map[string]any `json:"schema"`
181+
// Strict enables strict schema adherence (OpenAI only)
182+
Strict bool `json:"strict,omitempty"`
183+
}

pkg/model/provider/anthropic/client.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
107107
client := anthropic.NewClient(requestOptions...)
108108
slog.Debug("Anthropic client created successfully", "model", cfg.Model)
109109

110+
if globalOptions.StructuredOutput != nil {
111+
return &Client{}, errors.New("anthropic does not support native structured_output")
112+
}
113+
110114
return &Client{
111115
client: client,
112116
config: cfg,

pkg/model/provider/dmr/client.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ import (
2525
// Client represents an DMR client wrapper
2626
// It implements the provider.Provider interface
2727
type Client struct {
28-
client *openai.Client
29-
config *latest.ModelConfig
30-
baseURL string
28+
client *openai.Client
29+
config *latest.ModelConfig
30+
modelOptions options.ModelOptions
31+
baseURL string
3132
}
3233

3334
// NewClient creates a new DMR client from the provided configuration
@@ -93,9 +94,10 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt
9394
slog.Debug("DMR client created successfully", "model", cfg.Model, "base_url", clientConfig.BaseURL)
9495

9596
return &Client{
96-
client: openai.NewClientWithConfig(clientConfig),
97-
config: cfg,
98-
baseURL: clientConfig.BaseURL,
97+
client: openai.NewClientWithConfig(clientConfig),
98+
config: cfg,
99+
baseURL: clientConfig.BaseURL,
100+
modelOptions: globalOptions,
99101
}, nil
100102
}
101103

@@ -361,6 +363,17 @@ func (c *Client) CreateChatCompletionStream(ctx context.Context, messages []chat
361363
} else {
362364
slog.Error("Failed to marshal DMR request to JSON", "error", err)
363365
}
366+
if c.modelOptions.StructuredOutput != nil {
367+
request.ResponseFormat = &openai.ChatCompletionResponseFormat{
368+
Type: openai.ChatCompletionResponseFormatTypeJSONSchema,
369+
JSONSchema: &openai.ChatCompletionResponseFormatJSONSchema{
370+
Name: c.modelOptions.StructuredOutput.Name,
371+
Description: c.modelOptions.StructuredOutput.Description,
372+
Schema: jsonSchema(c.modelOptions.StructuredOutput.Schema),
373+
Strict: c.modelOptions.StructuredOutput.Strict,
374+
},
375+
}
376+
}
364377

365378
stream, err := c.client.CreateChatCompletionStream(ctx, request)
366379
if err != nil {
@@ -560,3 +573,11 @@ func buildRuntimeFlagsFromModelConfig(engine string, cfg *latest.ModelConfig) []
560573
}
561574
return flags
562575
}
576+
577+
// jsonSchema is a helper type that implements json.Marshaler for map[string]any
578+
// This allows us to pass schema maps to the OpenAI library which expects json.Marshaler
579+
type jsonSchema map[string]any
580+
581+
func (j jsonSchema) MarshalJSON() ([]byte, error) {
582+
return json.Marshal(map[string]any(j))
583+
}

0 commit comments

Comments
 (0)