Skip to content

Commit f1a1026

Browse files
Merge branch 'main' into fix/tab-auto-complete
2 parents b36c89c + dc8062b commit f1a1026

6 files changed

Lines changed: 201 additions & 19 deletions

File tree

pkg/a2a/adapter.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
6868
var contentBuilder strings.Builder
6969

7070
// Convert docker agent events to ADK events and yield them
71+
7172
for event := range eventsChan {
7273
if ctx.Ended() {
7374
slog.Debug("Invocation ended, stopping agent", "agent", agentName)
@@ -95,6 +96,7 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
9596

9697
case *runtime.ErrorEvent:
9798
// Yield error and stop
99+
98100
yield(nil, fmt.Errorf("%s", e.Error))
99101
return
100102

pkg/runtime/loop.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/docker/docker-agent/pkg/telemetry"
2323
"github.com/docker/docker-agent/pkg/tools"
2424
"github.com/docker/docker-agent/pkg/tools/builtin"
25+
bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
2526
)
2627

2728
// registerDefaultTools wires up the built-in tool handlers (delegation,
@@ -123,11 +124,19 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
123124
runtimeMaxIterations := sess.MaxIterations
124125

125126
// Initialize consecutive duplicate tool call detector
127+
//
128+
// Polling tools (view_background_agent, view_background_job) are
129+
// expected to be called repeatedly with identical arguments while a
130+
// background task is in progress. Exempt them so they never trigger
131+
// the loop-termination path.
126132
loopThreshold := sess.MaxConsecutiveToolCalls
127133
if loopThreshold == 0 {
128134
loopThreshold = 5 // default: always active
129135
}
130-
loopDetector := newToolLoopDetector(loopThreshold)
136+
loopDetector := newToolLoopDetector(loopThreshold,
137+
bgagent.ToolNameViewBackgroundAgent,
138+
builtin.ToolNameViewBackgroundJob,
139+
)
131140

132141
// overflowCompactions counts how many consecutive context-overflow
133142
// auto-compactions have been attempted without a successful model

pkg/runtime/tool_loop_detector.go

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,20 @@ type toolLoopDetector struct {
1616
lastSignature string
1717
consecutive int
1818
threshold int
19+
exemptTools map[string]struct{}
1920
}
2021

2122
// newToolLoopDetector creates a detector that triggers after threshold
22-
// consecutive identical call batches.
23-
func newToolLoopDetector(threshold int) *toolLoopDetector {
24-
return &toolLoopDetector{threshold: threshold}
23+
// consecutive identical call batches. Tool names passed in exemptTools
24+
// are polling-safe: batches composed entirely of exempt tools (e.g.
25+
// view_background_agent, view_background_job) never count toward the
26+
// consecutive-duplicate limit.
27+
func newToolLoopDetector(threshold int, exemptTools ...string) *toolLoopDetector {
28+
exempt := make(map[string]struct{}, len(exemptTools))
29+
for _, name := range exemptTools {
30+
exempt[name] = struct{}{}
31+
}
32+
return &toolLoopDetector{threshold: threshold, exemptTools: exempt}
2533
}
2634

2735
// reset clears the detector state so it can be reused after recovery.
@@ -32,11 +40,22 @@ func (d *toolLoopDetector) reset() {
3240

3341
// record updates the detector with the latest tool call batch and returns
3442
// true if the consecutive-duplicate threshold has been reached.
43+
// Batches composed entirely of exempt (polling) tools are silently
44+
// skipped so that expected polling patterns are not flagged.
3545
func (d *toolLoopDetector) record(calls []tools.ToolCall) bool {
3646
if len(calls) == 0 {
3747
return false
3848
}
3949

50+
// Polling tools are expected to be called repeatedly with identical
51+
// arguments while waiting for a background task to finish. Exempt batches
52+
// are completely invisible to the detector: they neither increment the
53+
// consecutive counter nor reset it, so a looping model cannot evade
54+
// detection by interleaving a single polling call.
55+
if d.isExemptBatch(calls) {
56+
return false
57+
}
58+
4059
sig := callSignature(calls)
4160
if sig == d.lastSignature {
4261
d.consecutive++
@@ -48,6 +67,20 @@ func (d *toolLoopDetector) record(calls []tools.ToolCall) bool {
4867
return d.consecutive >= d.threshold
4968
}
5069

70+
// isExemptBatch returns true when every call in the batch targets a
71+
// polling-exempt tool.
72+
func (d *toolLoopDetector) isExemptBatch(calls []tools.ToolCall) bool {
73+
if len(d.exemptTools) == 0 {
74+
return false
75+
}
76+
for _, c := range calls {
77+
if _, ok := d.exemptTools[c.Function.Name]; !ok {
78+
return false
79+
}
80+
}
81+
return true
82+
}
83+
5184
// callSignature builds a composite key from the name and arguments of every
5285
// tool call in the batch. Arguments are canonicalized (sorted keys) so that
5386
// semantically identical JSON with different key ordering produces the same

pkg/runtime/tool_loop_detector_test.go

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"testing"
55

66
"github.com/docker/docker-agent/pkg/tools"
7+
"github.com/docker/docker-agent/pkg/tools/builtin"
8+
bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
79
)
810

911
func TestToolLoopDetector(t *testing.T) {
@@ -21,10 +23,12 @@ func TestToolLoopDetector(t *testing.T) {
2123
}
2224

2325
tests := []struct {
24-
name string
25-
threshold int
26-
batches [][]tools.ToolCall
27-
wantTrip bool // whether any record call returns true
26+
name string
27+
threshold int
28+
exemptTools []string
29+
batches [][]tools.ToolCall
30+
wantTrip bool // whether any record call returns true
31+
wantCount int
2832
}{
2933
{
3034
name: "no loop with varied calls",
@@ -34,7 +38,8 @@ func TestToolLoopDetector(t *testing.T) {
3438
makeCalls("read_file", `{"path":"b.txt"}`),
3539
makeCalls("write_file", `{"path":"c.txt"}`),
3640
},
37-
wantTrip: false,
41+
wantTrip: false,
42+
wantCount: 1,
3843
},
3944
{
4045
name: "loop detected at exact threshold",
@@ -44,7 +49,8 @@ func TestToolLoopDetector(t *testing.T) {
4449
makeCalls("read_file", `{"path":"a.txt"}`),
4550
makeCalls("read_file", `{"path":"a.txt"}`),
4651
},
47-
wantTrip: true,
52+
wantTrip: true,
53+
wantCount: 3,
4854
},
4955
{
5056
name: "counter resets when calls change",
@@ -55,7 +61,8 @@ func TestToolLoopDetector(t *testing.T) {
5561
makeCalls("read_file", `{"path":"b.txt"}`), // reset
5662
makeCalls("read_file", `{"path":"b.txt"}`),
5763
},
58-
wantTrip: false,
64+
wantTrip: false,
65+
wantCount: 2,
5966
},
6067
{
6168
name: "empty calls never trigger",
@@ -65,7 +72,8 @@ func TestToolLoopDetector(t *testing.T) {
6572
{},
6673
{},
6774
},
68-
wantTrip: false,
75+
wantTrip: false,
76+
wantCount: 0,
6977
},
7078
{
7179
name: "multi-tool batches compared correctly",
@@ -74,7 +82,8 @@ func TestToolLoopDetector(t *testing.T) {
7482
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
7583
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
7684
},
77-
wantTrip: true,
85+
wantTrip: true,
86+
wantCount: 2,
7887
},
7988
{
8089
name: "multi-tool batches differ by one argument",
@@ -83,7 +92,8 @@ func TestToolLoopDetector(t *testing.T) {
8392
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
8493
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"c"}`),
8594
},
86-
wantTrip: false,
95+
wantTrip: false,
96+
wantCount: 1,
8797
},
8898
{
8999
name: "reordered JSON keys are treated as identical",
@@ -92,7 +102,8 @@ func TestToolLoopDetector(t *testing.T) {
92102
makeCalls("run", `{"cmd":"ls","cwd":"/tmp"}`),
93103
makeCalls("run", `{"cwd":"/tmp","cmd":"ls"}`),
94104
},
95-
wantTrip: true,
105+
wantTrip: true,
106+
wantCount: 2,
96107
},
97108
{
98109
name: "nested JSON key reordering is normalized",
@@ -101,13 +112,64 @@ func TestToolLoopDetector(t *testing.T) {
101112
makeCalls("call", `{"a":{"y":2,"x":1},"b":1}`),
102113
makeCalls("call", `{"b":1,"a":{"x":1,"y":2}}`),
103114
},
104-
wantTrip: true,
115+
wantTrip: true,
116+
wantCount: 2,
117+
},
118+
{
119+
name: "exempt background agent polling does not count as a loop",
120+
threshold: 2,
121+
exemptTools: []string{bgagent.ToolNameViewBackgroundAgent},
122+
batches: [][]tools.ToolCall{
123+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
124+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
125+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
126+
},
127+
wantTrip: false,
128+
wantCount: 0,
129+
},
130+
{
131+
name: "mixed batch with exempt and non exempt tools still counts",
132+
threshold: 2,
133+
exemptTools: []string{bgagent.ToolNameViewBackgroundAgent, builtin.ToolNameViewBackgroundJob},
134+
batches: [][]tools.ToolCall{
135+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`, "read_file", `{"path":"a.txt"}`),
136+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`, "read_file", `{"path":"a.txt"}`),
137+
},
138+
wantTrip: true,
139+
wantCount: 2,
140+
},
141+
{
142+
name: "exempt shell background job polling does not count as a loop",
143+
threshold: 2,
144+
exemptTools: []string{builtin.ToolNameViewBackgroundJob},
145+
batches: [][]tools.ToolCall{
146+
makeCalls(builtin.ToolNameViewBackgroundJob, `{"job_id":"job_1"}`),
147+
makeCalls(builtin.ToolNameViewBackgroundJob, `{"job_id":"job_1"}`),
148+
},
149+
wantTrip: false,
150+
wantCount: 0,
151+
},
152+
{
153+
// A looping model cannot evade detection by interleaving a single
154+
// polling call between identical non-exempt calls. Exempt calls are
155+
// completely invisible to the detector and do NOT reset the counter.
156+
name: "interleaved polling does not evade loop detection",
157+
threshold: 3,
158+
exemptTools: []string{bgagent.ToolNameViewBackgroundAgent},
159+
batches: [][]tools.ToolCall{
160+
makeCalls("read_file", `{"path":"a.txt"}`),
161+
makeCalls("read_file", `{"path":"a.txt"}`),
162+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"t1"}`), // exempt — counter stays at 2
163+
makeCalls("read_file", `{"path":"a.txt"}`), // consecutive=3 → trips
164+
},
165+
wantTrip: true,
166+
wantCount: 3,
105167
},
106168
}
107169

108170
for _, tt := range tests {
109171
t.Run(tt.name, func(t *testing.T) {
110-
d := newToolLoopDetector(tt.threshold)
172+
d := newToolLoopDetector(tt.threshold, tt.exemptTools...)
111173
var tripped bool
112174
for _, batch := range tt.batches {
113175
if d.record(batch) {
@@ -117,6 +179,9 @@ func TestToolLoopDetector(t *testing.T) {
117179
if tripped != tt.wantTrip {
118180
t.Errorf("tripped = %v, want %v", tripped, tt.wantTrip)
119181
}
182+
if d.consecutive != tt.wantCount {
183+
t.Errorf("consecutive = %d, want %d", d.consecutive, tt.wantCount)
184+
}
120185
})
121186
}
122187
}

pkg/tools/builtin/agent/agent.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ type task struct {
112112
status atomic.Int32
113113
result string
114114
errMsg string
115+
116+
// viewCount tracks how many consecutive HandleView calls observed no
117+
// new output. It is reset whenever the buffered output grows.
118+
// Protected by outputMu.
119+
viewCount int
120+
lastViewedOutputBytes int
115121
}
116122

117123
func (t *task) loadStatus() taskStatus {
@@ -331,10 +337,21 @@ func (h *Handler) HandleView(_ context.Context, _ *session.Session, toolCall too
331337
case taskStopped:
332338
out.WriteString("<task was stopped>")
333339
default:
334-
t.outputMu.RLock()
340+
t.outputMu.Lock()
335341
progress := t.output.String()
336342
truncated := t.outputBytes >= maxOutputBytes
337-
t.outputMu.RUnlock()
343+
currentBytes := t.outputBytes
344+
345+
// Track whether output has changed since the last view.
346+
if currentBytes == t.lastViewedOutputBytes {
347+
t.viewCount++
348+
} else {
349+
t.viewCount = 1
350+
t.lastViewedOutputBytes = currentBytes
351+
}
352+
viewCount := t.viewCount
353+
t.outputMu.Unlock()
354+
338355
if progress != "" {
339356
out.WriteString(progress)
340357
if truncated {
@@ -345,6 +362,9 @@ func (h *Handler) HandleView(_ context.Context, _ *session.Session, toolCall too
345362
} else {
346363
out.WriteString("<no output yet — still running>")
347364
}
365+
if viewCount > 1 {
366+
fmt.Fprintf(&out, "\n\n[No new output since last check — poll #%d]", viewCount)
367+
}
348368
}
349369

350370
return tools.ResultSuccess(out.String()), nil

pkg/tools/builtin/agent/agent_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,59 @@ func TestHandleView_InvalidJSON(t *testing.T) {
267267
require.Error(t, err, "invalid JSON should return an error")
268268
}
269269

270+
func TestHandleView_RepeatedPolling_NoNewOutput(t *testing.T) {
271+
h := newTestHandler()
272+
insertTask(h, "t1", "researcher", taskRunning)
273+
274+
tc := makeToolCall(t, ViewBackgroundAgentArgs{TaskID: "t1"})
275+
276+
// First view should not include poll marker.
277+
result1, err := h.HandleView(t.Context(), nil, tc)
278+
require.NoError(t, err)
279+
assert.NotContains(t, result1.Output, "poll #")
280+
281+
// Second view with no new output should include poll marker.
282+
result2, err := h.HandleView(t.Context(), nil, tc)
283+
require.NoError(t, err)
284+
assert.Contains(t, result2.Output, "poll #2")
285+
286+
// Third view should show poll #3.
287+
result3, err := h.HandleView(t.Context(), nil, tc)
288+
require.NoError(t, err)
289+
assert.Contains(t, result3.Output, "poll #3")
290+
291+
// Responses should be non-identical.
292+
assert.NotEqual(t, result2.Output, result3.Output)
293+
}
294+
295+
func TestHandleView_RepeatedPolling_OutputGrows(t *testing.T) {
296+
h := newTestHandler()
297+
tk := insertTask(h, "t1", "researcher", taskRunning)
298+
299+
tc := makeToolCall(t, ViewBackgroundAgentArgs{TaskID: "t1"})
300+
301+
// First view.
302+
_, err := h.HandleView(t.Context(), nil, tc)
303+
require.NoError(t, err)
304+
305+
// Second view with no change → poll #2.
306+
result2, err := h.HandleView(t.Context(), nil, tc)
307+
require.NoError(t, err)
308+
assert.Contains(t, result2.Output, "poll #2")
309+
310+
// Simulate new output arriving.
311+
tk.outputMu.Lock()
312+
tk.output.WriteString("new progress")
313+
tk.outputBytes += len("new progress")
314+
tk.outputMu.Unlock()
315+
316+
// Third view should reset the poll counter since output changed.
317+
result3, err := h.HandleView(t.Context(), nil, tc)
318+
require.NoError(t, err)
319+
assert.NotContains(t, result3.Output, "poll #", "poll marker should reset after new output")
320+
assert.Contains(t, result3.Output, "new progress")
321+
}
322+
270323
// --- HandleStop ---
271324

272325
func TestHandleStop_NotFound(t *testing.T) {

0 commit comments

Comments
 (0)