Skip to content

Commit 9c1cab4

Browse files
author
Gordon
committed
fix: improve todo tool reliability by reminding LLM of incomplete items
LLMs frequently create todos but fail to mark all of them as completed, leaving the todo sidebar in a partially-done state. This happens because the instruction to complete todos is far back in the system prompt by the time the LLM finishes its work. Add an incomplete-todo reminder to update_todos and list_todos tool output so the LLM sees unfinished items directly in its immediate context. Also strengthen the system instructions to emphasize that every todo must be completed before responding.
1 parent 27ee4cd commit 9c1cab4

2 files changed

Lines changed: 74 additions & 8 deletions

File tree

pkg/tools/builtin/todo.go

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"strings"
78
"sync"
89
"sync/atomic"
910

@@ -60,10 +61,12 @@ type CreateTodosOutput struct {
6061
type UpdateTodosOutput struct {
6162
Updated []TodoUpdate `json:"updated,omitempty" jsonschema:"List of successfully updated todos"`
6263
NotFound []string `json:"not_found,omitempty" jsonschema:"IDs of todos that were not found"`
64+
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
6365
}
6466

6567
type ListTodosOutput struct {
66-
Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
68+
Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
69+
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
6770
}
6871

6972
// TodoStorage defines the storage layer for todo items.
@@ -157,17 +160,20 @@ func (t *TodoTool) Instructions() string {
157160
IMPORTANT: You MUST use these tools to track the progress of your tasks:
158161
159162
1. Before starting any complex task:
160-
- Create a todo for each major step using create_todo
163+
- Create a todo for each major step using create_todos (prefer batch creation)
161164
- Break down complex steps into smaller todos
162165
163166
2. While working:
167+
- Update todo status to "in-progress" BEFORE starting each task
168+
- Mark todos as "completed" IMMEDIATELY after finishing each task
164169
- Use list_todos frequently to keep track of remaining work
165-
- Mark todos as "completed" when finished
166170
167-
3. Task Management Rules:
168-
- Never start a new task without creating a todo for it
169-
- Always check list_todos before responding to ensure no steps are missed
170-
- Update todo status to reflect current progress
171+
3. Task Completion Rules:
172+
- EVERY todo you create MUST eventually be marked "completed"
173+
- Before sending your final response, call list_todos to verify ALL todos are completed
174+
- If any todos remain pending or in-progress, complete them or mark them completed before responding
175+
- Never leave todos in a pending or in-progress state when you are done working
176+
- When updating multiple todos, batch them in a single update_todos call
171177
172178
This toolset is REQUIRED for maintaining task state and ensuring all steps are completed.`
173179
}
@@ -235,6 +241,8 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t
235241

236242
if h.allCompleted() {
237243
h.storage.Clear()
244+
} else {
245+
result.Reminder = h.incompleteReminder()
238246
}
239247

240248
return h.jsonResult(result)
@@ -253,12 +261,42 @@ func (h *todoHandler) allCompleted() bool {
253261
return true
254262
}
255263

264+
// incompleteReminder returns a reminder string listing any non-completed todos,
265+
// or an empty string if all are completed (or storage is empty).
266+
func (h *todoHandler) incompleteReminder() string {
267+
all := h.storage.All()
268+
var pending, inProgress []string
269+
for _, todo := range all {
270+
switch todo.Status {
271+
case "pending":
272+
pending = append(pending, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
273+
case "in-progress":
274+
inProgress = append(inProgress, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
275+
}
276+
}
277+
if len(pending) == 0 && len(inProgress) == 0 {
278+
return ""
279+
}
280+
281+
var b strings.Builder
282+
b.WriteString("The following todos are still incomplete and MUST be completed:")
283+
for _, s := range inProgress {
284+
b.WriteString(" (in-progress) " + s)
285+
}
286+
for _, s := range pending {
287+
b.WriteString(" (pending) " + s)
288+
}
289+
return b.String()
290+
}
291+
256292
func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.ToolCallResult, error) {
257293
todos := h.storage.All()
258294
if todos == nil {
259295
todos = []Todo{}
260296
}
261-
return h.jsonResult(ListTodosOutput{Todos: todos})
297+
out := ListTodosOutput{Todos: todos}
298+
out.Reminder = h.incompleteReminder()
299+
return h.jsonResult(out)
262300
}
263301

264302
func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {

pkg/tools/builtin/todo_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,28 @@ func TestTodoTool_ListTodos(t *testing.T) {
9595
assert.Equal(t, "pending", output.Todos[i].Status)
9696
}
9797

98+
// All pending, so reminder should list all of them
99+
assert.Contains(t, output.Reminder, "todo_1")
100+
assert.Contains(t, output.Reminder, "todo_2")
101+
assert.Contains(t, output.Reminder, "todo_3")
102+
98103
requireMeta(t, result, 3)
99104
}
100105

106+
func TestTodoTool_ListTodos_Empty(t *testing.T) {
107+
tool := NewTodoTool()
108+
109+
result, err := tool.handler.listTodos(t.Context(), tools.ToolCall{})
110+
require.NoError(t, err)
111+
112+
var output ListTodosOutput
113+
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
114+
assert.Empty(t, output.Todos)
115+
assert.Empty(t, output.Reminder)
116+
117+
requireMeta(t, result, 0)
118+
}
119+
101120
func TestTodoTool_UpdateTodos(t *testing.T) {
102121
storage := NewMemoryTodoStorage()
103122
tool := NewTodoTool(WithStorage(storage))
@@ -125,6 +144,11 @@ func TestTodoTool_UpdateTodos(t *testing.T) {
125144
assert.Equal(t, "in-progress", output.Updated[1].Status)
126145
assert.Empty(t, output.NotFound)
127146

147+
// Reminder should list incomplete todos
148+
assert.Contains(t, output.Reminder, "todo_2")
149+
assert.Contains(t, output.Reminder, "todo_3")
150+
assert.NotContains(t, output.Reminder, "todo_1") // completed, should not appear
151+
128152
todos := storage.All()
129153
require.Len(t, todos, 3)
130154
assert.Equal(t, "completed", todos[0].Status)
@@ -159,6 +183,9 @@ func TestTodoTool_UpdateTodos_PartialFailure(t *testing.T) {
159183
require.Len(t, output.NotFound, 1)
160184
assert.Equal(t, "nonexistent", output.NotFound[0])
161185

186+
// Reminder should mention the still-pending todo
187+
assert.Contains(t, output.Reminder, "todo_2")
188+
162189
todos := storage.All()
163190
require.Len(t, todos, 2)
164191
assert.Equal(t, "completed", todos[0].Status)
@@ -205,6 +232,7 @@ func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
205232
var output UpdateTodosOutput
206233
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
207234
require.Len(t, output.Updated, 2)
235+
assert.Empty(t, output.Reminder) // no reminder when all completed
208236

209237
assert.Empty(t, storage.All())
210238
requireMeta(t, result, 0)

0 commit comments

Comments
 (0)