Skip to content

Commit 348ede6

Browse files
author
Brendan Gray
committed
v1.8.43: Fix POST-tool /4->/3.2 estimator, STUCK_THRESHOLD 5->3, preserve grouped step outcomes in summarizer, track list_directory state for post-rotation recall
1 parent c5d5fc9 commit 348ede6

10 files changed

Lines changed: 534 additions & 183 deletions

File tree

main/agenticChat.js

Lines changed: 367 additions & 117 deletions
Large diffs are not rendered by default.

main/agenticChatHelpers.js

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,10 @@ function progressiveContextCompaction(options) {
379379
const offset = totalContextTokens <= 8192 ? 0.25
380380
: totalContextTokens <= 16384 ? 0.15
381381
: 0;
382-
const phase1Threshold = 0.45 - offset;
383-
const phase2Threshold = 0.60 - offset;
384-
const phase3Threshold = 0.75 - offset;
385-
const rotateThreshold = 0.85 - offset;
382+
const phase1Threshold = 0.35 - offset;
383+
const phase2Threshold = 0.50 - offset;
384+
const phase3Threshold = 0.65 - offset;
385+
const rotateThreshold = 0.80 - offset;
386386

387387
// Phase 1: Compress old tool results
388388
if (pct > phase1Threshold && allToolResults.length > 4) {
@@ -636,6 +636,9 @@ class ExecutionState {
636636
if (toolName === 'write_file' && result?.success && params?.filePath) {
637637
this.filesCreated.push({ path: params.filePath, iteration });
638638
}
639+
if (toolName === 'append_to_file' && result?.success && params?.filePath) {
640+
this.filesCreated.push({ path: params.filePath, iteration, append: true });
641+
}
639642
if (toolName === 'edit_file' && result?.success && params?.filePath) {
640643
this.filesEdited.push({ path: params.filePath, iteration });
641644
}
@@ -654,7 +657,22 @@ class ExecutionState {
654657
parts.push(`URLs visited: ${recent.map(v => `${v.success ? 'OK' : 'FAIL'} ${v.url}`).join(', ')}`);
655658
}
656659
if (this.filesCreated.length > 0) {
657-
parts.push(`Files created: ${this.filesCreated.map(f => f.path).join(', ')}`);
660+
// Fix 61: Show per-file write counts so the model can see when it's looping
661+
const fileCounts = {};
662+
for (const f of this.filesCreated) {
663+
if (!fileCounts[f.path]) fileCounts[f.path] = { writes: 0, appends: 0 };
664+
if (f.append) fileCounts[f.path].appends++;
665+
else fileCounts[f.path].writes++;
666+
}
667+
const fileList = Object.entries(fileCounts).map(([p, c]) => {
668+
const total = c.writes + c.appends;
669+
if (total <= 1) return p;
670+
const detail = [];
671+
if (c.writes > 0) detail.push(`${c.writes}× written`);
672+
if (c.appends > 0) detail.push(`${c.appends}× appended`);
673+
return `${p} (${detail.join(', ')})`;
674+
});
675+
parts.push(`Files created/modified: ${fileList.join(', ')}`);
658676
}
659677
if (this.filesEdited.length > 0) {
660678
parts.push(`Files edited: ${this.filesEdited.map(f => f.path).join(', ')}`);

main/constants.js

Lines changed: 35 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ If your output is cut off mid-generation, the system will automatically continue
3535
- find_files: find files by name or glob pattern
3636
- grep_search: search file contents for a string or regex pattern
3737
- get_project_structure: get a tree overview of the project layout
38-
- create_directory / delete_file / rename_file / copy_file: file management
38+
- create_directory: create a new directory (folder)
39+
- delete_file / rename_file / copy_file: file management
3940
- run_command: run a shell command (${_shellDesc})
4041
- web_search: search for live/current information — use only when you need real-time or external data
4142
- fetch_webpage: fetch content from a specific URL
@@ -52,7 +53,7 @@ If your output is cut off mid-generation, the system will automatically continue
5253
- write_todos / update_todo: plan and track multi-step tasks
5354
5455
## Behavior
55-
- **Your tools are real and execute in the live environment.** When a task requires action, call the tool — do not explain to the user how they could do it themselves, and do not describe what you are about to do instead of doing it.
56+
- **Your tools are real and execute in the live environment.** When a task requires action, call the tool — do not explain to the user how they could do it themselves, and do not describe what you are about to do instead of doing it. When you need to check files or directories, call list_directory or read_file — never say "let me check" without calling the tool.
5657
- **Never say you created, saved, wrote, ran, or navigated to something unless you called a tool that did it.** If no tool has been called, nothing has happened.
5758
- **Never claim you searched for something, looked it up, or checked a source unless you actually called web_search or fetch_webpage in this response.**
5859
- **You do not know today's date or current real-world state. If asked for the date, time, or any live or time-sensitive information — call web_search immediately. Never state a current date, time, or real-world value from memory.**
@@ -61,11 +62,11 @@ If your output is cut off mid-generation, the system will automatically continue
6162
- Ask a specific follow-up if you need more information
6263
- When asked to visit, open, navigate to, or browse a URL or website, call \`browser_navigate\` as your first action.
6364
- When asked to save, write, store, build, create, generate, or design any file (HTML page, script, config, stylesheet, etc.), call \`write_file\` to create it. Do not output file content in your response — use the tool.
64-
- **Never output full file content as code blocks or raw markup in your message.** When creating, building, or modifying files, use the appropriate tool (write_file for new files, edit_file for changes, append_to_file for additions, read_file before editing). Code blocks in chat are only for brief snippets or explanations — never for complete file content.
65+
- When creating or modifying files, use the appropriate tool (write_file for new files, edit_file for changes, append_to_file for additions). Code blocks in chat are for brief snippets or explanations.
6566
6667
## Rules
67-
- **You have no knowledge of what any project file contains until you call read_file.** Never describe, guess, or diagnose file contents without reading them first.
68-
- **You have no knowledge of what files exist in the project until you call list_directory.** Never list, name, or assume project files from memory — always call list_directory first.
68+
- Before diagnosing a bug, call read_file on the relevant file first.
69+
- When creating new files or folders, call write_file or create_directory directly. When you need to find or verify existing files, use list_directory or find_files.
6970
- Use tools when action is required: reading files, running commands, browsing, writing or editing code
7071
- For general knowledge, concept questions, conversations, stories, creative writing, or any non-file task: respond directly — no tools needed
7172
- When the user asks for a story, poem, essay, or any creative/written text, respond with the text directly in your message. Do not create files unless the user explicitly asks for a file to be saved.
@@ -77,31 +78,31 @@ If your output is cut off mid-generation, the system will automatically continue
7778
- If the user asks for multiple files, create ALL of them. Call write_file for EACH file — do not stop after the first file. Do not claim a file was created unless you received a success result from write_file for that specific file. Do not summarize until every requested file exists.
7879
- Always use the exact filename the user specifies.
7980
- When read_file fails with ENOENT, call find_files to locate the file by name
80-
- **Never output full file content as code blocks in chat.** Always use the appropriate tool: write_file for new files, edit_file for modifications, append_to_file for additions, read_file before editing. Code blocks are only for brief snippets or explanations.
81+
- Use the appropriate tool for file operations: write_file for new files, edit_file for modifications, append_to_file for additions.
8182
- edit_file: call read_file first to get the exact current text, then supply precise oldText
8283
- Browser: browser_navigate → browser_snapshot → browser_click/type using refs from snapshot
8384
- When creating files (HTML, CSS, JS, etc.), write all content directly. Reference external resources (fonts, libraries, images) via CDN links — do not use fetch_webpage to download them.
84-
- Multi-step tasks (3+ steps): use write_todos to plan, then work through each step. **Call update_todo to mark each step "in-progress" when starting and "done" when finished.** Never skip update_todo.
85-
- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping for permission. Work autonomously through every step.`;
85+
- For multi-step tasks, consider using write_todos to track progress. Then execute each step using the appropriate tools.
86+
- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping for permission. Work autonomously through every step.
87+
- Every iteration should produce tangible progress toward the user's goal. Choose the right tool for the job — all tools are available for a reason.`;
8688

87-
const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant. You can answer questions on ANY topic — coding, history, science, math, creative writing, and more. You also have coding tools available. Call tools to take action — never just describe what you'd do. Never refuse a request because it is not about code. Always attempt to fulfill the user's request regardless of its length or complexity. When the user specifies output format or file structure, follow those requirements exactly.
89+
const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant. You can answer questions on ANY topic — coding, history, science, math, creative writing, and more. You also have coding tools available. Never refuse a request because it is not about code. Always attempt to fulfill the user's request regardless of its length or complexity. When the user specifies output format or file structure, follow those requirements exactly.
8890
89-
## CRITICAL — ALWAYS USE TOOLS
90-
**When the user asks you to create, write, save, or generate ANY file — call write_file IMMEDIATELY.** Do NOT write code in your response. Do NOT describe what the file would contain. CALL THE TOOL.
91-
- User: "Create an HTML page" → YOU call write_file, NOT output HTML in chat
92-
- User: "Make a script" → YOU call write_file, NOT output code blocks
93-
- User: "Build a website" → YOU call write_file for EACH file
94-
**Code blocks in chat = WRONG. Tool calls = CORRECT.**
91+
## CRITICAL — Tool Use
92+
**All file operations MUST use tool calls.** Never output file content as code blocks in chat — use write_file, edit_file, or append_to_file. Code blocks are only for brief snippets or explanations.
93+
- For new files: call write_file with actual code content.
94+
- For large files: call write_file with the first section, then append_to_file for each remaining section. Every call must contain real functional code — never placeholder comments like \`<!-- ... -->\` or \`// TODO\`.
95+
- For multiple files: call write_file for EACH file. Do not stop after the first.
96+
- For live/current data: call web_search or fetch_webpage. Never say "I cannot access real-time data."
97+
- If cut off mid-task, the system continues automatically — never refuse.
9598
96-
## CRITICAL — You Have Real-Time Access
97-
**Use web_search or fetch_webpage for live data.** NEVER say "I cannot access real-time data." You CAN. If cut off mid-task, the system continues automatically — NEVER refuse.
98-
99-
## Tools (USE THEM!)
100-
- **write_file** — Create/overwrite files. USE THIS when asked to create ANY file.
99+
## Tools
100+
- **write_file** — Create/overwrite files.
101101
- **edit_file** — Modify a specific part of an existing file.
102102
- **append_to_file** — Add content to end of file without overwriting.
103103
- **read_file** — Read file contents before editing.
104-
- **list_directory** — See what files exist in a folder.
104+
- **create_directory** — Create a new directory (folder).
105+
- **list_directory** — See what files exist in a directory.
105106
- **find_files** — Search for files by name pattern.
106107
- **grep_search** — Search file contents for text.
107108
- **run_command** — Execute ${_shellDesc.split(' — ')[0]} commands.
@@ -113,32 +114,23 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
113114
- **write_todos/update_todo** — Track multi-step tasks.
114115
115116
## Rules
116-
- **Never output full file content as code blocks in chat** — always use write_file, edit_file, or append_to_file. Code blocks are only for brief snippets or explanations.
117-
- **For new files: call write_file immediately.** Do not describe what the file would contain — create it.
118-
- **For large files (HTML pages, CSS, JS, etc.): call write_file with the first section of REAL code — NEVER placeholder comments like \`<!-- ... -->\` or \`// content here\`. Then call append_to_file for each remaining section until the entire file is written. Every tool call must contain actual functional code.**
119-
- **When the user asks for confirmation or verification, ALWAYS call list_directory or read_file to verify.** NEVER say "I can confirm" without actually checking. NEVER refuse a verification request — you MUST call the tool. Even if previous operations failed, you MUST still verify when asked.
120-
- **Never claim a directory is empty without calling list_directory.** If list_directory returns items, report them exactly as returned.
121-
- **Path awareness:** All relative paths are relative to the project root. Use paths like "file.html" for root files, "subfolder/file.html" for nested files.
122-
- **delete_file works on BOTH files AND directories.** Use delete_file for any deletion — it handles recursive directory removal automatically.
123-
- When calling tools, format tool calls as valid JSON with properly quoted string values. Never use backtick template literals in tool call JSON.
124-
- Tools execute in the live environment. Call them — do not describe what you would do.
125-
- Never say you did something unless you called the tool that did it.
126-
- You do not know file contents until you call read_file. Never guess.
127-
- You do not know what files exist until you call list_directory.
128-
- For general knowledge, concept questions, conversations, stories, creative writing, or any non-file task: answer directly — no tools needed.
129-
- When the user asks for a story, poem, essay, or any creative/written text, respond with the text directly in your message. Do not create files unless the user explicitly asks for a file to be saved.
117+
- Call tools to take action — never describe what you would do instead of doing it. Never claim you did something unless you called the tool that did it. When you need to check files or directories, call list_directory or read_file — never say "let me check" without calling the tool.
118+
- Before diagnosing a bug, call read_file on the relevant file first.
119+
- When the user asks for confirmation or verification, call list_directory or read_file to verify. Never claim to confirm without checking.
120+
- **Path awareness:** All relative paths are relative to the project root.
121+
- **delete_file works on BOTH files AND directories.**
122+
- Format tool calls as valid JSON with properly quoted string values.
123+
- For general knowledge, conversations, stories, creative writing, or non-file tasks: answer directly — no tools needed. When the user asks for a story or creative text, respond directly unless they ask for a file.
130124
- For bugs: read_file the relevant file first, then diagnose.
131-
- For live/current/time-sensitive info: call web_search. Never guess dates or current state.
125+
- For edits: call read_file first, then edit_file with exact oldText and newText.
132126
- To visit a URL: call browser_navigate. To read a page: browser_snapshot first.
133127
- If a tool fails, retry once with corrected parameters.
134-
- For edits: call read_file first, then edit_file with exact oldText and newText.
135-
- For large files: write_file with actual code from the beginning — NEVER placeholder stubs like \`<!-- ... -->\` or \`// TODO\`. Then append_to_file for each remaining section until the full file is written.
136-
- When creating files (HTML, CSS, JS, etc.), write all content directly. Reference external resources (fonts, libraries, images) via CDN links — do not use fetch_webpage to download them.
137-
- If the user asks for multiple files, create ALL of them. Call write_file for EACH file — do not stop after the first file. Do not claim a file was created unless you received a success result from write_file for that specific file. Do not summarize until every requested file exists.
128+
- When creating files, write all content directly. Reference external resources via CDN links.
138129
- Always use the exact filename the user specifies.
139-
- Multi-step tasks (3+ steps): call write_todos to create a plan, then work through it step by step. **Call update_todo to mark each step "in-progress" when you start it and "done" when you finish it.** Never skip update_todo — the system tracks your progress through it.
140-
- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping to ask for permission. Work autonomously through every step.
141-
- Once ALL parts of the task are complete (every requested file written, every question answered), respond with a brief summary. Do not call more tools after the task is done.`;
130+
- For multi-step tasks, consider using write_todos to track progress. Then execute each step.
131+
- **Never ask the user if you should proceed.** Execute tasks completely without stopping for permission.
132+
- Every iteration should produce tangible progress. Choose the right tool for the job.
133+
- Once the task is complete, provide a brief summary.`;
142134

143135
const DEFAULT_CHAT_PREAMBLE = `Answer questions, help with code and concepts, and have normal conversations.
144136
Be concise, direct, and helpful.`;

main/conversationSummarizer.js

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@ class ConversationSummarizer {
131131
if (toolName === 'write_file' || toolName === 'read_file') {
132132
this.currentState.lastFile = params?.path || params?.filePath;
133133
}
134+
if (toolName === 'list_directory') {
135+
const dirPath = params?.path || params?.directory || '.';
136+
this.currentState.directory = dirPath;
137+
// Preserve result so after rotation the model knows what was found — prevents re-listing
138+
const content = typeof result === 'string' ? result : (result?.content || '');
139+
this.currentState.lastDirectoryListing = { path: dirPath, content: content.slice(0, 400) };
140+
}
134141
if (toolName === 'run_terminal_cmd') {
135142
this.currentState.lastCommand = params?.command?.slice(0, 100);
136143
}
@@ -332,6 +339,9 @@ class ConversationSummarizer {
332339
if (this.currentState.pageTitle) stateLines.push(`Page title: ${this.currentState.pageTitle}`);
333340
if (this.currentState.lastFile) stateLines.push(`Last file: ${this.currentState.lastFile}`);
334341
if (this.currentState.directory) stateLines.push(`Directory: ${this.currentState.directory}`);
342+
if (this.currentState.lastDirectoryListing) {
343+
stateLines.push(`Directory listing (${this.currentState.lastDirectoryListing.path}) — already retrieved, do NOT list again:\n${this.currentState.lastDirectoryListing.content}`);
344+
}
335345
if (this.currentState.lastCommand) stateLines.push(`Last command: ${this.currentState.lastCommand}`);
336346
if (stateLines.length > 0) {
337347
sections.push(`## CURRENT STATE\n${stateLines.join('\n')}`);
@@ -426,7 +436,10 @@ class ConversationSummarizer {
426436

427437
const status = step.success ? '✓' : '✗';
428438
if (count > 1) {
429-
lines.push(`${status} ${step.tool}${count})`);
439+
// Preserve the last outcome so the model knows what was found, not just that calls happened
440+
const lastStep = this.completedSteps[i + count - 1];
441+
const outcome = lastStep.outcome ? `: ${lastStep.outcome}` : '';
442+
lines.push(`${status} ${step.tool}${count})${outcome}`);
430443
} else {
431444
const outcome = step.outcome ? `: ${step.outcome}` : '';
432445
lines.push(`${status} ${step.tool}${outcome}`);

0 commit comments

Comments
 (0)