Skip to content

Commit 8b084a6

Browse files
author
Brendan Gray
committed
feat: Phase 1 context management overhaul - Rolling summary module for continuous task tracking in every prompt - Standardize token estimation to /4 across all systems - Raise rotation threshold 72% -> 85% (rolling summary handles pressure) - Guard _compactHistory with logging and tracking
1 parent d8f3303 commit 8b084a6

4 files changed

Lines changed: 358 additions & 8 deletions

File tree

main/agenticChat.js

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const {
3333
ExecutionState,
3434
} = require('./agenticChatHelpers');
3535
const { LLMEngine } = require('./llmEngine');
36+
const { RollingSummary } = require('./rollingSummary');
3637
const { repairToolCalls: repairToolCallsFn } = require('./tools/toolParser');
3738

3839
/**
@@ -637,7 +638,7 @@ function register(ctx) {
637638

638639
const hwContextSize = modelStatus.modelInfo?.contextSize || 32768;
639640

640-
const estimateTokens = (text) => Math.ceil((text || '').length / 3.5);
641+
const estimateTokens = (text) => Math.ceil((text || '').length / 4);
641642

642643
// ModelProfile-driven budgeting
643644
const modelTier = llmEngine.getModelTier();
@@ -859,6 +860,9 @@ function register(ctx) {
859860
const summarizer = new ConversationSummarizer();
860861
summarizer.setGoal(message);
861862

863+
const rollingSummary = new RollingSummary();
864+
rollingSummary.setGoal(message);
865+
862866
// Auto-create todos for large/incremental tasks (helps model track progress across rotations)
863867
const autoTodoResult = autoCreateLargeTaskTodos(message, mcpToolServer);
864868
if (autoTodoResult?.success) {
@@ -1704,16 +1708,18 @@ function register(ctx) {
17041708
if (isStale()) break;
17051709

17061710
// Record plan
1707-
if (responseText.length > 50) summarizer.recordPlan(responseText);
1711+
if (responseText.length > 50) {
1712+
summarizer.recordPlan(responseText);
1713+
rollingSummary.recordPlanFromResponse(responseText);
1714+
}
17081715

17091716
// ── Progressive Context Compaction ──
17101717
try {
17111718
let contextUsed = 0;
17121719
try { if (llmEngine.sequence?.nextTokenIndex) contextUsed = llmEngine.sequence.nextTokenIndex; } catch (_) {}
17131720
if (!contextUsed) {
17141721
const pLen = typeof currentPrompt === 'string' ? currentPrompt.length : ((currentPrompt.systemContext || '').length + (currentPrompt.userMessage || '').length);
1715-
// Use /3.5 instead of /4 for more conservative token estimation
1716-
contextUsed = Math.ceil((pLen + fullResponseText.length) / 3.5);
1722+
contextUsed = Math.ceil((pLen + fullResponseText.length) / 4);
17171723
}
17181724
const compaction = progressiveContextCompaction({
17191725
contextUsedTokens: contextUsed, totalContextTokens: totalCtx,
@@ -1896,6 +1902,7 @@ function register(ctx) {
18961902
summarizer.recordToolCall(tr.tool, tr.params, tr.result);
18971903
summarizer.markPlanStepCompleted(tr.tool, tr.params);
18981904
executionState.update(tr.tool, tr.params, tr.result, iteration);
1905+
rollingSummary.recordToolCall(tr.tool, tr.params, tr.result, iteration);
18991906
}
19001907

19011908
// UI events — send only non-deferred results to prevent duplicate bubbles
@@ -1953,6 +1960,21 @@ function register(ctx) {
19531960
const iterContext = executionBlock + stepDirective + taskReminder;
19541961
const allFeedback = toolFeedback + snapFeedback;
19551962

1963+
// ── Rolling Summary Injection ──
1964+
// Generate context-proportional summary for the next prompt.
1965+
// This ensures the model always has task awareness, not just post-rotation.
1966+
let rollingSummaryBlock = '';
1967+
{
1968+
let _rsCtxUsed = 0;
1969+
try { if (llmEngine.sequence?.nextTokenIndex) _rsCtxUsed = llmEngine.sequence.nextTokenIndex; } catch (_) {}
1970+
if (!_rsCtxUsed) _rsCtxUsed = Math.ceil((fullResponseText.length + (iterContext + allFeedback).length) / 4);
1971+
const _rsCtxPct = _rsCtxUsed / totalCtx;
1972+
if (rollingSummary.shouldInjectSummary(iteration, _rsCtxPct)) {
1973+
const summaryBudget = rollingSummary.getSummaryBudget(totalCtx, _rsCtxPct);
1974+
rollingSummaryBlock = rollingSummary.generateSummary(summaryBudget);
1975+
}
1976+
}
1977+
19561978
if (sessionJustRotated) {
19571979
sessionJustRotated = false;
19581980
currentPrompt = {
@@ -1962,7 +1984,7 @@ function register(ctx) {
19621984
} else {
19631985
currentPrompt = {
19641986
systemContext: buildStaticPrompt(),
1965-
userMessage: iterContext + buildDynamicContext() + '\n' + allFeedback + continueInstruction,
1987+
userMessage: iterContext + buildDynamicContext() + (rollingSummaryBlock ? '\n' + rollingSummaryBlock : '') + '\n' + allFeedback + continueInstruction,
19661988
};
19671989
}
19681990
}

main/agenticChatHelpers.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,11 @@ function progressiveContextCompaction(options) {
375375
if (chatHistory) pruned += pruneVerboseHistory(chatHistory, 2);
376376
}
377377

378-
// Proactive rotation at 72% to prevent context stalls (lowered from 78%)
379-
// This gives more headroom for large file generation before hitting overflow
380-
const shouldRotate = pct > 0.72;
378+
// Proactive rotation — raised from 72% to 85% because rolling summary +
379+
// progressive compression now handle context growth more gracefully.
380+
// The old 72% threshold was too aggressive, causing premature rotations that
381+
// destroyed conversation context unnecessarily.
382+
const shouldRotate = pct > 0.85;
381383

382384
if (pruned > 0) {
383385
console.log(`[Context Compaction] Phase ${pct > 0.75 ? 3 : pct > 0.60 ? 2 : 1}: compacted ${pruned} items at ${Math.round(pct * 100)}% usage`);

main/llmEngine.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,11 @@ class LLMEngine extends EventEmitter {
148148
if (this.chatHistory.length <= MAX_HISTORY_ENTRIES) return;
149149
const sysMsg = this.chatHistory[0];
150150
const keepCount = Math.ceil(this.chatHistory.length * 0.8);
151+
const droppedCount = this.chatHistory.length - 1 - keepCount;
152+
console.log(`[LLMEngine] _compactHistory: dropping ${droppedCount} of ${this.chatHistory.length} entries (keeping ${keepCount})`);
151153
this.chatHistory = [sysMsg, ...this.chatHistory.slice(-keepCount)];
152154
this.lastEvaluation = null;
155+
this._lastCompactDropped = droppedCount;
153156
}
154157

155158
_sanitizeResponse(text) {

0 commit comments

Comments
 (0)