feat: Phase 1 context management overhaul - Rolling summary module for continuous task tracking in every prompt - Standardize token estimation to /4 across all systems - Raise rotation threshold 72% -> 85% (rolling summary handles pressure) - Guard _compactHistory with logging and tracking

Brendan Gray · Brendan Gray · commit 8b084a651592 · 2026-03-13T12:17:36.000-04:00
diff --git a/main/agenticChat.js b/main/agenticChat.js
@@ -33,6 +33,7 @@ const {
   ExecutionState,
 } = require('./agenticChatHelpers');
 const { LLMEngine } = require('./llmEngine');
+const { RollingSummary } = require('./rollingSummary');
 const { repairToolCalls: repairToolCallsFn } = require('./tools/toolParser');
 
 /**
@@ -637,7 +638,7 @@ function register(ctx) {
 
     const hwContextSize = modelStatus.modelInfo?.contextSize || 32768;
 
-    const estimateTokens = (text) => Math.ceil((text || '').length / 3.5);
+    const estimateTokens = (text) => Math.ceil((text || '').length / 4);
 
     // ModelProfile-driven budgeting
     const modelTier = llmEngine.getModelTier();
@@ -859,6 +860,9 @@ function register(ctx) {
     const summarizer = new ConversationSummarizer();
     summarizer.setGoal(message);
 
+    const rollingSummary = new RollingSummary();
+    rollingSummary.setGoal(message);
+
     // Auto-create todos for large/incremental tasks (helps model track progress across rotations)
     const autoTodoResult = autoCreateLargeTaskTodos(message, mcpToolServer);
     if (autoTodoResult?.success) {
@@ -1704,16 +1708,18 @@ function register(ctx) {
       if (isStale()) break;
 
       // Record plan
-      if (responseText.length > 50) summarizer.recordPlan(responseText);
+      if (responseText.length > 50) {
+        summarizer.recordPlan(responseText);
+        rollingSummary.recordPlanFromResponse(responseText);
+      }
 
       // ── Progressive Context Compaction ──
       try {
         let contextUsed = 0;
         try { if (llmEngine.sequence?.nextTokenIndex) contextUsed = llmEngine.sequence.nextTokenIndex; } catch (_) {}
         if (!contextUsed) {
           const pLen = typeof currentPrompt === 'string' ? currentPrompt.length : ((currentPrompt.systemContext || '').length + (currentPrompt.userMessage || '').length);
-          // Use /3.5 instead of /4 for more conservative token estimation
-          contextUsed = Math.ceil((pLen + fullResponseText.length) / 3.5);
+          contextUsed = Math.ceil((pLen + fullResponseText.length) / 4);
         }
         const compaction = progressiveContextCompaction({
           contextUsedTokens: contextUsed, totalContextTokens: totalCtx,
@@ -1896,6 +1902,7 @@ function register(ctx) {
         summarizer.recordToolCall(tr.tool, tr.params, tr.result);
         summarizer.markPlanStepCompleted(tr.tool, tr.params);
         executionState.update(tr.tool, tr.params, tr.result, iteration);
+        rollingSummary.recordToolCall(tr.tool, tr.params, tr.result, iteration);
       }
 
       // UI events — send only non-deferred results to prevent duplicate bubbles
@@ -1953,6 +1960,21 @@ function register(ctx) {
       const iterContext = executionBlock + stepDirective + taskReminder;
       const allFeedback = toolFeedback + snapFeedback;
 
+      // ── Rolling Summary Injection ──
+      // Generate context-proportional summary for the next prompt.
+      // This ensures the model always has task awareness, not just post-rotation.
+      let rollingSummaryBlock = '';
+      {
+        let _rsCtxUsed = 0;
+        try { if (llmEngine.sequence?.nextTokenIndex) _rsCtxUsed = llmEngine.sequence.nextTokenIndex; } catch (_) {}
+        if (!_rsCtxUsed) _rsCtxUsed = Math.ceil((fullResponseText.length + (iterContext + allFeedback).length) / 4);
+        const _rsCtxPct = _rsCtxUsed / totalCtx;
+        if (rollingSummary.shouldInjectSummary(iteration, _rsCtxPct)) {
+          const summaryBudget = rollingSummary.getSummaryBudget(totalCtx, _rsCtxPct);
+          rollingSummaryBlock = rollingSummary.generateSummary(summaryBudget);
+        }
+      }
+
       if (sessionJustRotated) {
         sessionJustRotated = false;
         currentPrompt = {
@@ -1962,7 +1984,7 @@ function register(ctx) {
       } else {
         currentPrompt = {
           systemContext: buildStaticPrompt(),
-          userMessage: iterContext + buildDynamicContext() + '\n' + allFeedback + continueInstruction,
+          userMessage: iterContext + buildDynamicContext() + (rollingSummaryBlock ? '\n' + rollingSummaryBlock : '') + '\n' + allFeedback + continueInstruction,
         };
       }
     }
diff --git a/main/agenticChatHelpers.js b/main/agenticChatHelpers.js
@@ -375,9 +375,11 @@ function progressiveContextCompaction(options) {
     if (chatHistory) pruned += pruneVerboseHistory(chatHistory, 2);
   }
 
-  // Proactive rotation at 72% to prevent context stalls (lowered from 78%)
-  // This gives more headroom for large file generation before hitting overflow
-  const shouldRotate = pct > 0.72;
+  // Proactive rotation — raised from 72% to 85% because rolling summary +
+  // progressive compression now handle context growth more gracefully.
+  // The old 72% threshold was too aggressive, causing premature rotations that
+  // destroyed conversation context unnecessarily.
+  const shouldRotate = pct > 0.85;
 
   if (pruned > 0) {
     console.log(`[Context Compaction] Phase ${pct > 0.75 ? 3 : pct > 0.60 ? 2 : 1}: compacted ${pruned} items at ${Math.round(pct * 100)}% usage`);
diff --git a/main/llmEngine.js b/main/llmEngine.js
@@ -148,8 +148,11 @@ class LLMEngine extends EventEmitter {
     if (this.chatHistory.length <= MAX_HISTORY_ENTRIES) return;
     const sysMsg = this.chatHistory[0];
     const keepCount = Math.ceil(this.chatHistory.length * 0.8);
+    const droppedCount = this.chatHistory.length - 1 - keepCount;
+    console.log(`[LLMEngine] _compactHistory: dropping ${droppedCount} of ${this.chatHistory.length} entries (keeping ${keepCount})`);
     this.chatHistory = [sysMsg, ...this.chatHistory.slice(-keepCount)];
     this.lastEvaluation = null;
+    this._lastCompactDropped = droppedCount;
   }
 
   _sanitizeResponse(text) {
diff --git a/main/rollingSummary.js b/main/rollingSummary.js