matdev83
diff --git a/‎config/config.example.yaml‎
Lines changed: 23 additions & 0 deletions b/‎config/config.example.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎config/schemas/app_config.schema.yaml‎
Lines changed: 18 additions & 0 deletions b/‎config/schemas/app_config.schema.yaml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/user_guide/cli-parameters.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/user_guide/cli-parameters.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/user_guide/configuration.md‎
Lines changed: 50 additions & 6 deletions b/‎docs/user_guide/configuration.md‎
Lines changed: 50 additions & 6 deletions
diff --git a/‎docs/user_guide/features/auto-continue-removal.md‎
Lines changed: 72 additions & 0 deletions b/‎docs/user_guide/features/auto-continue-removal.md‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎docs/user_guide/index.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/user_guide/index.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/core/cli_support/applicators/session_applicator.py‎
Lines changed: 20 additions & 8 deletions b/‎src/core/cli_support/applicators/session_applicator.py‎
Lines changed: 20 additions & 8 deletions
diff --git a/‎src/core/config/env/from_env_part1b.py‎
Lines changed: 15 additions & 8 deletions b/‎src/core/config/env/from_env_part1b.py‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎src/core/config/models/session.py‎
Lines changed: 6 additions & 5 deletions b/‎src/core/config/models/session.py‎
Lines changed: 6 additions & 5 deletions
@@ -112,6 +112,9 @@ session:
   # Fix improperly formatted <think> tags in model responses
   fix_think_tags_enabled: false  # Set to true to enable think tags correction
 
+  # Remove trailing [AUTO-CONTINUE] marker from assistant responses.
+  auto_continue_removal_enabled: true
+
   # Planning phase: Route initial requests to a strong model for better planning
   planning_phase:
     enabled: false  # Set to true to enable planning phase
@@ -471,6 +474,26 @@ resilience:
   # Force shared scoping for selected backends (optional override).
   shared_backend_types: []
 
+# Scheduled provider warm-up for sliding usage windows.
+# Sends lightweight prompts at fixed local server times to intentionally start
+# request windows at more favorable times of day.
+# Only explicit backend:model routes are allowed. Aliases, model-only selectors,
+# and composite selectors using ^ or | are rejected.
+usage_window_warmup:
+  enabled: false
+  entries: []
+  # Example:
+  # entries:
+  #   - model: "openai-codex:gpt-5.4-mini"
+  #     time: "08:00"
+  #     execute_on_weekend: false
+  #   - model: "gemini.2:google/gemini-2.5-flash"
+  #     time: "13:30"
+  #     execute_on_weekend: false
+  #   - model: "gemini.2:google/gemini-2.5-flash"
+  #     time: "18:45"
+  #     execute_on_weekend: true
+
 # Model name rewrite rules (optional)
 # These rules allow you to dynamically rewrite model names before they are processed
 # Rules are processed in order, and the first matching rule is applied
 
@@ -230,6 +230,7 @@ properties:
       fix_think_tags_streaming_buffer_size: { type: integer, minimum: 1024 }
       droid_path_fix_enabled: { type: boolean }
       double_ampersand_fixes_for_windows_enabled: { type: boolean }
+      auto_continue_removal_enabled: { type: boolean }
       max_per_session_backends: { type: integer, minimum: 1 }
       session_continuity:
         type: object
@@ -554,6 +555,23 @@ properties:
           method: { type: string, enum: [GET, HEAD] }
           path: { type: string }
           accept_any_response: { type: boolean }
+  usage_window_warmup:
+    type: object
+    additionalProperties: false
+    properties:
+      enabled: { type: boolean }
+      entries:
+        type: array
+        items:
+          type: object
+          additionalProperties: false
+          required: [model, time]
+          properties:
+            model: { type: string }
+            time:
+              type: string
+              pattern: "^(?:[01]\\d|2[0-3]):[0-5]\\d$"
+            execute_on_weekend: { type: boolean }
   failure_handling:
     type: object
     additionalProperties: false
 
@@ -441,6 +441,7 @@ Prevent duplicate requests from exhausting rate limits. See [Request Deduplicati
 | CLI Argument | Environment Variable | Description |
 | :--- | :--- | :--- |
 | `--fix-think-tags` | `FIX_THINK_TAGS_ENABLED=true` | Enable correction of `<think>` tags. |
+| `--disable-auto-continue-removal` | `AUTO_CONTINUE_REMOVAL_ENABLED=false` | Disable automatic removal of trailing "continue"/"proceed" user messages. |
 | `--disable-binary-file-edit-steering` | N/A | Disable binary file edit steering (overrides config). |
 | `--disable-dangerous-git-commands-protection` | `DANGEROUS_COMMAND_PREVENTION_ENABLED=false` | Disable dangerous command protection. |
 | N/A | `DANGEROUS_COMMAND_STEERING_MESSAGE` | Custom message for dangerous commands. |
 
@@ -248,6 +248,11 @@ session:
   # Fixes
   fix_think_tags_enabled: false
   fix_think_tags_streaming_buffer_size: 4096
+
+  # Auto continue/proceed removal
+  # When the last user message is exactly "continue" or "proceed",
+  # tag it as non-forwardable so it is excluded from remote LLM submissions.
+  auto_continue_removal_enabled: true
   
   # Quality Verifier
   quality_verifier_model: null            # "backend:model"
@@ -521,12 +526,51 @@ health_check:
 | `ping.interval_seconds` | float | `30.0` | Seconds between ping checks |
 | `ping.timeout_seconds` | float | `5.0` | Ping timeout |
 | `ping.failure_threshold` | int | `3` | Failures before unhealthy |
-| `http.enabled` | bool | `true` | Enable HTTP probe checks |
-| `http.interval_seconds` | float | `60.0` | Seconds between HTTP checks |
-| `http.timeout_seconds` | float | `10.0` | HTTP request timeout |
-| `http.failure_threshold` | int | `2` | Failures before unhealthy |
-
-### ProxyMem (Cross-Session Memory)
+| `http.enabled` | bool | `true` | Enable HTTP probe checks |
+| `http.interval_seconds` | float | `60.0` | Seconds between HTTP checks |
+| `http.timeout_seconds` | float | `10.0` | HTTP request timeout |
+| `http.failure_threshold` | int | `2` | Failures before unhealthy |
+
+### Usage Window Warm-up (`usage_window_warmup`)
+
+Schedules lightweight background prompts at fixed local server times to intentionally start
+sliding provider request windows at more favorable times of day.
+
+- Runs automatically while the server is up.
+- Accepts explicit `backend:model` routes, including numbered backends such as
+  `gemini.2:google/gemini-2.5-flash`.
+- Rejects aliases (`alias:` / `auto:`), model-only selectors, and composite routing
+  expressions using `^` or `|`.
+- Adds random jitter between 5 and 35 seconds before each scheduled request.
+- Sends prompts like `Hi, how much is it 1234 times 567 plus 8901` and retries once
+  when a temporary error prevents a valid response.
+- For `openai-codex:<model>` entries, warm-up fans out across all currently eligible
+  managed OAuth accounts so each account window is warmed independently.
+
+```yaml
+usage_window_warmup:
+  enabled: true
+  entries:
+    - model: "openai-codex:gpt-5.4-mini"
+      time: "08:00"
+      execute_on_weekend: false
+    - model: "gemini.2:google/gemini-2.5-flash"
+      time: "13:30"
+      execute_on_weekend: false
+    - model: "gemini.2:google/gemini-2.5-flash"
+      time: "18:45"
+      execute_on_weekend: true
+```
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `enabled` | bool | `false` | Enable the background warm-up scheduler |
+| `entries` | list | `[]` | Scheduled warm-up entries |
+| `entries[].model` | str | required | Explicit `backend:model` route; numbered backends allowed |
+| `entries[].time` | str | required | Local server time in `HH:MM` 24-hour format |
+| `entries[].execute_on_weekend` | bool | `false` | Allow this entry to run on Saturday and Sunday |
+
+### ProxyMem (Cross-Session Memory)
 
 ProxyMem provides persistent context across sessions by capturing interactions, generating LLM summaries, and injecting relevant history into new sessions.
 
 
@@ -0,0 +1,72 @@
+# Auto Continue/Proceed Removal
+
+Automatically detect and exclude mechanical "continue" / "proceed" user messages from backend submissions after connectivity interruptions, keeping context windows clean.
+
+## Overview
+
+When a coding agent session is interrupted (network drop, timeout, etc.), users commonly type `continue` or `proceed` to resume. These messages serve a purely mechanical purpose of re-enabling the agent loop and provide no semantic value to the remote LLM. Without this feature, they pollute the context window and are sent to every backend on every subsequent turn.
+
+The Auto Continue/Proceed Removal feature detects when the very last user message is exactly `continue` or `proceed` (trimmed, case-insensitive) and tags it as non-forwardable. The existing non-forwardable enforcement layer then silently excludes it from outbound payloads to remote LLMs. The message remains in the agent's local context history so the coding agent continues building a complete window; only the transmission to the remote model is affected.
+
+## Key Features
+
+- **Exact match only**: Only pure `continue` or `proceed` strings are matched (case-insensitive, trimmed). Phrases like `please continue` or `continue working` are **not** affected.
+- **Last-message scope**: Only the final user message in the request is checked. Earlier occurrences are left untouched.
+- **Non-forwardable tagging**: Uses the existing `NEVER_FORWARD` mechanism so the proxy keeps the message in local history but excludes it from all backend transmissions.
+- **Default enabled**: Active by default; disable explicitly when needed.
+- **Fail-open**: If the non-forwardable registry or identity service is unavailable, the feature degrades gracefully without breaking requests.
+
+## How It Works
+
+1. During the request transform pipeline, the proxy inspects the last message.
+2. If the message role is `user` and its content (trimmed, lowercased) is exactly `continue` or `proceed`, the proxy computes a deterministic identity and tags it with `NEVER_FORWARD` and reason `auto_continue_removal`.
+3. Later, just before the backend call, the non-forwardable message enforcer filters out tagged messages from the outbound payload.
+4. On subsequent turns the coding agent resubmits the same context window; the tag persists for the session lifetime, so the message continues to be excluded.
+
+## Configuration
+
+The feature is **enabled by default**. Configuration follows precedence: CLI > Environment > Config File.
+
+### CLI Flag
+
+```bash
+# Disable the feature
+python -m src.core.cli --disable-auto-continue-removal
+```
+
+### Environment Variable
+
+```bash
+# Disable the feature
+export AUTO_CONTINUE_REMOVAL_ENABLED=false
+```
+
+### Config File
+
+```yaml
+# config.yaml
+session:
+  auto_continue_removal_enabled: false
+```
+
+## When to Disable
+
+- You want the remote LLM to see literal `continue` / `proceed` prompts (e.g. for debugging agent behavior).
+- Your workflow uses custom continue-like keywords that should reach the model.
+- You are testing context window behavior and need every message forwarded verbatim.
+
+## Logging
+
+When a message is tagged, the proxy logs at INFO level:
+
+```
+Auto continue removal: tagged last user message for session abc-123, reason=auto_continue_removal
+```
+
+Debug-level logging shows when messages are checked but not matched.
+
+## Related Features
+
+- [Non-Forwardable Message Tagging](../features/non-forwardable-message-tagging.md) - Underlying tagging and enforcement mechanism
+- [Quality Verifier System](quality-verifier.md) - Verifies individual responses for quality
+- [Context Window Enforcement](context-window-enforcement.md) - Enforces per-model context limits
@@ -46,6 +46,7 @@ Advanced features that enhance the proxy's capabilities:
 ### Response Processing
 
 - **[Think Tags Fix](features/think-tags-fix.md)** - Correct improperly formatted thinking tags in model responses
+- **[Auto Continue/Proceed Removal](features/auto-continue-removal.md)** - Strip mechanical "continue"/"proceed" messages from remote LLM submissions after interruptions
 - **[Edit Precision Tuning](features/edit-precision.md)** - Automatically adjust temperature and top_p for code editing tasks
 
 ### Session Memory
 
@@ -621,20 +621,32 @@ def _apply_session_flags(
                 origin="--disable-dangerous-git-commands-protection",
             )
 
-        if (
-            getattr(args, "disable_double_ampersand_fixes_for_windows", None)
-            is not None
-        ):
-            session = overrides.setdefault("session", {})
+        if (
+            getattr(args, "disable_double_ampersand_fixes_for_windows", None)
+            is not None
+        ):
+            session = overrides.setdefault("session", {})
             session["double_ampersand_fixes_for_windows_enabled"] = (
                 not args.disable_double_ampersand_fixes_for_windows
             )
             resolution.record(
                 "session.double_ampersand_fixes_for_windows_enabled",
                 not args.disable_double_ampersand_fixes_for_windows,
-                ParameterSource.CLI,
-                origin="--disable-double-ampersand-fixes-for-windows",
-            )
+                ParameterSource.CLI,
+                origin="--disable-double-ampersand-fixes-for-windows",
+            )
+
+        if getattr(args, "disable_auto_continue_removal", None) is not None:
+            session = overrides.setdefault("session", {})
+            session["auto_continue_removal_enabled"] = (
+                not args.disable_auto_continue_removal
+            )
+            resolution.record(
+                "session.auto_continue_removal_enabled",
+                not args.disable_auto_continue_removal,
+                ParameterSource.CLI,
+                origin="--disable-auto-continue-removal",
+            )
 
     def _apply_strict_command_detection(
         self,
 
@@ -195,14 +195,21 @@ def _optional_int(value: str) -> int | None:
             path="session.fix_think_tags_streaming_buffer_size",
             resolution=resolution,
         ),
-        "double_ampersand_fixes_for_windows_enabled": _env_to_bool(
-            "DOUBLE_AMPERSAND_FIXES_FOR_WINDOWS_ENABLED",
-            True,
-            env,
-            path="session.double_ampersand_fixes_for_windows_enabled",
-            resolution=resolution,
-        ),
-        "planning_phase": {
+        "double_ampersand_fixes_for_windows_enabled": _env_to_bool(
+            "DOUBLE_AMPERSAND_FIXES_FOR_WINDOWS_ENABLED",
+            True,
+            env,
+            path="session.double_ampersand_fixes_for_windows_enabled",
+            resolution=resolution,
+        ),
+        "auto_continue_removal_enabled": _env_to_bool(
+            "AUTO_CONTINUE_REMOVAL_ENABLED",
+            True,
+            env,
+            path="session.auto_continue_removal_enabled",
+            resolution=resolution,
+        ),
+        "planning_phase": {
             "enabled": _env_to_bool(
                 "PLANNING_PHASE_ENABLED",
                 False,
 
@@ -273,11 +273,12 @@ class SessionConfig(DomainModel):
     test_execution_reminder_enabled: bool | None = None
     test_execution_reminder_message: str | None = None
     droid_path_fix_enabled: bool = False
-    fix_think_tags_enabled: bool = False
-    fix_think_tags_streaming_buffer_size: int = 4096
-    double_ampersand_fixes_for_windows_enabled: bool = True
-    """Whether automatic && to ; replacement is enabled for Windows clients."""
-    planning_phase: PlanningPhaseConfig = Field(default_factory=PlanningPhaseConfig)
+    fix_think_tags_enabled: bool = False
+    fix_think_tags_streaming_buffer_size: int = 4096
+    double_ampersand_fixes_for_windows_enabled: bool = True
+    """Whether automatic && to ; replacement is enabled for Windows clients."""
+    auto_continue_removal_enabled: bool = True
+    planning_phase: PlanningPhaseConfig = Field(default_factory=PlanningPhaseConfig)
     max_per_session_backends: int = 32
     session_continuity: SessionContinuityConfig = Field(
         default_factory=SessionContinuityConfig