diff --git a/Dockerfile b/Dockerfile
index fae7bc7..7056e3f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,6 +13,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Install Himalaya CLI (pre-built Rust binary for email management)
 RUN curl -sSL https://raw.githubusercontent.com/pimalaya/himalaya/master/install.sh | sh
 
+# Install GitHub CLI (gh) from the official apt repository (Tools tab: gh)
+RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+      -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+      > /etc/apt/sources.list.d/github-cli.list \
+    && apt-get update && apt-get install -y --no-install-recommends gh \
+    && rm -rf /var/lib/apt/lists/*
+
 # Install uv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 
diff --git a/api/admin.py b/api/admin.py
index 675e631..cfdd7e9 100644
--- a/api/admin.py
+++ b/api/admin.py
@@ -8,6 +8,7 @@
 
 from __future__ import annotations
 
+import asyncio
 import collections
 import hashlib
 import json
@@ -36,6 +37,8 @@
     DEFAULT_TOOL_USAGE_BLOCK,
     build_prompt_sections,
 )
+from core.tools import registry as tool_registry
+from core.tools import tool_env
 from core.wacli import WacliManager
 
 if TYPE_CHECKING:
@@ -545,6 +548,8 @@ async def _lifespan(app: FastAPI):  # noqa: ANN001
     _HISTORY_PREFIX = "history."
     _EMAIL_PREFIX = "email."
     _PROMPT_PREFIX = "prompt."
+    _TOOLS_PREFIX = "tools."
+    _COMPACTION_PREFIX = "compaction."
 
     def _is_managed_key(key: str) -> bool:
         """Return True if this key is managed by a dedicated tab (not Config)."""
@@ -562,6 +567,8 @@ def _is_managed_key(key: str) -> bool:
             _HISTORY_PREFIX,
             _EMAIL_PREFIX,
             _PROMPT_PREFIX,
+            _TOOLS_PREFIX,
+            _COMPACTION_PREFIX,
         ):
             if key.startswith(prefix):
                 return True
@@ -801,6 +808,8 @@ async def partial_llm() -> HTMLResponse:
         tr_enabled = tr_enabled if tr_enabled is not None else "true"
         tr_provider = await config_store.get("task_reflection.provider") or "anthropic"
         tr_model = await config_store.get("task_reflection.model") or "claude-haiku-4-5"
+        compaction_provider = await config_store.get("compaction.provider") or "anthropic"
+        compaction_model = await config_store.get("compaction.model") or "claude-haiku-4-5"
         prompt_tool_usage_override = await config_store.get("prompt.tool_usage_override") or ""
         prompt_history_override = await config_store.get("prompt.history_handling_override") or ""
         prompt_capture_enabled = await config_store.get("admin.capture_prompts")
@@ -830,6 +839,8 @@ async def partial_llm() -> HTMLResponse:
             tr_enabled=tr_enabled,
             tr_provider=tr_provider,
             tr_model=tr_model,
+            compaction_provider=compaction_provider,
+            compaction_model=compaction_model,
             prompt_tool_usage_override=prompt_tool_usage_override,
             prompt_history_override=prompt_history_override,
             default_tool_usage=DEFAULT_TOOL_USAGE_BLOCK,
@@ -837,6 +848,49 @@ async def partial_llm() -> HTMLResponse:
             prompt_capture_enabled=prompt_capture_enabled,
         )
 
+    @app.get("/partials/tools", dependencies=[Depends(auth)])
+    async def partial_tools() -> HTMLResponse:
+        """Tools tab partial — manage optional external CLI tools (e.g. gh)."""
+        gh_enabled = await config_store.get("tools.gh.enabled")
+        gh_enabled = gh_enabled if gh_enabled is not None else "false"
+        gh_token = await config_store.get("tools.gh.token") or ""
+        return _render_partial(
+            "partials/tools.html",
+            tools=tool_registry(),
+            gh_enabled=gh_enabled,
+            gh_token=gh_token,
+        )
+
+    @app.post("/tools/gh/test", dependencies=[Depends(auth)])
+    async def test_gh_tool(request: Request) -> dict:
+        """Verify a GitHub token by calling the GitHub API as that token."""
+        body = await request.json()
+        token = str(body.get("token", "")).strip()
+        if not token:
+            return {"ok": False, "error": "Token is required."}
+        try:
+            resp = await asyncio.to_thread(
+                http_requests.get,
+                "https://api.github.com/user",
+                headers={
+                    "Authorization": f"Bearer {token}",
+                    "Accept": "application/vnd.github+json",
+                    "X-GitHub-Api-Version": "2022-11-28",
+                },
+                timeout=10,
+            )
+        except Exception as exc:  # noqa: BLE001 — surface any network error to the UI
+            return {"ok": False, "error": str(exc)}
+        if resp.status_code == 200:
+            login = resp.json().get("login", "")
+            return {"ok": True, "login": login}
+        if resp.status_code in (401, 403):
+            return {
+                "ok": False,
+                "error": "Token rejected by GitHub (invalid or insufficient scope).",
+            }
+        return {"ok": False, "error": f"GitHub returned HTTP {resp.status_code}."}
+
     @app.get("/partials/search", dependencies=[Depends(auth)])
     async def partial_search() -> HTMLResponse:
         """Search tab partial."""
@@ -890,10 +944,23 @@ async def partial_history() -> HTMLResponse:
         """History tab partial."""
         mode = await config_store.get("history.mode") or "injection"
         max_turns = await config_store.get("history.max_turns") or "10"
+        c_enabled = await config_store.get("compaction.enabled")
+        c_enabled = c_enabled if c_enabled is not None else "true"
+        c_threshold_type = await config_store.get("compaction.threshold_type") or "percent"
+        c_threshold_percent = await config_store.get("compaction.threshold_percent") or "80"
+        c_threshold_tokens = await config_store.get("compaction.threshold_tokens") or "150000"
+        c_context_window = await config_store.get("compaction.context_window") or "200000"
+        c_keep_recent_turns = await config_store.get("compaction.keep_recent_turns") or "4"
         return _render_partial(
             "partials/history.html",
             mode=mode,
             max_turns=max_turns,
+            compaction_enabled=c_enabled,
+            compaction_threshold_type=c_threshold_type,
+            compaction_threshold_percent=c_threshold_percent,
+            compaction_threshold_tokens=c_threshold_tokens,
+            compaction_context_window=c_context_window,
+            compaction_keep_recent_turns=c_keep_recent_turns,
         )
 
     @app.get("/partials/logs", dependencies=[Depends(auth)])
@@ -1174,6 +1241,7 @@ async def patch_config(body: ConfigPatchIn) -> dict:
                 new_config = await config_store.export_to_config()
                 agent.config = new_config
                 agent.llm = LLMClient.from_agent_config(new_config.agent)
+                agent.executor.tool_env = tool_env(new_config)
                 agent.history_mode = new_config.history.mode
                 agent.memory.long_term_limit = new_config.memory.long_term_limit
                 agent.reflections.max_reflections = new_config.task_reflection.max_reflections
diff --git a/api/templates/base.html b/api/templates/base.html
index f5aeb32..587183c 100644
--- a/api/templates/base.html
+++ b/api/templates/base.html
@@ -42,7 +42,7 @@
     }
     window.showToast = showToast;
 
-    function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled) {
+    function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled, compactionProvider, compactionModel) {
       const currentProvider = provider || 'anthropic';
       return {
         providerOptions: [
@@ -97,6 +97,10 @@
         gdResultOk: false,
         trResult: '',
         trResultOk: false,
+        compactionProvider: compactionProvider || 'anthropic',
+        compactionModel: compactionModel || 'claude-haiku-4-5',
+        compactionResult: '',
+        compactionResultOk: false,
         result: '',
         resultOk: false,
         tests: {
diff --git a/api/templates/dashboard.html b/api/templates/dashboard.html
index aed07a3..49a33fb 100644
--- a/api/templates/dashboard.html
+++ b/api/templates/dashboard.html
@@ -80,6 +80,10 @@ <h2 class="text-base text-accent">Agent Control</h2>
             @click="select('search')">
       Search
     </button>
+    <button class="tab-link" :class="tab === 'tools' && 'tab-link-active'"
+            @click="select('tools')">
+      Tools
+    </button>
     <button class="tab-link" :class="tab === 'permissions' && 'tab-link-active'"
             @click="select('permissions')">
       Permissions
@@ -675,6 +679,7 @@ <h2 class="text-base text-accent">Agent Control</h2>
       history: '/partials/history',
       channels: '/partials/channels',
       search: '/partials/search',
+      tools: '/partials/tools',
       permissions: '/partials/permissions',
       skills: '/partials/skills',
       jobs: '/partials/jobs',
diff --git a/api/templates/partials/history.html b/api/templates/partials/history.html
index b591134..d63897f 100644
--- a/api/templates/partials/history.html
+++ b/api/templates/partials/history.html
@@ -2,8 +2,16 @@
 <div class="space-y-6" x-data="{
   mode: {{ mode|default('injection', true)|tojson|forceescape }},
   maxTurns: {{ max_turns|default('10', true)|tojson|forceescape }},
+  compactionEnabled: {{ compaction_enabled|default('true', true)|tojson|forceescape }},
+  thresholdType: {{ compaction_threshold_type|default('percent', true)|tojson|forceescape }},
+  thresholdPercent: {{ compaction_threshold_percent|default('80', true)|tojson|forceescape }},
+  thresholdTokens: {{ compaction_threshold_tokens|default('150000', true)|tojson|forceescape }},
+  contextWindow: {{ compaction_context_window|default('200000', true)|tojson|forceescape }},
+  keepRecentTurns: {{ compaction_keep_recent_turns|default('4', true)|tojson|forceescape }},
   result: '',
-  resultOk: false
+  resultOk: false,
+  cResult: '',
+  cResultOk: false
 }">
   <div class="card">
     <h2 class="text-base mb-1">Conversation History</h2>
@@ -66,4 +74,104 @@ <h2 class="text-base mb-1">Conversation History</h2>
       <div :class="resultOk ? 'alert-success' : 'alert-error'" class="mt-2" x-text="result"></div>
     </template>
   </div>
+
+  {# Compaction — only relevant in session mode #}
+  <div class="card">
+    <div class="flex items-center justify-between gap-3 mb-1">
+      <h2 class="text-base">Context compaction</h2>
+      <div class="flex items-center gap-2">
+        <label class="label mb-0">Enabled</label>
+        <input type="checkbox" x-model="compactionEnabled"
+               :checked="compactionEnabled === 'true' || compactionEnabled === true">
+      </div>
+    </div>
+    <p class="text-muted text-xs mb-4">
+      In <strong>session</strong> mode, when the conversation approaches the model's
+      context window, the oldest turns are summarized by a small model (configured in
+      the <strong>LLM</strong> tab) while recent turns are kept verbatim. The user is
+      notified with a system message when this happens. Has no effect in injection mode.
+    </p>
+
+    <div class="space-y-3" :class="(compactionEnabled === 'true' || compactionEnabled === true) ? '' : 'opacity-50'">
+      <div>
+        <label class="label">Trigger threshold</label>
+        <div class="flex gap-4 mt-1">
+          <label class="flex items-center gap-2 cursor-pointer">
+            <input type="radio" name="threshold_type" value="percent"
+                   x-model="thresholdType" class="accent-primary">
+            <span class="text-sm">% of context window</span>
+          </label>
+          <label class="flex items-center gap-2 cursor-pointer">
+            <input type="radio" name="threshold_type" value="tokens"
+                   x-model="thresholdType" class="accent-primary">
+            <span class="text-sm">Absolute tokens</span>
+          </label>
+        </div>
+      </div>
+
+      <div x-show="thresholdType === 'percent'" x-transition class="flex gap-4 flex-wrap">
+        <div>
+          <label class="label">Percent</label>
+          <input type="number" class="input-sm" style="max-width:120px"
+                 x-model="thresholdPercent" min="10" max="99">
+          <p class="text-muted text-xs mt-1">Compact at this % of the window.</p>
+        </div>
+        <div>
+          <label class="label">Context window (fallback)</label>
+          <input type="number" class="input-sm" style="max-width:160px"
+                 x-model="contextWindow" min="8000" step="1000">
+          <p class="text-muted text-xs mt-1">Used when the model's window isn't known (e.g. 200000, 1000000).</p>
+        </div>
+      </div>
+
+      <div x-show="thresholdType === 'tokens'" x-transition>
+        <label class="label">Tokens</label>
+        <input type="number" class="input-sm" style="max-width:160px"
+               x-model="thresholdTokens" min="8000" step="1000">
+        <p class="text-muted text-xs mt-1">Compact once the context reaches this many tokens (e.g. 150000).</p>
+      </div>
+
+      <div>
+        <label class="label">Keep recent turns</label>
+        <input type="number" class="input-sm" style="max-width:120px"
+               x-model="keepRecentTurns" min="1" max="20">
+        <p class="text-muted text-xs mt-1">Most-recent user turns kept verbatim; everything older is summarized.</p>
+      </div>
+    </div>
+
+    <div class="flex items-center gap-2 mt-4">
+      <button class="btn-primary btn-sm"
+              @click="
+                const vals = {
+                  'compaction.enabled': String(compactionEnabled === true || compactionEnabled === 'true'),
+                  'compaction.threshold_type': thresholdType,
+                  'compaction.threshold_percent': String(thresholdPercent),
+                  'compaction.threshold_tokens': String(thresholdTokens),
+                  'compaction.context_window': String(contextWindow),
+                  'compaction.keep_recent_turns': String(keepRecentTurns)
+                };
+                fetch('/config', {
+                  method: 'PATCH',
+                  headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': 'Bearer ' + (localStorage.getItem('admin_api_key') || '')
+                  },
+                  body: JSON.stringify({values: vals})
+                })
+                .then(r => { cResultOk = r.ok; return r.json(); })
+                .then(d => {
+                  cResult = cResultOk ? 'Compaction settings saved' : (d.detail || 'Error');
+                  if (cResultOk && window.showToast) { window.showToast('Compaction settings saved'); }
+                })
+                .catch(e => { cResultOk = false; cResult = e.message; })
+              ">
+        Save
+      </button>
+      <span class="text-muted text-xs">Compaction model is set in the LLM tab.</span>
+    </div>
+
+    <template x-if="cResult">
+      <div :class="cResultOk ? 'alert-success' : 'alert-error'" class="mt-2" x-text="cResult"></div>
+    </template>
+  </div>
 </div>
diff --git a/api/templates/partials/llm.html b/api/templates/partials/llm.html
index 5697478..27d33fa 100644
--- a/api/templates/partials/llm.html
+++ b/api/templates/partials/llm.html
@@ -25,7 +25,9 @@
   {{ prompt_history_override|default('', true)|tojson|forceescape }},
   {{ default_tool_usage|default('', true)|tojson|forceescape }},
   {{ default_history_handling|default('', true)|tojson|forceescape }},
-  {{ prompt_capture_enabled|default(false, true)|tojson|forceescape }}
+  {{ prompt_capture_enabled|default(false, true)|tojson|forceescape }},
+  {{ compaction_provider|default('anthropic', true)|tojson|forceescape }},
+  {{ compaction_model|default('claude-haiku-4-5', true)|tojson|forceescape }}
 )">
   <div class="card">
     <h2 class="text-base mb-1">System Prompt Controls</h2>
@@ -415,6 +417,61 @@ <h2 class="text-base mb-1">Task Reflection</h2>
     </template>
   </div>
 
+  <div class="card">
+    <h2 class="text-base mb-1">History Compaction</h2>
+    <p class="text-muted text-xs mb-4">Model used to summarize older conversation turns when a session's context grows large. Enable compaction and set the trigger threshold in the <strong>History</strong> tab.</p>
+
+    <form class="space-y-4" @submit.prevent>
+      <div>
+        <label class="label">Provider</label>
+        <select class="input-sm" style="max-width:500px" x-model="compactionProvider" x-effect="$el.value = compactionProvider">
+          <template x-for="item in providerOptions" :key="'cp-' + item.value">
+            <option :value="item.value" :selected="item.value === compactionProvider" x-text="item.label"></option>
+          </template>
+        </select>
+      </div>
+      <div>
+        <label class="label">Model</label>
+        <input type="text" class="input-sm" style="max-width:500px" x-model="compactionModel"
+               list="dl-model-compaction" placeholder="Type or pick a model">
+        <datalist id="dl-model-compaction">
+          <template x-for="item in modelOptions(compactionProvider)" :key="'cp-m-' + item.value">
+            <option :value="item.value"></option>
+          </template>
+        </datalist>
+        <p class="text-muted text-xs mt-1">A small, fast model is recommended (summarization is cheap).</p>
+      </div>
+    </form>
+
+    <div class="flex items-center gap-2 mt-4">
+      <button class="btn-primary btn-sm"
+              @click="
+                fetch('/config', {
+                  method: 'PATCH',
+                  headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': 'Bearer ' + (localStorage.getItem('admin_api_key') || '')
+                  },
+                  body: JSON.stringify({values: {
+                    'compaction.provider': compactionProvider,
+                    'compaction.model': compactionModel
+                  }})
+                })
+                .then(r => { compactionResultOk = r.ok; return r.json(); })
+                .then(d => {
+                  compactionResult = compactionResultOk ? 'Compaction model saved' : (d.detail || 'Error');
+                  if (compactionResultOk && window.showToast) { window.showToast('Compaction model saved'); }
+                })
+                .catch(e => { compactionResultOk = false; compactionResult = e.message; })
+              ">
+        Save compaction model
+      </button>
+    </div>
+    <template x-if="compactionResult">
+      <div :class="compactionResultOk ? 'alert-success' : 'alert-error'" class="mt-2" x-text="compactionResult"></div>
+    </template>
+  </div>
+
   <div class="card">
     <h2 class="text-base mb-1">Anthropic</h2>
     <p class="text-muted text-xs mb-4">Configure the Anthropic API key and test the connection.</p>
diff --git a/api/templates/partials/tools.html b/api/templates/partials/tools.html
new file mode 100644
index 0000000..91ba92e
--- /dev/null
+++ b/api/templates/partials/tools.html
@@ -0,0 +1,115 @@
+{# Tools tab partial — manage optional external CLI tools #}
+<div class="space-y-6" x-data="toolsTab(
+  {{ gh_enabled|default('false', true)|tojson|forceescape }},
+  {{ gh_token|default('', true)|tojson|forceescape }}
+)">
+  <div class="card">
+    <h2 class="text-base mb-1">Tools</h2>
+    <p class="text-muted text-xs mb-4">
+      Optional external CLI tools the agent can use. When a tool is enabled it is
+      authenticated and advertised to the assistant in its system prompt; when
+      disabled it stays hidden. In session history mode, newly enabled tools are
+      advertised starting from the next <code>/new</code> conversation.
+    </p>
+  </div>
+
+  {# GitHub CLI (gh) #}
+  <div class="card">
+    <div class="flex items-center justify-between gap-3 mb-1">
+      <h3 class="text-sm text-accent">GitHub CLI (gh)</h3>
+      <div class="flex items-center gap-2">
+        <label class="label mb-0">Enabled</label>
+        <input type="checkbox" x-model="ghEnabled"
+               :checked="ghEnabled === 'true' || ghEnabled === true">
+      </div>
+    </div>
+    <p class="text-muted text-xs mb-4">
+      Let the agent query and act on GitHub (issues, PRs, repos, <code>gh api</code>,
+      search). Read operations run without asking; creating issues, PRs or releases
+      ask for confirmation first.
+    </p>
+
+    <div class="space-y-3">
+      <div>
+        <label class="label">Personal Access Token</label>
+        <input type="text" class="input-sm" style="max-width:500px; display:none"
+               autocomplete="username" aria-hidden="true" tabindex="-1">
+        <input type="password" class="input-sm" style="max-width:500px"
+               autocomplete="new-password"
+               x-model="ghToken" placeholder="ghp_... or github_pat_...">
+        <p class="text-muted text-xs mt-1">
+          A GitHub
+          <a class="link" href="https://github.com/settings/tokens" target="_blank" rel="noreferrer">Personal Access Token</a>.
+          Injected as <code>GH_TOKEN</code> so <code>gh</code> is authenticated
+          non-interactively. Grant only the scopes you want the agent to have
+          (e.g. <code>repo</code>, <code>read:org</code>).
+        </p>
+      </div>
+    </div>
+
+    <div class="flex items-center gap-2 mt-4">
+      <button class="btn-primary btn-sm"
+              @click="
+                const vals = {
+                  'tools.gh.enabled': String(ghEnabled === true || ghEnabled === 'true'),
+                  'tools.gh.token': ghToken
+                };
+                fetch('/config', {
+                  method: 'PATCH',
+                  headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': 'Bearer ' + (localStorage.getItem('admin_api_key') || '')
+                  },
+                  body: JSON.stringify({values: vals})
+                })
+                .then(r => { resultOk = r.ok; return r.json(); })
+                .then(d => {
+                  result = resultOk ? 'GitHub CLI settings saved' : (d.detail || 'Error');
+                  if (resultOk && window.showToast) { window.showToast('GitHub CLI settings saved'); }
+                })
+                .catch(e => { resultOk = false; result = e.message; })
+              ">
+        Save
+      </button>
+      <button class="btn btn-sm" :disabled="testing || !ghToken"
+              @click="
+                testing = true; testResult = '';
+                fetch('/tools/gh/test', {
+                  method: 'POST',
+                  headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': 'Bearer ' + (localStorage.getItem('admin_api_key') || '')
+                  },
+                  body: JSON.stringify({token: ghToken})
+                })
+                .then(r => r.json())
+                .then(d => { testResult = d.ok ? ('Authenticated as ' + (d.login || 'user')) : ('Failed: ' + (d.error || 'unknown')); })
+                .catch(e => { testResult = 'Error: ' + e.message; })
+                .finally(() => { testing = false; })
+              ">
+        <span x-show="!testing">Test token</span>
+        <span x-show="testing">Testing...</span>
+      </button>
+    </div>
+
+    <template x-if="result">
+      <div :class="resultOk ? 'alert-success' : 'alert-error'" class="mt-2" x-text="result"></div>
+    </template>
+    <template x-if="testResult">
+      <div :class="testResult.startsWith('Authenticated') ? 'alert-success' : 'alert-error'" class="mt-2" x-text="testResult"></div>
+    </template>
+  </div>
+</div>
+
+<script>
+  function toolsTab(ghEnabled, ghToken) {
+    return {
+      ghEnabled: ghEnabled,
+      ghToken: ghToken,
+      result: '',
+      resultOk: false,
+      testing: false,
+      testResult: ''
+    };
+  }
+</script>
diff --git a/channels/telegram.py b/channels/telegram.py
index 60185bf..32e1b63 100644
--- a/channels/telegram.py
+++ b/channels/telegram.py
@@ -372,6 +372,9 @@ async def _send_response(self, chat_id: int, response) -> None:
             await self.send(chat_id, response.text)
         else:
             log.warning("Skipping empty response for chat_id=%s", chat_id)
+        # Out-of-band system notice (e.g. context compaction), sent separately.
+        if getattr(response, "system_notice", None):
+            await self.send(chat_id, response.system_notice)
 
     async def _finalize_approval_response(
         self, query: CallbackQuery, resolved: bool, label: str
diff --git a/channels/whatsapp.py b/channels/whatsapp.py
index 074a601..1435370 100644
--- a/channels/whatsapp.py
+++ b/channels/whatsapp.py
@@ -122,6 +122,8 @@ async def handle_webhook(self, payload: dict) -> dict:
             chat_id=chat_id,
         )
         await self.send(sender, response.text)
+        if getattr(response, "system_notice", None):
+            await self.send(sender, response.system_notice)
         return {"ok": True}
 
     def _is_allowed(self, sender: str) -> bool:
diff --git a/core/agent.py b/core/agent.py
index 024a97c..9175d72 100644
--- a/core/agent.py
+++ b/core/agent.py
@@ -15,6 +15,7 @@
 
 from tavily import TavilyClient
 
+from core.compaction import compact_messages, should_compact
 from core.config import Config
 from core.executor import ToolExecutor
 from core.goal_decomposition import DecomposedGoal, classify_complexity, decompose_goal
@@ -28,6 +29,7 @@
 from core.scheduler import AgentScheduler
 from core.skills import SkillsEngine
 from core.task_reflection import ReflectionStore
+from core.tools import tool_env
 from voice.pipeline import VoicePipeline
 
 log = logging.getLogger(__name__)
@@ -246,7 +248,7 @@ def __init__(self, config: Config):
             db_path=config.agent.skills_db_path,
             seed_dir=config.agent.skills_dir,
         )
-        self.executor = ToolExecutor()
+        self.executor = ToolExecutor(tool_env=tool_env(config))
         self.history = ConversationHistory(
             db_path=config.history.db_path,
             max_turns=config.history.max_turns,
@@ -294,8 +296,8 @@ async def process(
         preventing context leakage across chats.
         """
 
-        # Handle /new command — clear conversational context.
-        if message.strip().lower() == "/new":
+        # Handle /new (alias /clear) command — clear conversational context.
+        if message.strip().lower() in ("/new", "/clear"):
             if self.history_mode == "session":
                 await self.history.clear_session(channel, user_id, chat_id)
             else:
@@ -308,12 +310,25 @@ async def process(
             )
             return AgentResponse(text="Conversation cleared.")
 
-        # Goal decomposition — classify and (if complex) decompose the request
+        # Goal decomposition — classify and (if complex) decompose the request.
+        # The resulting plan is request-specific, so it is injected per turn
+        # (in the user-message preamble), not baked into the static prompt.
         decomposed_goal: DecomposedGoal | None = None
         if self.config.goal_decomposition.enabled and channel != "system":
             decomposed_goal = await self._maybe_decompose(message)
 
-        system = await self._build_system_prompt(decomposed_goal=decomposed_goal)
+        # Per-turn preamble: live date/time + (optional) execution plan.
+        preamble = self._turn_preamble(decomposed_goal)
+
+        # Static system prompt. In session mode it is snapshotted once at the
+        # start of the session and reused for every turn (so the static content
+        # is only built once, not rebuilt and re-sent each turn). In injection
+        # mode the prompt is windowed/stateless, so it is rebuilt per call.
+        if self.history_mode == "session":
+            system = await self._session_system_prompt(channel, user_id, chat_id)
+        else:
+            system = await self._build_system_prompt()
+
         if self.config.admin.capture_prompts:
             self._record_system_prompt(
                 channel=channel,
@@ -324,34 +339,116 @@ async def process(
 
         if self.history_mode == "session":
             return await self._process_session(
-                system, message, channel, user_id, attachments, chat_id
+                system, preamble, message, channel, user_id, attachments, chat_id
             )
         return await self._process_injection(
-            system, message, channel, user_id, attachments, chat_id
+            system, preamble, message, channel, user_id, attachments, chat_id
+        )
+
+    def _turn_preamble(self, decomposed_goal: DecomposedGoal | None) -> str:
+        """Build the per-turn preamble prepended to the current user message.
+
+        Always carries the live date/time (so the agent knows 'now' every turn);
+        also carries the execution plan when the request was decomposed.
+        """
+        now = datetime.now(ZoneInfo(self.config.agent.timezone))
+        stamp = now.strftime("%A, %B %d, %Y %H:%M %Z")
+        preamble = f"[Current date & time: {stamp}]"
+        if decomposed_goal:
+            preamble += (
+                "\n\n<execution_plan>\n"
+                "Your request has been analysed and broken into the following sub-goals.\n"
+                "Follow this plan step-by-step, completing each sub-goal in order "
+                "(respecting dependencies). Report progress as you go.\n\n"
+                f"{decomposed_goal.format_for_prompt()}\n"
+                "</execution_plan>"
+            )
+        return preamble
+
+    async def _session_system_prompt(self, channel: str, user_id: str, chat_id: str) -> str:
+        """Return the session's static system prompt, building it once if needed.
+
+        Built fresh after a ``/new`` (when no snapshot exists), then reused for
+        the lifetime of the session so the static content is sent only once.
+        """
+        cached = await self.history.get_session_system(channel, user_id, chat_id)
+        if cached is not None:
+            return cached
+        system = await self._build_system_prompt()
+        await self.history.set_session_system(channel, user_id, system, chat_id)
+        return system
+
+    async def _maybe_compact(
+        self, channel: str, user_id: str, chat_id: str, response: Any
+    ) -> str | None:
+        """Compact the session if the context exceeds the configured threshold.
+
+        Returns a user-facing notice when compaction happened, else ``None``.
+        Failures are logged and swallowed — compaction must never break a turn.
+        """
+        cfg = self.config.compaction
+        if self.history_mode != "session" or not cfg.enabled:
+            return None
+        usage = getattr(response, "usage", None) or {}
+        context_tokens = int(usage.get("context_tokens") or 0)
+        if not should_compact(cfg, context_tokens, self.config.agent.model):
+            return None
+
+        session = await self.history.get_session(channel, user_id, chat_id)
+        try:
+            llm = self._background_llm(cfg.provider)
+            result = await compact_messages(llm, cfg.model, session, cfg.keep_recent_turns)
+        except Exception:
+            log.exception("Conversation compaction failed")
+            return None
+        if not result:
+            return None
+
+        new_messages, _summary = result
+        await self.history.replace_session(channel, user_id, new_messages, chat_id)
+        log.info(
+            "Compacted session %s/%s/%s: %d → %d messages (~%d ctx tokens)",
+            channel,
+            user_id,
+            chat_id,
+            len(session),
+            len(new_messages),
+            context_tokens,
+        )
+        return (
+            f"🗜️ Our conversation was getting large (~{context_tokens:,} tokens). "
+            "I summarized the earlier part to free up space; recent messages are kept as-is."
         )
 
     def _build_user_message(
         self,
         message: str,
         attachments: list[Attachment] | None = None,
+        preamble: str = "",
     ) -> dict:
-        """Build the user message dict, handling multimodal content."""
+        """Build the user message dict, handling multimodal content.
+
+        ``preamble`` (live date/time + optional execution plan) is prepended to
+        the message text so the agent always knows 'now' for the current turn.
+        """
+        text = f"{preamble}\n\n{message}" if preamble else message
         image_attachments = [a for a in (attachments or []) if a.is_image]
         if image_attachments:
             content_blocks: list[dict] = []
-            if message:
-                content_blocks.append({"type": "text", "text": message})
+            if text:
+                content_blocks.append({"type": "text", "text": text})
             for att in image_attachments:
                 if self.llm.provider == "anthropic":
                     content_blocks.append(att.to_anthropic_block())
                 else:
                     content_blocks.append(att.to_openai_block())
             return {"role": "user", "content": content_blocks}
-        return {"role": "user", "content": message}
+        return {"role": "user", "content": text}
 
     async def _process_injection(
         self,
         system: str,
+        preamble: str,
         message: str,
         channel: str,
         user_id: str,
@@ -368,7 +465,7 @@ async def _process_injection(
                 messages.append({"role": turn["role"], "content": turn["content"]})
 
         # The actual current request — always the last user message.
-        messages.append(self._build_user_message(message, attachments))
+        messages.append(self._build_user_message(message, attachments, preamble))
 
         log.info(
             "Processing message (injection) from %s/%s/%s: %s",
@@ -445,6 +542,7 @@ async def _process_injection(
     async def _process_session(
         self,
         system: str,
+        preamble: str,
         message: str,
         channel: str,
         user_id: str,
@@ -460,8 +558,8 @@ async def _process_session(
         # Load existing session (from memory cache or DB)
         session = await self.history.get_session(channel, user_id, chat_id)
 
-        # Append the new user message
-        user_msg = self._build_user_message(message, attachments)
+        # Append the new user message (with the live date/time preamble)
+        user_msg = self._build_user_message(message, attachments, preamble)
         await self.history.append_session_message(channel, user_id, user_msg, chat_id)
 
         log.info(
@@ -526,6 +624,11 @@ async def _process_session(
         final_text = response.text
         log.info("Response: %s", final_text[:200])
 
+        # Compaction — if the context has grown past the configured threshold,
+        # summarise the oldest turns. ``response.usage`` reflects the full
+        # session that was just sent, so it's the authoritative context size.
+        system_notice = await self._maybe_compact(channel, user_id, chat_id, response)
+
         # Check if the LLM wants to respond with voice
         voice_bytes = await self._maybe_synthesize_voice(final_text)
         if voice_bytes:
@@ -545,7 +648,7 @@ async def _process_session(
                 name=f"task-reflect-{user_id}",
             )
 
-        return AgentResponse(text=final_text, voice=voice_bytes)
+        return AgentResponse(text=final_text, voice=voice_bytes, system_notice=system_notice)
 
     @staticmethod
     def _history_message_text(message: str, attachments: list[Attachment] | None = None) -> str:
diff --git a/core/compaction.py b/core/compaction.py
new file mode 100644
index 0000000..da1d32e
--- /dev/null
+++ b/core/compaction.py
@@ -0,0 +1,181 @@
+"""Conversation compaction for session history mode.
+
+When a sticky session grows close to the model's context window, the oldest
+turns are summarised by a (cheap) LLM into a single synthetic exchange, while
+the most recent turns are kept verbatim. This keeps long conversations going
+without blowing the context window, and — unlike provider-specific server-side
+compaction — works across every provider MPA supports.
+
+The trigger is the *real* token usage reported by the provider after the turn
+(see ``LLMResponse.usage``), so no local tokenizer is needed.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from core.config import CompactionConfig
+from core.llm import LLMClient
+
+log = logging.getLogger(__name__)
+
+# Known context-window sizes (tokens). Used for percent-mode thresholds.
+# Unknown models fall back to CompactionConfig.context_window.
+CONTEXT_WINDOWS: dict[str, int] = {
+    "claude-opus-4-8": 1_000_000,
+    "claude-opus-4-7": 1_000_000,
+    "claude-opus-4-6": 1_000_000,
+    "claude-opus-4-5": 200_000,
+    "claude-opus-4-1": 200_000,
+    "claude-sonnet-4-6": 1_000_000,
+    "claude-4-6-sonnet": 1_000_000,
+    "claude-sonnet-4-5": 200_000,
+    "claude-haiku-4-5": 200_000,
+    "claude-4-5-haiku": 200_000,
+}
+
+_SUMMARY_PROMPT = """\
+You are compacting the earlier part of a conversation between a user and their
+personal AI assistant so it fits in a smaller context window. Write a dense,
+factual summary that preserves everything needed to continue the conversation
+seamlessly. Include:
+- The user's goals, requests, and any decisions made.
+- Concrete facts, names, identifiers, numbers, file paths, and preferences.
+- Results of actions already taken (emails sent, events created, lookups done).
+- Open threads or pending follow-ups.
+
+Do NOT include pleasantries or restate this instruction. Be concise but complete.
+Write the summary as plain prose / bullet points.
+
+<conversation>
+{transcript}
+</conversation>
+
+Summary:"""
+
+
+def effective_window(config: CompactionConfig, model: str) -> int:
+    """Return the context window (tokens) to use for percent-mode thresholds."""
+    key = (model or "").strip().lower()
+    return CONTEXT_WINDOWS.get(key, config.context_window)
+
+
+def compaction_threshold_tokens(config: CompactionConfig, model: str) -> int:
+    """Return the absolute token count at which compaction should trigger."""
+    if config.threshold_type == "tokens":
+        return config.threshold_tokens
+    window = effective_window(config, model)
+    return int(window * config.threshold_percent / 100)
+
+
+def should_compact(config: CompactionConfig, context_tokens: int, model: str) -> bool:
+    """Return True if the current context size warrants compaction."""
+    if not config.enabled or not context_tokens:
+        return False
+    return context_tokens >= compaction_threshold_tokens(config, model)
+
+
+def _is_real_user_turn(message: dict[str, Any]) -> bool:
+    """True if this is a genuine user turn (not a tool_result carrier message)."""
+    if message.get("role") != "user":
+        return False
+    content = message.get("content")
+    if isinstance(content, str):
+        return True
+    if isinstance(content, list):
+        # Tool results are delivered as user messages whose blocks are all
+        # tool_result; a real user turn has at least one text/image block.
+        return any(isinstance(b, dict) and b.get("type") != "tool_result" for b in content)
+    return True
+
+
+def _block_text(content: Any) -> str:
+    """Flatten a message's content into plain text for summarisation."""
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return str(content)
+    parts: list[str] = []
+    for block in content:
+        if not isinstance(block, dict):
+            parts.append(str(block))
+            continue
+        btype = block.get("type")
+        if btype == "text":
+            parts.append(str(block.get("text", "")))
+        elif btype == "tool_use":
+            parts.append(f"[tool_use {block.get('name', '')}: {block.get('input', {})}]")
+        elif btype == "tool_result":
+            parts.append(f"[tool_result: {block.get('content', '')}]")
+        elif btype == "image":
+            parts.append("[image]")
+    return "\n".join(p for p in parts if p)
+
+
+def _render_transcript(messages: list[dict[str, Any]]) -> str:
+    lines: list[str] = []
+    for msg in messages:
+        role = msg.get("role", "?")
+        text = _block_text(msg.get("content")).strip()
+        if text:
+            lines.append(f"{role.upper()}: {text}")
+    return "\n\n".join(lines)
+
+
+async def compact_messages(
+    llm: LLMClient,
+    model: str,
+    messages: list[dict[str, Any]],
+    keep_recent_turns: int,
+) -> tuple[list[dict[str, Any]], str] | None:
+    """Summarise the oldest turns of a session, keeping the recent ones verbatim.
+
+    Returns ``(new_messages, summary)`` or ``None`` if there is nothing worth
+    compacting (too few turns). The rebuilt session is:
+
+        [user(<summary>), assistant(<ack>), *recent_turns_verbatim]
+
+    The cut is made at a real user-turn boundary so a ``tool_use`` block is
+    never split from its ``tool_result``.
+    """
+    boundaries = [i for i, m in enumerate(messages) if _is_real_user_turn(m)]
+    # Need more turns than we intend to keep, otherwise there's nothing to fold.
+    if len(boundaries) <= keep_recent_turns:
+        return None
+
+    cut = boundaries[len(boundaries) - keep_recent_turns]
+    prefix = messages[:cut]
+    tail = messages[cut:]
+    if not prefix:
+        return None
+
+    transcript = _render_transcript(prefix)
+    if not transcript.strip():
+        return None
+
+    summary = await llm.generate_text(
+        model=model,
+        prompt=_SUMMARY_PROMPT.format(transcript=transcript),
+        max_tokens=2048,
+    )
+    summary = (summary or "").strip()
+    if not summary:
+        return None
+
+    new_messages: list[dict[str, Any]] = [
+        {
+            "role": "user",
+            "content": (
+                "Here is a summary of the earlier part of our conversation "
+                "(it was compacted to save space):\n\n"
+                f"<conversation_summary>\n{summary}\n</conversation_summary>"
+            ),
+        },
+        {
+            "role": "assistant",
+            "content": "Understood — I'll continue with that summarized context in mind.",
+        },
+        *tail,
+    ]
+    return new_messages, summary
diff --git a/core/config.py b/core/config.py
index 47bfdfb..870813c 100644
--- a/core/config.py
+++ b/core/config.py
@@ -155,6 +155,23 @@ class TaskReflectionConfig(BaseModel):
     max_reflections: int = 50  # max reflections to keep for prompt injection
 
 
+class CompactionConfig(BaseModel):
+    """Conversation compaction — summarise old turns when the context grows.
+
+    Only applies in session history mode. The threshold is evaluated against
+    the real token usage reported by the provider after each turn.
+    """
+
+    enabled: bool = True
+    provider: str = "anthropic"
+    model: str = "claude-haiku-4-5"
+    threshold_type: str = "percent"  # "percent" (of context window) or "tokens" (absolute)
+    threshold_percent: int = 80  # trigger at this % of the model's context window
+    threshold_tokens: int = 150000  # absolute trigger when threshold_type == "tokens"
+    context_window: int = 200000  # fallback window for % mode when the model is unknown
+    keep_recent_turns: int = 4  # most-recent user turns kept verbatim after compaction
+
+
 class SearchConfig(BaseModel):
     enabled: bool = False
     provider: str = "tavily"
@@ -171,6 +188,19 @@ class PromptConfig(BaseModel):
     history_handling_override: str = ""
 
 
+class GhToolConfig(BaseModel):
+    """GitHub CLI (`gh`) tool — auth via a Personal Access Token."""
+
+    enabled: bool = False
+    token: str = ""  # GitHub PAT, injected as GH_TOKEN when running `gh`
+
+
+class ToolsConfig(BaseModel):
+    """Optional external CLI tools the agent can use (see core/tools.py)."""
+
+    gh: GhToolConfig = GhToolConfig()
+
+
 class Config(BaseModel):
     agent: AgentConfig = AgentConfig()
     channels: ChannelsConfig = ChannelsConfig()
@@ -182,9 +212,11 @@ class Config(BaseModel):
     memory: MemoryConfig = MemoryConfig()
     goal_decomposition: GoalDecompositionConfig = GoalDecompositionConfig()
     task_reflection: TaskReflectionConfig = TaskReflectionConfig()
+    compaction: CompactionConfig = CompactionConfig()
     search: SearchConfig = SearchConfig()
     you: YouConfig = YouConfig()
     prompt: PromptConfig = PromptConfig()
+    tools: ToolsConfig = ToolsConfig()
 
 
 def load_config(path: str | Path = "config.yml") -> Config:
diff --git a/core/config_store.py b/core/config_store.py
index 4fd91fc..6897a31 100644
--- a/core/config_store.py
+++ b/core/config_store.py
@@ -55,6 +55,7 @@
         "admin.password_hash",
         "admin.password_salt",
         "search.api_key",
+        "tools.gh.token",
         "calendar.providers",
         "calendar.google_oauth_client_id",
         "calendar.google_oauth_client_secret",
diff --git a/core/executor.py b/core/executor.py
index 5b11669..185e15b 100644
--- a/core/executor.py
+++ b/core/executor.py
@@ -33,6 +33,11 @@ def _find_wacli_bin() -> str:
 class ToolExecutor:
     """Executes CLI commands on behalf of the LLM."""
 
+    def __init__(self, tool_env: dict[str, str] | None = None) -> None:
+        # Extra environment for optional tools (e.g. GH_TOKEN for `gh`).
+        # Injected into every spawned subprocess; updated on config reload.
+        self.tool_env: dict[str, str] = dict(tool_env or {})
+
     ALLOWED_PREFIXES = [
         "curl",
         "himalaya",
@@ -90,9 +95,12 @@ async def run_command_trusted(self, command: str, timeout: int = 30) -> dict:
     async def _exec(self, command: str, timeout: int) -> dict:
         """Run a shell command and capture output."""
         env = None
-        if "himalaya" in command:
+        if "himalaya" in command or self.tool_env:
             env = os.environ.copy()
-            env.update(himalaya_env())
+            if "himalaya" in command:
+                env.update(himalaya_env())
+            # Tool auth (e.g. GH_TOKEN) — only set when a tool is enabled.
+            env.update(self.tool_env)
         proc = await asyncio.create_subprocess_shell(
             command,
             stdout=asyncio.subprocess.PIPE,
diff --git a/core/history.py b/core/history.py
index 4da6599..7156658 100644
--- a/core/history.py
+++ b/core/history.py
@@ -42,6 +42,14 @@
 );
 CREATE INDEX IF NOT EXISTS idx_session_lookup
     ON session_messages(channel, user_id, chat_id, id);
+CREATE TABLE IF NOT EXISTS session_system (
+    channel TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    chat_id TEXT NOT NULL DEFAULT '',
+    system TEXT NOT NULL,
+    created_at DATETIME DEFAULT (datetime('now')),
+    PRIMARY KEY (channel, user_id, chat_id)
+);
 """
 
 # Migrations applied after initial schema creation.
@@ -77,6 +85,8 @@ def __init__(self, db_path: str = "data/history.db", max_turns: int = 20):
         self._ready = False
         # In-memory cache for sticky sessions: {(channel, user_id, chat_id): [message_dicts]}
         self._sessions: dict[tuple[str, str, str], list[dict[str, Any]]] = {}
+        # In-memory cache for the static system prompt snapshot per session.
+        self._session_system: dict[tuple[str, str, str], str] = {}
 
     async def _ensure_schema(self) -> None:
         if self._ready:
@@ -159,9 +169,14 @@ async def clear(self, channel: str, user_id: str, chat_id: str = "") -> None:
                 "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?",
                 (channel, user_id, chat_id),
             )
+            await db.execute(
+                "DELETE FROM session_system WHERE channel = ? AND user_id = ? AND chat_id = ?",
+                (channel, user_id, chat_id),
+            )
             await db.commit()
         # Clear in-memory session cache
         self._sessions.pop((channel, user_id, chat_id), None)
+        self._session_system.pop((channel, user_id, chat_id), None)
 
     # -------------------------------------------------------------------
     # Session mode — sticky session per (channel, user_id, chat_id)
@@ -230,6 +245,33 @@ async def append_session_messages(
             )
             await db.commit()
 
+    async def replace_session(
+        self,
+        channel: str,
+        user_id: str,
+        messages: list[dict[str, Any]],
+        chat_id: str = "",
+    ) -> None:
+        """Atomically replace a session's messages (used by compaction).
+
+        Rewrites both the in-memory cache and the persisted ``session_messages``
+        rows. The system-prompt snapshot is left untouched.
+        """
+        await self._ensure_schema()
+        key = (channel, user_id, chat_id)
+        self._sessions[key] = list(messages)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?",
+                (channel, user_id, chat_id),
+            )
+            await db.executemany(
+                "INSERT INTO session_messages (channel, user_id, chat_id, message) "
+                "VALUES (?, ?, ?, ?)",
+                [(channel, user_id, chat_id, json.dumps(m)) for m in messages],
+            )
+            await db.commit()
+
     async def clear_session(self, channel: str, user_id: str, chat_id: str = "") -> None:
         """Clear just the sticky session for a (channel, user_id, chat_id) triple."""
         await self._ensure_schema()
@@ -238,5 +280,52 @@ async def clear_session(self, channel: str, user_id: str, chat_id: str = "") ->
                 "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?",
                 (channel, user_id, chat_id),
             )
+            await db.execute(
+                "DELETE FROM session_system WHERE channel = ? AND user_id = ? AND chat_id = ?",
+                (channel, user_id, chat_id),
+            )
             await db.commit()
         self._sessions.pop((channel, user_id, chat_id), None)
+        self._session_system.pop((channel, user_id, chat_id), None)
+
+    # -------------------------------------------------------------------
+    # Session mode — static system prompt snapshot
+    # -------------------------------------------------------------------
+
+    async def get_session_system(self, channel: str, user_id: str, chat_id: str = "") -> str | None:
+        """Return the cached static system prompt for a session, or None if unset.
+
+        The system prompt is snapshotted once at the start of a session (after a
+        ``/new``) and reused for every subsequent turn, so the static content is
+        only built/sent once instead of being rebuilt each turn.
+        """
+        await self._ensure_schema()
+        key = (channel, user_id, chat_id)
+        if key in self._session_system:
+            return self._session_system[key]
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "SELECT system FROM session_system "
+                "WHERE channel = ? AND user_id = ? AND chat_id = ?",
+                (channel, user_id, chat_id),
+            )
+            row = await cursor.fetchone()
+        if row is None:
+            return None
+        self._session_system[key] = row[0]
+        return row[0]
+
+    async def set_session_system(
+        self, channel: str, user_id: str, system: str, chat_id: str = ""
+    ) -> None:
+        """Persist the static system prompt snapshot for a session."""
+        await self._ensure_schema()
+        self._session_system[(channel, user_id, chat_id)] = system
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                "INSERT INTO session_system (channel, user_id, chat_id, system) "
+                "VALUES (?, ?, ?, ?) "
+                "ON CONFLICT(channel, user_id, chat_id) DO UPDATE SET system = excluded.system",
+                (channel, user_id, chat_id, system),
+            )
+            await db.commit()
diff --git a/core/llm.py b/core/llm.py
index b0f3bab..43ae6d4 100644
--- a/core/llm.py
+++ b/core/llm.py
@@ -32,6 +32,44 @@ class LLMResponse:
     text: str
     tool_calls: list[LLMToolCall]
     raw: object | None = None
+    # Token usage for the request, when the provider reports it. Keys:
+    # input_tokens, output_tokens, cache_read_input_tokens,
+    # cache_creation_input_tokens, context_tokens (= full prompt size).
+    usage: dict[str, int] | None = None
+
+
+def _anthropic_usage(response: Any) -> dict[str, int] | None:
+    u = getattr(response, "usage", None)
+    if u is None:
+        return None
+    inp = getattr(u, "input_tokens", 0) or 0
+    out = getattr(u, "output_tokens", 0) or 0
+    cache_read = getattr(u, "cache_read_input_tokens", 0) or 0
+    cache_creation = getattr(u, "cache_creation_input_tokens", 0) or 0
+    # The true context size is the uncached input plus everything served
+    # from / written to the cache this request.
+    return {
+        "input_tokens": inp,
+        "output_tokens": out,
+        "cache_read_input_tokens": cache_read,
+        "cache_creation_input_tokens": cache_creation,
+        "context_tokens": inp + cache_read + cache_creation,
+    }
+
+
+def _openai_usage(response: Any) -> dict[str, int] | None:
+    u = getattr(response, "usage", None)
+    if u is None:
+        return None
+    prompt = getattr(u, "prompt_tokens", 0) or 0
+    completion = getattr(u, "completion_tokens", 0) or 0
+    return {
+        "input_tokens": prompt,
+        "output_tokens": completion,
+        "cache_read_input_tokens": 0,
+        "cache_creation_input_tokens": 0,
+        "context_tokens": prompt,
+    }
 
 
 def _openai_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -125,10 +163,21 @@ async def generate(
         if self.provider == "anthropic":
             client_any = cast(Any, self._client)
             messages_client = cast(Any, getattr(client_any, "messages"))  # type: ignore[attr-defined]
+            # Mark the (static) system prompt as a cache breakpoint so the
+            # tools + system prefix is cached and not reprocessed every turn.
+            system_param: Any = system
+            if system:
+                system_param = [
+                    {
+                        "type": "text",
+                        "text": system,
+                        "cache_control": {"type": "ephemeral"},
+                    }
+                ]
             response = await messages_client.create(
                 model=resolved_model,
                 max_tokens=max_tokens,
-                system=system,
+                system=cast(Any, system_param),
                 messages=cast(Any, messages),
                 tools=cast(Any, tools),
             )
@@ -150,6 +199,7 @@ async def generate(
                 text="\n".join(text_parts).strip(),
                 tool_calls=tool_calls,
                 raw=response.content,
+                usage=_anthropic_usage(response),
             )
 
         openai_tools = _openai_tools(tools)
@@ -174,6 +224,7 @@ async def generate(
             text=(message.content or "").strip(),
             tool_calls=tool_calls,
             raw=message.model_dump(exclude_none=True),
+            usage=_openai_usage(response),
         )
 
     def assistant_message(self, response: LLMResponse) -> dict[str, Any]:
diff --git a/core/models.py b/core/models.py
index 97f3abf..dedae63 100644
--- a/core/models.py
+++ b/core/models.py
@@ -58,3 +58,6 @@ class AgentResponse:
     text: str
     voice: bytes | None = None
     attachments: list[Attachment] = field(default_factory=list)
+    # Optional out-of-band system message (e.g. "context was compacted"),
+    # delivered by the channel as a separate follow-up message.
+    system_notice: str | None = None
diff --git a/core/prompt_builder.py b/core/prompt_builder.py
index bcac689..13900f4 100644
--- a/core/prompt_builder.py
+++ b/core/prompt_builder.py
@@ -3,11 +3,10 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from datetime import datetime
-from zoneinfo import ZoneInfo
 
 from core.config import Config
 from core.goal_decomposition import DecomposedGoal
+from core.tools import active_tool_prompts
 
 DEFAULT_TOOL_USAGE_BLOCK = """For write actions
 (sending emails, replying to emails, sending messages, creating calendar events,
@@ -51,6 +50,7 @@ class PromptSections:
     character: str
     about_user: str
     tool_usage: str
+    tools: str
     memory_instruction: str
     history_handling: str
     memories: str
@@ -66,8 +66,10 @@ def full_prompt(self) -> str:
             self.character,
             self.about_user,
             self.tool_usage,
-            self.memory_instruction,
         ]
+        if self.tools:
+            parts.append(self.tools)
+        parts.append(self.memory_instruction)
         if self.history_handling:
             parts.append(self.history_handling)
         if self.memories:
@@ -87,6 +89,7 @@ def as_dict(self) -> dict[str, str]:
             "character": self.character,
             "about_user": self.about_user,
             "tool_usage": self.tool_usage,
+            "tools": self.tools,
             "memory_instruction": self.memory_instruction,
             "history_handling": self.history_handling,
             "memories": self.memories,
@@ -107,11 +110,13 @@ def build_prompt_sections(
     include_memories: bool = True,
     include_reflections: bool = True,
 ) -> PromptSections:
-    """Build all prompt sections with current config and dynamic context."""
+    """Build all prompt sections with current config and dynamic context.
+
+    The prompt is intentionally **static** (no current date/time): it forms the
+    cacheable prefix sent to the LLM. The live date/time is injected per turn at
+    the start of each user message instead (see ``AgentCore._turn_preamble``).
+    """
     cfg = config.agent
-    now = datetime.now(ZoneInfo(cfg.timezone))
-    date_str = now.strftime("%A, %B %d, %Y")
-    time_str = now.strftime("%H:%M")
 
     about_user_block = config.you.personalia.strip()
     tool_usage_text = resolve_prompt_block(
@@ -125,13 +130,19 @@ def build_prompt_sections(
 
     intro = (
         f"You are {cfg.name}, a personal AI assistant for {cfg.owner_name}.\n\n"
-        f"Today is {date_str}. Current time: {time_str}. Timezone: {cfg.timezone}."
+        f"Your timezone is {cfg.timezone}. The current date and time is provided at the "
+        f"start of each user message — always use that as 'now'."
     )
 
     personalia = f"<personalia>\n{cfg.personalia}\n</personalia>"
     character = f"<character>\n{cfg.character}\n</character>"
     about_user = f"<about_user>\n{about_user_block}\n</about_user>" if about_user_block else ""
     tool_usage = f"<tool_usage>\n{tool_usage_text}\n</tool_usage>"
+
+    tool_blocks = active_tool_prompts(config)
+    tools_section = ""
+    if tool_blocks:
+        tools_section = "<tools>\n" + "\n\n".join(tool_blocks) + "\n</tools>"
     memory_instruction = (
         "You can store and recall memories using the sqlite3 CLI (see the memory skill).\n"
         "Proactively remember important facts about the user and their contacts.\n"
@@ -171,6 +182,7 @@ def build_prompt_sections(
         character=character,
         about_user=about_user,
         tool_usage=tool_usage,
+        tools=tools_section,
         memory_instruction=memory_instruction,
         history_handling=history_handling,
         memories=memory_section,
diff --git a/core/tools.py b/core/tools.py
new file mode 100644
index 0000000..594e5ce
--- /dev/null
+++ b/core/tools.py
@@ -0,0 +1,100 @@
+"""Optional external CLI tools the agent can use (e.g. the GitHub `gh` CLI).
+
+Tools are configured under ``config.tools.*``.  When a tool is *enabled*, two
+things happen:
+
+1. Its authentication is wired into the executor environment (via :func:`tool_env`),
+   so the underlying CLI is authenticated when the agent runs it.
+2. It is advertised to the LLM in the system prompt (via :func:`active_tool_prompts`),
+   so the model knows the capability exists and how to use it.
+
+A tool that is *not* enabled is neither authenticated nor advertised, keeping the
+prompt lean and the capability hidden.
+
+Adding a new tool means: add a config sub-model in ``core/config.py``, then add an
+entry to ``_REGISTRY`` below describing its env + prompt advertisement.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+
+from core.config import Config
+
+# Advertisement injected into the system prompt when `gh` is active.
+_GH_PROMPT = """<tool name="gh">
+The GitHub CLI `gh` is installed and authenticated. Run it with the `run_command`
+tool for GitHub operations. Read operations run without asking; creating issues,
+PRs or releases ask for confirmation first.
+Examples:
+  gh issue list --repo owner/name
+  gh pr view 123 --repo owner/name
+  gh pr list --repo owner/name --state open
+  gh api repos/owner/name/commits
+  gh search issues "is:open label:bug" --repo owner/name
+Always pass `--repo owner/name` unless the working directory is a checkout of the
+target repository. Use `-o json` / `gh api` and parse JSON when you need fields.
+</tool>"""
+
+
+@dataclass(frozen=True)
+class ToolSpec:
+    """Describes an optional external tool the agent can use."""
+
+    key: str  # config sub-key under `tools.` (e.g. "gh")
+    label: str  # human-friendly name for the admin UI
+    summary: str  # one-line description for the admin UI
+    # Returns env vars to inject when the tool is enabled (auth, etc.).
+    env: Callable[[Config], dict[str, str]]
+    # Returns the system-prompt advertisement block for the tool.
+    prompt: Callable[[Config], str]
+
+
+def _gh_env(config: Config) -> dict[str, str]:
+    gh = config.tools.gh
+    if gh.enabled and gh.token:
+        # `gh` reads GH_TOKEN (preferred) / GITHUB_TOKEN for non-interactive auth.
+        return {"GH_TOKEN": gh.token}
+    return {}
+
+
+_REGISTRY: tuple[ToolSpec, ...] = (
+    ToolSpec(
+        key="gh",
+        label="GitHub CLI (gh)",
+        summary="Let the agent query and act on GitHub (issues, PRs, repos, API).",
+        env=_gh_env,
+        prompt=lambda _cfg: _GH_PROMPT,
+    ),
+)
+
+
+def registry() -> tuple[ToolSpec, ...]:
+    """Return all known optional tools."""
+    return _REGISTRY
+
+
+def _is_enabled(config: Config, key: str) -> bool:
+    sub = getattr(config.tools, key, None)
+    return bool(getattr(sub, "enabled", False))
+
+
+def active_tool_prompts(config: Config) -> list[str]:
+    """Return system-prompt advertisement blocks for every *enabled* tool."""
+    blocks: list[str] = []
+    for spec in _REGISTRY:
+        if _is_enabled(config, spec.key):
+            block = spec.prompt(config).strip()
+            if block:
+                blocks.append(block)
+    return blocks
+
+
+def tool_env(config: Config) -> dict[str, str]:
+    """Return the merged environment for every *enabled* tool (auth tokens, etc.)."""
+    env: dict[str, str] = {}
+    for spec in _REGISTRY:
+        if _is_enabled(config, spec.key):
+            env.update(spec.env(config))
+    return env
diff --git a/docs/content/docs/admin-ui.mdx b/docs/content/docs/admin-ui.mdx
index 57e3cc2..66d7df1 100644
--- a/docs/content/docs/admin-ui.mdx
+++ b/docs/content/docs/admin-ui.mdx
@@ -28,14 +28,21 @@ Overview of agent status, active channels, and quick stats.
 Edit agent settings without touching config files:
 
 - **Identity** — agent name and owner name
-- **LLM** — provider, model, and API key configuration. The model field is free-text (enter any model id the provider supports); a **Fetch models** button on each provider card loads the live model list from the provider API into the field's autocomplete, and **Copy list** copies all available ids
+- **LLM** — provider, model, and API key configuration. The model field is free-text (enter any model id the provider supports); a **Fetch models** button on each provider card loads the live model list from the provider API into the field's autocomplete, and **Copy list** copies all available ids. Also configures the background models for memory, goal decomposition, task reflection, and **history compaction** (the model used to summarize old conversation turns — see the History tab for the trigger threshold)
 - **Channels** — enable/disable Telegram and WhatsApp
 - **Calendar** — manage CalDAV providers
 - **Contacts** — manage contact providers
 - **Email** — Himalaya configuration
 - **Search** — Tavily API configuration
+- **Tools** — enable optional external CLI tools the agent can use
 - **Voice** — STT/TTS settings
 
+### Tools
+
+Enable optional external CLI tools. When a tool is enabled it is authenticated and advertised to the assistant in its system prompt; when disabled it stays hidden, keeping the prompt lean.
+
+- **GitHub CLI (`gh`)** — let the agent query and act on GitHub (issues, PRs, repos, `gh api`, search). Provide a [Personal Access Token](https://github.com/settings/tokens); it is injected as `GH_TOKEN` so `gh` is authenticated non-interactively. Read operations (`list`/`view`/`status`/`api`/`search`) run without asking; creating issues, PRs, or releases ask for confirmation first. Use **Test token** to verify the token against the GitHub API. In session history mode, a newly enabled tool is advertised starting from the next `/new` conversation.
+
 ### Skills
 
 Built-in skill editor for creating, editing, and deleting skill files. Changes take effect immediately — no restart needed.
@@ -65,7 +72,10 @@ Manage scheduled tasks:
 
 ### History
 
-Browse conversation history across channels. View individual conversation turns with tool call details.
+Configure conversation history and browse stored turns:
+
+- **Mode** — `injection` (replay the last N user/assistant pairs each request) or `session` (sticky per-chat session, full context, cache-friendly). Reset a conversation any time by sending `/new` (or its alias `/clear`).
+- **Context compaction** (session mode) — when the conversation nears the model's context window, the oldest turns are summarized by a small model while recent turns are kept verbatim, and the user gets a system message saying it happened. Set the trigger threshold here as either a **percent of the context window** or an **absolute token count** (e.g. 200k), plus how many recent turns to keep. The compaction model is configured in the **LLM** tab (under *History Compaction*). Has no effect in injection mode (which is already windowed).
 
 ### Logs
 
diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx
index 384741d..93eb837 100644
--- a/docs/content/docs/architecture.mdx
+++ b/docs/content/docs/architecture.mdx
@@ -70,12 +70,14 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to
 The brain of MPA. Implements the LLM tool-use loop:
 
 1. Load conversation history
-2. Build system prompt (skills, character, personalia, memories)
-3. Call the LLM
-4. Handle tool calls with permission checks
-5. Save conversation turn and extract memories
-
-The agent uses a single `run_command` meta-tool that executes any whitelisted CLI command, plus structured tools for safety-critical write operations (`send_email`, `send_message`, `create_calendar_event`).
+2. Build the static system prompt (skills, character, personalia, memories, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
+3. Inject the live date/time (and any per-request execution plan) at the start of the current user message — so the agent always knows "now" without mutating the cached prefix
+4. Call the LLM
+5. Handle tool calls with permission checks
+6. Save conversation turn and extract memories
+7. In session mode, if the provider-reported context size crosses the configured threshold, compact: summarize the oldest turns with a small model, keep recent turns verbatim, and notify the user with a system message (see `core/compaction.py`)
+
+The agent uses a single `run_command` meta-tool that executes any whitelisted CLI command, plus structured tools for safety-critical write operations (`send_email`, `send_message`, `create_calendar_event`). Optional external tools (e.g. the GitHub `gh` CLI, configured under the **Tools** tab) are authenticated and advertised to the model only when enabled — see `core/tools.py`.
 
 ### LLM Provider (`core/llm.py`)
 
diff --git a/tests/test_compaction.py b/tests/test_compaction.py
new file mode 100644
index 0000000..6107e4c
--- /dev/null
+++ b/tests/test_compaction.py
@@ -0,0 +1,226 @@
+"""Tests for conversation compaction, token-usage capture, and the /clear alias."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from core.compaction import (
+    compact_messages,
+    compaction_threshold_tokens,
+    effective_window,
+    should_compact,
+)
+from core.config import CompactionConfig, Config
+from core.history import ConversationHistory
+from core.llm import _anthropic_usage, _openai_usage
+
+
+class FakeLLM:
+    """Minimal stand-in exposing the async generate_text used by compaction."""
+
+    def __init__(self, summary: str = "SUMMARY") -> None:
+        self.summary = summary
+        self.calls = 0
+
+    async def generate_text(self, *, model: str, prompt: str, max_tokens: int = 2048) -> str:
+        self.calls += 1
+        return self.summary
+
+
+# ---------------------------------------------------------------------------
+# Threshold logic
+# ---------------------------------------------------------------------------
+
+
+def test_effective_window_known_and_fallback() -> None:
+    cfg = CompactionConfig(context_window=12345)
+    assert effective_window(cfg, "claude-haiku-4-5") == 200_000
+    assert effective_window(cfg, "claude-opus-4-8") == 1_000_000
+    assert effective_window(cfg, "some-unknown-model") == 12345
+
+
+def test_threshold_percent_vs_tokens() -> None:
+    pct = CompactionConfig(threshold_type="percent", threshold_percent=80)
+    assert compaction_threshold_tokens(pct, "claude-haiku-4-5") == 160_000
+    tok = CompactionConfig(threshold_type="tokens", threshold_tokens=150_000)
+    assert compaction_threshold_tokens(tok, "claude-haiku-4-5") == 150_000
+
+
+def test_should_compact_gates() -> None:
+    cfg = CompactionConfig(enabled=True, threshold_type="tokens", threshold_tokens=1000)
+    assert should_compact(cfg, 1000, "m") is True
+    assert should_compact(cfg, 999, "m") is False
+    assert should_compact(cfg, 0, "m") is False  # no usage info
+    disabled = CompactionConfig(enabled=False, threshold_type="tokens", threshold_tokens=1000)
+    assert should_compact(disabled, 5000, "m") is False
+
+
+# ---------------------------------------------------------------------------
+# Usage capture
+# ---------------------------------------------------------------------------
+
+
+def test_anthropic_usage_sums_cache_into_context() -> None:
+    resp = SimpleNamespace(
+        usage=SimpleNamespace(
+            input_tokens=100,
+            output_tokens=20,
+            cache_read_input_tokens=300,
+            cache_creation_input_tokens=50,
+        )
+    )
+    u = _anthropic_usage(resp)
+    assert u["context_tokens"] == 450  # 100 + 300 + 50
+    assert u["output_tokens"] == 20
+
+
+def test_openai_usage_uses_prompt_tokens() -> None:
+    resp = SimpleNamespace(usage=SimpleNamespace(prompt_tokens=777, completion_tokens=33))
+    u = _openai_usage(resp)
+    assert u["context_tokens"] == 777
+    assert u["output_tokens"] == 33
+
+
+def test_usage_none_when_absent() -> None:
+    assert _anthropic_usage(SimpleNamespace(usage=None)) is None
+    assert _openai_usage(SimpleNamespace(usage=None)) is None
+
+
+# ---------------------------------------------------------------------------
+# compact_messages
+# ---------------------------------------------------------------------------
+
+
+def _session_with_tool_pair() -> list[dict]:
+    return [
+        {"role": "user", "content": "u1"},
+        {"role": "assistant", "content": "a1"},
+        {"role": "user", "content": "u2"},
+        {
+            "role": "assistant",
+            "content": [{"type": "tool_use", "id": "t", "name": "x", "input": {}}],
+        },
+        {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "t", "content": "r"}]},
+        {"role": "assistant", "content": "a2"},
+        {"role": "user", "content": "u3"},
+        {"role": "assistant", "content": "a3"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_compact_keeps_recent_and_summarizes_rest() -> None:
+    llm = FakeLLM("THE SUMMARY")
+    msgs = _session_with_tool_pair()
+    result = await compact_messages(llm, "m", msgs, keep_recent_turns=1)
+    assert result is not None
+    new, summary = result
+    assert summary == "THE SUMMARY"
+    assert llm.calls == 1
+    # Rebuilt as: user(summary), assistant(ack), then the last real user turn verbatim.
+    assert new[0]["role"] == "user" and "THE SUMMARY" in new[0]["content"]
+    assert new[1]["role"] == "assistant"
+    assert new[2:] == [{"role": "user", "content": "u3"}, {"role": "assistant", "content": "a3"}]
+    # Valid alternation: never two same-role in a row.
+    roles = [m["role"] for m in new]
+    assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1))
+
+
+@pytest.mark.asyncio
+async def test_compact_does_not_split_tool_pair() -> None:
+    # Keeping 2 turns cuts before u2 — the tool_use/tool_result pair stays together
+    # in the summarized prefix, never straddling the boundary.
+    llm = FakeLLM()
+    msgs = _session_with_tool_pair()
+    new, _ = await compact_messages(llm, "m", msgs, keep_recent_turns=2)
+    tail = new[2:]
+    # The kept tail must start with a real user message (not a tool_result carrier).
+    first = tail[0]
+    assert first["role"] == "user" and isinstance(first["content"], str)
+
+
+@pytest.mark.asyncio
+async def test_compact_noop_when_too_few_turns() -> None:
+    llm = FakeLLM()
+    msgs = _session_with_tool_pair()  # 3 real user turns
+    assert await compact_messages(llm, "m", msgs, keep_recent_turns=3) is None
+    assert llm.calls == 0
+
+
+# ---------------------------------------------------------------------------
+# History.replace_session
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_replace_session_rewrites_messages(tmp_path) -> None:
+    h = ConversationHistory(db_path=str(tmp_path / "h.db"))
+    await h.append_session_message("telegram", "u", {"role": "user", "content": "old"})
+    await h.replace_session("telegram", "u", [{"role": "user", "content": "new"}])
+    assert await h.get_session("telegram", "u") == [{"role": "user", "content": "new"}]
+    # Cold instance reads the rewritten rows.
+    h2 = ConversationHistory(db_path=str(tmp_path / "h.db"))
+    assert await h2.get_session("telegram", "u") == [{"role": "user", "content": "new"}]
+
+
+# ---------------------------------------------------------------------------
+# Agent integration: _maybe_compact + /clear alias
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def agent(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    from core.agent import AgentCore
+
+    a = AgentCore(Config())
+    a.history_mode = "session"
+    return a
+
+
+@pytest.mark.asyncio
+async def test_maybe_compact_replaces_session_and_notifies(agent, monkeypatch) -> None:
+    agent.config.compaction.enabled = True
+    agent.config.compaction.threshold_type = "tokens"
+    agent.config.compaction.threshold_tokens = 100
+    agent.config.compaction.keep_recent_turns = 1
+
+    for m in _session_with_tool_pair():
+        await agent.history.append_session_message("telegram", "u", m, "")
+
+    monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S"))
+    response = SimpleNamespace(usage={"context_tokens": 999})
+
+    notice = await agent._maybe_compact("telegram", "u", "", response)
+    assert notice is not None and "summarized" in notice.lower()
+    session = await agent.history.get_session("telegram", "u")
+    assert "S" in session[0]["content"]
+    assert session[-2:] == [
+        {"role": "user", "content": "u3"},
+        {"role": "assistant", "content": "a3"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_maybe_compact_below_threshold_noop(agent, monkeypatch) -> None:
+    agent.config.compaction.enabled = True
+    agent.config.compaction.threshold_type = "tokens"
+    agent.config.compaction.threshold_tokens = 100000
+    for m in _session_with_tool_pair():
+        await agent.history.append_session_message("telegram", "u", m, "")
+    monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S"))
+    response = SimpleNamespace(usage={"context_tokens": 50})
+    assert await agent._maybe_compact("telegram", "u", "", response) is None
+
+
+@pytest.mark.asyncio
+async def test_clear_alias_clears_conversation(agent) -> None:
+    resp = await agent.process("/clear", channel="telegram", user_id="u", chat_id="")
+    assert resp.text == "Conversation cleared."
+
+
+@pytest.mark.asyncio
+async def test_new_still_clears_conversation(agent) -> None:
+    resp = await agent.process("/new", channel="telegram", user_id="u", chat_id="")
+    assert resp.text == "Conversation cleared."
diff --git a/tests/test_tools.py b/tests/test_tools.py
new file mode 100644
index 0000000..587d760
--- /dev/null
+++ b/tests/test_tools.py
@@ -0,0 +1,168 @@
+"""Tests for optional tools, per-turn datetime injection, and prompt caching."""
+
+from __future__ import annotations
+
+import pytest
+
+from core.config import Config
+from core.history import ConversationHistory
+from core.prompt_builder import build_prompt_sections
+from core.tools import active_tool_prompts, tool_env
+
+# ---------------------------------------------------------------------------
+# Tools registry
+# ---------------------------------------------------------------------------
+
+
+def test_gh_tool_inactive_by_default() -> None:
+    cfg = Config()
+    assert active_tool_prompts(cfg) == []
+    assert tool_env(cfg) == {}
+
+
+def test_gh_tool_env_and_advert_when_enabled() -> None:
+    cfg = Config()
+    cfg.tools.gh.enabled = True
+    cfg.tools.gh.token = "ghp_secret"
+    assert tool_env(cfg) == {"GH_TOKEN": "ghp_secret"}
+    blocks = active_tool_prompts(cfg)
+    assert len(blocks) == 1
+    assert "gh" in blocks[0]
+
+
+def test_gh_enabled_without_token_has_no_env() -> None:
+    cfg = Config()
+    cfg.tools.gh.enabled = True  # no token
+    assert tool_env(cfg) == {}
+    # Still advertised so the agent knows the capability exists.
+    assert active_tool_prompts(cfg)
+
+
+# ---------------------------------------------------------------------------
+# Static system prompt: no datetime, tool advert gated on activation
+# ---------------------------------------------------------------------------
+
+
+def _sections(cfg: Config):
+    return build_prompt_sections(
+        config=cfg,
+        history_mode="session",
+        skills_index="",
+        memories="",
+        reflections="",
+        decomposed_goal=None,
+    )
+
+
+def test_static_prompt_has_no_datetime() -> None:
+    cfg = Config()
+    sections = _sections(cfg)
+    # The static prompt must not bake in a concrete date/time (it is injected
+    # per turn instead), so the prefix stays stable and cacheable.
+    assert "Today is" not in sections.full_prompt
+    assert "Current time:" not in sections.full_prompt
+    assert cfg.agent.timezone in sections.intro
+
+
+def test_tools_section_only_when_enabled() -> None:
+    cfg = Config()
+    assert _sections(cfg).tools == ""
+    cfg.tools.gh.enabled = True
+    cfg.tools.gh.token = "ghp_x"
+    sections = _sections(cfg)
+    assert "<tools>" in sections.tools
+    assert sections.tools in sections.full_prompt
+
+
+# ---------------------------------------------------------------------------
+# Session system snapshot
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_session_system_snapshot_roundtrip(tmp_path) -> None:
+    history = ConversationHistory(db_path=str(tmp_path / "h.db"))
+    assert await history.get_session_system("telegram", "u1") is None
+    await history.set_session_system("telegram", "u1", "SYSTEM-A")
+    assert await history.get_session_system("telegram", "u1") == "SYSTEM-A"
+
+
+@pytest.mark.asyncio
+async def test_session_system_survives_new_instance(tmp_path) -> None:
+    db = str(tmp_path / "h.db")
+    h1 = ConversationHistory(db_path=db)
+    await h1.set_session_system("telegram", "u1", "SYSTEM-A")
+    # Fresh instance (cold cache) must load the snapshot from disk.
+    h2 = ConversationHistory(db_path=db)
+    assert await h2.get_session_system("telegram", "u1") == "SYSTEM-A"
+
+
+@pytest.mark.asyncio
+async def test_clear_session_drops_system_snapshot(tmp_path) -> None:
+    history = ConversationHistory(db_path=str(tmp_path / "h.db"))
+    await history.set_session_system("telegram", "u1", "SYSTEM-A")
+    await history.clear_session("telegram", "u1")
+    assert await history.get_session_system("telegram", "u1") is None
+
+
+@pytest.mark.asyncio
+async def test_clear_drops_system_snapshot(tmp_path) -> None:
+    history = ConversationHistory(db_path=str(tmp_path / "h.db"))
+    await history.set_session_system("telegram", "u1", "SYSTEM-A")
+    await history.clear("telegram", "u1")
+    assert await history.get_session_system("telegram", "u1") is None
+
+
+# ---------------------------------------------------------------------------
+# Agent: per-turn preamble + user-message injection + session caching
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def agent(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    from core.agent import AgentCore
+
+    return AgentCore(Config())
+
+
+def test_turn_preamble_carries_datetime(agent) -> None:
+    preamble = agent._turn_preamble(None)
+    assert "Current date & time" in preamble
+    # No execution plan when the goal was not decomposed.
+    assert "execution_plan" not in preamble
+
+
+def test_build_user_message_prepends_preamble(agent) -> None:
+    preamble = agent._turn_preamble(None)
+    msg = agent._build_user_message("hello", None, preamble)
+    assert msg["role"] == "user"
+    assert msg["content"].startswith(preamble)
+    assert msg["content"].endswith("hello")
+
+
+def test_build_user_message_no_preamble_is_plain(agent) -> None:
+    msg = agent._build_user_message("hello", None, "")
+    assert msg["content"] == "hello"
+
+
+@pytest.mark.asyncio
+async def test_session_system_built_once_and_reused(agent, monkeypatch) -> None:
+    calls = {"n": 0}
+
+    async def fake_build() -> str:
+        calls["n"] += 1
+        return f"SYSTEM-{calls['n']}"
+
+    monkeypatch.setattr(agent, "_build_system_prompt", fake_build)
+
+    first = await agent._session_system_prompt("telegram", "u1", "")
+    second = await agent._session_system_prompt("telegram", "u1", "")
+    assert first == second == "SYSTEM-1"
+    assert calls["n"] == 1  # built only once for the session
+
+    # After /new (clear), it rebuilds.
+    await agent.history.clear_session("telegram", "u1")
+    third = await agent._session_system_prompt("telegram", "u1", "")
+    assert third == "SYSTEM-2"
+    assert calls["n"] == 2
diff --git a/uv.lock b/uv.lock
index d5e54b1..a2b2401 100644
--- a/uv.lock
+++ b/uv.lock
@@ -764,7 +764,7 @@ wheels = [
 
 [[package]]
 name = "mpa"
-version = "0.10.0"
+version = "0.11.0"
 source = { virtual = "." }
 dependencies = [
     { name = "aiosqlite" },