Anthropic
Configure the Anthropic API key and test the connection.
diff --git a/api/templates/partials/tools.html b/api/templates/partials/tools.html
new file mode 100644
index 0000000..91ba92e
--- /dev/null
+++ b/api/templates/partials/tools.html
@@ -0,0 +1,115 @@
+{# Tools tab partial — manage optional external CLI tools #}
+
+
+
Tools
+
+ Optional external CLI tools the agent can use. When a tool is enabled it is
+ authenticated and advertised to the assistant in its system prompt; when
+ disabled it stays hidden. In session history mode, newly enabled tools are
+ advertised starting from the next /new conversation.
+
+
+
+ {# GitHub CLI (gh) #}
+
+
+
GitHub CLI (gh)
+
+
+
+
+
+
+ Let the agent query and act on GitHub (issues, PRs, repos, gh api,
+ search). Read operations run without asking; creating issues, PRs or releases
+ ask for confirmation first.
+
+
+
+
+
+
+
+
+ A GitHub
+ Personal Access Token.
+ Injected as GH_TOKEN so gh is authenticated
+ non-interactively. Grant only the scopes you want the agent to have
+ (e.g. repo, read:org).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/channels/telegram.py b/channels/telegram.py
index 60185bf..32e1b63 100644
--- a/channels/telegram.py
+++ b/channels/telegram.py
@@ -372,6 +372,9 @@ async def _send_response(self, chat_id: int, response) -> None:
await self.send(chat_id, response.text)
else:
log.warning("Skipping empty response for chat_id=%s", chat_id)
+ # Out-of-band system notice (e.g. context compaction), sent separately.
+ if getattr(response, "system_notice", None):
+ await self.send(chat_id, response.system_notice)
async def _finalize_approval_response(
self, query: CallbackQuery, resolved: bool, label: str
diff --git a/channels/whatsapp.py b/channels/whatsapp.py
index 074a601..1435370 100644
--- a/channels/whatsapp.py
+++ b/channels/whatsapp.py
@@ -122,6 +122,8 @@ async def handle_webhook(self, payload: dict) -> dict:
chat_id=chat_id,
)
await self.send(sender, response.text)
+ if getattr(response, "system_notice", None):
+ await self.send(sender, response.system_notice)
return {"ok": True}
def _is_allowed(self, sender: str) -> bool:
diff --git a/core/agent.py b/core/agent.py
index 024a97c..9175d72 100644
--- a/core/agent.py
+++ b/core/agent.py
@@ -15,6 +15,7 @@
from tavily import TavilyClient
+from core.compaction import compact_messages, should_compact
from core.config import Config
from core.executor import ToolExecutor
from core.goal_decomposition import DecomposedGoal, classify_complexity, decompose_goal
@@ -28,6 +29,7 @@
from core.scheduler import AgentScheduler
from core.skills import SkillsEngine
from core.task_reflection import ReflectionStore
+from core.tools import tool_env
from voice.pipeline import VoicePipeline
log = logging.getLogger(__name__)
@@ -246,7 +248,7 @@ def __init__(self, config: Config):
db_path=config.agent.skills_db_path,
seed_dir=config.agent.skills_dir,
)
- self.executor = ToolExecutor()
+ self.executor = ToolExecutor(tool_env=tool_env(config))
self.history = ConversationHistory(
db_path=config.history.db_path,
max_turns=config.history.max_turns,
@@ -294,8 +296,8 @@ async def process(
preventing context leakage across chats.
"""
- # Handle /new command — clear conversational context.
- if message.strip().lower() == "/new":
+ # Handle /new (alias /clear) command — clear conversational context.
+ if message.strip().lower() in ("/new", "/clear"):
if self.history_mode == "session":
await self.history.clear_session(channel, user_id, chat_id)
else:
@@ -308,12 +310,25 @@ async def process(
)
return AgentResponse(text="Conversation cleared.")
- # Goal decomposition — classify and (if complex) decompose the request
+ # Goal decomposition — classify and (if complex) decompose the request.
+ # The resulting plan is request-specific, so it is injected per turn
+ # (in the user-message preamble), not baked into the static prompt.
decomposed_goal: DecomposedGoal | None = None
if self.config.goal_decomposition.enabled and channel != "system":
decomposed_goal = await self._maybe_decompose(message)
- system = await self._build_system_prompt(decomposed_goal=decomposed_goal)
+ # Per-turn preamble: live date/time + (optional) execution plan.
+ preamble = self._turn_preamble(decomposed_goal)
+
+ # Static system prompt. In session mode it is snapshotted once at the
+ # start of the session and reused for every turn (so the static content
+ # is only built once, not rebuilt and re-sent each turn). In injection
+ # mode the prompt is windowed/stateless, so it is rebuilt per call.
+ if self.history_mode == "session":
+ system = await self._session_system_prompt(channel, user_id, chat_id)
+ else:
+ system = await self._build_system_prompt()
+
if self.config.admin.capture_prompts:
self._record_system_prompt(
channel=channel,
@@ -324,34 +339,116 @@ async def process(
if self.history_mode == "session":
return await self._process_session(
- system, message, channel, user_id, attachments, chat_id
+ system, preamble, message, channel, user_id, attachments, chat_id
)
return await self._process_injection(
- system, message, channel, user_id, attachments, chat_id
+ system, preamble, message, channel, user_id, attachments, chat_id
+ )
+
+ def _turn_preamble(self, decomposed_goal: DecomposedGoal | None) -> str:
+ """Build the per-turn preamble prepended to the current user message.
+
+ Always carries the live date/time (so the agent knows 'now' every turn);
+ also carries the execution plan when the request was decomposed.
+ """
+ now = datetime.now(ZoneInfo(self.config.agent.timezone))
+ stamp = now.strftime("%A, %B %d, %Y %H:%M %Z")
+ preamble = f"[Current date & time: {stamp}]"
+ if decomposed_goal:
+ preamble += (
+ "\n\n
\n"
+ "Your request has been analysed and broken into the following sub-goals.\n"
+ "Follow this plan step-by-step, completing each sub-goal in order "
+ "(respecting dependencies). Report progress as you go.\n\n"
+ f"{decomposed_goal.format_for_prompt()}\n"
+ ""
+ )
+ return preamble
+
+ async def _session_system_prompt(self, channel: str, user_id: str, chat_id: str) -> str:
+ """Return the session's static system prompt, building it once if needed.
+
+ Built fresh after a ``/new`` (when no snapshot exists), then reused for
+ the lifetime of the session so the static content is sent only once.
+ """
+ cached = await self.history.get_session_system(channel, user_id, chat_id)
+ if cached is not None:
+ return cached
+ system = await self._build_system_prompt()
+ await self.history.set_session_system(channel, user_id, system, chat_id)
+ return system
+
+ async def _maybe_compact(
+ self, channel: str, user_id: str, chat_id: str, response: Any
+ ) -> str | None:
+ """Compact the session if the context exceeds the configured threshold.
+
+ Returns a user-facing notice when compaction happened, else ``None``.
+ Failures are logged and swallowed — compaction must never break a turn.
+ """
+ cfg = self.config.compaction
+ if self.history_mode != "session" or not cfg.enabled:
+ return None
+ usage = getattr(response, "usage", None) or {}
+ context_tokens = int(usage.get("context_tokens") or 0)
+ if not should_compact(cfg, context_tokens, self.config.agent.model):
+ return None
+
+ session = await self.history.get_session(channel, user_id, chat_id)
+ try:
+ llm = self._background_llm(cfg.provider)
+ result = await compact_messages(llm, cfg.model, session, cfg.keep_recent_turns)
+ except Exception:
+ log.exception("Conversation compaction failed")
+ return None
+ if not result:
+ return None
+
+ new_messages, _summary = result
+ await self.history.replace_session(channel, user_id, new_messages, chat_id)
+ log.info(
+ "Compacted session %s/%s/%s: %d → %d messages (~%d ctx tokens)",
+ channel,
+ user_id,
+ chat_id,
+ len(session),
+ len(new_messages),
+ context_tokens,
+ )
+ return (
+ f"🗜️ Our conversation was getting large (~{context_tokens:,} tokens). "
+ "I summarized the earlier part to free up space; recent messages are kept as-is."
)
def _build_user_message(
self,
message: str,
attachments: list[Attachment] | None = None,
+ preamble: str = "",
) -> dict:
- """Build the user message dict, handling multimodal content."""
+ """Build the user message dict, handling multimodal content.
+
+ ``preamble`` (live date/time + optional execution plan) is prepended to
+ the message text so the agent always knows 'now' for the current turn.
+ """
+ text = f"{preamble}\n\n{message}" if preamble else message
image_attachments = [a for a in (attachments or []) if a.is_image]
if image_attachments:
content_blocks: list[dict] = []
- if message:
- content_blocks.append({"type": "text", "text": message})
+ if text:
+ content_blocks.append({"type": "text", "text": text})
for att in image_attachments:
if self.llm.provider == "anthropic":
content_blocks.append(att.to_anthropic_block())
else:
content_blocks.append(att.to_openai_block())
return {"role": "user", "content": content_blocks}
- return {"role": "user", "content": message}
+ return {"role": "user", "content": text}
async def _process_injection(
self,
system: str,
+ preamble: str,
message: str,
channel: str,
user_id: str,
@@ -368,7 +465,7 @@ async def _process_injection(
messages.append({"role": turn["role"], "content": turn["content"]})
# The actual current request — always the last user message.
- messages.append(self._build_user_message(message, attachments))
+ messages.append(self._build_user_message(message, attachments, preamble))
log.info(
"Processing message (injection) from %s/%s/%s: %s",
@@ -445,6 +542,7 @@ async def _process_injection(
async def _process_session(
self,
system: str,
+ preamble: str,
message: str,
channel: str,
user_id: str,
@@ -460,8 +558,8 @@ async def _process_session(
# Load existing session (from memory cache or DB)
session = await self.history.get_session(channel, user_id, chat_id)
- # Append the new user message
- user_msg = self._build_user_message(message, attachments)
+ # Append the new user message (with the live date/time preamble)
+ user_msg = self._build_user_message(message, attachments, preamble)
await self.history.append_session_message(channel, user_id, user_msg, chat_id)
log.info(
@@ -526,6 +624,11 @@ async def _process_session(
final_text = response.text
log.info("Response: %s", final_text[:200])
+ # Compaction — if the context has grown past the configured threshold,
+ # summarise the oldest turns. ``response.usage`` reflects the full
+ # session that was just sent, so it's the authoritative context size.
+ system_notice = await self._maybe_compact(channel, user_id, chat_id, response)
+
# Check if the LLM wants to respond with voice
voice_bytes = await self._maybe_synthesize_voice(final_text)
if voice_bytes:
@@ -545,7 +648,7 @@ async def _process_session(
name=f"task-reflect-{user_id}",
)
- return AgentResponse(text=final_text, voice=voice_bytes)
+ return AgentResponse(text=final_text, voice=voice_bytes, system_notice=system_notice)
@staticmethod
def _history_message_text(message: str, attachments: list[Attachment] | None = None) -> str:
diff --git a/core/compaction.py b/core/compaction.py
new file mode 100644
index 0000000..da1d32e
--- /dev/null
+++ b/core/compaction.py
@@ -0,0 +1,181 @@
+"""Conversation compaction for session history mode.
+
+When a sticky session grows close to the model's context window, the oldest
+turns are summarised by a (cheap) LLM into a single synthetic exchange, while
+the most recent turns are kept verbatim. This keeps long conversations going
+without blowing the context window, and — unlike provider-specific server-side
+compaction — works across every provider MPA supports.
+
+The trigger is the *real* token usage reported by the provider after the turn
+(see ``LLMResponse.usage``), so no local tokenizer is needed.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from core.config import CompactionConfig
+from core.llm import LLMClient
+
+log = logging.getLogger(__name__)
+
+# Known context-window sizes (tokens). Used for percent-mode thresholds.
+# Unknown models fall back to CompactionConfig.context_window.
+CONTEXT_WINDOWS: dict[str, int] = {
+ "claude-opus-4-8": 1_000_000,
+ "claude-opus-4-7": 1_000_000,
+ "claude-opus-4-6": 1_000_000,
+ "claude-opus-4-5": 200_000,
+ "claude-opus-4-1": 200_000,
+ "claude-sonnet-4-6": 1_000_000,
+ "claude-4-6-sonnet": 1_000_000,
+ "claude-sonnet-4-5": 200_000,
+ "claude-haiku-4-5": 200_000,
+ "claude-4-5-haiku": 200_000,
+}
+
+_SUMMARY_PROMPT = """\
+You are compacting the earlier part of a conversation between a user and their
+personal AI assistant so it fits in a smaller context window. Write a dense,
+factual summary that preserves everything needed to continue the conversation
+seamlessly. Include:
+- The user's goals, requests, and any decisions made.
+- Concrete facts, names, identifiers, numbers, file paths, and preferences.
+- Results of actions already taken (emails sent, events created, lookups done).
+- Open threads or pending follow-ups.
+
+Do NOT include pleasantries or restate this instruction. Be concise but complete.
+Write the summary as plain prose / bullet points.
+
+
+{transcript}
+
+
+Summary:"""
+
+
+def effective_window(config: CompactionConfig, model: str) -> int:
+ """Return the context window (tokens) to use for percent-mode thresholds."""
+ key = (model or "").strip().lower()
+ return CONTEXT_WINDOWS.get(key, config.context_window)
+
+
+def compaction_threshold_tokens(config: CompactionConfig, model: str) -> int:
+ """Return the absolute token count at which compaction should trigger."""
+ if config.threshold_type == "tokens":
+ return config.threshold_tokens
+ window = effective_window(config, model)
+ return int(window * config.threshold_percent / 100)
+
+
+def should_compact(config: CompactionConfig, context_tokens: int, model: str) -> bool:
+ """Return True if the current context size warrants compaction."""
+ if not config.enabled or not context_tokens:
+ return False
+ return context_tokens >= compaction_threshold_tokens(config, model)
+
+
+def _is_real_user_turn(message: dict[str, Any]) -> bool:
+ """True if this is a genuine user turn (not a tool_result carrier message)."""
+ if message.get("role") != "user":
+ return False
+ content = message.get("content")
+ if isinstance(content, str):
+ return True
+ if isinstance(content, list):
+ # Tool results are delivered as user messages whose blocks are all
+ # tool_result; a real user turn has at least one text/image block.
+ return any(isinstance(b, dict) and b.get("type") != "tool_result" for b in content)
+ return True
+
+
+def _block_text(content: Any) -> str:
+ """Flatten a message's content into plain text for summarisation."""
+ if isinstance(content, str):
+ return content
+ if not isinstance(content, list):
+ return str(content)
+ parts: list[str] = []
+ for block in content:
+ if not isinstance(block, dict):
+ parts.append(str(block))
+ continue
+ btype = block.get("type")
+ if btype == "text":
+ parts.append(str(block.get("text", "")))
+ elif btype == "tool_use":
+ parts.append(f"[tool_use {block.get('name', '')}: {block.get('input', {})}]")
+ elif btype == "tool_result":
+ parts.append(f"[tool_result: {block.get('content', '')}]")
+ elif btype == "image":
+ parts.append("[image]")
+ return "\n".join(p for p in parts if p)
+
+
+def _render_transcript(messages: list[dict[str, Any]]) -> str:
+ lines: list[str] = []
+ for msg in messages:
+ role = msg.get("role", "?")
+ text = _block_text(msg.get("content")).strip()
+ if text:
+ lines.append(f"{role.upper()}: {text}")
+ return "\n\n".join(lines)
+
+
+async def compact_messages(
+ llm: LLMClient,
+ model: str,
+ messages: list[dict[str, Any]],
+ keep_recent_turns: int,
+) -> tuple[list[dict[str, Any]], str] | None:
+ """Summarise the oldest turns of a session, keeping the recent ones verbatim.
+
+ Returns ``(new_messages, summary)`` or ``None`` if there is nothing worth
+ compacting (too few turns). The rebuilt session is:
+
+ [user(
), assistant(), *recent_turns_verbatim]
+
+ The cut is made at a real user-turn boundary so a ``tool_use`` block is
+ never split from its ``tool_result``.
+ """
+ boundaries = [i for i, m in enumerate(messages) if _is_real_user_turn(m)]
+ # Need more turns than we intend to keep, otherwise there's nothing to fold.
+ if len(boundaries) <= keep_recent_turns:
+ return None
+
+ cut = boundaries[len(boundaries) - keep_recent_turns]
+ prefix = messages[:cut]
+ tail = messages[cut:]
+ if not prefix:
+ return None
+
+ transcript = _render_transcript(prefix)
+ if not transcript.strip():
+ return None
+
+ summary = await llm.generate_text(
+ model=model,
+ prompt=_SUMMARY_PROMPT.format(transcript=transcript),
+ max_tokens=2048,
+ )
+ summary = (summary or "").strip()
+ if not summary:
+ return None
+
+ new_messages: list[dict[str, Any]] = [
+ {
+ "role": "user",
+ "content": (
+ "Here is a summary of the earlier part of our conversation "
+ "(it was compacted to save space):\n\n"
+ f"\n{summary}\n"
+ ),
+ },
+ {
+ "role": "assistant",
+ "content": "Understood — I'll continue with that summarized context in mind.",
+ },
+ *tail,
+ ]
+ return new_messages, summary
diff --git a/core/config.py b/core/config.py
index 47bfdfb..870813c 100644
--- a/core/config.py
+++ b/core/config.py
@@ -155,6 +155,23 @@ class TaskReflectionConfig(BaseModel):
max_reflections: int = 50 # max reflections to keep for prompt injection
+class CompactionConfig(BaseModel):
+ """Conversation compaction — summarise old turns when the context grows.
+
+ Only applies in session history mode. The threshold is evaluated against
+ the real token usage reported by the provider after each turn.
+ """
+
+ enabled: bool = True
+ provider: str = "anthropic"
+ model: str = "claude-haiku-4-5"
+ threshold_type: str = "percent" # "percent" (of context window) or "tokens" (absolute)
+ threshold_percent: int = 80 # trigger at this % of the model's context window
+ threshold_tokens: int = 150000 # absolute trigger when threshold_type == "tokens"
+ context_window: int = 200000 # fallback window for % mode when the model is unknown
+ keep_recent_turns: int = 4 # most-recent user turns kept verbatim after compaction
+
+
class SearchConfig(BaseModel):
enabled: bool = False
provider: str = "tavily"
@@ -171,6 +188,19 @@ class PromptConfig(BaseModel):
history_handling_override: str = ""
+class GhToolConfig(BaseModel):
+ """GitHub CLI (`gh`) tool — auth via a Personal Access Token."""
+
+ enabled: bool = False
+ token: str = "" # GitHub PAT, injected as GH_TOKEN when running `gh`
+
+
+class ToolsConfig(BaseModel):
+ """Optional external CLI tools the agent can use (see core/tools.py)."""
+
+ gh: GhToolConfig = GhToolConfig()
+
+
class Config(BaseModel):
agent: AgentConfig = AgentConfig()
channels: ChannelsConfig = ChannelsConfig()
@@ -182,9 +212,11 @@ class Config(BaseModel):
memory: MemoryConfig = MemoryConfig()
goal_decomposition: GoalDecompositionConfig = GoalDecompositionConfig()
task_reflection: TaskReflectionConfig = TaskReflectionConfig()
+ compaction: CompactionConfig = CompactionConfig()
search: SearchConfig = SearchConfig()
you: YouConfig = YouConfig()
prompt: PromptConfig = PromptConfig()
+ tools: ToolsConfig = ToolsConfig()
def load_config(path: str | Path = "config.yml") -> Config:
diff --git a/core/config_store.py b/core/config_store.py
index 4fd91fc..6897a31 100644
--- a/core/config_store.py
+++ b/core/config_store.py
@@ -55,6 +55,7 @@
"admin.password_hash",
"admin.password_salt",
"search.api_key",
+ "tools.gh.token",
"calendar.providers",
"calendar.google_oauth_client_id",
"calendar.google_oauth_client_secret",
diff --git a/core/executor.py b/core/executor.py
index 5b11669..185e15b 100644
--- a/core/executor.py
+++ b/core/executor.py
@@ -33,6 +33,11 @@ def _find_wacli_bin() -> str:
class ToolExecutor:
"""Executes CLI commands on behalf of the LLM."""
+ def __init__(self, tool_env: dict[str, str] | None = None) -> None:
+ # Extra environment for optional tools (e.g. GH_TOKEN for `gh`).
+ # Injected into every spawned subprocess; updated on config reload.
+ self.tool_env: dict[str, str] = dict(tool_env or {})
+
ALLOWED_PREFIXES = [
"curl",
"himalaya",
@@ -90,9 +95,12 @@ async def run_command_trusted(self, command: str, timeout: int = 30) -> dict:
async def _exec(self, command: str, timeout: int) -> dict:
"""Run a shell command and capture output."""
env = None
- if "himalaya" in command:
+ if "himalaya" in command or self.tool_env:
env = os.environ.copy()
- env.update(himalaya_env())
+ if "himalaya" in command:
+ env.update(himalaya_env())
+ # Tool auth (e.g. GH_TOKEN) — only set when a tool is enabled.
+ env.update(self.tool_env)
proc = await asyncio.create_subprocess_shell(
command,
stdout=asyncio.subprocess.PIPE,
diff --git a/core/history.py b/core/history.py
index 4da6599..7156658 100644
--- a/core/history.py
+++ b/core/history.py
@@ -42,6 +42,14 @@
);
CREATE INDEX IF NOT EXISTS idx_session_lookup
ON session_messages(channel, user_id, chat_id, id);
+CREATE TABLE IF NOT EXISTS session_system (
+ channel TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ chat_id TEXT NOT NULL DEFAULT '',
+ system TEXT NOT NULL,
+ created_at DATETIME DEFAULT (datetime('now')),
+ PRIMARY KEY (channel, user_id, chat_id)
+);
"""
# Migrations applied after initial schema creation.
@@ -77,6 +85,8 @@ def __init__(self, db_path: str = "data/history.db", max_turns: int = 20):
self._ready = False
# In-memory cache for sticky sessions: {(channel, user_id, chat_id): [message_dicts]}
self._sessions: dict[tuple[str, str, str], list[dict[str, Any]]] = {}
+ # In-memory cache for the static system prompt snapshot per session.
+ self._session_system: dict[tuple[str, str, str], str] = {}
async def _ensure_schema(self) -> None:
if self._ready:
@@ -159,9 +169,14 @@ async def clear(self, channel: str, user_id: str, chat_id: str = "") -> None:
"DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?",
(channel, user_id, chat_id),
)
+ await db.execute(
+ "DELETE FROM session_system WHERE channel = ? AND user_id = ? AND chat_id = ?",
+ (channel, user_id, chat_id),
+ )
await db.commit()
# Clear in-memory session cache
self._sessions.pop((channel, user_id, chat_id), None)
+ self._session_system.pop((channel, user_id, chat_id), None)
# -------------------------------------------------------------------
# Session mode — sticky session per (channel, user_id, chat_id)
@@ -230,6 +245,33 @@ async def append_session_messages(
)
await db.commit()
+ async def replace_session(
+ self,
+ channel: str,
+ user_id: str,
+ messages: list[dict[str, Any]],
+ chat_id: str = "",
+ ) -> None:
+ """Atomically replace a session's messages (used by compaction).
+
+ Rewrites both the in-memory cache and the persisted ``session_messages``
+ rows. The system-prompt snapshot is left untouched.
+ """
+ await self._ensure_schema()
+ key = (channel, user_id, chat_id)
+ self._sessions[key] = list(messages)
+ async with aiosqlite.connect(self.db_path) as db:
+ await db.execute(
+ "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?",
+ (channel, user_id, chat_id),
+ )
+ await db.executemany(
+ "INSERT INTO session_messages (channel, user_id, chat_id, message) "
+ "VALUES (?, ?, ?, ?)",
+ [(channel, user_id, chat_id, json.dumps(m)) for m in messages],
+ )
+ await db.commit()
+
async def clear_session(self, channel: str, user_id: str, chat_id: str = "") -> None:
"""Clear just the sticky session for a (channel, user_id, chat_id) triple."""
await self._ensure_schema()
@@ -238,5 +280,52 @@ async def clear_session(self, channel: str, user_id: str, chat_id: str = "") ->
"DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?",
(channel, user_id, chat_id),
)
+ await db.execute(
+ "DELETE FROM session_system WHERE channel = ? AND user_id = ? AND chat_id = ?",
+ (channel, user_id, chat_id),
+ )
await db.commit()
self._sessions.pop((channel, user_id, chat_id), None)
+ self._session_system.pop((channel, user_id, chat_id), None)
+
+ # -------------------------------------------------------------------
+ # Session mode — static system prompt snapshot
+ # -------------------------------------------------------------------
+
+ async def get_session_system(self, channel: str, user_id: str, chat_id: str = "") -> str | None:
+ """Return the cached static system prompt for a session, or None if unset.
+
+ The system prompt is snapshotted once at the start of a session (after a
+ ``/new``) and reused for every subsequent turn, so the static content is
+ only built/sent once instead of being rebuilt each turn.
+ """
+ await self._ensure_schema()
+ key = (channel, user_id, chat_id)
+ if key in self._session_system:
+ return self._session_system[key]
+ async with aiosqlite.connect(self.db_path) as db:
+ cursor = await db.execute(
+ "SELECT system FROM session_system "
+ "WHERE channel = ? AND user_id = ? AND chat_id = ?",
+ (channel, user_id, chat_id),
+ )
+ row = await cursor.fetchone()
+ if row is None:
+ return None
+ self._session_system[key] = row[0]
+ return row[0]
+
+ async def set_session_system(
+ self, channel: str, user_id: str, system: str, chat_id: str = ""
+ ) -> None:
+ """Persist the static system prompt snapshot for a session."""
+ await self._ensure_schema()
+ self._session_system[(channel, user_id, chat_id)] = system
+ async with aiosqlite.connect(self.db_path) as db:
+ await db.execute(
+ "INSERT INTO session_system (channel, user_id, chat_id, system) "
+ "VALUES (?, ?, ?, ?) "
+ "ON CONFLICT(channel, user_id, chat_id) DO UPDATE SET system = excluded.system",
+ (channel, user_id, chat_id, system),
+ )
+ await db.commit()
diff --git a/core/llm.py b/core/llm.py
index b0f3bab..43ae6d4 100644
--- a/core/llm.py
+++ b/core/llm.py
@@ -32,6 +32,44 @@ class LLMResponse:
text: str
tool_calls: list[LLMToolCall]
raw: object | None = None
+ # Token usage for the request, when the provider reports it. Keys:
+ # input_tokens, output_tokens, cache_read_input_tokens,
+ # cache_creation_input_tokens, context_tokens (= full prompt size).
+ usage: dict[str, int] | None = None
+
+
+def _anthropic_usage(response: Any) -> dict[str, int] | None:
+ u = getattr(response, "usage", None)
+ if u is None:
+ return None
+ inp = getattr(u, "input_tokens", 0) or 0
+ out = getattr(u, "output_tokens", 0) or 0
+ cache_read = getattr(u, "cache_read_input_tokens", 0) or 0
+ cache_creation = getattr(u, "cache_creation_input_tokens", 0) or 0
+ # The true context size is the uncached input plus everything served
+ # from / written to the cache this request.
+ return {
+ "input_tokens": inp,
+ "output_tokens": out,
+ "cache_read_input_tokens": cache_read,
+ "cache_creation_input_tokens": cache_creation,
+ "context_tokens": inp + cache_read + cache_creation,
+ }
+
+
+def _openai_usage(response: Any) -> dict[str, int] | None:
+ u = getattr(response, "usage", None)
+ if u is None:
+ return None
+ prompt = getattr(u, "prompt_tokens", 0) or 0
+ completion = getattr(u, "completion_tokens", 0) or 0
+ return {
+ "input_tokens": prompt,
+ "output_tokens": completion,
+ "cache_read_input_tokens": 0,
+ "cache_creation_input_tokens": 0,
+ "context_tokens": prompt,
+ }
def _openai_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -125,10 +163,21 @@ async def generate(
if self.provider == "anthropic":
client_any = cast(Any, self._client)
messages_client = cast(Any, getattr(client_any, "messages")) # type: ignore[attr-defined]
+ # Mark the (static) system prompt as a cache breakpoint so the
+ # tools + system prefix is cached and not reprocessed every turn.
+ system_param: Any = system
+ if system:
+ system_param = [
+ {
+ "type": "text",
+ "text": system,
+ "cache_control": {"type": "ephemeral"},
+ }
+ ]
response = await messages_client.create(
model=resolved_model,
max_tokens=max_tokens,
- system=system,
+ system=cast(Any, system_param),
messages=cast(Any, messages),
tools=cast(Any, tools),
)
@@ -150,6 +199,7 @@ async def generate(
text="\n".join(text_parts).strip(),
tool_calls=tool_calls,
raw=response.content,
+ usage=_anthropic_usage(response),
)
openai_tools = _openai_tools(tools)
@@ -174,6 +224,7 @@ async def generate(
text=(message.content or "").strip(),
tool_calls=tool_calls,
raw=message.model_dump(exclude_none=True),
+ usage=_openai_usage(response),
)
def assistant_message(self, response: LLMResponse) -> dict[str, Any]:
diff --git a/core/models.py b/core/models.py
index 97f3abf..dedae63 100644
--- a/core/models.py
+++ b/core/models.py
@@ -58,3 +58,6 @@ class AgentResponse:
text: str
voice: bytes | None = None
attachments: list[Attachment] = field(default_factory=list)
+ # Optional out-of-band system message (e.g. "context was compacted"),
+ # delivered by the channel as a separate follow-up message.
+ system_notice: str | None = None
diff --git a/core/prompt_builder.py b/core/prompt_builder.py
index bcac689..13900f4 100644
--- a/core/prompt_builder.py
+++ b/core/prompt_builder.py
@@ -3,11 +3,10 @@
from __future__ import annotations
from dataclasses import dataclass
-from datetime import datetime
-from zoneinfo import ZoneInfo
from core.config import Config
from core.goal_decomposition import DecomposedGoal
+from core.tools import active_tool_prompts
DEFAULT_TOOL_USAGE_BLOCK = """For write actions
(sending emails, replying to emails, sending messages, creating calendar events,
@@ -51,6 +50,7 @@ class PromptSections:
character: str
about_user: str
tool_usage: str
+ tools: str
memory_instruction: str
history_handling: str
memories: str
@@ -66,8 +66,10 @@ def full_prompt(self) -> str:
self.character,
self.about_user,
self.tool_usage,
- self.memory_instruction,
]
+ if self.tools:
+ parts.append(self.tools)
+ parts.append(self.memory_instruction)
if self.history_handling:
parts.append(self.history_handling)
if self.memories:
@@ -87,6 +89,7 @@ def as_dict(self) -> dict[str, str]:
"character": self.character,
"about_user": self.about_user,
"tool_usage": self.tool_usage,
+ "tools": self.tools,
"memory_instruction": self.memory_instruction,
"history_handling": self.history_handling,
"memories": self.memories,
@@ -107,11 +110,13 @@ def build_prompt_sections(
include_memories: bool = True,
include_reflections: bool = True,
) -> PromptSections:
- """Build all prompt sections with current config and dynamic context."""
+ """Build all prompt sections with current config and dynamic context.
+
+ The prompt is intentionally **static** (no current date/time): it forms the
+ cacheable prefix sent to the LLM. The live date/time is injected per turn at
+ the start of each user message instead (see ``AgentCore._turn_preamble``).
+ """
cfg = config.agent
- now = datetime.now(ZoneInfo(cfg.timezone))
- date_str = now.strftime("%A, %B %d, %Y")
- time_str = now.strftime("%H:%M")
about_user_block = config.you.personalia.strip()
tool_usage_text = resolve_prompt_block(
@@ -125,13 +130,19 @@ def build_prompt_sections(
intro = (
f"You are {cfg.name}, a personal AI assistant for {cfg.owner_name}.\n\n"
- f"Today is {date_str}. Current time: {time_str}. Timezone: {cfg.timezone}."
+ f"Your timezone is {cfg.timezone}. The current date and time is provided at the "
+ f"start of each user message — always use that as 'now'."
)
personalia = f"\n{cfg.personalia}\n"
character = f"\n{cfg.character}\n"
about_user = f"\n{about_user_block}\n" if about_user_block else ""
tool_usage = f"\n{tool_usage_text}\n"
+
+ tool_blocks = active_tool_prompts(config)
+ tools_section = ""
+ if tool_blocks:
+ tools_section = "\n" + "\n\n".join(tool_blocks) + "\n"
memory_instruction = (
"You can store and recall memories using the sqlite3 CLI (see the memory skill).\n"
"Proactively remember important facts about the user and their contacts.\n"
@@ -171,6 +182,7 @@ def build_prompt_sections(
character=character,
about_user=about_user,
tool_usage=tool_usage,
+ tools=tools_section,
memory_instruction=memory_instruction,
history_handling=history_handling,
memories=memory_section,
diff --git a/core/tools.py b/core/tools.py
new file mode 100644
index 0000000..594e5ce
--- /dev/null
+++ b/core/tools.py
@@ -0,0 +1,100 @@
+"""Optional external CLI tools the agent can use (e.g. the GitHub `gh` CLI).
+
+Tools are configured under ``config.tools.*``. When a tool is *enabled*, two
+things happen:
+
+1. Its authentication is wired into the executor environment (via :func:`tool_env`),
+ so the underlying CLI is authenticated when the agent runs it.
+2. It is advertised to the LLM in the system prompt (via :func:`active_tool_prompts`),
+ so the model knows the capability exists and how to use it.
+
+A tool that is *not* enabled is neither authenticated nor advertised, keeping the
+prompt lean and the capability hidden.
+
+Adding a new tool means: add a config sub-model in ``core/config.py``, then add an
+entry to ``_REGISTRY`` below describing its env + prompt advertisement.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+
+from core.config import Config
+
+# Advertisement injected into the system prompt when `gh` is active.
+_GH_PROMPT = """
+The GitHub CLI `gh` is installed and authenticated. Run it with the `run_command`
+tool for GitHub operations. Read operations run without asking; creating issues,
+PRs or releases ask for confirmation first.
+Examples:
+ gh issue list --repo owner/name
+ gh pr view 123 --repo owner/name
+ gh pr list --repo owner/name --state open
+ gh api repos/owner/name/commits
+ gh search issues "is:open label:bug" --repo owner/name
+Always pass `--repo owner/name` unless the working directory is a checkout of the
+target repository. Use `-o json` / `gh api` and parse JSON when you need fields.
+"""
+
+
+@dataclass(frozen=True)
+class ToolSpec:
+ """Describes an optional external tool the agent can use."""
+
+ key: str # config sub-key under `tools.` (e.g. "gh")
+ label: str # human-friendly name for the admin UI
+ summary: str # one-line description for the admin UI
+ # Returns env vars to inject when the tool is enabled (auth, etc.).
+ env: Callable[[Config], dict[str, str]]
+ # Returns the system-prompt advertisement block for the tool.
+ prompt: Callable[[Config], str]
+
+
+def _gh_env(config: Config) -> dict[str, str]:
+ gh = config.tools.gh
+ if gh.enabled and gh.token:
+ # `gh` reads GH_TOKEN (preferred) / GITHUB_TOKEN for non-interactive auth.
+ return {"GH_TOKEN": gh.token}
+ return {}
+
+
+_REGISTRY: tuple[ToolSpec, ...] = (
+ ToolSpec(
+ key="gh",
+ label="GitHub CLI (gh)",
+ summary="Let the agent query and act on GitHub (issues, PRs, repos, API).",
+ env=_gh_env,
+ prompt=lambda _cfg: _GH_PROMPT,
+ ),
+)
+
+
+def registry() -> tuple[ToolSpec, ...]:
+ """Return all known optional tools."""
+ return _REGISTRY
+
+
+def _is_enabled(config: Config, key: str) -> bool:
+ sub = getattr(config.tools, key, None)
+ return bool(getattr(sub, "enabled", False))
+
+
+def active_tool_prompts(config: Config) -> list[str]:
+ """Return system-prompt advertisement blocks for every *enabled* tool."""
+ blocks: list[str] = []
+ for spec in _REGISTRY:
+ if _is_enabled(config, spec.key):
+ block = spec.prompt(config).strip()
+ if block:
+ blocks.append(block)
+ return blocks
+
+
+def tool_env(config: Config) -> dict[str, str]:
+ """Return the merged environment for every *enabled* tool (auth tokens, etc.)."""
+ env: dict[str, str] = {}
+ for spec in _REGISTRY:
+ if _is_enabled(config, spec.key):
+ env.update(spec.env(config))
+ return env
diff --git a/docs/content/docs/admin-ui.mdx b/docs/content/docs/admin-ui.mdx
index 57e3cc2..66d7df1 100644
--- a/docs/content/docs/admin-ui.mdx
+++ b/docs/content/docs/admin-ui.mdx
@@ -28,14 +28,21 @@ Overview of agent status, active channels, and quick stats.
Edit agent settings without touching config files:
- **Identity** — agent name and owner name
-- **LLM** — provider, model, and API key configuration. The model field is free-text (enter any model id the provider supports); a **Fetch models** button on each provider card loads the live model list from the provider API into the field's autocomplete, and **Copy list** copies all available ids
+- **LLM** — provider, model, and API key configuration. The model field is free-text (enter any model id the provider supports); a **Fetch models** button on each provider card loads the live model list from the provider API into the field's autocomplete, and **Copy list** copies all available ids. Also configures the background models for memory, goal decomposition, task reflection, and **history compaction** (the model used to summarize old conversation turns — see the History tab for the trigger threshold)
- **Channels** — enable/disable Telegram and WhatsApp
- **Calendar** — manage CalDAV providers
- **Contacts** — manage contact providers
- **Email** — Himalaya configuration
- **Search** — Tavily API configuration
+- **Tools** — enable optional external CLI tools the agent can use
- **Voice** — STT/TTS settings
+### Tools
+
+Enable optional external CLI tools. When a tool is enabled it is authenticated and advertised to the assistant in its system prompt; when disabled it stays hidden, keeping the prompt lean.
+
+- **GitHub CLI (`gh`)** — let the agent query and act on GitHub (issues, PRs, repos, `gh api`, search). Provide a [Personal Access Token](https://github.com/settings/tokens); it is injected as `GH_TOKEN` so `gh` is authenticated non-interactively. Read operations (`list`/`view`/`status`/`api`/`search`) run without asking; creating issues, PRs, or releases ask for confirmation first. Use **Test token** to verify the token against the GitHub API. In session history mode, a newly enabled tool is advertised starting from the next `/new` conversation.
+
### Skills
Built-in skill editor for creating, editing, and deleting skill files. Changes take effect immediately — no restart needed.
@@ -65,7 +72,10 @@ Manage scheduled tasks:
### History
-Browse conversation history across channels. View individual conversation turns with tool call details.
+Configure conversation history and browse stored turns:
+
+- **Mode** — `injection` (replay the last N user/assistant pairs each request) or `session` (sticky per-chat session, full context, cache-friendly). Reset a conversation any time by sending `/new` (or its alias `/clear`).
+- **Context compaction** (session mode) — when the conversation nears the model's context window, the oldest turns are summarized by a small model while recent turns are kept verbatim, and the user gets a system message saying it happened. Set the trigger threshold here as either a **percent of the context window** or an **absolute token count** (e.g. 200k), plus how many recent turns to keep. The compaction model is configured in the **LLM** tab (under *History Compaction*). Has no effect in injection mode (which is already windowed).
### Logs
diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx
index 384741d..93eb837 100644
--- a/docs/content/docs/architecture.mdx
+++ b/docs/content/docs/architecture.mdx
@@ -70,12 +70,14 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to
The brain of MPA. Implements the LLM tool-use loop:
1. Load conversation history
-2. Build system prompt (skills, character, personalia, memories)
-3. Call the LLM
-4. Handle tool calls with permission checks
-5. Save conversation turn and extract memories
-
-The agent uses a single `run_command` meta-tool that executes any whitelisted CLI command, plus structured tools for safety-critical write operations (`send_email`, `send_message`, `create_calendar_event`).
+2. Build the static system prompt (skills, character, personalia, memories, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
+3. Inject the live date/time (and any per-request execution plan) at the start of the current user message — so the agent always knows "now" without mutating the cached prefix
+4. Call the LLM
+5. Handle tool calls with permission checks
+6. Save conversation turn and extract memories
+7. In session mode, if the provider-reported context size crosses the configured threshold, compact: summarize the oldest turns with a small model, keep recent turns verbatim, and notify the user with a system message (see `core/compaction.py`)
+
+The agent uses a single `run_command` meta-tool that executes any whitelisted CLI command, plus structured tools for safety-critical write operations (`send_email`, `send_message`, `create_calendar_event`). Optional external tools (e.g. the GitHub `gh` CLI, configured under the **Tools** tab) are authenticated and advertised to the model only when enabled — see `core/tools.py`.
### LLM Provider (`core/llm.py`)
diff --git a/tests/test_compaction.py b/tests/test_compaction.py
new file mode 100644
index 0000000..6107e4c
--- /dev/null
+++ b/tests/test_compaction.py
@@ -0,0 +1,226 @@
+"""Tests for conversation compaction, token-usage capture, and the /clear alias."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from core.compaction import (
+ compact_messages,
+ compaction_threshold_tokens,
+ effective_window,
+ should_compact,
+)
+from core.config import CompactionConfig, Config
+from core.history import ConversationHistory
+from core.llm import _anthropic_usage, _openai_usage
+
+
+class FakeLLM:
+ """Minimal stand-in exposing the async generate_text used by compaction."""
+
+ def __init__(self, summary: str = "SUMMARY") -> None:
+ self.summary = summary
+ self.calls = 0
+
+ async def generate_text(self, *, model: str, prompt: str, max_tokens: int = 2048) -> str:
+ self.calls += 1
+ return self.summary
+
+
+# ---------------------------------------------------------------------------
+# Threshold logic
+# ---------------------------------------------------------------------------
+
+
+def test_effective_window_known_and_fallback() -> None:
+ cfg = CompactionConfig(context_window=12345)
+ assert effective_window(cfg, "claude-haiku-4-5") == 200_000
+ assert effective_window(cfg, "claude-opus-4-8") == 1_000_000
+ assert effective_window(cfg, "some-unknown-model") == 12345
+
+
+def test_threshold_percent_vs_tokens() -> None:
+ pct = CompactionConfig(threshold_type="percent", threshold_percent=80)
+ assert compaction_threshold_tokens(pct, "claude-haiku-4-5") == 160_000
+ tok = CompactionConfig(threshold_type="tokens", threshold_tokens=150_000)
+ assert compaction_threshold_tokens(tok, "claude-haiku-4-5") == 150_000
+
+
+def test_should_compact_gates() -> None:
+ cfg = CompactionConfig(enabled=True, threshold_type="tokens", threshold_tokens=1000)
+ assert should_compact(cfg, 1000, "m") is True
+ assert should_compact(cfg, 999, "m") is False
+ assert should_compact(cfg, 0, "m") is False # no usage info
+ disabled = CompactionConfig(enabled=False, threshold_type="tokens", threshold_tokens=1000)
+ assert should_compact(disabled, 5000, "m") is False
+
+
+# ---------------------------------------------------------------------------
+# Usage capture
+# ---------------------------------------------------------------------------
+
+
+def test_anthropic_usage_sums_cache_into_context() -> None:
+ resp = SimpleNamespace(
+ usage=SimpleNamespace(
+ input_tokens=100,
+ output_tokens=20,
+ cache_read_input_tokens=300,
+ cache_creation_input_tokens=50,
+ )
+ )
+ u = _anthropic_usage(resp)
+ assert u["context_tokens"] == 450 # 100 + 300 + 50
+ assert u["output_tokens"] == 20
+
+
+def test_openai_usage_uses_prompt_tokens() -> None:
+ resp = SimpleNamespace(usage=SimpleNamespace(prompt_tokens=777, completion_tokens=33))
+ u = _openai_usage(resp)
+ assert u["context_tokens"] == 777
+ assert u["output_tokens"] == 33
+
+
+def test_usage_none_when_absent() -> None:
+ assert _anthropic_usage(SimpleNamespace(usage=None)) is None
+ assert _openai_usage(SimpleNamespace(usage=None)) is None
+
+
+# ---------------------------------------------------------------------------
+# compact_messages
+# ---------------------------------------------------------------------------
+
+
+def _session_with_tool_pair() -> list[dict]:
+ return [
+ {"role": "user", "content": "u1"},
+ {"role": "assistant", "content": "a1"},
+ {"role": "user", "content": "u2"},
+ {
+ "role": "assistant",
+ "content": [{"type": "tool_use", "id": "t", "name": "x", "input": {}}],
+ },
+ {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "t", "content": "r"}]},
+ {"role": "assistant", "content": "a2"},
+ {"role": "user", "content": "u3"},
+ {"role": "assistant", "content": "a3"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_compact_keeps_recent_and_summarizes_rest() -> None:
+ llm = FakeLLM("THE SUMMARY")
+ msgs = _session_with_tool_pair()
+ result = await compact_messages(llm, "m", msgs, keep_recent_turns=1)
+ assert result is not None
+ new, summary = result
+ assert summary == "THE SUMMARY"
+ assert llm.calls == 1
+ # Rebuilt as: user(summary), assistant(ack), then the last real user turn verbatim.
+ assert new[0]["role"] == "user" and "THE SUMMARY" in new[0]["content"]
+ assert new[1]["role"] == "assistant"
+ assert new[2:] == [{"role": "user", "content": "u3"}, {"role": "assistant", "content": "a3"}]
+ # Valid alternation: never two same-role in a row.
+ roles = [m["role"] for m in new]
+ assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1))
+
+
+@pytest.mark.asyncio
+async def test_compact_does_not_split_tool_pair() -> None:
+ # Keeping 2 turns cuts before u2 — the tool_use/tool_result pair stays together
+ # in the summarized prefix, never straddling the boundary.
+ llm = FakeLLM()
+ msgs = _session_with_tool_pair()
+ new, _ = await compact_messages(llm, "m", msgs, keep_recent_turns=2)
+ tail = new[2:]
+ # The kept tail must start with a real user message (not a tool_result carrier).
+ first = tail[0]
+ assert first["role"] == "user" and isinstance(first["content"], str)
+
+
+@pytest.mark.asyncio
+async def test_compact_noop_when_too_few_turns() -> None:
+ llm = FakeLLM()
+ msgs = _session_with_tool_pair() # 3 real user turns
+ assert await compact_messages(llm, "m", msgs, keep_recent_turns=3) is None
+ assert llm.calls == 0
+
+
+# ---------------------------------------------------------------------------
+# History.replace_session
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_replace_session_rewrites_messages(tmp_path) -> None:
+ h = ConversationHistory(db_path=str(tmp_path / "h.db"))
+ await h.append_session_message("telegram", "u", {"role": "user", "content": "old"})
+ await h.replace_session("telegram", "u", [{"role": "user", "content": "new"}])
+ assert await h.get_session("telegram", "u") == [{"role": "user", "content": "new"}]
+ # Cold instance reads the rewritten rows.
+ h2 = ConversationHistory(db_path=str(tmp_path / "h.db"))
+ assert await h2.get_session("telegram", "u") == [{"role": "user", "content": "new"}]
+
+
+# ---------------------------------------------------------------------------
+# Agent integration: _maybe_compact + /clear alias
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def agent(tmp_path, monkeypatch):
+ monkeypatch.chdir(tmp_path)
+ from core.agent import AgentCore
+
+ a = AgentCore(Config())
+ a.history_mode = "session"
+ return a
+
+
+@pytest.mark.asyncio
+async def test_maybe_compact_replaces_session_and_notifies(agent, monkeypatch) -> None:
+ agent.config.compaction.enabled = True
+ agent.config.compaction.threshold_type = "tokens"
+ agent.config.compaction.threshold_tokens = 100
+ agent.config.compaction.keep_recent_turns = 1
+
+ for m in _session_with_tool_pair():
+ await agent.history.append_session_message("telegram", "u", m, "")
+
+ monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S"))
+ response = SimpleNamespace(usage={"context_tokens": 999})
+
+ notice = await agent._maybe_compact("telegram", "u", "", response)
+ assert notice is not None and "summarized" in notice.lower()
+ session = await agent.history.get_session("telegram", "u")
+ assert "S" in session[0]["content"]
+ assert session[-2:] == [
+ {"role": "user", "content": "u3"},
+ {"role": "assistant", "content": "a3"},
+ ]
+
+
+@pytest.mark.asyncio
+async def test_maybe_compact_below_threshold_noop(agent, monkeypatch) -> None:
+ agent.config.compaction.enabled = True
+ agent.config.compaction.threshold_type = "tokens"
+ agent.config.compaction.threshold_tokens = 100000
+ for m in _session_with_tool_pair():
+ await agent.history.append_session_message("telegram", "u", m, "")
+ monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S"))
+ response = SimpleNamespace(usage={"context_tokens": 50})
+ assert await agent._maybe_compact("telegram", "u", "", response) is None
+
+
+@pytest.mark.asyncio
+async def test_clear_alias_clears_conversation(agent) -> None:
+ resp = await agent.process("/clear", channel="telegram", user_id="u", chat_id="")
+ assert resp.text == "Conversation cleared."
+
+
+@pytest.mark.asyncio
+async def test_new_still_clears_conversation(agent) -> None:
+ resp = await agent.process("/new", channel="telegram", user_id="u", chat_id="")
+ assert resp.text == "Conversation cleared."
diff --git a/tests/test_tools.py b/tests/test_tools.py
new file mode 100644
index 0000000..587d760
--- /dev/null
+++ b/tests/test_tools.py
@@ -0,0 +1,168 @@
+"""Tests for optional tools, per-turn datetime injection, and prompt caching."""
+
+from __future__ import annotations
+
+import pytest
+
+from core.config import Config
+from core.history import ConversationHistory
+from core.prompt_builder import build_prompt_sections
+from core.tools import active_tool_prompts, tool_env
+
+# ---------------------------------------------------------------------------
+# Tools registry
+# ---------------------------------------------------------------------------
+
+
+def test_gh_tool_inactive_by_default() -> None:
+ cfg = Config()
+ assert active_tool_prompts(cfg) == []
+ assert tool_env(cfg) == {}
+
+
+def test_gh_tool_env_and_advert_when_enabled() -> None:
+ cfg = Config()
+ cfg.tools.gh.enabled = True
+ cfg.tools.gh.token = "ghp_secret"
+ assert tool_env(cfg) == {"GH_TOKEN": "ghp_secret"}
+ blocks = active_tool_prompts(cfg)
+ assert len(blocks) == 1
+ assert "gh" in blocks[0]
+
+
+def test_gh_enabled_without_token_has_no_env() -> None:
+ cfg = Config()
+ cfg.tools.gh.enabled = True # no token
+ assert tool_env(cfg) == {}
+ # Still advertised so the agent knows the capability exists.
+ assert active_tool_prompts(cfg)
+
+
+# ---------------------------------------------------------------------------
+# Static system prompt: no datetime, tool advert gated on activation
+# ---------------------------------------------------------------------------
+
+
+def _sections(cfg: Config):
+ return build_prompt_sections(
+ config=cfg,
+ history_mode="session",
+ skills_index="",
+ memories="",
+ reflections="",
+ decomposed_goal=None,
+ )
+
+
+def test_static_prompt_has_no_datetime() -> None:
+ cfg = Config()
+ sections = _sections(cfg)
+ # The static prompt must not bake in a concrete date/time (it is injected
+ # per turn instead), so the prefix stays stable and cacheable.
+ assert "Today is" not in sections.full_prompt
+ assert "Current time:" not in sections.full_prompt
+ assert cfg.agent.timezone in sections.intro
+
+
+def test_tools_section_only_when_enabled() -> None:
+ cfg = Config()
+ assert _sections(cfg).tools == ""
+ cfg.tools.gh.enabled = True
+ cfg.tools.gh.token = "ghp_x"
+ sections = _sections(cfg)
+ assert "" in sections.tools
+ assert sections.tools in sections.full_prompt
+
+
+# ---------------------------------------------------------------------------
+# Session system snapshot
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_session_system_snapshot_roundtrip(tmp_path) -> None:
+ history = ConversationHistory(db_path=str(tmp_path / "h.db"))
+ assert await history.get_session_system("telegram", "u1") is None
+ await history.set_session_system("telegram", "u1", "SYSTEM-A")
+ assert await history.get_session_system("telegram", "u1") == "SYSTEM-A"
+
+
+@pytest.mark.asyncio
+async def test_session_system_survives_new_instance(tmp_path) -> None:
+ db = str(tmp_path / "h.db")
+ h1 = ConversationHistory(db_path=db)
+ await h1.set_session_system("telegram", "u1", "SYSTEM-A")
+ # Fresh instance (cold cache) must load the snapshot from disk.
+ h2 = ConversationHistory(db_path=db)
+ assert await h2.get_session_system("telegram", "u1") == "SYSTEM-A"
+
+
+@pytest.mark.asyncio
+async def test_clear_session_drops_system_snapshot(tmp_path) -> None:
+ history = ConversationHistory(db_path=str(tmp_path / "h.db"))
+ await history.set_session_system("telegram", "u1", "SYSTEM-A")
+ await history.clear_session("telegram", "u1")
+ assert await history.get_session_system("telegram", "u1") is None
+
+
+@pytest.mark.asyncio
+async def test_clear_drops_system_snapshot(tmp_path) -> None:
+ history = ConversationHistory(db_path=str(tmp_path / "h.db"))
+ await history.set_session_system("telegram", "u1", "SYSTEM-A")
+ await history.clear("telegram", "u1")
+ assert await history.get_session_system("telegram", "u1") is None
+
+
+# ---------------------------------------------------------------------------
+# Agent: per-turn preamble + user-message injection + session caching
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def agent(tmp_path, monkeypatch):
+ monkeypatch.chdir(tmp_path)
+ from core.agent import AgentCore
+
+ return AgentCore(Config())
+
+
+def test_turn_preamble_carries_datetime(agent) -> None:
+ preamble = agent._turn_preamble(None)
+ assert "Current date & time" in preamble
+ # No execution plan when the goal was not decomposed.
+ assert "execution_plan" not in preamble
+
+
+def test_build_user_message_prepends_preamble(agent) -> None:
+ preamble = agent._turn_preamble(None)
+ msg = agent._build_user_message("hello", None, preamble)
+ assert msg["role"] == "user"
+ assert msg["content"].startswith(preamble)
+ assert msg["content"].endswith("hello")
+
+
+def test_build_user_message_no_preamble_is_plain(agent) -> None:
+ msg = agent._build_user_message("hello", None, "")
+ assert msg["content"] == "hello"
+
+
+@pytest.mark.asyncio
+async def test_session_system_built_once_and_reused(agent, monkeypatch) -> None:
+ calls = {"n": 0}
+
+ async def fake_build() -> str:
+ calls["n"] += 1
+ return f"SYSTEM-{calls['n']}"
+
+ monkeypatch.setattr(agent, "_build_system_prompt", fake_build)
+
+ first = await agent._session_system_prompt("telegram", "u1", "")
+ second = await agent._session_system_prompt("telegram", "u1", "")
+ assert first == second == "SYSTEM-1"
+ assert calls["n"] == 1 # built only once for the session
+
+ # After /new (clear), it rebuilds.
+ await agent.history.clear_session("telegram", "u1")
+ third = await agent._session_system_prompt("telegram", "u1", "")
+ assert third == "SYSTEM-2"
+ assert calls["n"] == 2
diff --git a/uv.lock b/uv.lock
index d5e54b1..a2b2401 100644
--- a/uv.lock
+++ b/uv.lock
@@ -764,7 +764,7 @@ wheels = [
[[package]]
name = "mpa"
-version = "0.10.0"
+version = "0.11.0"
source = { virtual = "." }
dependencies = [
{ name = "aiosqlite" },