diff --git a/Dockerfile b/Dockerfile index fae7bc7..7056e3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Install Himalaya CLI (pre-built Rust binary for email management) RUN curl -sSL https://raw.githubusercontent.com/pimalaya/himalaya/master/install.sh | sh +# Install GitHub CLI (gh) from the official apt repository (Tools tab: gh) +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list \ + && apt-get update && apt-get install -y --no-install-recommends gh \ + && rm -rf /var/lib/apt/lists/* + # Install uv COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv diff --git a/api/admin.py b/api/admin.py index 675e631..cfdd7e9 100644 --- a/api/admin.py +++ b/api/admin.py @@ -8,6 +8,7 @@ from __future__ import annotations +import asyncio import collections import hashlib import json @@ -36,6 +37,8 @@ DEFAULT_TOOL_USAGE_BLOCK, build_prompt_sections, ) +from core.tools import registry as tool_registry +from core.tools import tool_env from core.wacli import WacliManager if TYPE_CHECKING: @@ -545,6 +548,8 @@ async def _lifespan(app: FastAPI): # noqa: ANN001 _HISTORY_PREFIX = "history." _EMAIL_PREFIX = "email." _PROMPT_PREFIX = "prompt." + _TOOLS_PREFIX = "tools." + _COMPACTION_PREFIX = "compaction." def _is_managed_key(key: str) -> bool: """Return True if this key is managed by a dedicated tab (not Config).""" @@ -562,6 +567,8 @@ def _is_managed_key(key: str) -> bool: _HISTORY_PREFIX, _EMAIL_PREFIX, _PROMPT_PREFIX, + _TOOLS_PREFIX, + _COMPACTION_PREFIX, ): if key.startswith(prefix): return True @@ -801,6 +808,8 @@ async def partial_llm() -> HTMLResponse: tr_enabled = tr_enabled if tr_enabled is not None else "true" tr_provider = await config_store.get("task_reflection.provider") or "anthropic" tr_model = await config_store.get("task_reflection.model") or "claude-haiku-4-5" + compaction_provider = await config_store.get("compaction.provider") or "anthropic" + compaction_model = await config_store.get("compaction.model") or "claude-haiku-4-5" prompt_tool_usage_override = await config_store.get("prompt.tool_usage_override") or "" prompt_history_override = await config_store.get("prompt.history_handling_override") or "" prompt_capture_enabled = await config_store.get("admin.capture_prompts") @@ -830,6 +839,8 @@ async def partial_llm() -> HTMLResponse: tr_enabled=tr_enabled, tr_provider=tr_provider, tr_model=tr_model, + compaction_provider=compaction_provider, + compaction_model=compaction_model, prompt_tool_usage_override=prompt_tool_usage_override, prompt_history_override=prompt_history_override, default_tool_usage=DEFAULT_TOOL_USAGE_BLOCK, @@ -837,6 +848,49 @@ async def partial_llm() -> HTMLResponse: prompt_capture_enabled=prompt_capture_enabled, ) + @app.get("/partials/tools", dependencies=[Depends(auth)]) + async def partial_tools() -> HTMLResponse: + """Tools tab partial — manage optional external CLI tools (e.g. gh).""" + gh_enabled = await config_store.get("tools.gh.enabled") + gh_enabled = gh_enabled if gh_enabled is not None else "false" + gh_token = await config_store.get("tools.gh.token") or "" + return _render_partial( + "partials/tools.html", + tools=tool_registry(), + gh_enabled=gh_enabled, + gh_token=gh_token, + ) + + @app.post("/tools/gh/test", dependencies=[Depends(auth)]) + async def test_gh_tool(request: Request) -> dict: + """Verify a GitHub token by calling the GitHub API as that token.""" + body = await request.json() + token = str(body.get("token", "")).strip() + if not token: + return {"ok": False, "error": "Token is required."} + try: + resp = await asyncio.to_thread( + http_requests.get, + "https://api.github.com/user", + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + timeout=10, + ) + except Exception as exc: # noqa: BLE001 — surface any network error to the UI + return {"ok": False, "error": str(exc)} + if resp.status_code == 200: + login = resp.json().get("login", "") + return {"ok": True, "login": login} + if resp.status_code in (401, 403): + return { + "ok": False, + "error": "Token rejected by GitHub (invalid or insufficient scope).", + } + return {"ok": False, "error": f"GitHub returned HTTP {resp.status_code}."} + @app.get("/partials/search", dependencies=[Depends(auth)]) async def partial_search() -> HTMLResponse: """Search tab partial.""" @@ -890,10 +944,23 @@ async def partial_history() -> HTMLResponse: """History tab partial.""" mode = await config_store.get("history.mode") or "injection" max_turns = await config_store.get("history.max_turns") or "10" + c_enabled = await config_store.get("compaction.enabled") + c_enabled = c_enabled if c_enabled is not None else "true" + c_threshold_type = await config_store.get("compaction.threshold_type") or "percent" + c_threshold_percent = await config_store.get("compaction.threshold_percent") or "80" + c_threshold_tokens = await config_store.get("compaction.threshold_tokens") or "150000" + c_context_window = await config_store.get("compaction.context_window") or "200000" + c_keep_recent_turns = await config_store.get("compaction.keep_recent_turns") or "4" return _render_partial( "partials/history.html", mode=mode, max_turns=max_turns, + compaction_enabled=c_enabled, + compaction_threshold_type=c_threshold_type, + compaction_threshold_percent=c_threshold_percent, + compaction_threshold_tokens=c_threshold_tokens, + compaction_context_window=c_context_window, + compaction_keep_recent_turns=c_keep_recent_turns, ) @app.get("/partials/logs", dependencies=[Depends(auth)]) @@ -1174,6 +1241,7 @@ async def patch_config(body: ConfigPatchIn) -> dict: new_config = await config_store.export_to_config() agent.config = new_config agent.llm = LLMClient.from_agent_config(new_config.agent) + agent.executor.tool_env = tool_env(new_config) agent.history_mode = new_config.history.mode agent.memory.long_term_limit = new_config.memory.long_term_limit agent.reflections.max_reflections = new_config.task_reflection.max_reflections diff --git a/api/templates/base.html b/api/templates/base.html index f5aeb32..587183c 100644 --- a/api/templates/base.html +++ b/api/templates/base.html @@ -42,7 +42,7 @@ } window.showToast = showToast; - function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled) { + function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled, compactionProvider, compactionModel) { const currentProvider = provider || 'anthropic'; return { providerOptions: [ @@ -97,6 +97,10 @@ gdResultOk: false, trResult: '', trResultOk: false, + compactionProvider: compactionProvider || 'anthropic', + compactionModel: compactionModel || 'claude-haiku-4-5', + compactionResult: '', + compactionResultOk: false, result: '', resultOk: false, tests: { diff --git a/api/templates/dashboard.html b/api/templates/dashboard.html index aed07a3..49a33fb 100644 --- a/api/templates/dashboard.html +++ b/api/templates/dashboard.html @@ -80,6 +80,10 @@

Agent Control

@click="select('search')"> Search + + Compaction model is set in the LLM tab. + + + + diff --git a/api/templates/partials/llm.html b/api/templates/partials/llm.html index 5697478..27d33fa 100644 --- a/api/templates/partials/llm.html +++ b/api/templates/partials/llm.html @@ -25,7 +25,9 @@ {{ prompt_history_override|default('', true)|tojson|forceescape }}, {{ default_tool_usage|default('', true)|tojson|forceescape }}, {{ default_history_handling|default('', true)|tojson|forceescape }}, - {{ prompt_capture_enabled|default(false, true)|tojson|forceescape }} + {{ prompt_capture_enabled|default(false, true)|tojson|forceescape }}, + {{ compaction_provider|default('anthropic', true)|tojson|forceescape }}, + {{ compaction_model|default('claude-haiku-4-5', true)|tojson|forceescape }} )">

System Prompt Controls

@@ -415,6 +417,61 @@

Task Reflection

+
+

History Compaction

+

Model used to summarize older conversation turns when a session's context grows large. Enable compaction and set the trigger threshold in the History tab.

+ +
+
+ + +
+
+ + + + + +

A small, fast model is recommended (summarization is cheap).

+
+
+ +
+ +
+ +
+

Anthropic

Configure the Anthropic API key and test the connection.

diff --git a/api/templates/partials/tools.html b/api/templates/partials/tools.html new file mode 100644 index 0000000..91ba92e --- /dev/null +++ b/api/templates/partials/tools.html @@ -0,0 +1,115 @@ +{# Tools tab partial — manage optional external CLI tools #} +
+
+

Tools

+

+ Optional external CLI tools the agent can use. When a tool is enabled it is + authenticated and advertised to the assistant in its system prompt; when + disabled it stays hidden. In session history mode, newly enabled tools are + advertised starting from the next /new conversation. +

+
+ + {# GitHub CLI (gh) #} +
+
+

GitHub CLI (gh)

+
+ + +
+
+

+ Let the agent query and act on GitHub (issues, PRs, repos, gh api, + search). Read operations run without asking; creating issues, PRs or releases + ask for confirmation first. +

+ +
+
+ + + +

+ A GitHub + Personal Access Token. + Injected as GH_TOKEN so gh is authenticated + non-interactively. Grant only the scopes you want the agent to have + (e.g. repo, read:org). +

+
+
+ +
+ + +
+ + + +
+
+ + diff --git a/channels/telegram.py b/channels/telegram.py index 60185bf..32e1b63 100644 --- a/channels/telegram.py +++ b/channels/telegram.py @@ -372,6 +372,9 @@ async def _send_response(self, chat_id: int, response) -> None: await self.send(chat_id, response.text) else: log.warning("Skipping empty response for chat_id=%s", chat_id) + # Out-of-band system notice (e.g. context compaction), sent separately. + if getattr(response, "system_notice", None): + await self.send(chat_id, response.system_notice) async def _finalize_approval_response( self, query: CallbackQuery, resolved: bool, label: str diff --git a/channels/whatsapp.py b/channels/whatsapp.py index 074a601..1435370 100644 --- a/channels/whatsapp.py +++ b/channels/whatsapp.py @@ -122,6 +122,8 @@ async def handle_webhook(self, payload: dict) -> dict: chat_id=chat_id, ) await self.send(sender, response.text) + if getattr(response, "system_notice", None): + await self.send(sender, response.system_notice) return {"ok": True} def _is_allowed(self, sender: str) -> bool: diff --git a/core/agent.py b/core/agent.py index 024a97c..9175d72 100644 --- a/core/agent.py +++ b/core/agent.py @@ -15,6 +15,7 @@ from tavily import TavilyClient +from core.compaction import compact_messages, should_compact from core.config import Config from core.executor import ToolExecutor from core.goal_decomposition import DecomposedGoal, classify_complexity, decompose_goal @@ -28,6 +29,7 @@ from core.scheduler import AgentScheduler from core.skills import SkillsEngine from core.task_reflection import ReflectionStore +from core.tools import tool_env from voice.pipeline import VoicePipeline log = logging.getLogger(__name__) @@ -246,7 +248,7 @@ def __init__(self, config: Config): db_path=config.agent.skills_db_path, seed_dir=config.agent.skills_dir, ) - self.executor = ToolExecutor() + self.executor = ToolExecutor(tool_env=tool_env(config)) self.history = ConversationHistory( db_path=config.history.db_path, max_turns=config.history.max_turns, @@ -294,8 +296,8 @@ async def process( preventing context leakage across chats. """ - # Handle /new command — clear conversational context. - if message.strip().lower() == "/new": + # Handle /new (alias /clear) command — clear conversational context. + if message.strip().lower() in ("/new", "/clear"): if self.history_mode == "session": await self.history.clear_session(channel, user_id, chat_id) else: @@ -308,12 +310,25 @@ async def process( ) return AgentResponse(text="Conversation cleared.") - # Goal decomposition — classify and (if complex) decompose the request + # Goal decomposition — classify and (if complex) decompose the request. + # The resulting plan is request-specific, so it is injected per turn + # (in the user-message preamble), not baked into the static prompt. decomposed_goal: DecomposedGoal | None = None if self.config.goal_decomposition.enabled and channel != "system": decomposed_goal = await self._maybe_decompose(message) - system = await self._build_system_prompt(decomposed_goal=decomposed_goal) + # Per-turn preamble: live date/time + (optional) execution plan. + preamble = self._turn_preamble(decomposed_goal) + + # Static system prompt. In session mode it is snapshotted once at the + # start of the session and reused for every turn (so the static content + # is only built once, not rebuilt and re-sent each turn). In injection + # mode the prompt is windowed/stateless, so it is rebuilt per call. + if self.history_mode == "session": + system = await self._session_system_prompt(channel, user_id, chat_id) + else: + system = await self._build_system_prompt() + if self.config.admin.capture_prompts: self._record_system_prompt( channel=channel, @@ -324,34 +339,116 @@ async def process( if self.history_mode == "session": return await self._process_session( - system, message, channel, user_id, attachments, chat_id + system, preamble, message, channel, user_id, attachments, chat_id ) return await self._process_injection( - system, message, channel, user_id, attachments, chat_id + system, preamble, message, channel, user_id, attachments, chat_id + ) + + def _turn_preamble(self, decomposed_goal: DecomposedGoal | None) -> str: + """Build the per-turn preamble prepended to the current user message. + + Always carries the live date/time (so the agent knows 'now' every turn); + also carries the execution plan when the request was decomposed. + """ + now = datetime.now(ZoneInfo(self.config.agent.timezone)) + stamp = now.strftime("%A, %B %d, %Y %H:%M %Z") + preamble = f"[Current date & time: {stamp}]" + if decomposed_goal: + preamble += ( + "\n\n\n" + "Your request has been analysed and broken into the following sub-goals.\n" + "Follow this plan step-by-step, completing each sub-goal in order " + "(respecting dependencies). Report progress as you go.\n\n" + f"{decomposed_goal.format_for_prompt()}\n" + "" + ) + return preamble + + async def _session_system_prompt(self, channel: str, user_id: str, chat_id: str) -> str: + """Return the session's static system prompt, building it once if needed. + + Built fresh after a ``/new`` (when no snapshot exists), then reused for + the lifetime of the session so the static content is sent only once. + """ + cached = await self.history.get_session_system(channel, user_id, chat_id) + if cached is not None: + return cached + system = await self._build_system_prompt() + await self.history.set_session_system(channel, user_id, system, chat_id) + return system + + async def _maybe_compact( + self, channel: str, user_id: str, chat_id: str, response: Any + ) -> str | None: + """Compact the session if the context exceeds the configured threshold. + + Returns a user-facing notice when compaction happened, else ``None``. + Failures are logged and swallowed — compaction must never break a turn. + """ + cfg = self.config.compaction + if self.history_mode != "session" or not cfg.enabled: + return None + usage = getattr(response, "usage", None) or {} + context_tokens = int(usage.get("context_tokens") or 0) + if not should_compact(cfg, context_tokens, self.config.agent.model): + return None + + session = await self.history.get_session(channel, user_id, chat_id) + try: + llm = self._background_llm(cfg.provider) + result = await compact_messages(llm, cfg.model, session, cfg.keep_recent_turns) + except Exception: + log.exception("Conversation compaction failed") + return None + if not result: + return None + + new_messages, _summary = result + await self.history.replace_session(channel, user_id, new_messages, chat_id) + log.info( + "Compacted session %s/%s/%s: %d → %d messages (~%d ctx tokens)", + channel, + user_id, + chat_id, + len(session), + len(new_messages), + context_tokens, + ) + return ( + f"🗜️ Our conversation was getting large (~{context_tokens:,} tokens). " + "I summarized the earlier part to free up space; recent messages are kept as-is." ) def _build_user_message( self, message: str, attachments: list[Attachment] | None = None, + preamble: str = "", ) -> dict: - """Build the user message dict, handling multimodal content.""" + """Build the user message dict, handling multimodal content. + + ``preamble`` (live date/time + optional execution plan) is prepended to + the message text so the agent always knows 'now' for the current turn. + """ + text = f"{preamble}\n\n{message}" if preamble else message image_attachments = [a for a in (attachments or []) if a.is_image] if image_attachments: content_blocks: list[dict] = [] - if message: - content_blocks.append({"type": "text", "text": message}) + if text: + content_blocks.append({"type": "text", "text": text}) for att in image_attachments: if self.llm.provider == "anthropic": content_blocks.append(att.to_anthropic_block()) else: content_blocks.append(att.to_openai_block()) return {"role": "user", "content": content_blocks} - return {"role": "user", "content": message} + return {"role": "user", "content": text} async def _process_injection( self, system: str, + preamble: str, message: str, channel: str, user_id: str, @@ -368,7 +465,7 @@ async def _process_injection( messages.append({"role": turn["role"], "content": turn["content"]}) # The actual current request — always the last user message. - messages.append(self._build_user_message(message, attachments)) + messages.append(self._build_user_message(message, attachments, preamble)) log.info( "Processing message (injection) from %s/%s/%s: %s", @@ -445,6 +542,7 @@ async def _process_injection( async def _process_session( self, system: str, + preamble: str, message: str, channel: str, user_id: str, @@ -460,8 +558,8 @@ async def _process_session( # Load existing session (from memory cache or DB) session = await self.history.get_session(channel, user_id, chat_id) - # Append the new user message - user_msg = self._build_user_message(message, attachments) + # Append the new user message (with the live date/time preamble) + user_msg = self._build_user_message(message, attachments, preamble) await self.history.append_session_message(channel, user_id, user_msg, chat_id) log.info( @@ -526,6 +624,11 @@ async def _process_session( final_text = response.text log.info("Response: %s", final_text[:200]) + # Compaction — if the context has grown past the configured threshold, + # summarise the oldest turns. ``response.usage`` reflects the full + # session that was just sent, so it's the authoritative context size. + system_notice = await self._maybe_compact(channel, user_id, chat_id, response) + # Check if the LLM wants to respond with voice voice_bytes = await self._maybe_synthesize_voice(final_text) if voice_bytes: @@ -545,7 +648,7 @@ async def _process_session( name=f"task-reflect-{user_id}", ) - return AgentResponse(text=final_text, voice=voice_bytes) + return AgentResponse(text=final_text, voice=voice_bytes, system_notice=system_notice) @staticmethod def _history_message_text(message: str, attachments: list[Attachment] | None = None) -> str: diff --git a/core/compaction.py b/core/compaction.py new file mode 100644 index 0000000..da1d32e --- /dev/null +++ b/core/compaction.py @@ -0,0 +1,181 @@ +"""Conversation compaction for session history mode. + +When a sticky session grows close to the model's context window, the oldest +turns are summarised by a (cheap) LLM into a single synthetic exchange, while +the most recent turns are kept verbatim. This keeps long conversations going +without blowing the context window, and — unlike provider-specific server-side +compaction — works across every provider MPA supports. + +The trigger is the *real* token usage reported by the provider after the turn +(see ``LLMResponse.usage``), so no local tokenizer is needed. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from core.config import CompactionConfig +from core.llm import LLMClient + +log = logging.getLogger(__name__) + +# Known context-window sizes (tokens). Used for percent-mode thresholds. +# Unknown models fall back to CompactionConfig.context_window. +CONTEXT_WINDOWS: dict[str, int] = { + "claude-opus-4-8": 1_000_000, + "claude-opus-4-7": 1_000_000, + "claude-opus-4-6": 1_000_000, + "claude-opus-4-5": 200_000, + "claude-opus-4-1": 200_000, + "claude-sonnet-4-6": 1_000_000, + "claude-4-6-sonnet": 1_000_000, + "claude-sonnet-4-5": 200_000, + "claude-haiku-4-5": 200_000, + "claude-4-5-haiku": 200_000, +} + +_SUMMARY_PROMPT = """\ +You are compacting the earlier part of a conversation between a user and their +personal AI assistant so it fits in a smaller context window. Write a dense, +factual summary that preserves everything needed to continue the conversation +seamlessly. Include: +- The user's goals, requests, and any decisions made. +- Concrete facts, names, identifiers, numbers, file paths, and preferences. +- Results of actions already taken (emails sent, events created, lookups done). +- Open threads or pending follow-ups. + +Do NOT include pleasantries or restate this instruction. Be concise but complete. +Write the summary as plain prose / bullet points. + + +{transcript} + + +Summary:""" + + +def effective_window(config: CompactionConfig, model: str) -> int: + """Return the context window (tokens) to use for percent-mode thresholds.""" + key = (model or "").strip().lower() + return CONTEXT_WINDOWS.get(key, config.context_window) + + +def compaction_threshold_tokens(config: CompactionConfig, model: str) -> int: + """Return the absolute token count at which compaction should trigger.""" + if config.threshold_type == "tokens": + return config.threshold_tokens + window = effective_window(config, model) + return int(window * config.threshold_percent / 100) + + +def should_compact(config: CompactionConfig, context_tokens: int, model: str) -> bool: + """Return True if the current context size warrants compaction.""" + if not config.enabled or not context_tokens: + return False + return context_tokens >= compaction_threshold_tokens(config, model) + + +def _is_real_user_turn(message: dict[str, Any]) -> bool: + """True if this is a genuine user turn (not a tool_result carrier message).""" + if message.get("role") != "user": + return False + content = message.get("content") + if isinstance(content, str): + return True + if isinstance(content, list): + # Tool results are delivered as user messages whose blocks are all + # tool_result; a real user turn has at least one text/image block. + return any(isinstance(b, dict) and b.get("type") != "tool_result" for b in content) + return True + + +def _block_text(content: Any) -> str: + """Flatten a message's content into plain text for summarisation.""" + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) + parts: list[str] = [] + for block in content: + if not isinstance(block, dict): + parts.append(str(block)) + continue + btype = block.get("type") + if btype == "text": + parts.append(str(block.get("text", ""))) + elif btype == "tool_use": + parts.append(f"[tool_use {block.get('name', '')}: {block.get('input', {})}]") + elif btype == "tool_result": + parts.append(f"[tool_result: {block.get('content', '')}]") + elif btype == "image": + parts.append("[image]") + return "\n".join(p for p in parts if p) + + +def _render_transcript(messages: list[dict[str, Any]]) -> str: + lines: list[str] = [] + for msg in messages: + role = msg.get("role", "?") + text = _block_text(msg.get("content")).strip() + if text: + lines.append(f"{role.upper()}: {text}") + return "\n\n".join(lines) + + +async def compact_messages( + llm: LLMClient, + model: str, + messages: list[dict[str, Any]], + keep_recent_turns: int, +) -> tuple[list[dict[str, Any]], str] | None: + """Summarise the oldest turns of a session, keeping the recent ones verbatim. + + Returns ``(new_messages, summary)`` or ``None`` if there is nothing worth + compacting (too few turns). The rebuilt session is: + + [user(), assistant(), *recent_turns_verbatim] + + The cut is made at a real user-turn boundary so a ``tool_use`` block is + never split from its ``tool_result``. + """ + boundaries = [i for i, m in enumerate(messages) if _is_real_user_turn(m)] + # Need more turns than we intend to keep, otherwise there's nothing to fold. + if len(boundaries) <= keep_recent_turns: + return None + + cut = boundaries[len(boundaries) - keep_recent_turns] + prefix = messages[:cut] + tail = messages[cut:] + if not prefix: + return None + + transcript = _render_transcript(prefix) + if not transcript.strip(): + return None + + summary = await llm.generate_text( + model=model, + prompt=_SUMMARY_PROMPT.format(transcript=transcript), + max_tokens=2048, + ) + summary = (summary or "").strip() + if not summary: + return None + + new_messages: list[dict[str, Any]] = [ + { + "role": "user", + "content": ( + "Here is a summary of the earlier part of our conversation " + "(it was compacted to save space):\n\n" + f"\n{summary}\n" + ), + }, + { + "role": "assistant", + "content": "Understood — I'll continue with that summarized context in mind.", + }, + *tail, + ] + return new_messages, summary diff --git a/core/config.py b/core/config.py index 47bfdfb..870813c 100644 --- a/core/config.py +++ b/core/config.py @@ -155,6 +155,23 @@ class TaskReflectionConfig(BaseModel): max_reflections: int = 50 # max reflections to keep for prompt injection +class CompactionConfig(BaseModel): + """Conversation compaction — summarise old turns when the context grows. + + Only applies in session history mode. The threshold is evaluated against + the real token usage reported by the provider after each turn. + """ + + enabled: bool = True + provider: str = "anthropic" + model: str = "claude-haiku-4-5" + threshold_type: str = "percent" # "percent" (of context window) or "tokens" (absolute) + threshold_percent: int = 80 # trigger at this % of the model's context window + threshold_tokens: int = 150000 # absolute trigger when threshold_type == "tokens" + context_window: int = 200000 # fallback window for % mode when the model is unknown + keep_recent_turns: int = 4 # most-recent user turns kept verbatim after compaction + + class SearchConfig(BaseModel): enabled: bool = False provider: str = "tavily" @@ -171,6 +188,19 @@ class PromptConfig(BaseModel): history_handling_override: str = "" +class GhToolConfig(BaseModel): + """GitHub CLI (`gh`) tool — auth via a Personal Access Token.""" + + enabled: bool = False + token: str = "" # GitHub PAT, injected as GH_TOKEN when running `gh` + + +class ToolsConfig(BaseModel): + """Optional external CLI tools the agent can use (see core/tools.py).""" + + gh: GhToolConfig = GhToolConfig() + + class Config(BaseModel): agent: AgentConfig = AgentConfig() channels: ChannelsConfig = ChannelsConfig() @@ -182,9 +212,11 @@ class Config(BaseModel): memory: MemoryConfig = MemoryConfig() goal_decomposition: GoalDecompositionConfig = GoalDecompositionConfig() task_reflection: TaskReflectionConfig = TaskReflectionConfig() + compaction: CompactionConfig = CompactionConfig() search: SearchConfig = SearchConfig() you: YouConfig = YouConfig() prompt: PromptConfig = PromptConfig() + tools: ToolsConfig = ToolsConfig() def load_config(path: str | Path = "config.yml") -> Config: diff --git a/core/config_store.py b/core/config_store.py index 4fd91fc..6897a31 100644 --- a/core/config_store.py +++ b/core/config_store.py @@ -55,6 +55,7 @@ "admin.password_hash", "admin.password_salt", "search.api_key", + "tools.gh.token", "calendar.providers", "calendar.google_oauth_client_id", "calendar.google_oauth_client_secret", diff --git a/core/executor.py b/core/executor.py index 5b11669..185e15b 100644 --- a/core/executor.py +++ b/core/executor.py @@ -33,6 +33,11 @@ def _find_wacli_bin() -> str: class ToolExecutor: """Executes CLI commands on behalf of the LLM.""" + def __init__(self, tool_env: dict[str, str] | None = None) -> None: + # Extra environment for optional tools (e.g. GH_TOKEN for `gh`). + # Injected into every spawned subprocess; updated on config reload. + self.tool_env: dict[str, str] = dict(tool_env or {}) + ALLOWED_PREFIXES = [ "curl", "himalaya", @@ -90,9 +95,12 @@ async def run_command_trusted(self, command: str, timeout: int = 30) -> dict: async def _exec(self, command: str, timeout: int) -> dict: """Run a shell command and capture output.""" env = None - if "himalaya" in command: + if "himalaya" in command or self.tool_env: env = os.environ.copy() - env.update(himalaya_env()) + if "himalaya" in command: + env.update(himalaya_env()) + # Tool auth (e.g. GH_TOKEN) — only set when a tool is enabled. + env.update(self.tool_env) proc = await asyncio.create_subprocess_shell( command, stdout=asyncio.subprocess.PIPE, diff --git a/core/history.py b/core/history.py index 4da6599..7156658 100644 --- a/core/history.py +++ b/core/history.py @@ -42,6 +42,14 @@ ); CREATE INDEX IF NOT EXISTS idx_session_lookup ON session_messages(channel, user_id, chat_id, id); +CREATE TABLE IF NOT EXISTS session_system ( + channel TEXT NOT NULL, + user_id TEXT NOT NULL, + chat_id TEXT NOT NULL DEFAULT '', + system TEXT NOT NULL, + created_at DATETIME DEFAULT (datetime('now')), + PRIMARY KEY (channel, user_id, chat_id) +); """ # Migrations applied after initial schema creation. @@ -77,6 +85,8 @@ def __init__(self, db_path: str = "data/history.db", max_turns: int = 20): self._ready = False # In-memory cache for sticky sessions: {(channel, user_id, chat_id): [message_dicts]} self._sessions: dict[tuple[str, str, str], list[dict[str, Any]]] = {} + # In-memory cache for the static system prompt snapshot per session. + self._session_system: dict[tuple[str, str, str], str] = {} async def _ensure_schema(self) -> None: if self._ready: @@ -159,9 +169,14 @@ async def clear(self, channel: str, user_id: str, chat_id: str = "") -> None: "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?", (channel, user_id, chat_id), ) + await db.execute( + "DELETE FROM session_system WHERE channel = ? AND user_id = ? AND chat_id = ?", + (channel, user_id, chat_id), + ) await db.commit() # Clear in-memory session cache self._sessions.pop((channel, user_id, chat_id), None) + self._session_system.pop((channel, user_id, chat_id), None) # ------------------------------------------------------------------- # Session mode — sticky session per (channel, user_id, chat_id) @@ -230,6 +245,33 @@ async def append_session_messages( ) await db.commit() + async def replace_session( + self, + channel: str, + user_id: str, + messages: list[dict[str, Any]], + chat_id: str = "", + ) -> None: + """Atomically replace a session's messages (used by compaction). + + Rewrites both the in-memory cache and the persisted ``session_messages`` + rows. The system-prompt snapshot is left untouched. + """ + await self._ensure_schema() + key = (channel, user_id, chat_id) + self._sessions[key] = list(messages) + async with aiosqlite.connect(self.db_path) as db: + await db.execute( + "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?", + (channel, user_id, chat_id), + ) + await db.executemany( + "INSERT INTO session_messages (channel, user_id, chat_id, message) " + "VALUES (?, ?, ?, ?)", + [(channel, user_id, chat_id, json.dumps(m)) for m in messages], + ) + await db.commit() + async def clear_session(self, channel: str, user_id: str, chat_id: str = "") -> None: """Clear just the sticky session for a (channel, user_id, chat_id) triple.""" await self._ensure_schema() @@ -238,5 +280,52 @@ async def clear_session(self, channel: str, user_id: str, chat_id: str = "") -> "DELETE FROM session_messages WHERE channel = ? AND user_id = ? AND chat_id = ?", (channel, user_id, chat_id), ) + await db.execute( + "DELETE FROM session_system WHERE channel = ? AND user_id = ? AND chat_id = ?", + (channel, user_id, chat_id), + ) await db.commit() self._sessions.pop((channel, user_id, chat_id), None) + self._session_system.pop((channel, user_id, chat_id), None) + + # ------------------------------------------------------------------- + # Session mode — static system prompt snapshot + # ------------------------------------------------------------------- + + async def get_session_system(self, channel: str, user_id: str, chat_id: str = "") -> str | None: + """Return the cached static system prompt for a session, or None if unset. + + The system prompt is snapshotted once at the start of a session (after a + ``/new``) and reused for every subsequent turn, so the static content is + only built/sent once instead of being rebuilt each turn. + """ + await self._ensure_schema() + key = (channel, user_id, chat_id) + if key in self._session_system: + return self._session_system[key] + async with aiosqlite.connect(self.db_path) as db: + cursor = await db.execute( + "SELECT system FROM session_system " + "WHERE channel = ? AND user_id = ? AND chat_id = ?", + (channel, user_id, chat_id), + ) + row = await cursor.fetchone() + if row is None: + return None + self._session_system[key] = row[0] + return row[0] + + async def set_session_system( + self, channel: str, user_id: str, system: str, chat_id: str = "" + ) -> None: + """Persist the static system prompt snapshot for a session.""" + await self._ensure_schema() + self._session_system[(channel, user_id, chat_id)] = system + async with aiosqlite.connect(self.db_path) as db: + await db.execute( + "INSERT INTO session_system (channel, user_id, chat_id, system) " + "VALUES (?, ?, ?, ?) " + "ON CONFLICT(channel, user_id, chat_id) DO UPDATE SET system = excluded.system", + (channel, user_id, chat_id, system), + ) + await db.commit() diff --git a/core/llm.py b/core/llm.py index b0f3bab..43ae6d4 100644 --- a/core/llm.py +++ b/core/llm.py @@ -32,6 +32,44 @@ class LLMResponse: text: str tool_calls: list[LLMToolCall] raw: object | None = None + # Token usage for the request, when the provider reports it. Keys: + # input_tokens, output_tokens, cache_read_input_tokens, + # cache_creation_input_tokens, context_tokens (= full prompt size). + usage: dict[str, int] | None = None + + +def _anthropic_usage(response: Any) -> dict[str, int] | None: + u = getattr(response, "usage", None) + if u is None: + return None + inp = getattr(u, "input_tokens", 0) or 0 + out = getattr(u, "output_tokens", 0) or 0 + cache_read = getattr(u, "cache_read_input_tokens", 0) or 0 + cache_creation = getattr(u, "cache_creation_input_tokens", 0) or 0 + # The true context size is the uncached input plus everything served + # from / written to the cache this request. + return { + "input_tokens": inp, + "output_tokens": out, + "cache_read_input_tokens": cache_read, + "cache_creation_input_tokens": cache_creation, + "context_tokens": inp + cache_read + cache_creation, + } + + +def _openai_usage(response: Any) -> dict[str, int] | None: + u = getattr(response, "usage", None) + if u is None: + return None + prompt = getattr(u, "prompt_tokens", 0) or 0 + completion = getattr(u, "completion_tokens", 0) or 0 + return { + "input_tokens": prompt, + "output_tokens": completion, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + "context_tokens": prompt, + } def _openai_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]: @@ -125,10 +163,21 @@ async def generate( if self.provider == "anthropic": client_any = cast(Any, self._client) messages_client = cast(Any, getattr(client_any, "messages")) # type: ignore[attr-defined] + # Mark the (static) system prompt as a cache breakpoint so the + # tools + system prefix is cached and not reprocessed every turn. + system_param: Any = system + if system: + system_param = [ + { + "type": "text", + "text": system, + "cache_control": {"type": "ephemeral"}, + } + ] response = await messages_client.create( model=resolved_model, max_tokens=max_tokens, - system=system, + system=cast(Any, system_param), messages=cast(Any, messages), tools=cast(Any, tools), ) @@ -150,6 +199,7 @@ async def generate( text="\n".join(text_parts).strip(), tool_calls=tool_calls, raw=response.content, + usage=_anthropic_usage(response), ) openai_tools = _openai_tools(tools) @@ -174,6 +224,7 @@ async def generate( text=(message.content or "").strip(), tool_calls=tool_calls, raw=message.model_dump(exclude_none=True), + usage=_openai_usage(response), ) def assistant_message(self, response: LLMResponse) -> dict[str, Any]: diff --git a/core/models.py b/core/models.py index 97f3abf..dedae63 100644 --- a/core/models.py +++ b/core/models.py @@ -58,3 +58,6 @@ class AgentResponse: text: str voice: bytes | None = None attachments: list[Attachment] = field(default_factory=list) + # Optional out-of-band system message (e.g. "context was compacted"), + # delivered by the channel as a separate follow-up message. + system_notice: str | None = None diff --git a/core/prompt_builder.py b/core/prompt_builder.py index bcac689..13900f4 100644 --- a/core/prompt_builder.py +++ b/core/prompt_builder.py @@ -3,11 +3,10 @@ from __future__ import annotations from dataclasses import dataclass -from datetime import datetime -from zoneinfo import ZoneInfo from core.config import Config from core.goal_decomposition import DecomposedGoal +from core.tools import active_tool_prompts DEFAULT_TOOL_USAGE_BLOCK = """For write actions (sending emails, replying to emails, sending messages, creating calendar events, @@ -51,6 +50,7 @@ class PromptSections: character: str about_user: str tool_usage: str + tools: str memory_instruction: str history_handling: str memories: str @@ -66,8 +66,10 @@ def full_prompt(self) -> str: self.character, self.about_user, self.tool_usage, - self.memory_instruction, ] + if self.tools: + parts.append(self.tools) + parts.append(self.memory_instruction) if self.history_handling: parts.append(self.history_handling) if self.memories: @@ -87,6 +89,7 @@ def as_dict(self) -> dict[str, str]: "character": self.character, "about_user": self.about_user, "tool_usage": self.tool_usage, + "tools": self.tools, "memory_instruction": self.memory_instruction, "history_handling": self.history_handling, "memories": self.memories, @@ -107,11 +110,13 @@ def build_prompt_sections( include_memories: bool = True, include_reflections: bool = True, ) -> PromptSections: - """Build all prompt sections with current config and dynamic context.""" + """Build all prompt sections with current config and dynamic context. + + The prompt is intentionally **static** (no current date/time): it forms the + cacheable prefix sent to the LLM. The live date/time is injected per turn at + the start of each user message instead (see ``AgentCore._turn_preamble``). + """ cfg = config.agent - now = datetime.now(ZoneInfo(cfg.timezone)) - date_str = now.strftime("%A, %B %d, %Y") - time_str = now.strftime("%H:%M") about_user_block = config.you.personalia.strip() tool_usage_text = resolve_prompt_block( @@ -125,13 +130,19 @@ def build_prompt_sections( intro = ( f"You are {cfg.name}, a personal AI assistant for {cfg.owner_name}.\n\n" - f"Today is {date_str}. Current time: {time_str}. Timezone: {cfg.timezone}." + f"Your timezone is {cfg.timezone}. The current date and time is provided at the " + f"start of each user message — always use that as 'now'." ) personalia = f"\n{cfg.personalia}\n" character = f"\n{cfg.character}\n" about_user = f"\n{about_user_block}\n" if about_user_block else "" tool_usage = f"\n{tool_usage_text}\n" + + tool_blocks = active_tool_prompts(config) + tools_section = "" + if tool_blocks: + tools_section = "\n" + "\n\n".join(tool_blocks) + "\n" memory_instruction = ( "You can store and recall memories using the sqlite3 CLI (see the memory skill).\n" "Proactively remember important facts about the user and their contacts.\n" @@ -171,6 +182,7 @@ def build_prompt_sections( character=character, about_user=about_user, tool_usage=tool_usage, + tools=tools_section, memory_instruction=memory_instruction, history_handling=history_handling, memories=memory_section, diff --git a/core/tools.py b/core/tools.py new file mode 100644 index 0000000..594e5ce --- /dev/null +++ b/core/tools.py @@ -0,0 +1,100 @@ +"""Optional external CLI tools the agent can use (e.g. the GitHub `gh` CLI). + +Tools are configured under ``config.tools.*``. When a tool is *enabled*, two +things happen: + +1. Its authentication is wired into the executor environment (via :func:`tool_env`), + so the underlying CLI is authenticated when the agent runs it. +2. It is advertised to the LLM in the system prompt (via :func:`active_tool_prompts`), + so the model knows the capability exists and how to use it. + +A tool that is *not* enabled is neither authenticated nor advertised, keeping the +prompt lean and the capability hidden. + +Adding a new tool means: add a config sub-model in ``core/config.py``, then add an +entry to ``_REGISTRY`` below describing its env + prompt advertisement. +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass + +from core.config import Config + +# Advertisement injected into the system prompt when `gh` is active. +_GH_PROMPT = """ +The GitHub CLI `gh` is installed and authenticated. Run it with the `run_command` +tool for GitHub operations. Read operations run without asking; creating issues, +PRs or releases ask for confirmation first. +Examples: + gh issue list --repo owner/name + gh pr view 123 --repo owner/name + gh pr list --repo owner/name --state open + gh api repos/owner/name/commits + gh search issues "is:open label:bug" --repo owner/name +Always pass `--repo owner/name` unless the working directory is a checkout of the +target repository. Use `-o json` / `gh api` and parse JSON when you need fields. +""" + + +@dataclass(frozen=True) +class ToolSpec: + """Describes an optional external tool the agent can use.""" + + key: str # config sub-key under `tools.` (e.g. "gh") + label: str # human-friendly name for the admin UI + summary: str # one-line description for the admin UI + # Returns env vars to inject when the tool is enabled (auth, etc.). + env: Callable[[Config], dict[str, str]] + # Returns the system-prompt advertisement block for the tool. + prompt: Callable[[Config], str] + + +def _gh_env(config: Config) -> dict[str, str]: + gh = config.tools.gh + if gh.enabled and gh.token: + # `gh` reads GH_TOKEN (preferred) / GITHUB_TOKEN for non-interactive auth. + return {"GH_TOKEN": gh.token} + return {} + + +_REGISTRY: tuple[ToolSpec, ...] = ( + ToolSpec( + key="gh", + label="GitHub CLI (gh)", + summary="Let the agent query and act on GitHub (issues, PRs, repos, API).", + env=_gh_env, + prompt=lambda _cfg: _GH_PROMPT, + ), +) + + +def registry() -> tuple[ToolSpec, ...]: + """Return all known optional tools.""" + return _REGISTRY + + +def _is_enabled(config: Config, key: str) -> bool: + sub = getattr(config.tools, key, None) + return bool(getattr(sub, "enabled", False)) + + +def active_tool_prompts(config: Config) -> list[str]: + """Return system-prompt advertisement blocks for every *enabled* tool.""" + blocks: list[str] = [] + for spec in _REGISTRY: + if _is_enabled(config, spec.key): + block = spec.prompt(config).strip() + if block: + blocks.append(block) + return blocks + + +def tool_env(config: Config) -> dict[str, str]: + """Return the merged environment for every *enabled* tool (auth tokens, etc.).""" + env: dict[str, str] = {} + for spec in _REGISTRY: + if _is_enabled(config, spec.key): + env.update(spec.env(config)) + return env diff --git a/docs/content/docs/admin-ui.mdx b/docs/content/docs/admin-ui.mdx index 57e3cc2..66d7df1 100644 --- a/docs/content/docs/admin-ui.mdx +++ b/docs/content/docs/admin-ui.mdx @@ -28,14 +28,21 @@ Overview of agent status, active channels, and quick stats. Edit agent settings without touching config files: - **Identity** — agent name and owner name -- **LLM** — provider, model, and API key configuration. The model field is free-text (enter any model id the provider supports); a **Fetch models** button on each provider card loads the live model list from the provider API into the field's autocomplete, and **Copy list** copies all available ids +- **LLM** — provider, model, and API key configuration. The model field is free-text (enter any model id the provider supports); a **Fetch models** button on each provider card loads the live model list from the provider API into the field's autocomplete, and **Copy list** copies all available ids. Also configures the background models for memory, goal decomposition, task reflection, and **history compaction** (the model used to summarize old conversation turns — see the History tab for the trigger threshold) - **Channels** — enable/disable Telegram and WhatsApp - **Calendar** — manage CalDAV providers - **Contacts** — manage contact providers - **Email** — Himalaya configuration - **Search** — Tavily API configuration +- **Tools** — enable optional external CLI tools the agent can use - **Voice** — STT/TTS settings +### Tools + +Enable optional external CLI tools. When a tool is enabled it is authenticated and advertised to the assistant in its system prompt; when disabled it stays hidden, keeping the prompt lean. + +- **GitHub CLI (`gh`)** — let the agent query and act on GitHub (issues, PRs, repos, `gh api`, search). Provide a [Personal Access Token](https://github.com/settings/tokens); it is injected as `GH_TOKEN` so `gh` is authenticated non-interactively. Read operations (`list`/`view`/`status`/`api`/`search`) run without asking; creating issues, PRs, or releases ask for confirmation first. Use **Test token** to verify the token against the GitHub API. In session history mode, a newly enabled tool is advertised starting from the next `/new` conversation. + ### Skills Built-in skill editor for creating, editing, and deleting skill files. Changes take effect immediately — no restart needed. @@ -65,7 +72,10 @@ Manage scheduled tasks: ### History -Browse conversation history across channels. View individual conversation turns with tool call details. +Configure conversation history and browse stored turns: + +- **Mode** — `injection` (replay the last N user/assistant pairs each request) or `session` (sticky per-chat session, full context, cache-friendly). Reset a conversation any time by sending `/new` (or its alias `/clear`). +- **Context compaction** (session mode) — when the conversation nears the model's context window, the oldest turns are summarized by a small model while recent turns are kept verbatim, and the user gets a system message saying it happened. Set the trigger threshold here as either a **percent of the context window** or an **absolute token count** (e.g. 200k), plus how many recent turns to keep. The compaction model is configured in the **LLM** tab (under *History Compaction*). Has no effect in injection mode (which is already windowed). ### Logs diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index 384741d..93eb837 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -70,12 +70,14 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to The brain of MPA. Implements the LLM tool-use loop: 1. Load conversation history -2. Build system prompt (skills, character, personalia, memories) -3. Call the LLM -4. Handle tool calls with permission checks -5. Save conversation turn and extract memories - -The agent uses a single `run_command` meta-tool that executes any whitelisted CLI command, plus structured tools for safety-critical write operations (`send_email`, `send_message`, `create_calendar_event`). +2. Build the static system prompt (skills, character, personalia, memories, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn +3. Inject the live date/time (and any per-request execution plan) at the start of the current user message — so the agent always knows "now" without mutating the cached prefix +4. Call the LLM +5. Handle tool calls with permission checks +6. Save conversation turn and extract memories +7. In session mode, if the provider-reported context size crosses the configured threshold, compact: summarize the oldest turns with a small model, keep recent turns verbatim, and notify the user with a system message (see `core/compaction.py`) + +The agent uses a single `run_command` meta-tool that executes any whitelisted CLI command, plus structured tools for safety-critical write operations (`send_email`, `send_message`, `create_calendar_event`). Optional external tools (e.g. the GitHub `gh` CLI, configured under the **Tools** tab) are authenticated and advertised to the model only when enabled — see `core/tools.py`. ### LLM Provider (`core/llm.py`) diff --git a/tests/test_compaction.py b/tests/test_compaction.py new file mode 100644 index 0000000..6107e4c --- /dev/null +++ b/tests/test_compaction.py @@ -0,0 +1,226 @@ +"""Tests for conversation compaction, token-usage capture, and the /clear alias.""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from core.compaction import ( + compact_messages, + compaction_threshold_tokens, + effective_window, + should_compact, +) +from core.config import CompactionConfig, Config +from core.history import ConversationHistory +from core.llm import _anthropic_usage, _openai_usage + + +class FakeLLM: + """Minimal stand-in exposing the async generate_text used by compaction.""" + + def __init__(self, summary: str = "SUMMARY") -> None: + self.summary = summary + self.calls = 0 + + async def generate_text(self, *, model: str, prompt: str, max_tokens: int = 2048) -> str: + self.calls += 1 + return self.summary + + +# --------------------------------------------------------------------------- +# Threshold logic +# --------------------------------------------------------------------------- + + +def test_effective_window_known_and_fallback() -> None: + cfg = CompactionConfig(context_window=12345) + assert effective_window(cfg, "claude-haiku-4-5") == 200_000 + assert effective_window(cfg, "claude-opus-4-8") == 1_000_000 + assert effective_window(cfg, "some-unknown-model") == 12345 + + +def test_threshold_percent_vs_tokens() -> None: + pct = CompactionConfig(threshold_type="percent", threshold_percent=80) + assert compaction_threshold_tokens(pct, "claude-haiku-4-5") == 160_000 + tok = CompactionConfig(threshold_type="tokens", threshold_tokens=150_000) + assert compaction_threshold_tokens(tok, "claude-haiku-4-5") == 150_000 + + +def test_should_compact_gates() -> None: + cfg = CompactionConfig(enabled=True, threshold_type="tokens", threshold_tokens=1000) + assert should_compact(cfg, 1000, "m") is True + assert should_compact(cfg, 999, "m") is False + assert should_compact(cfg, 0, "m") is False # no usage info + disabled = CompactionConfig(enabled=False, threshold_type="tokens", threshold_tokens=1000) + assert should_compact(disabled, 5000, "m") is False + + +# --------------------------------------------------------------------------- +# Usage capture +# --------------------------------------------------------------------------- + + +def test_anthropic_usage_sums_cache_into_context() -> None: + resp = SimpleNamespace( + usage=SimpleNamespace( + input_tokens=100, + output_tokens=20, + cache_read_input_tokens=300, + cache_creation_input_tokens=50, + ) + ) + u = _anthropic_usage(resp) + assert u["context_tokens"] == 450 # 100 + 300 + 50 + assert u["output_tokens"] == 20 + + +def test_openai_usage_uses_prompt_tokens() -> None: + resp = SimpleNamespace(usage=SimpleNamespace(prompt_tokens=777, completion_tokens=33)) + u = _openai_usage(resp) + assert u["context_tokens"] == 777 + assert u["output_tokens"] == 33 + + +def test_usage_none_when_absent() -> None: + assert _anthropic_usage(SimpleNamespace(usage=None)) is None + assert _openai_usage(SimpleNamespace(usage=None)) is None + + +# --------------------------------------------------------------------------- +# compact_messages +# --------------------------------------------------------------------------- + + +def _session_with_tool_pair() -> list[dict]: + return [ + {"role": "user", "content": "u1"}, + {"role": "assistant", "content": "a1"}, + {"role": "user", "content": "u2"}, + { + "role": "assistant", + "content": [{"type": "tool_use", "id": "t", "name": "x", "input": {}}], + }, + {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "t", "content": "r"}]}, + {"role": "assistant", "content": "a2"}, + {"role": "user", "content": "u3"}, + {"role": "assistant", "content": "a3"}, + ] + + +@pytest.mark.asyncio +async def test_compact_keeps_recent_and_summarizes_rest() -> None: + llm = FakeLLM("THE SUMMARY") + msgs = _session_with_tool_pair() + result = await compact_messages(llm, "m", msgs, keep_recent_turns=1) + assert result is not None + new, summary = result + assert summary == "THE SUMMARY" + assert llm.calls == 1 + # Rebuilt as: user(summary), assistant(ack), then the last real user turn verbatim. + assert new[0]["role"] == "user" and "THE SUMMARY" in new[0]["content"] + assert new[1]["role"] == "assistant" + assert new[2:] == [{"role": "user", "content": "u3"}, {"role": "assistant", "content": "a3"}] + # Valid alternation: never two same-role in a row. + roles = [m["role"] for m in new] + assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1)) + + +@pytest.mark.asyncio +async def test_compact_does_not_split_tool_pair() -> None: + # Keeping 2 turns cuts before u2 — the tool_use/tool_result pair stays together + # in the summarized prefix, never straddling the boundary. + llm = FakeLLM() + msgs = _session_with_tool_pair() + new, _ = await compact_messages(llm, "m", msgs, keep_recent_turns=2) + tail = new[2:] + # The kept tail must start with a real user message (not a tool_result carrier). + first = tail[0] + assert first["role"] == "user" and isinstance(first["content"], str) + + +@pytest.mark.asyncio +async def test_compact_noop_when_too_few_turns() -> None: + llm = FakeLLM() + msgs = _session_with_tool_pair() # 3 real user turns + assert await compact_messages(llm, "m", msgs, keep_recent_turns=3) is None + assert llm.calls == 0 + + +# --------------------------------------------------------------------------- +# History.replace_session +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_replace_session_rewrites_messages(tmp_path) -> None: + h = ConversationHistory(db_path=str(tmp_path / "h.db")) + await h.append_session_message("telegram", "u", {"role": "user", "content": "old"}) + await h.replace_session("telegram", "u", [{"role": "user", "content": "new"}]) + assert await h.get_session("telegram", "u") == [{"role": "user", "content": "new"}] + # Cold instance reads the rewritten rows. + h2 = ConversationHistory(db_path=str(tmp_path / "h.db")) + assert await h2.get_session("telegram", "u") == [{"role": "user", "content": "new"}] + + +# --------------------------------------------------------------------------- +# Agent integration: _maybe_compact + /clear alias +# --------------------------------------------------------------------------- + + +@pytest.fixture +def agent(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + from core.agent import AgentCore + + a = AgentCore(Config()) + a.history_mode = "session" + return a + + +@pytest.mark.asyncio +async def test_maybe_compact_replaces_session_and_notifies(agent, monkeypatch) -> None: + agent.config.compaction.enabled = True + agent.config.compaction.threshold_type = "tokens" + agent.config.compaction.threshold_tokens = 100 + agent.config.compaction.keep_recent_turns = 1 + + for m in _session_with_tool_pair(): + await agent.history.append_session_message("telegram", "u", m, "") + + monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S")) + response = SimpleNamespace(usage={"context_tokens": 999}) + + notice = await agent._maybe_compact("telegram", "u", "", response) + assert notice is not None and "summarized" in notice.lower() + session = await agent.history.get_session("telegram", "u") + assert "S" in session[0]["content"] + assert session[-2:] == [ + {"role": "user", "content": "u3"}, + {"role": "assistant", "content": "a3"}, + ] + + +@pytest.mark.asyncio +async def test_maybe_compact_below_threshold_noop(agent, monkeypatch) -> None: + agent.config.compaction.enabled = True + agent.config.compaction.threshold_type = "tokens" + agent.config.compaction.threshold_tokens = 100000 + for m in _session_with_tool_pair(): + await agent.history.append_session_message("telegram", "u", m, "") + monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S")) + response = SimpleNamespace(usage={"context_tokens": 50}) + assert await agent._maybe_compact("telegram", "u", "", response) is None + + +@pytest.mark.asyncio +async def test_clear_alias_clears_conversation(agent) -> None: + resp = await agent.process("/clear", channel="telegram", user_id="u", chat_id="") + assert resp.text == "Conversation cleared." + + +@pytest.mark.asyncio +async def test_new_still_clears_conversation(agent) -> None: + resp = await agent.process("/new", channel="telegram", user_id="u", chat_id="") + assert resp.text == "Conversation cleared." diff --git a/tests/test_tools.py b/tests/test_tools.py new file mode 100644 index 0000000..587d760 --- /dev/null +++ b/tests/test_tools.py @@ -0,0 +1,168 @@ +"""Tests for optional tools, per-turn datetime injection, and prompt caching.""" + +from __future__ import annotations + +import pytest + +from core.config import Config +from core.history import ConversationHistory +from core.prompt_builder import build_prompt_sections +from core.tools import active_tool_prompts, tool_env + +# --------------------------------------------------------------------------- +# Tools registry +# --------------------------------------------------------------------------- + + +def test_gh_tool_inactive_by_default() -> None: + cfg = Config() + assert active_tool_prompts(cfg) == [] + assert tool_env(cfg) == {} + + +def test_gh_tool_env_and_advert_when_enabled() -> None: + cfg = Config() + cfg.tools.gh.enabled = True + cfg.tools.gh.token = "ghp_secret" + assert tool_env(cfg) == {"GH_TOKEN": "ghp_secret"} + blocks = active_tool_prompts(cfg) + assert len(blocks) == 1 + assert "gh" in blocks[0] + + +def test_gh_enabled_without_token_has_no_env() -> None: + cfg = Config() + cfg.tools.gh.enabled = True # no token + assert tool_env(cfg) == {} + # Still advertised so the agent knows the capability exists. + assert active_tool_prompts(cfg) + + +# --------------------------------------------------------------------------- +# Static system prompt: no datetime, tool advert gated on activation +# --------------------------------------------------------------------------- + + +def _sections(cfg: Config): + return build_prompt_sections( + config=cfg, + history_mode="session", + skills_index="", + memories="", + reflections="", + decomposed_goal=None, + ) + + +def test_static_prompt_has_no_datetime() -> None: + cfg = Config() + sections = _sections(cfg) + # The static prompt must not bake in a concrete date/time (it is injected + # per turn instead), so the prefix stays stable and cacheable. + assert "Today is" not in sections.full_prompt + assert "Current time:" not in sections.full_prompt + assert cfg.agent.timezone in sections.intro + + +def test_tools_section_only_when_enabled() -> None: + cfg = Config() + assert _sections(cfg).tools == "" + cfg.tools.gh.enabled = True + cfg.tools.gh.token = "ghp_x" + sections = _sections(cfg) + assert "" in sections.tools + assert sections.tools in sections.full_prompt + + +# --------------------------------------------------------------------------- +# Session system snapshot +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_session_system_snapshot_roundtrip(tmp_path) -> None: + history = ConversationHistory(db_path=str(tmp_path / "h.db")) + assert await history.get_session_system("telegram", "u1") is None + await history.set_session_system("telegram", "u1", "SYSTEM-A") + assert await history.get_session_system("telegram", "u1") == "SYSTEM-A" + + +@pytest.mark.asyncio +async def test_session_system_survives_new_instance(tmp_path) -> None: + db = str(tmp_path / "h.db") + h1 = ConversationHistory(db_path=db) + await h1.set_session_system("telegram", "u1", "SYSTEM-A") + # Fresh instance (cold cache) must load the snapshot from disk. + h2 = ConversationHistory(db_path=db) + assert await h2.get_session_system("telegram", "u1") == "SYSTEM-A" + + +@pytest.mark.asyncio +async def test_clear_session_drops_system_snapshot(tmp_path) -> None: + history = ConversationHistory(db_path=str(tmp_path / "h.db")) + await history.set_session_system("telegram", "u1", "SYSTEM-A") + await history.clear_session("telegram", "u1") + assert await history.get_session_system("telegram", "u1") is None + + +@pytest.mark.asyncio +async def test_clear_drops_system_snapshot(tmp_path) -> None: + history = ConversationHistory(db_path=str(tmp_path / "h.db")) + await history.set_session_system("telegram", "u1", "SYSTEM-A") + await history.clear("telegram", "u1") + assert await history.get_session_system("telegram", "u1") is None + + +# --------------------------------------------------------------------------- +# Agent: per-turn preamble + user-message injection + session caching +# --------------------------------------------------------------------------- + + +@pytest.fixture +def agent(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + from core.agent import AgentCore + + return AgentCore(Config()) + + +def test_turn_preamble_carries_datetime(agent) -> None: + preamble = agent._turn_preamble(None) + assert "Current date & time" in preamble + # No execution plan when the goal was not decomposed. + assert "execution_plan" not in preamble + + +def test_build_user_message_prepends_preamble(agent) -> None: + preamble = agent._turn_preamble(None) + msg = agent._build_user_message("hello", None, preamble) + assert msg["role"] == "user" + assert msg["content"].startswith(preamble) + assert msg["content"].endswith("hello") + + +def test_build_user_message_no_preamble_is_plain(agent) -> None: + msg = agent._build_user_message("hello", None, "") + assert msg["content"] == "hello" + + +@pytest.mark.asyncio +async def test_session_system_built_once_and_reused(agent, monkeypatch) -> None: + calls = {"n": 0} + + async def fake_build() -> str: + calls["n"] += 1 + return f"SYSTEM-{calls['n']}" + + monkeypatch.setattr(agent, "_build_system_prompt", fake_build) + + first = await agent._session_system_prompt("telegram", "u1", "") + second = await agent._session_system_prompt("telegram", "u1", "") + assert first == second == "SYSTEM-1" + assert calls["n"] == 1 # built only once for the session + + # After /new (clear), it rebuilds. + await agent.history.clear_session("telegram", "u1") + third = await agent._session_system_prompt("telegram", "u1", "") + assert third == "SYSTEM-2" + assert calls["n"] == 2 diff --git a/uv.lock b/uv.lock index d5e54b1..a2b2401 100644 --- a/uv.lock +++ b/uv.lock @@ -764,7 +764,7 @@ wheels = [ [[package]] name = "mpa" -version = "0.10.0" +version = "0.11.0" source = { virtual = "." } dependencies = [ { name = "aiosqlite" },