From 7a63ff3c38893d9a44705149a3482b42daa9fc13 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:03:27 +0200 Subject: [PATCH 1/7] feat(config): default to deepseek-v4-flash inference model Set llm_provider=deepseek and model=deepseek-v4-flash as the built-in defaults (config.py) and in config.yml.example. --- config.yml.example | 4 ++-- core/config.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.yml.example b/config.yml.example index 4991aa0..683a9bf 100644 --- a/config.yml.example +++ b/config.yml.example @@ -1,7 +1,7 @@ agent: name: "Clio" owner_name: "Matteo" - llm_provider: "anthropic" + llm_provider: "deepseek" anthropic_api_key: "${ANTHROPIC_API_KEY}" openai_api_key: "${OPENAI_API_KEY}" openai_base_url: "${OPENAI_BASE_URL}" @@ -11,7 +11,7 @@ agent: grok_base_url: "${GROK_BASE_URL}" deepseek_api_key: "${DEEPSEEK_API_KEY}" deepseek_base_url: "${DEEPSEEK_BASE_URL}" - model: "claude-sonnet-4-5-20250514" + model: "deepseek-v4-flash" timezone: "Europe/Zurich" skills_dir: "skills/" diff --git a/core/config.py b/core/config.py index 8d9b90a..2cf9ba2 100644 --- a/core/config.py +++ b/core/config.py @@ -37,7 +37,7 @@ def _resolve_env_vars(obj: object) -> object: class AgentConfig(BaseModel): name: str = "Clio" owner_name: str = "Matteo" - llm_provider: str = "anthropic" + llm_provider: str = "deepseek" anthropic_api_key: str = "" openai_api_key: str = "" openai_base_url: str = "" @@ -47,7 +47,7 @@ class AgentConfig(BaseModel): grok_base_url: str = "" deepseek_api_key: str = "" deepseek_base_url: str = "" - model: str = "claude-4-6-sonnet" + model: str = "deepseek-v4-flash" thinking_level: str = "" # "" (off) | "low" | "medium" | "high" — only for reasoning models timezone: str = "Europe/Zurich" skills_dir: str = "skills/" From f476ae47f835703602cad2c135426c8163e2b4d3 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:03:27 +0200 Subject: [PATCH 2/7] feat(repl): terminal REPL channel for local agent testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds core/repl.py and a 'make repl' target. Builds the agent from the shared config store, registers a 'repl' channel that routes permission approvals to a y/n stdin prompt, and loops on stdin — lets you exercise the agent end to end without Telegram. --- Makefile | 10 +++++-- core/repl.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 core/repl.py diff --git a/Makefile b/Makefile index d83b7de..ebb9c95 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ -.PHONY: help setup setup-hooks install install-dev sync lock lint format test run dev dev-agent dev-css dev-wa clean release css docs docs-dev +.PHONY: help setup setup-hooks install install-dev sync lock lint format test run repl dev dev-agent dev-css dev-wa clean release css docs docs-dev +PORT := 8001 PYTHON := uv run python UV := uv TAILWIND := ./tailwindcss @@ -23,6 +24,7 @@ help: @echo " make lock Update lockfile after changing pyproject.toml" @echo "" @echo " Development:" + @echo " make repl Chat with the agent from the terminal (no Telegram)" @echo " make dev Show instructions for running dev services" @echo " make dev-agent Run agent with auto-reload" @echo " make dev-css Run Tailwind CSS watcher" @@ -86,6 +88,10 @@ test: run: $(UV) run python -m core.main +# Local REPL — chat with the agent from the terminal (no Telegram) +repl: + $(PYTHON) -m core.repl + # Run in dev mode: instructions for running services in separate shells dev: @echo "" @@ -107,7 +113,7 @@ dev: # Dev: admin API with auto-reload on code changes (agent managed via UI) dev-agent: PYTHONWARNINGS="ignore::UserWarning:multiprocessing.resource_tracker" \ - $(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port 8000 --log-level info \ + $(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port $(PORT) --log-level info \ --reload-dir api --reload-dir core --reload-dir channels --reload-dir schema \ --reload-dir skills --reload-dir tools --reload-dir voice diff --git a/core/repl.py b/core/repl.py new file mode 100644 index 0000000..860df54 --- /dev/null +++ b/core/repl.py @@ -0,0 +1,76 @@ +"""Local REPL channel — talk to the agent from the terminal, no Telegram. + +Run: make repl (or uv run python -m core.repl) + +Builds the agent from the same config store the server uses, registers itself +as the ``repl`` channel so permission approvals route to a y/n terminal prompt, +then loops on stdin. Ctrl-D or ``/exit`` quits. +""" + +from __future__ import annotations + +import asyncio +import logging + +from core.agent import AgentCore +from core.config_store import ConfigStore + +log = logging.getLogger(__name__) + +USER_ID = "repl" + + +class ReplChannel: + """Minimal channel: prints approval prompts and reads a y/n from stdin.""" + + def __init__(self, agent: AgentCore): + self.agent = agent + + async def send(self, chat_id, text: str) -> None: + print(f"\n{text}\n") + + async def send_approval_request(self, user_id: str, request_id: str, description: str) -> None: + ans = await asyncio.to_thread(input, f"\n[approval] {description}\nallow? [y/N] ") + self.agent.permissions.resolve_approval(request_id, ans.strip().lower() in ("y", "yes")) + + +async def main() -> None: + logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(name)s — %(message)s") + + store = ConfigStore() + await store.seed_if_empty() + await store.ensure_admin_password() + config = await store.export_to_config() + + agent = AgentCore(config) + agent.channels["repl"] = ReplChannel(agent) + + print( + f"{config.agent.name} REPL — model={config.agent.model} " + f"provider={config.agent.llm_provider}. Ctrl-D or /exit to quit.\n" + ) + + while True: + try: + text = await asyncio.to_thread(input, "> ") + except EOFError: + break + text = text.strip() + if not text: + continue + if text in ("/exit", "/quit"): + break + response = await agent.process( + message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID + ) + if response.text: + print(f"\n{response.text}\n") + if getattr(response, "system_notice", None): + print(f"[system] {response.system_notice}\n") + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + pass From f3199368f342dbfe77fd04462cb5260329fc7d86 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:08:44 +0200 Subject: [PATCH 3/7] feat(repl): stream chain-of-thought + processing spinner Capture model reasoning (anthropic thinking blocks, deepseek reasoning_content) on LLMResponse and emit it via a dedicated core.llm.reasoning logger, silent by default. The REPL bumps that logger plus core.agent/core.executor to INFO and streams them live above a \r spinner, so you see the reasoning and every tool call as the agent works. --- core/llm.py | 21 ++++++++++++ core/repl.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 109 insertions(+), 6 deletions(-) diff --git a/core/llm.py b/core/llm.py index 5b5318f..e4c2763 100644 --- a/core/llm.py +++ b/core/llm.py @@ -4,11 +4,17 @@ import importlib import json +import logging from dataclasses import dataclass from typing import Any, cast from anthropic import AsyncAnthropic +# Dedicated logger for model chain-of-thought. Silent by default (WARNING); +# the REPL bumps it to INFO to stream reasoning live without spamming server logs. +reasoning_log = logging.getLogger("core.llm.reasoning") +reasoning_log.setLevel(logging.WARNING) + _DEFAULT_BASE_URLS = { "google": "https://generativelanguage.googleapis.com/v1beta/openai", "grok": "https://api.x.ai/v1", @@ -31,6 +37,7 @@ class LLMToolCall: class LLMResponse: text: str tool_calls: list[LLMToolCall] + reasoning: str = "" # model chain-of-thought, when the provider exposes it raw: object | None = None # Token usage for the request, when the provider reports it. Keys: # input_tokens, output_tokens, cache_read_input_tokens, @@ -209,6 +216,7 @@ async def generate( ) tool_calls = [] text_parts = [] + reasoning_parts = [] for block in response.content: block_any = cast(Any, block) if getattr(block_any, "type", None) == "tool_use": @@ -221,9 +229,15 @@ async def generate( ) if getattr(block_any, "type", None) == "text": text_parts.append(getattr(block_any, "text", "")) + if getattr(block_any, "type", None) == "thinking": + reasoning_parts.append(getattr(block_any, "thinking", "")) + reasoning = "\n".join(p for p in reasoning_parts if p).strip() + if reasoning: + reasoning_log.info("%s", reasoning) return LLMResponse( text="\n".join(text_parts).strip(), tool_calls=tool_calls, + reasoning=reasoning, raw=response.content, usage=_anthropic_usage(response), ) @@ -247,9 +261,16 @@ async def generate( except json.JSONDecodeError: args = {} tool_calls.append(LLMToolCall(id=call.id, name=call.function.name, arguments=args)) + # DeepSeek/others expose CoT as message.reasoning_content (or .reasoning). + reasoning = ( + getattr(message, "reasoning_content", None) or getattr(message, "reasoning", None) or "" + ).strip() + if reasoning: + reasoning_log.info("%s", reasoning) return LLMResponse( text=(message.content or "").strip(), tool_calls=tool_calls, + reasoning=reasoning, raw=message.model_dump(exclude_none=True), usage=_openai_usage(response), ) diff --git a/core/repl.py b/core/repl.py index 860df54..43cdcb4 100644 --- a/core/repl.py +++ b/core/repl.py @@ -5,12 +5,18 @@ Builds the agent from the same config store the server uses, registers itself as the ``repl`` channel so permission approvals route to a y/n terminal prompt, then loops on stdin. Ctrl-D or ``/exit`` quits. + +While the agent works, a spinner shows it's busy and the chain of thought +(model reasoning + each tool call) streams live above it. """ from __future__ import annotations import asyncio +import itertools import logging +import sys +import time from core.agent import AgentCore from core.config_store import ConfigStore @@ -19,23 +25,95 @@ USER_ID = "repl" +# Loggers whose INFO output is the agent's "chain of thought" / activity trail. +_THOUGHT_LOGGERS = ("core.agent", "core.executor", "core.llm.reasoning") +_NOISY_LOGGERS = ("httpx", "httpcore", "apscheduler", "telegram") + + +class _SpinnerHandler(logging.Handler): + """Prints log lines above the spinner, clearing its line first.""" + + def __init__(self, spinner: Spinner): + super().__init__() + self.spinner = spinner + self.setFormatter(logging.Formatter(" \033[2m· %(message)s\033[0m")) + + def emit(self, record: logging.LogRecord) -> None: + sys.stderr.write("\r\033[K" + self.format(record) + "\n") + sys.stderr.flush() + self.spinner.redraw() + + +class Spinner: + """Background \\r spinner on stderr. Start before a turn, stop after.""" + + _frames = itertools.cycle("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏") + + def __init__(self) -> None: + self._task: asyncio.Task | None = None + self._start = 0.0 + self._frame = "⠋" + + def redraw(self) -> None: + sys.stderr.write(f"\r\033[K\033[2m{self._frame} thinking… {self._elapsed():.0f}s\033[0m") + sys.stderr.flush() + + def _elapsed(self) -> float: + return time.monotonic() - self._start + + async def _run(self) -> None: + while True: + self._frame = next(self._frames) + self.redraw() + await asyncio.sleep(0.1) + + def start(self) -> None: + self._start = time.monotonic() + self._task = asyncio.create_task(self._run()) + + async def stop(self) -> None: + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + sys.stderr.write("\r\033[K") + sys.stderr.flush() + class ReplChannel: """Minimal channel: prints approval prompts and reads a y/n from stdin.""" - def __init__(self, agent: AgentCore): + def __init__(self, agent: AgentCore, spinner: Spinner): self.agent = agent + self.spinner = spinner async def send(self, chat_id, text: str) -> None: print(f"\n{text}\n") async def send_approval_request(self, user_id: str, request_id: str, description: str) -> None: + await self.spinner.stop() # don't fight the prompt for the line ans = await asyncio.to_thread(input, f"\n[approval] {description}\nallow? [y/N] ") self.agent.permissions.resolve_approval(request_id, ans.strip().lower() in ("y", "yes")) + self.spinner.start() + + +def _setup_logging(spinner: Spinner) -> None: + handler = _SpinnerHandler(spinner) + root = logging.getLogger() + root.handlers = [handler] + root.setLevel(logging.WARNING) + for name in _THOUGHT_LOGGERS: + logging.getLogger(name).setLevel(logging.INFO) + for name in _NOISY_LOGGERS: + logging.getLogger(name).setLevel(logging.WARNING) async def main() -> None: - logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(name)s — %(message)s") + spinner = Spinner() + _setup_logging(spinner) store = ConfigStore() await store.seed_if_empty() @@ -43,7 +121,7 @@ async def main() -> None: config = await store.export_to_config() agent = AgentCore(config) - agent.channels["repl"] = ReplChannel(agent) + agent.channels["repl"] = ReplChannel(agent, spinner) print( f"{config.agent.name} REPL — model={config.agent.model} " @@ -60,9 +138,13 @@ async def main() -> None: continue if text in ("/exit", "/quit"): break - response = await agent.process( - message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID - ) + spinner.start() + try: + response = await agent.process( + message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID + ) + finally: + await spinner.stop() if response.text: print(f"\n{response.text}\n") if getattr(response, "system_notice", None): From ba82fd5822d49516d5455b4c290d260bf0f6c5c9 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:14:20 +0200 Subject: [PATCH 4/7] fix(repl): no stray spinner on startup; debug config dump; color CoT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Guard redraw() so startup/idle log records can't draw the spinner with a bogus elapsed time (the '…3128121s' line). - Print a debug config block at launch: active provider/model, thinking level, memory model, history mode, voice, timezone. - Tool-call/agent activity now renders cyan, reasoning stays dim, so the chain of thought reads at a glance. --- core/repl.py | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/core/repl.py b/core/repl.py index 43cdcb4..a95ac43 100644 --- a/core/repl.py +++ b/core/repl.py @@ -30,16 +30,26 @@ _NOISY_LOGGERS = ("httpx", "httpcore", "apscheduler", "telegram") +_DIM = "\033[2m" # thinking / reasoning — low contrast +_CYAN = "\033[36m" # tool calls / agent activity — stands out +_RESET = "\033[0m" + + class _SpinnerHandler(logging.Handler): - """Prints log lines above the spinner, clearing its line first.""" + """Prints log lines above the spinner, clearing its line first. + + Reasoning (``core.llm.reasoning``) renders dim; everything else + (tool calls, agent activity) renders cyan so it stands out. + """ def __init__(self, spinner: Spinner): super().__init__() self.spinner = spinner - self.setFormatter(logging.Formatter(" \033[2m· %(message)s\033[0m")) def emit(self, record: logging.LogRecord) -> None: - sys.stderr.write("\r\033[K" + self.format(record) + "\n") + color = _DIM if record.name == "core.llm.reasoning" else _CYAN + line = f" {color}· {record.getMessage()}{_RESET}" + sys.stderr.write("\r\033[K" + line + "\n") sys.stderr.flush() self.spinner.redraw() @@ -55,6 +65,8 @@ def __init__(self) -> None: self._frame = "⠋" def redraw(self) -> None: + if self._task is None: # not running — startup/idle log records mustn't draw it + return sys.stderr.write(f"\r\033[K\033[2m{self._frame} thinking… {self._elapsed():.0f}s\033[0m") sys.stderr.flush() @@ -111,6 +123,23 @@ def _setup_logging(spinner: Spinner) -> None: logging.getLogger(name).setLevel(logging.WARNING) +def _print_debug_config(config) -> None: + a = config.agent + th = a.thinking_level or "off" + rows = [ + ("agent", f"{a.name} (owner {a.owner_name})"), + ("inference", f"{a.llm_provider} / {a.model} thinking={th}"), + ("memory", f"{config.memory.extraction_provider}/{config.memory.extraction_model}"), + ("history", config.history.mode), + ("voice", "on" if config.voice.tts_enabled else "off"), + ("timezone", a.timezone), + ] + print(f"\n{_CYAN}── REPL debug config ──{_RESET}") + for k, v in rows: + print(f" {_DIM}{k:>10}{_RESET} {v}") + print("\nCtrl-D or /exit to quit.\n") + + async def main() -> None: spinner = Spinner() _setup_logging(spinner) @@ -123,10 +152,7 @@ async def main() -> None: agent = AgentCore(config) agent.channels["repl"] = ReplChannel(agent, spinner) - print( - f"{config.agent.name} REPL — model={config.agent.model} " - f"provider={config.agent.llm_provider}. Ctrl-D or /exit to quit.\n" - ) + _print_debug_config(config) while True: try: From 368c3ef1c3f7138b9a67684f7bb891f4181a31ec Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:28:49 +0200 Subject: [PATCH 5/7] feat(repl): ESC to interrupt a turn, /clear to reset context - ESC during a turn cancels the in-flight agent.process task (POSIX tty via cbreak + add_reader; lone ESC only, escape sequences ignored). - /clear wipes this session's conversation history. - Banner documents both. --- core/repl.py | 57 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/core/repl.py b/core/repl.py index a95ac43..188f531 100644 --- a/core/repl.py +++ b/core/repl.py @@ -15,12 +15,19 @@ import asyncio import itertools import logging +import os import sys import time from core.agent import AgentCore from core.config_store import ConfigStore +try: # POSIX-only: lets us watch for an ESC keypress mid-turn + import termios + import tty +except ImportError: # pragma: no cover - non-POSIX + termios = tty = None + log = logging.getLogger(__name__) USER_ID = "repl" @@ -137,7 +144,40 @@ def _print_debug_config(config) -> None: print(f"\n{_CYAN}── REPL debug config ──{_RESET}") for k, v in rows: print(f" {_DIM}{k:>10}{_RESET} {v}") - print("\nCtrl-D or /exit to quit.\n") + print("\nESC interrupts a turn · /clear resets context · Ctrl-D or /exit quits.\n") + + +async def _run_turn(agent: AgentCore, spinner: Spinner, text: str): + """Run one turn, cancellable by pressing ESC. Returns None if interrupted.""" + proc = asyncio.create_task( + agent.process(message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID) + ) + fd = sys.stdin.fileno() + loop = asyncio.get_running_loop() + watch = termios is not None and sys.stdin.isatty() + old = termios.tcgetattr(fd) if watch else None + + def _on_key() -> None: + # A lone ESC (b"\x1b") interrupts; escape sequences (arrows) read longer → ignore. + try: + if os.read(fd, 16) == b"\x1b": + proc.cancel() + except OSError: + pass + + if watch: + tty.setcbreak(fd) + loop.add_reader(fd, _on_key) + spinner.start() + try: + return await proc + except asyncio.CancelledError: + return None + finally: + if watch: + loop.remove_reader(fd) + termios.tcsetattr(fd, termios.TCSADRAIN, old) + await spinner.stop() async def main() -> None: @@ -164,13 +204,14 @@ async def main() -> None: continue if text in ("/exit", "/quit"): break - spinner.start() - try: - response = await agent.process( - message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID - ) - finally: - await spinner.stop() + if text == "/clear": + await agent.history.clear("repl", USER_ID, USER_ID) + print("[context cleared]\n") + continue + response = await _run_turn(agent, spinner, text) + if response is None: + print("\n[interrupted]\n") + continue if response.text: print(f"\n{response.text}\n") if getattr(response, "system_notice", None): From 718326b14c2cca222f4cd195012c4c35f5aa81fd Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:32:16 +0200 Subject: [PATCH 6/7] fix(repl): drop redundant 'Processing message' log from the stream --- core/repl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/repl.py b/core/repl.py index 188f531..e6efed6 100644 --- a/core/repl.py +++ b/core/repl.py @@ -54,6 +54,8 @@ def __init__(self, spinner: Spinner): self.spinner = spinner def emit(self, record: logging.LogRecord) -> None: + if record.getMessage().startswith("Processing message"): + return # redundant in a REPL — you just typed it (and it shows "repl/repl/repl") color = _DIM if record.name == "core.llm.reasoning" else _CYAN line = f" {color}· {record.getMessage()}{_RESET}" sys.stderr.write("\r\033[K" + line + "\n") From ece518a02d883284482f9ee07b3cbd32523591f2 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Thu, 25 Jun 2026 14:38:34 +0200 Subject: [PATCH 7/7] docs(voice): harden speakable-text rules (ban more symbols, whole-response scope) --- skills/voice.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/skills/voice.md b/skills/voice.md index 6b0c909..9a4f738 100644 --- a/skills/voice.md +++ b/skills/voice.md @@ -20,17 +20,13 @@ Do NOT use voice responses when: ## Writing for voice -When you add `[respond_with_voice]`, write the whole message to be *spoken*, not -read. The medium changed, so the style changes with it. Before deciding on voice, -ask: does this content even work aloud? If it only makes sense on screen, reply -with text instead. - -A voice reply must contain only plain, speakable words: -- No emojis, no symbols (`*`, `#`, `~`, `>`, etc.) — say the meaning instead. -- No URLs — describe the link ("I sent the booking page") or send it as text. +When you add `[respond_with_voice]`, the *entire response* (text preamble included) must be written to be spoken, not read. The medium changes with the tag, so the whole message changes with it. Before deciding on voice, ask: does this content even work aloud? If it only makes sense on screen, reply with text instead. + +A voice response must contain only plain, speakable words, from start to finish: +- No emojis, no symbols (`*`, `#`, `~`, `>`, `:`, `;`, `-`, etc.) — say the meaning instead. +- No URLs — describe the link ("I sent the booking page") or send it as text separately without the voice tag. - No code snippets, tables, or structured/markdown formatting. -- No bullet points or dashes as list markers — speak it as flowing sentences - ("First… then… finally…"). +- No bullet points or dashes as list markers — speak it as flowing sentences ("First... then... finally..."). - Spell awkward things out: say "version one point two", not "v1.2". Keep it short and conversational, the way you'd actually say it out loud.