mattmezza · mattmezza · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/Makefile b/Makefile
@@ -1,5 +1,6 @@
-.PHONY: help setup setup-hooks install install-dev sync lock lint format test run dev dev-agent dev-css dev-wa clean release css docs docs-dev
+.PHONY: help setup setup-hooks install install-dev sync lock lint format test run repl dev dev-agent dev-css dev-wa clean release css docs docs-dev
 
+PORT := 8001
 PYTHON := uv run python
 UV := uv
 TAILWIND := ./tailwindcss
@@ -23,6 +24,7 @@ help:
 	@echo "    make lock         Update lockfile after changing pyproject.toml"
 	@echo ""
 	@echo "  Development:"
+	@echo "    make repl         Chat with the agent from the terminal (no Telegram)"
 	@echo "    make dev          Show instructions for running dev services"
 	@echo "    make dev-agent    Run agent with auto-reload"
 	@echo "    make dev-css      Run Tailwind CSS watcher"
@@ -86,6 +88,10 @@ test:
 run:
 	$(UV) run python -m core.main
 
+# Local REPL — chat with the agent from the terminal (no Telegram)
+repl:
+	$(PYTHON) -m core.repl
+
 # Run in dev mode: instructions for running services in separate shells
 dev:
 	@echo ""
@@ -107,7 +113,7 @@ dev:
 # Dev: admin API with auto-reload on code changes (agent managed via UI)
 dev-agent:
 	PYTHONWARNINGS="ignore::UserWarning:multiprocessing.resource_tracker" \
-	$(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port 8000 --log-level info \
+	$(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port $(PORT) --log-level info \
 		--reload-dir api --reload-dir core --reload-dir channels --reload-dir schema \
 		--reload-dir skills --reload-dir tools --reload-dir voice
 

diff --git a/config.yml.example b/config.yml.example
@@ -1,7 +1,7 @@
 agent:
   name: "Clio"
   owner_name: "Matteo"
-  llm_provider: "anthropic"
+  llm_provider: "deepseek"
   anthropic_api_key: "${ANTHROPIC_API_KEY}"
   openai_api_key: "${OPENAI_API_KEY}"
   openai_base_url: "${OPENAI_BASE_URL}"
@@ -11,7 +11,7 @@ agent:
   grok_base_url: "${GROK_BASE_URL}"
   deepseek_api_key: "${DEEPSEEK_API_KEY}"
   deepseek_base_url: "${DEEPSEEK_BASE_URL}"
-  model: "claude-sonnet-4-5-20250514"
+  model: "deepseek-v4-flash"
   timezone: "Europe/Zurich"
   skills_dir: "skills/"
 

diff --git a/core/config.py b/core/config.py
@@ -37,7 +37,7 @@ def _resolve_env_vars(obj: object) -> object:
 class AgentConfig(BaseModel):
     name: str = "Clio"
     owner_name: str = "Matteo"
-    llm_provider: str = "anthropic"
+    llm_provider: str = "deepseek"
     anthropic_api_key: str = ""
     openai_api_key: str = ""
     openai_base_url: str = ""
@@ -47,7 +47,7 @@ class AgentConfig(BaseModel):
     grok_base_url: str = ""
     deepseek_api_key: str = ""
     deepseek_base_url: str = ""
-    model: str = "claude-4-6-sonnet"
+    model: str = "deepseek-v4-flash"
     thinking_level: str = ""  # "" (off) | "low" | "medium" | "high" — only for reasoning models
     timezone: str = "Europe/Zurich"
     skills_dir: str = "skills/"

diff --git a/core/llm.py b/core/llm.py
@@ -4,11 +4,17 @@
 
 import importlib
 import json
+import logging
 from dataclasses import dataclass
 from typing import Any, cast
 
 from anthropic import AsyncAnthropic
 
+# Dedicated logger for model chain-of-thought. Silent by default (WARNING);
+# the REPL bumps it to INFO to stream reasoning live without spamming server logs.
+reasoning_log = logging.getLogger("core.llm.reasoning")
+reasoning_log.setLevel(logging.WARNING)
+
 _DEFAULT_BASE_URLS = {
     "google": "https://generativelanguage.googleapis.com/v1beta/openai",
     "grok": "https://api.x.ai/v1",
@@ -31,6 +37,7 @@ class LLMToolCall:
 class LLMResponse:
     text: str
     tool_calls: list[LLMToolCall]
+    reasoning: str = ""  # model chain-of-thought, when the provider exposes it
     raw: object | None = None
     # Token usage for the request, when the provider reports it. Keys:
     # input_tokens, output_tokens, cache_read_input_tokens,
@@ -209,6 +216,7 @@ async def generate(
             )
             tool_calls = []
             text_parts = []
+            reasoning_parts = []
             for block in response.content:
                 block_any = cast(Any, block)
                 if getattr(block_any, "type", None) == "tool_use":
@@ -221,9 +229,15 @@ async def generate(
                     )
                 if getattr(block_any, "type", None) == "text":
                     text_parts.append(getattr(block_any, "text", ""))
+                if getattr(block_any, "type", None) == "thinking":
+                    reasoning_parts.append(getattr(block_any, "thinking", ""))
+            reasoning = "\n".join(p for p in reasoning_parts if p).strip()
+            if reasoning:
+                reasoning_log.info("%s", reasoning)
             return LLMResponse(
                 text="\n".join(text_parts).strip(),
                 tool_calls=tool_calls,
+                reasoning=reasoning,
                 raw=response.content,
                 usage=_anthropic_usage(response),
             )
@@ -247,9 +261,16 @@ async def generate(
             except json.JSONDecodeError:
                 args = {}
             tool_calls.append(LLMToolCall(id=call.id, name=call.function.name, arguments=args))
+        # DeepSeek/others expose CoT as message.reasoning_content (or .reasoning).
+        reasoning = (
+            getattr(message, "reasoning_content", None) or getattr(message, "reasoning", None) or ""
+        ).strip()
+        if reasoning:
+            reasoning_log.info("%s", reasoning)
         return LLMResponse(
             text=(message.content or "").strip(),
             tool_calls=tool_calls,
+            reasoning=reasoning,
             raw=message.model_dump(exclude_none=True),
             usage=_openai_usage(response),
         )

diff --git a/core/repl.py b/core/repl.py
@@ -0,0 +1,227 @@
+"""Local REPL channel — talk to the agent from the terminal, no Telegram.
+
+Run:  make repl   (or  uv run python -m core.repl)
+
+Builds the agent from the same config store the server uses, registers itself
+as the ``repl`` channel so permission approvals route to a y/n terminal prompt,
+then loops on stdin. Ctrl-D or ``/exit`` quits.
+
+While the agent works, a spinner shows it's busy and the chain of thought
+(model reasoning + each tool call) streams live above it.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import itertools
+import logging
+import os
+import sys
+import time
+
+from core.agent import AgentCore
+from core.config_store import ConfigStore
+
+try:  # POSIX-only: lets us watch for an ESC keypress mid-turn
+    import termios
+    import tty
+except ImportError:  # pragma: no cover - non-POSIX
+    termios = tty = None
+
+log = logging.getLogger(__name__)
+
+USER_ID = "repl"
+
+# Loggers whose INFO output is the agent's "chain of thought" / activity trail.
+_THOUGHT_LOGGERS = ("core.agent", "core.executor", "core.llm.reasoning")
+_NOISY_LOGGERS = ("httpx", "httpcore", "apscheduler", "telegram")
+
+
+_DIM = "\033[2m"  # thinking / reasoning — low contrast
+_CYAN = "\033[36m"  # tool calls / agent activity — stands out
+_RESET = "\033[0m"
+
+
+class _SpinnerHandler(logging.Handler):
+    """Prints log lines above the spinner, clearing its line first.
+
+    Reasoning (``core.llm.reasoning``) renders dim; everything else
+    (tool calls, agent activity) renders cyan so it stands out.
+    """
+
+    def __init__(self, spinner: Spinner):
+        super().__init__()
+        self.spinner = spinner
+
+    def emit(self, record: logging.LogRecord) -> None:
+        if record.getMessage().startswith("Processing message"):
+            return  # redundant in a REPL — you just typed it (and it shows "repl/repl/repl")
+        color = _DIM if record.name == "core.llm.reasoning" else _CYAN
+        line = f"  {color}· {record.getMessage()}{_RESET}"
+        sys.stderr.write("\r\033[K" + line + "\n")
+        sys.stderr.flush()
+        self.spinner.redraw()
+
+
+class Spinner:
+    """Background \\r spinner on stderr. Start before a turn, stop after."""
+
+    _frames = itertools.cycle("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏")
+
+    def __init__(self) -> None:
+        self._task: asyncio.Task | None = None
+        self._start = 0.0
+        self._frame = "⠋"
+
+    def redraw(self) -> None:
+        if self._task is None:  # not running — startup/idle log records mustn't draw it
+            return
+        sys.stderr.write(f"\r\033[K\033[2m{self._frame} thinking… {self._elapsed():.0f}s\033[0m")
+        sys.stderr.flush()
+
+    def _elapsed(self) -> float:
+        return time.monotonic() - self._start
+
+    async def _run(self) -> None:
+        while True:
+            self._frame = next(self._frames)
+            self.redraw()
+            await asyncio.sleep(0.1)
+
+    def start(self) -> None:
+        self._start = time.monotonic()
+        self._task = asyncio.create_task(self._run())
+
+    async def stop(self) -> None:
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+            self._task = None
+        sys.stderr.write("\r\033[K")
+        sys.stderr.flush()
+
+
+class ReplChannel:
+    """Minimal channel: prints approval prompts and reads a y/n from stdin."""
+
+    def __init__(self, agent: AgentCore, spinner: Spinner):
+        self.agent = agent
+        self.spinner = spinner
+
+    async def send(self, chat_id, text: str) -> None:
+        print(f"\n{text}\n")
+
+    async def send_approval_request(self, user_id: str, request_id: str, description: str) -> None:
+        await self.spinner.stop()  # don't fight the prompt for the line
+        ans = await asyncio.to_thread(input, f"\n[approval] {description}\nallow? [y/N] ")
+        self.agent.permissions.resolve_approval(request_id, ans.strip().lower() in ("y", "yes"))
+        self.spinner.start()
+
+
+def _setup_logging(spinner: Spinner) -> None:
+    handler = _SpinnerHandler(spinner)
+    root = logging.getLogger()
+    root.handlers = [handler]
+    root.setLevel(logging.WARNING)
+    for name in _THOUGHT_LOGGERS:
+        logging.getLogger(name).setLevel(logging.INFO)
+    for name in _NOISY_LOGGERS:
+        logging.getLogger(name).setLevel(logging.WARNING)
+
+
+def _print_debug_config(config) -> None:
+    a = config.agent
+    th = a.thinking_level or "off"
+    rows = [
+        ("agent", f"{a.name} (owner {a.owner_name})"),
+        ("inference", f"{a.llm_provider} / {a.model}  thinking={th}"),
+        ("memory", f"{config.memory.extraction_provider}/{config.memory.extraction_model}"),
+        ("history", config.history.mode),
+        ("voice", "on" if config.voice.tts_enabled else "off"),
+        ("timezone", a.timezone),
+    ]
+    print(f"\n{_CYAN}── REPL debug config ──{_RESET}")
+    for k, v in rows:
+        print(f"  {_DIM}{k:>10}{_RESET}  {v}")
+    print("\nESC interrupts a turn · /clear resets context · Ctrl-D or /exit quits.\n")
+
+
+async def _run_turn(agent: AgentCore, spinner: Spinner, text: str):
+    """Run one turn, cancellable by pressing ESC. Returns None if interrupted."""
+    proc = asyncio.create_task(
+        agent.process(message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID)
+    )
+    fd = sys.stdin.fileno()
+    loop = asyncio.get_running_loop()
+    watch = termios is not None and sys.stdin.isatty()
+    old = termios.tcgetattr(fd) if watch else None
+
+    def _on_key() -> None:
+        # A lone ESC (b"\x1b") interrupts; escape sequences (arrows) read longer → ignore.
+        try:
+            if os.read(fd, 16) == b"\x1b":
+                proc.cancel()
+        except OSError:
+            pass
+
+    if watch:
+        tty.setcbreak(fd)
+        loop.add_reader(fd, _on_key)
+    spinner.start()
+    try:
+        return await proc
+    except asyncio.CancelledError:
+        return None
+    finally:
+        if watch:
+            loop.remove_reader(fd)
+            termios.tcsetattr(fd, termios.TCSADRAIN, old)
+        await spinner.stop()
+
+
+async def main() -> None:
+    spinner = Spinner()
+    _setup_logging(spinner)
+
+    store = ConfigStore()
+    await store.seed_if_empty()
+    await store.ensure_admin_password()
+    config = await store.export_to_config()
+
+    agent = AgentCore(config)
+    agent.channels["repl"] = ReplChannel(agent, spinner)
+
+    _print_debug_config(config)
+
+    while True:
+        try:
+            text = await asyncio.to_thread(input, "> ")
+        except EOFError:
+            break
+        text = text.strip()
+        if not text:
+            continue
+        if text in ("/exit", "/quit"):
+            break
+        if text == "/clear":
+            await agent.history.clear("repl", USER_ID, USER_ID)
+            print("[context cleared]\n")
+            continue
+        response = await _run_turn(agent, spinner, text)
+        if response is None:
+            print("\n[interrupted]\n")
+            continue
+        if response.text:
+            print(f"\n{response.text}\n")
+        if getattr(response, "system_notice", None):
+            print(f"[system] {response.system_notice}\n")
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        pass
diff --git a/skills/voice.md b/skills/voice.md
@@ -20,17 +20,13 @@ Do NOT use voice responses when:
 
 ## Writing for voice
 
-When you add `[respond_with_voice]`, write the whole message to be *spoken*, not
-read. The medium changed, so the style changes with it. Before deciding on voice,
-ask: does this content even work aloud? If it only makes sense on screen, reply
-with text instead.
-
-A voice reply must contain only plain, speakable words:
-- No emojis, no symbols (`*`, `#`, `~`, `>`, etc.) — say the meaning instead.
-- No URLs — describe the link ("I sent the booking page") or send it as text.
+When you add `[respond_with_voice]`, the *entire response* (text preamble included) must be written to be spoken, not read. The medium changes with the tag, so the whole message changes with it. Before deciding on voice, ask: does this content even work aloud? If it only makes sense on screen, reply with text instead.
+
+A voice response must contain only plain, speakable words, from start to finish:
+- No emojis, no symbols (`*`, `#`, `~`, `>`, `:`, `;`, `-`, etc.) — say the meaning instead.
+- No URLs — describe the link ("I sent the booking page") or send it as text separately without the voice tag.
 - No code snippets, tables, or structured/markdown formatting.
-- No bullet points or dashes as list markers — speak it as flowing sentences
-  ("First… then… finally…").
+- No bullet points or dashes as list markers — speak it as flowing sentences ("First... then... finally...").
 - Spell awkward things out: say "version one point two", not "v1.2".
 
 Keep it short and conversational, the way you'd actually say it out loud.