diff --git a/Makefile b/Makefile index d83b7de..ebb9c95 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ -.PHONY: help setup setup-hooks install install-dev sync lock lint format test run dev dev-agent dev-css dev-wa clean release css docs docs-dev +.PHONY: help setup setup-hooks install install-dev sync lock lint format test run repl dev dev-agent dev-css dev-wa clean release css docs docs-dev +PORT := 8001 PYTHON := uv run python UV := uv TAILWIND := ./tailwindcss @@ -23,6 +24,7 @@ help: @echo " make lock Update lockfile after changing pyproject.toml" @echo "" @echo " Development:" + @echo " make repl Chat with the agent from the terminal (no Telegram)" @echo " make dev Show instructions for running dev services" @echo " make dev-agent Run agent with auto-reload" @echo " make dev-css Run Tailwind CSS watcher" @@ -86,6 +88,10 @@ test: run: $(UV) run python -m core.main +# Local REPL — chat with the agent from the terminal (no Telegram) +repl: + $(PYTHON) -m core.repl + # Run in dev mode: instructions for running services in separate shells dev: @echo "" @@ -107,7 +113,7 @@ dev: # Dev: admin API with auto-reload on code changes (agent managed via UI) dev-agent: PYTHONWARNINGS="ignore::UserWarning:multiprocessing.resource_tracker" \ - $(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port 8000 --log-level info \ + $(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port $(PORT) --log-level info \ --reload-dir api --reload-dir core --reload-dir channels --reload-dir schema \ --reload-dir skills --reload-dir tools --reload-dir voice diff --git a/config.yml.example b/config.yml.example index 4991aa0..683a9bf 100644 --- a/config.yml.example +++ b/config.yml.example @@ -1,7 +1,7 @@ agent: name: "Clio" owner_name: "Matteo" - llm_provider: "anthropic" + llm_provider: "deepseek" anthropic_api_key: "${ANTHROPIC_API_KEY}" openai_api_key: "${OPENAI_API_KEY}" openai_base_url: "${OPENAI_BASE_URL}" @@ -11,7 +11,7 @@ agent: grok_base_url: "${GROK_BASE_URL}" deepseek_api_key: "${DEEPSEEK_API_KEY}" deepseek_base_url: "${DEEPSEEK_BASE_URL}" - model: "claude-sonnet-4-5-20250514" + model: "deepseek-v4-flash" timezone: "Europe/Zurich" skills_dir: "skills/" diff --git a/core/config.py b/core/config.py index 8d9b90a..2cf9ba2 100644 --- a/core/config.py +++ b/core/config.py @@ -37,7 +37,7 @@ def _resolve_env_vars(obj: object) -> object: class AgentConfig(BaseModel): name: str = "Clio" owner_name: str = "Matteo" - llm_provider: str = "anthropic" + llm_provider: str = "deepseek" anthropic_api_key: str = "" openai_api_key: str = "" openai_base_url: str = "" @@ -47,7 +47,7 @@ class AgentConfig(BaseModel): grok_base_url: str = "" deepseek_api_key: str = "" deepseek_base_url: str = "" - model: str = "claude-4-6-sonnet" + model: str = "deepseek-v4-flash" thinking_level: str = "" # "" (off) | "low" | "medium" | "high" — only for reasoning models timezone: str = "Europe/Zurich" skills_dir: str = "skills/" diff --git a/core/llm.py b/core/llm.py index 5b5318f..e4c2763 100644 --- a/core/llm.py +++ b/core/llm.py @@ -4,11 +4,17 @@ import importlib import json +import logging from dataclasses import dataclass from typing import Any, cast from anthropic import AsyncAnthropic +# Dedicated logger for model chain-of-thought. Silent by default (WARNING); +# the REPL bumps it to INFO to stream reasoning live without spamming server logs. +reasoning_log = logging.getLogger("core.llm.reasoning") +reasoning_log.setLevel(logging.WARNING) + _DEFAULT_BASE_URLS = { "google": "https://generativelanguage.googleapis.com/v1beta/openai", "grok": "https://api.x.ai/v1", @@ -31,6 +37,7 @@ class LLMToolCall: class LLMResponse: text: str tool_calls: list[LLMToolCall] + reasoning: str = "" # model chain-of-thought, when the provider exposes it raw: object | None = None # Token usage for the request, when the provider reports it. Keys: # input_tokens, output_tokens, cache_read_input_tokens, @@ -209,6 +216,7 @@ async def generate( ) tool_calls = [] text_parts = [] + reasoning_parts = [] for block in response.content: block_any = cast(Any, block) if getattr(block_any, "type", None) == "tool_use": @@ -221,9 +229,15 @@ async def generate( ) if getattr(block_any, "type", None) == "text": text_parts.append(getattr(block_any, "text", "")) + if getattr(block_any, "type", None) == "thinking": + reasoning_parts.append(getattr(block_any, "thinking", "")) + reasoning = "\n".join(p for p in reasoning_parts if p).strip() + if reasoning: + reasoning_log.info("%s", reasoning) return LLMResponse( text="\n".join(text_parts).strip(), tool_calls=tool_calls, + reasoning=reasoning, raw=response.content, usage=_anthropic_usage(response), ) @@ -247,9 +261,16 @@ async def generate( except json.JSONDecodeError: args = {} tool_calls.append(LLMToolCall(id=call.id, name=call.function.name, arguments=args)) + # DeepSeek/others expose CoT as message.reasoning_content (or .reasoning). + reasoning = ( + getattr(message, "reasoning_content", None) or getattr(message, "reasoning", None) or "" + ).strip() + if reasoning: + reasoning_log.info("%s", reasoning) return LLMResponse( text=(message.content or "").strip(), tool_calls=tool_calls, + reasoning=reasoning, raw=message.model_dump(exclude_none=True), usage=_openai_usage(response), ) diff --git a/core/repl.py b/core/repl.py new file mode 100644 index 0000000..e6efed6 --- /dev/null +++ b/core/repl.py @@ -0,0 +1,227 @@ +"""Local REPL channel — talk to the agent from the terminal, no Telegram. + +Run: make repl (or uv run python -m core.repl) + +Builds the agent from the same config store the server uses, registers itself +as the ``repl`` channel so permission approvals route to a y/n terminal prompt, +then loops on stdin. Ctrl-D or ``/exit`` quits. + +While the agent works, a spinner shows it's busy and the chain of thought +(model reasoning + each tool call) streams live above it. +""" + +from __future__ import annotations + +import asyncio +import itertools +import logging +import os +import sys +import time + +from core.agent import AgentCore +from core.config_store import ConfigStore + +try: # POSIX-only: lets us watch for an ESC keypress mid-turn + import termios + import tty +except ImportError: # pragma: no cover - non-POSIX + termios = tty = None + +log = logging.getLogger(__name__) + +USER_ID = "repl" + +# Loggers whose INFO output is the agent's "chain of thought" / activity trail. +_THOUGHT_LOGGERS = ("core.agent", "core.executor", "core.llm.reasoning") +_NOISY_LOGGERS = ("httpx", "httpcore", "apscheduler", "telegram") + + +_DIM = "\033[2m" # thinking / reasoning — low contrast +_CYAN = "\033[36m" # tool calls / agent activity — stands out +_RESET = "\033[0m" + + +class _SpinnerHandler(logging.Handler): + """Prints log lines above the spinner, clearing its line first. + + Reasoning (``core.llm.reasoning``) renders dim; everything else + (tool calls, agent activity) renders cyan so it stands out. + """ + + def __init__(self, spinner: Spinner): + super().__init__() + self.spinner = spinner + + def emit(self, record: logging.LogRecord) -> None: + if record.getMessage().startswith("Processing message"): + return # redundant in a REPL — you just typed it (and it shows "repl/repl/repl") + color = _DIM if record.name == "core.llm.reasoning" else _CYAN + line = f" {color}· {record.getMessage()}{_RESET}" + sys.stderr.write("\r\033[K" + line + "\n") + sys.stderr.flush() + self.spinner.redraw() + + +class Spinner: + """Background \\r spinner on stderr. Start before a turn, stop after.""" + + _frames = itertools.cycle("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏") + + def __init__(self) -> None: + self._task: asyncio.Task | None = None + self._start = 0.0 + self._frame = "⠋" + + def redraw(self) -> None: + if self._task is None: # not running — startup/idle log records mustn't draw it + return + sys.stderr.write(f"\r\033[K\033[2m{self._frame} thinking… {self._elapsed():.0f}s\033[0m") + sys.stderr.flush() + + def _elapsed(self) -> float: + return time.monotonic() - self._start + + async def _run(self) -> None: + while True: + self._frame = next(self._frames) + self.redraw() + await asyncio.sleep(0.1) + + def start(self) -> None: + self._start = time.monotonic() + self._task = asyncio.create_task(self._run()) + + async def stop(self) -> None: + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + sys.stderr.write("\r\033[K") + sys.stderr.flush() + + +class ReplChannel: + """Minimal channel: prints approval prompts and reads a y/n from stdin.""" + + def __init__(self, agent: AgentCore, spinner: Spinner): + self.agent = agent + self.spinner = spinner + + async def send(self, chat_id, text: str) -> None: + print(f"\n{text}\n") + + async def send_approval_request(self, user_id: str, request_id: str, description: str) -> None: + await self.spinner.stop() # don't fight the prompt for the line + ans = await asyncio.to_thread(input, f"\n[approval] {description}\nallow? [y/N] ") + self.agent.permissions.resolve_approval(request_id, ans.strip().lower() in ("y", "yes")) + self.spinner.start() + + +def _setup_logging(spinner: Spinner) -> None: + handler = _SpinnerHandler(spinner) + root = logging.getLogger() + root.handlers = [handler] + root.setLevel(logging.WARNING) + for name in _THOUGHT_LOGGERS: + logging.getLogger(name).setLevel(logging.INFO) + for name in _NOISY_LOGGERS: + logging.getLogger(name).setLevel(logging.WARNING) + + +def _print_debug_config(config) -> None: + a = config.agent + th = a.thinking_level or "off" + rows = [ + ("agent", f"{a.name} (owner {a.owner_name})"), + ("inference", f"{a.llm_provider} / {a.model} thinking={th}"), + ("memory", f"{config.memory.extraction_provider}/{config.memory.extraction_model}"), + ("history", config.history.mode), + ("voice", "on" if config.voice.tts_enabled else "off"), + ("timezone", a.timezone), + ] + print(f"\n{_CYAN}── REPL debug config ──{_RESET}") + for k, v in rows: + print(f" {_DIM}{k:>10}{_RESET} {v}") + print("\nESC interrupts a turn · /clear resets context · Ctrl-D or /exit quits.\n") + + +async def _run_turn(agent: AgentCore, spinner: Spinner, text: str): + """Run one turn, cancellable by pressing ESC. Returns None if interrupted.""" + proc = asyncio.create_task( + agent.process(message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID) + ) + fd = sys.stdin.fileno() + loop = asyncio.get_running_loop() + watch = termios is not None and sys.stdin.isatty() + old = termios.tcgetattr(fd) if watch else None + + def _on_key() -> None: + # A lone ESC (b"\x1b") interrupts; escape sequences (arrows) read longer → ignore. + try: + if os.read(fd, 16) == b"\x1b": + proc.cancel() + except OSError: + pass + + if watch: + tty.setcbreak(fd) + loop.add_reader(fd, _on_key) + spinner.start() + try: + return await proc + except asyncio.CancelledError: + return None + finally: + if watch: + loop.remove_reader(fd) + termios.tcsetattr(fd, termios.TCSADRAIN, old) + await spinner.stop() + + +async def main() -> None: + spinner = Spinner() + _setup_logging(spinner) + + store = ConfigStore() + await store.seed_if_empty() + await store.ensure_admin_password() + config = await store.export_to_config() + + agent = AgentCore(config) + agent.channels["repl"] = ReplChannel(agent, spinner) + + _print_debug_config(config) + + while True: + try: + text = await asyncio.to_thread(input, "> ") + except EOFError: + break + text = text.strip() + if not text: + continue + if text in ("/exit", "/quit"): + break + if text == "/clear": + await agent.history.clear("repl", USER_ID, USER_ID) + print("[context cleared]\n") + continue + response = await _run_turn(agent, spinner, text) + if response is None: + print("\n[interrupted]\n") + continue + if response.text: + print(f"\n{response.text}\n") + if getattr(response, "system_notice", None): + print(f"[system] {response.system_notice}\n") + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + pass diff --git a/skills/voice.md b/skills/voice.md index 6b0c909..9a4f738 100644 --- a/skills/voice.md +++ b/skills/voice.md @@ -20,17 +20,13 @@ Do NOT use voice responses when: ## Writing for voice -When you add `[respond_with_voice]`, write the whole message to be *spoken*, not -read. The medium changed, so the style changes with it. Before deciding on voice, -ask: does this content even work aloud? If it only makes sense on screen, reply -with text instead. - -A voice reply must contain only plain, speakable words: -- No emojis, no symbols (`*`, `#`, `~`, `>`, etc.) — say the meaning instead. -- No URLs — describe the link ("I sent the booking page") or send it as text. +When you add `[respond_with_voice]`, the *entire response* (text preamble included) must be written to be spoken, not read. The medium changes with the tag, so the whole message changes with it. Before deciding on voice, ask: does this content even work aloud? If it only makes sense on screen, reply with text instead. + +A voice response must contain only plain, speakable words, from start to finish: +- No emojis, no symbols (`*`, `#`, `~`, `>`, `:`, `;`, `-`, etc.) — say the meaning instead. +- No URLs — describe the link ("I sent the booking page") or send it as text separately without the voice tag. - No code snippets, tables, or structured/markdown formatting. -- No bullet points or dashes as list markers — speak it as flowing sentences - ("First… then… finally…"). +- No bullet points or dashes as list markers — speak it as flowing sentences ("First... then... finally..."). - Spell awkward things out: say "version one point two", not "v1.2". Keep it short and conversational, the way you'd actually say it out loud.