Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.PHONY: help setup setup-hooks install install-dev sync lock lint format test run dev dev-agent dev-css dev-wa clean release css docs docs-dev
.PHONY: help setup setup-hooks install install-dev sync lock lint format test run repl dev dev-agent dev-css dev-wa clean release css docs docs-dev

PORT := 8001
PYTHON := uv run python
UV := uv
TAILWIND := ./tailwindcss
Expand All @@ -23,6 +24,7 @@ help:
@echo " make lock Update lockfile after changing pyproject.toml"
@echo ""
@echo " Development:"
@echo " make repl Chat with the agent from the terminal (no Telegram)"
@echo " make dev Show instructions for running dev services"
@echo " make dev-agent Run agent with auto-reload"
@echo " make dev-css Run Tailwind CSS watcher"
Expand Down Expand Up @@ -86,6 +88,10 @@ test:
run:
$(UV) run python -m core.main

# Local REPL — chat with the agent from the terminal (no Telegram)
repl:
$(PYTHON) -m core.repl

# Run in dev mode: instructions for running services in separate shells
dev:
@echo ""
Expand All @@ -107,7 +113,7 @@ dev:
# Dev: admin API with auto-reload on code changes (agent managed via UI)
dev-agent:
PYTHONWARNINGS="ignore::UserWarning:multiprocessing.resource_tracker" \
$(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port 8000 --log-level info \
$(UV) run uvicorn core.main:app --reload --host 0.0.0.0 --port $(PORT) --log-level info \
--reload-dir api --reload-dir core --reload-dir channels --reload-dir schema \
--reload-dir skills --reload-dir tools --reload-dir voice

Expand Down
4 changes: 2 additions & 2 deletions config.yml.example
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
agent:
name: "Clio"
owner_name: "Matteo"
llm_provider: "anthropic"
llm_provider: "deepseek"
anthropic_api_key: "${ANTHROPIC_API_KEY}"
openai_api_key: "${OPENAI_API_KEY}"
openai_base_url: "${OPENAI_BASE_URL}"
Expand All @@ -11,7 +11,7 @@ agent:
grok_base_url: "${GROK_BASE_URL}"
deepseek_api_key: "${DEEPSEEK_API_KEY}"
deepseek_base_url: "${DEEPSEEK_BASE_URL}"
model: "claude-sonnet-4-5-20250514"
model: "deepseek-v4-flash"
timezone: "Europe/Zurich"
skills_dir: "skills/"

Expand Down
4 changes: 2 additions & 2 deletions core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _resolve_env_vars(obj: object) -> object:
class AgentConfig(BaseModel):
name: str = "Clio"
owner_name: str = "Matteo"
llm_provider: str = "anthropic"
llm_provider: str = "deepseek"
anthropic_api_key: str = ""
openai_api_key: str = ""
openai_base_url: str = ""
Expand All @@ -47,7 +47,7 @@ class AgentConfig(BaseModel):
grok_base_url: str = ""
deepseek_api_key: str = ""
deepseek_base_url: str = ""
model: str = "claude-4-6-sonnet"
model: str = "deepseek-v4-flash"
thinking_level: str = "" # "" (off) | "low" | "medium" | "high" — only for reasoning models
timezone: str = "Europe/Zurich"
skills_dir: str = "skills/"
Expand Down
21 changes: 21 additions & 0 deletions core/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@

import importlib
import json
import logging
from dataclasses import dataclass
from typing import Any, cast

from anthropic import AsyncAnthropic

# Dedicated logger for model chain-of-thought. Silent by default (WARNING);
# the REPL bumps it to INFO to stream reasoning live without spamming server logs.
reasoning_log = logging.getLogger("core.llm.reasoning")
reasoning_log.setLevel(logging.WARNING)

_DEFAULT_BASE_URLS = {
"google": "https://generativelanguage.googleapis.com/v1beta/openai",
"grok": "https://api.x.ai/v1",
Expand All @@ -31,6 +37,7 @@ class LLMToolCall:
class LLMResponse:
text: str
tool_calls: list[LLMToolCall]
reasoning: str = "" # model chain-of-thought, when the provider exposes it
raw: object | None = None
# Token usage for the request, when the provider reports it. Keys:
# input_tokens, output_tokens, cache_read_input_tokens,
Expand Down Expand Up @@ -209,6 +216,7 @@ async def generate(
)
tool_calls = []
text_parts = []
reasoning_parts = []
for block in response.content:
block_any = cast(Any, block)
if getattr(block_any, "type", None) == "tool_use":
Expand All @@ -221,9 +229,15 @@ async def generate(
)
if getattr(block_any, "type", None) == "text":
text_parts.append(getattr(block_any, "text", ""))
if getattr(block_any, "type", None) == "thinking":
reasoning_parts.append(getattr(block_any, "thinking", ""))
reasoning = "\n".join(p for p in reasoning_parts if p).strip()
if reasoning:
reasoning_log.info("%s", reasoning)
return LLMResponse(
text="\n".join(text_parts).strip(),
tool_calls=tool_calls,
reasoning=reasoning,
raw=response.content,
usage=_anthropic_usage(response),
)
Expand All @@ -247,9 +261,16 @@ async def generate(
except json.JSONDecodeError:
args = {}
tool_calls.append(LLMToolCall(id=call.id, name=call.function.name, arguments=args))
# DeepSeek/others expose CoT as message.reasoning_content (or .reasoning).
reasoning = (
getattr(message, "reasoning_content", None) or getattr(message, "reasoning", None) or ""
).strip()
if reasoning:
reasoning_log.info("%s", reasoning)
return LLMResponse(
text=(message.content or "").strip(),
tool_calls=tool_calls,
reasoning=reasoning,
raw=message.model_dump(exclude_none=True),
usage=_openai_usage(response),
)
Expand Down
227 changes: 227 additions & 0 deletions core/repl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
"""Local REPL channel — talk to the agent from the terminal, no Telegram.

Run: make repl (or uv run python -m core.repl)

Builds the agent from the same config store the server uses, registers itself
as the ``repl`` channel so permission approvals route to a y/n terminal prompt,
then loops on stdin. Ctrl-D or ``/exit`` quits.

While the agent works, a spinner shows it's busy and the chain of thought
(model reasoning + each tool call) streams live above it.
"""

from __future__ import annotations

import asyncio
import itertools
import logging
import os
import sys
import time

from core.agent import AgentCore
from core.config_store import ConfigStore

try: # POSIX-only: lets us watch for an ESC keypress mid-turn
import termios
import tty
except ImportError: # pragma: no cover - non-POSIX
termios = tty = None

log = logging.getLogger(__name__)

USER_ID = "repl"

# Loggers whose INFO output is the agent's "chain of thought" / activity trail.
_THOUGHT_LOGGERS = ("core.agent", "core.executor", "core.llm.reasoning")
_NOISY_LOGGERS = ("httpx", "httpcore", "apscheduler", "telegram")


_DIM = "\033[2m" # thinking / reasoning — low contrast
_CYAN = "\033[36m" # tool calls / agent activity — stands out
_RESET = "\033[0m"


class _SpinnerHandler(logging.Handler):
"""Prints log lines above the spinner, clearing its line first.

Reasoning (``core.llm.reasoning``) renders dim; everything else
(tool calls, agent activity) renders cyan so it stands out.
"""

def __init__(self, spinner: Spinner):
super().__init__()
self.spinner = spinner

def emit(self, record: logging.LogRecord) -> None:
if record.getMessage().startswith("Processing message"):
return # redundant in a REPL — you just typed it (and it shows "repl/repl/repl")
color = _DIM if record.name == "core.llm.reasoning" else _CYAN
line = f" {color}· {record.getMessage()}{_RESET}"
sys.stderr.write("\r\033[K" + line + "\n")
sys.stderr.flush()
self.spinner.redraw()


class Spinner:
"""Background \\r spinner on stderr. Start before a turn, stop after."""

_frames = itertools.cycle("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏")

def __init__(self) -> None:
self._task: asyncio.Task | None = None
self._start = 0.0
self._frame = "⠋"

def redraw(self) -> None:
if self._task is None: # not running — startup/idle log records mustn't draw it
return
sys.stderr.write(f"\r\033[K\033[2m{self._frame} thinking… {self._elapsed():.0f}s\033[0m")
sys.stderr.flush()

def _elapsed(self) -> float:
return time.monotonic() - self._start

async def _run(self) -> None:
while True:
self._frame = next(self._frames)
self.redraw()
await asyncio.sleep(0.1)

def start(self) -> None:
self._start = time.monotonic()
self._task = asyncio.create_task(self._run())

async def stop(self) -> None:
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
sys.stderr.write("\r\033[K")
sys.stderr.flush()


class ReplChannel:
"""Minimal channel: prints approval prompts and reads a y/n from stdin."""

def __init__(self, agent: AgentCore, spinner: Spinner):
self.agent = agent
self.spinner = spinner

async def send(self, chat_id, text: str) -> None:
print(f"\n{text}\n")

async def send_approval_request(self, user_id: str, request_id: str, description: str) -> None:
await self.spinner.stop() # don't fight the prompt for the line
ans = await asyncio.to_thread(input, f"\n[approval] {description}\nallow? [y/N] ")
self.agent.permissions.resolve_approval(request_id, ans.strip().lower() in ("y", "yes"))
self.spinner.start()


def _setup_logging(spinner: Spinner) -> None:
handler = _SpinnerHandler(spinner)
root = logging.getLogger()
root.handlers = [handler]
root.setLevel(logging.WARNING)
for name in _THOUGHT_LOGGERS:
logging.getLogger(name).setLevel(logging.INFO)
for name in _NOISY_LOGGERS:
logging.getLogger(name).setLevel(logging.WARNING)


def _print_debug_config(config) -> None:
a = config.agent
th = a.thinking_level or "off"
rows = [
("agent", f"{a.name} (owner {a.owner_name})"),
("inference", f"{a.llm_provider} / {a.model} thinking={th}"),
("memory", f"{config.memory.extraction_provider}/{config.memory.extraction_model}"),
("history", config.history.mode),
("voice", "on" if config.voice.tts_enabled else "off"),
("timezone", a.timezone),
]
print(f"\n{_CYAN}── REPL debug config ──{_RESET}")
for k, v in rows:
print(f" {_DIM}{k:>10}{_RESET} {v}")
print("\nESC interrupts a turn · /clear resets context · Ctrl-D or /exit quits.\n")


async def _run_turn(agent: AgentCore, spinner: Spinner, text: str):
"""Run one turn, cancellable by pressing ESC. Returns None if interrupted."""
proc = asyncio.create_task(
agent.process(message=text, channel="repl", user_id=USER_ID, chat_id=USER_ID)
)
fd = sys.stdin.fileno()
loop = asyncio.get_running_loop()
watch = termios is not None and sys.stdin.isatty()
old = termios.tcgetattr(fd) if watch else None

def _on_key() -> None:
# A lone ESC (b"\x1b") interrupts; escape sequences (arrows) read longer → ignore.
try:
if os.read(fd, 16) == b"\x1b":
proc.cancel()
except OSError:
pass

if watch:
tty.setcbreak(fd)
loop.add_reader(fd, _on_key)
spinner.start()
try:
return await proc
except asyncio.CancelledError:
return None
finally:
if watch:
loop.remove_reader(fd)
termios.tcsetattr(fd, termios.TCSADRAIN, old)
await spinner.stop()


async def main() -> None:
spinner = Spinner()
_setup_logging(spinner)

store = ConfigStore()
await store.seed_if_empty()
await store.ensure_admin_password()
config = await store.export_to_config()

agent = AgentCore(config)
agent.channels["repl"] = ReplChannel(agent, spinner)

_print_debug_config(config)

while True:
try:
text = await asyncio.to_thread(input, "> ")
except EOFError:
break
text = text.strip()
if not text:
continue
if text in ("/exit", "/quit"):
break
if text == "/clear":
await agent.history.clear("repl", USER_ID, USER_ID)
print("[context cleared]\n")
continue
response = await _run_turn(agent, spinner, text)
if response is None:
print("\n[interrupted]\n")
continue
if response.text:
print(f"\n{response.text}\n")
if getattr(response, "system_notice", None):
print(f"[system] {response.system_notice}\n")


if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
pass
16 changes: 6 additions & 10 deletions skills/voice.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,13 @@ Do NOT use voice responses when:

## Writing for voice

When you add `[respond_with_voice]`, write the whole message to be *spoken*, not
read. The medium changed, so the style changes with it. Before deciding on voice,
ask: does this content even work aloud? If it only makes sense on screen, reply
with text instead.

A voice reply must contain only plain, speakable words:
- No emojis, no symbols (`*`, `#`, `~`, `>`, etc.) — say the meaning instead.
- No URLs — describe the link ("I sent the booking page") or send it as text.
When you add `[respond_with_voice]`, the *entire response* (text preamble included) must be written to be spoken, not read. The medium changes with the tag, so the whole message changes with it. Before deciding on voice, ask: does this content even work aloud? If it only makes sense on screen, reply with text instead.

A voice response must contain only plain, speakable words, from start to finish:
- No emojis, no symbols (`*`, `#`, `~`, `>`, `:`, `;`, `-`, etc.) — say the meaning instead.
- No URLs — describe the link ("I sent the booking page") or send it as text separately without the voice tag.
- No code snippets, tables, or structured/markdown formatting.
- No bullet points or dashes as list markers — speak it as flowing sentences
("First… then… finally…").
- No bullet points or dashes as list markers — speak it as flowing sentences ("First... then... finally...").
- Spell awkward things out: say "version one point two", not "v1.2".

Keep it short and conversational, the way you'd actually say it out loud.
Loading