diff --git a/api/admin.py b/api/admin.py
index e69d43e..dee923c 100644
--- a/api/admin.py
+++ b/api/admin.py
@@ -792,6 +792,7 @@ async def partial_llm() -> HTMLResponse:
deepseek_api_key = await config_store.get("agent.deepseek_api_key") or ""
deepseek_base_url = await config_store.get("agent.deepseek_base_url") or ""
model = await config_store.get("agent.model") or "claude-4-6-sonnet"
+ thinking_level = await config_store.get("agent.thinking_level") or ""
extraction_provider = await config_store.get("memory.extraction_provider") or "anthropic"
extraction_model = await config_store.get("memory.extraction_model") or "claude-haiku-4-5"
consolidation_provider = (
@@ -800,6 +801,10 @@ async def partial_llm() -> HTMLResponse:
consolidation_model = (
await config_store.get("memory.consolidation_model") or "claude-haiku-4-5"
)
+ extraction_thinking_level = await config_store.get("memory.extraction_thinking_level") or ""
+ consolidation_thinking_level = (
+ await config_store.get("memory.consolidation_thinking_level") or ""
+ )
gd_enabled = await config_store.get("goal_decomposition.enabled")
gd_enabled = gd_enabled if gd_enabled is not None else "true"
gd_provider = await config_store.get("goal_decomposition.provider") or "anthropic"
@@ -808,8 +813,11 @@ async def partial_llm() -> HTMLResponse:
tr_enabled = tr_enabled if tr_enabled is not None else "true"
tr_provider = await config_store.get("task_reflection.provider") or "anthropic"
tr_model = await config_store.get("task_reflection.model") or "claude-haiku-4-5"
+ gd_thinking_level = await config_store.get("goal_decomposition.thinking_level") or ""
+ tr_thinking_level = await config_store.get("task_reflection.thinking_level") or ""
compaction_provider = await config_store.get("compaction.provider") or "anthropic"
compaction_model = await config_store.get("compaction.model") or "claude-haiku-4-5"
+ compaction_thinking_level = await config_store.get("compaction.thinking_level") or ""
prompt_tool_usage_override = await config_store.get("prompt.tool_usage_override") or ""
prompt_history_override = await config_store.get("prompt.history_handling_override") or ""
prompt_capture_enabled = await config_store.get("admin.capture_prompts")
@@ -829,18 +837,24 @@ async def partial_llm() -> HTMLResponse:
deepseek_api_key=deepseek_api_key,
deepseek_base_url=deepseek_base_url,
model=model,
+ thinking_level=thinking_level,
extraction_provider=extraction_provider,
extraction_model=extraction_model,
+ extraction_thinking_level=extraction_thinking_level,
consolidation_provider=consolidation_provider,
consolidation_model=consolidation_model,
+ consolidation_thinking_level=consolidation_thinking_level,
gd_enabled=gd_enabled,
gd_provider=gd_provider,
gd_model=gd_model,
+ gd_thinking_level=gd_thinking_level,
tr_enabled=tr_enabled,
tr_provider=tr_provider,
tr_model=tr_model,
+ tr_thinking_level=tr_thinking_level,
compaction_provider=compaction_provider,
compaction_model=compaction_model,
+ compaction_thinking_level=compaction_thinking_level,
prompt_tool_usage_override=prompt_tool_usage_override,
prompt_history_override=prompt_history_override,
default_tool_usage=DEFAULT_TOOL_USAGE_BLOCK,
@@ -2398,6 +2412,25 @@ async def list_models(request: Request) -> dict:
return await _list_models_openai(api_key, base_url)
return {"ok": False, "error": f"Unknown service: {service}"}
+ @app.post("/setup/thinking-levels")
+ async def thinking_levels(request: Request) -> dict:
+ """Autodiscover supported reasoning-effort levels for a model.
+
+ Only Anthropic exposes this via the Models API; other providers must be
+ configured by typing the effort value (see the docs link in the UI).
+ """
+ payload = await request.json()
+ service = payload.get("service", "")
+ api_key = payload.get("api_key", "")
+ model = payload.get("model", "")
+ if service == "anthropic":
+ return await _thinking_levels_anthropic(api_key, model)
+ return {
+ "ok": False,
+ "error": "Autodiscovery is only available for Anthropic — "
+ "enter the effort value manually for this provider.",
+ }
+
return app, auth
@@ -2456,6 +2489,33 @@ async def _test_openai(api_key: str, base_url: str | None, model: str = "gpt-4o-
return {"ok": False, "error": str(exc)}
+async def _thinking_levels_anthropic(api_key: str, model: str) -> dict:
+ if not api_key:
+ return {"ok": False, "error": "API key is empty"}
+ if not model:
+ return {"ok": False, "error": "Enter a model id first"}
+ try:
+ from anthropic import AsyncAnthropic
+
+ client = AsyncAnthropic(api_key=api_key)
+ m = await client.models.retrieve(model)
+ caps = getattr(m, "capabilities", None)
+ if caps is not None and not isinstance(caps, dict):
+ caps = getattr(caps, "model_dump", lambda: {})() or {}
+ caps = caps or {}
+ effort = caps.get("effort") or {}
+ levels = [
+ lvl
+ for lvl in ("low", "medium", "high", "xhigh", "max")
+ if isinstance(effort.get(lvl), dict) and effort[lvl].get("supported")
+ ]
+ thinking = caps.get("thinking") or {}
+ supported = bool(thinking.get("supported")) or bool(levels)
+ return {"ok": True, "supported": supported, "levels": levels}
+ except Exception as exc:
+ return {"ok": False, "error": str(exc)}
+
+
async def _list_models_anthropic(api_key: str) -> dict:
if not api_key:
return {"ok": False, "error": "API key is empty"}
diff --git a/api/templates/base.html b/api/templates/base.html
index 587183c..59632c3 100644
--- a/api/templates/base.html
+++ b/api/templates/base.html
@@ -42,7 +42,7 @@
}
window.showToast = showToast;
- function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled, compactionProvider, compactionModel) {
+ function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled, compactionProvider, compactionModel, thinkingLevel, extractionThinkingLevel, consolidationThinkingLevel, gdThinkingLevel, trThinkingLevel, compactionThinkingLevel) {
const currentProvider = provider || 'anthropic';
return {
providerOptions: [
@@ -55,6 +55,15 @@
provider: currentProvider,
apiKey: apiKey || '',
model: model || '',
+ thinkingLevel: thinkingLevel || '',
+ extractionThinkingLevel: extractionThinkingLevel || '',
+ consolidationThinkingLevel: consolidationThinkingLevel || '',
+ gdThinkingLevel: gdThinkingLevel || '',
+ trThinkingLevel: trThinkingLevel || '',
+ compactionThinkingLevel: compactionThinkingLevel || '',
+ fetchedLevels: {anthropic: [], openai: [], google: [], grok: [], deepseek: []},
+ fetchingLevels: {anthropic: false, openai: false, google: false, grok: false, deepseek: false},
+ fetchLevelsResult: {anthropic: '', openai: '', google: '', grok: '', deepseek: ''},
openaiKey: openaiKey || '',
openaiBaseUrl: openaiBaseUrl || '',
googleKey: googleKey || '',
@@ -161,6 +170,64 @@
}
return this.models[prov] || [];
},
+ // ponytail: substring heuristic for reasoning-capable models — extend the
+ // list as model ids change; the thinking-level control shows only when true.
+ modelSupportsThinking(modelId) {
+ const id = (modelId || '').toLowerCase();
+ if (!id) return false;
+ return ['opus', 'sonnet', 'thinking', 'pro', 'deep-think', 'reasoner', 'grok-4']
+ .some(pat => id.includes(pat));
+ },
+ // Effort values offered in the thinking-level datalist. Anthropic can be
+ // enriched live via "Fetch levels"; others fall back to the common set.
+ levelOptions(prov) {
+ const fetched = this.fetchedLevels[prov];
+ if (fetched && fetched.length) return fetched;
+ return ['low', 'medium', 'high'];
+ },
+ fetchThinkingLevels(service, model) {
+ const apiKey = this.keyFor(service);
+ if (!apiKey) { this.fetchLevelsResult[service] = 'Missing API key — save/enter it first'; return; }
+ if (!model) { this.fetchLevelsResult[service] = 'Enter a model id first'; return; }
+ this.fetchingLevels[service] = true;
+ this.fetchLevelsResult[service] = '';
+ fetch('/setup/thinking-levels', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Authorization': 'Bearer ' + (localStorage.getItem('admin_api_key') || '')
+ },
+ body: JSON.stringify({service: service, api_key: apiKey, model: model})
+ })
+ .then(r => r.json())
+ .then(d => {
+ if (d.ok && Array.isArray(d.levels) && d.levels.length) {
+ this.fetchedLevels[service] = d.levels;
+ this.fetchLevelsResult[service] = 'Supported: ' + d.levels.join(', ');
+ } else if (d.ok) {
+ this.fetchLevelsResult[service] = d.supported ? 'Thinking supported (no effort levels reported)' : 'Model does not support thinking';
+ } else {
+ this.fetchLevelsResult[service] = 'Failed: ' + (d.error || 'unknown');
+ }
+ })
+ .catch(e => { this.fetchLevelsResult[service] = 'Error: ' + e.message; })
+ .finally(() => { this.fetchingLevels[service] = false; });
+ },
+ // Docs for the reasoning-effort value to type, per provider.
+ providerDocsUrl(prov) {
+ return ({
+ anthropic: 'https://platform.claude.com/docs/en/build-with-claude/effort',
+ openai: 'https://platform.openai.com/docs/guides/reasoning',
+ google: 'https://ai.google.dev/gemini-api/docs/thinking',
+ grok: 'https://docs.x.ai/docs/guides/reasoning',
+ deepseek: 'https://api-docs.deepseek.com/guides/reasoning_model'
+ })[prov] || '';
+ },
+ // True when a background kind points at the exact same provider+model as
+ // main inference — surfaced in the UI so shared config is obvious.
+ sameAsMain(prov, model) {
+ return !!model && prov === this.provider && model === this.model;
+ },
fetchModels(service) {
const apiKey = this.keyFor(service);
if (!apiKey) {
diff --git a/api/templates/partials/llm.html b/api/templates/partials/llm.html
index 27d33fa..df6613d 100644
--- a/api/templates/partials/llm.html
+++ b/api/templates/partials/llm.html
@@ -1,4 +1,31 @@
{# LLM tab partial #}
+{#- Reusable thinking-level control. `bg` adds the "shared config" badge for
+ background inference kinds that point at the same provider+model as main. -#}
+{% macro think(level, prov, model, lid, bg=false) -%}
+
+
Thinking level{% if bg %} · same config as Main inference {% endif %}
+
+
+
Leave Off for non-reasoning models. Higher = more reasoning, more tokens. Anthropic: “Fetch levels” autodiscovers supported values; other providers: see the docs link and type the value.
+
Heads up: “ ” isn't a recognized reasoning model — only set a level if you know it supports thinking, or the inference call may error.
+
+{%- endmacro %}
System Prompt Controls
@@ -175,6 +208,8 @@
Active Inference Provider
Type any model id, or pick from the list. Use “Fetch models” in the provider card below to load the live list from the API.
+
+{{ think('thinkingLevel', 'provider', 'model', 'dl-think-main') }}
@@ -197,7 +232,8 @@
Active Inference Provider
'agent.grok_base_url': grokBaseUrl,
'agent.deepseek_api_key': deepseekKey,
'agent.deepseek_base_url': deepseekBaseUrl,
- 'agent.model': model
+ 'agent.model': model,
+ 'agent.thinking_level': thinkingLevel
}})
})
.then(r => { resultOk = r.ok; return r.json(); })
@@ -240,6 +276,7 @@
Memory Models
Model used to extract memories from conversations.
+{{ think('extractionThinkingLevel', 'extractionProvider', 'extractionModel', 'dl-think-ext', true) }}
@@ -262,6 +299,7 @@
Memory Models
Model used to consolidate and merge duplicate memories.
+{{ think('consolidationThinkingLevel', 'consolidationProvider', 'consolidationModel', 'dl-think-con', true) }}
@@ -276,8 +314,10 @@
Memory Models
body: JSON.stringify({values: {
'memory.extraction_provider': extractionProvider,
'memory.extraction_model': extractionModel,
+ 'memory.extraction_thinking_level': extractionThinkingLevel,
'memory.consolidation_provider': consolidationProvider,
- 'memory.consolidation_model': consolidationModel
+ 'memory.consolidation_model': consolidationModel,
+ 'memory.consolidation_thinking_level': consolidationThinkingLevel
}})
})
.then(r => { memoryResultOk = r.ok; return r.json(); })
@@ -324,6 +364,7 @@
Goal Decomposition
Model used to classify message complexity and decompose goals.
+{{ think('gdThinkingLevel', 'gdProvider', 'gdModel', 'dl-think-gd', true) }}
@@ -338,7 +379,8 @@
Goal Decomposition
body: JSON.stringify({values: {
'goal_decomposition.enabled': gdEnabled ? 'true' : 'false',
'goal_decomposition.provider': gdProvider,
- 'goal_decomposition.model': gdModel
+ 'goal_decomposition.model': gdModel,
+ 'goal_decomposition.thinking_level': gdThinkingLevel
}})
})
.then(r => { gdResultOk = r.ok; return r.json(); })
@@ -385,6 +427,7 @@
Task Reflection
Model used to reflect on completed tasks and extract lessons.
+{{ think('trThinkingLevel', 'trProvider', 'trModel', 'dl-think-tr', true) }}
@@ -399,7 +442,8 @@
Task Reflection
body: JSON.stringify({values: {
'task_reflection.enabled': trEnabled ? 'true' : 'false',
'task_reflection.provider': trProvider,
- 'task_reflection.model': trModel
+ 'task_reflection.model': trModel,
+ 'task_reflection.thinking_level': trThinkingLevel
}})
})
.then(r => { trResultOk = r.ok; return r.json(); })
@@ -441,6 +485,7 @@
History Compaction
A small, fast model is recommended (summarization is cheap).
+{{ think('compactionThinkingLevel', 'compactionProvider', 'compactionModel', 'dl-think-cp', true) }}
@@ -454,7 +499,8 @@
History Compaction
},
body: JSON.stringify({values: {
'compaction.provider': compactionProvider,
- 'compaction.model': compactionModel
+ 'compaction.model': compactionModel,
+ 'compaction.thinking_level': compactionThinkingLevel
}})
})
.then(r => { compactionResultOk = r.ok; return r.json(); })
diff --git a/core/agent.py b/core/agent.py
index 449c7ae..ced0ec7 100644
--- a/core/agent.py
+++ b/core/agent.py
@@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
+import copy
import hashlib
import json
import logging
@@ -410,7 +411,7 @@ async def _maybe_compact(
session = await self.history.get_session(channel, user_id, chat_id)
try:
- llm = self._background_llm(cfg.provider)
+ llm = self._background_llm(cfg.provider, cfg.thinking_level)
result = await compact_messages(llm, cfg.model, session, cfg.keep_recent_turns)
except Exception:
log.exception("Conversation compaction failed")
@@ -1136,7 +1137,10 @@ async def _extract_memories(self, user_msg: str, agent_msg: str) -> None:
main agent loop.
"""
try:
- llm = self._memory_llm(self.config.memory.extraction_provider)
+ llm = self._memory_llm(
+ self.config.memory.extraction_provider,
+ self.config.memory.extraction_thinking_level,
+ )
stored = await self.memory.extract_memories(
llm=llm,
model=self.config.memory.extraction_model,
@@ -1149,29 +1153,33 @@ async def _extract_memories(self, user_msg: str, agent_msg: str) -> None:
except Exception:
log.exception("Background memory extraction failed")
- def _memory_llm(self, provider: str) -> LLMClient:
+ def _memory_llm(self, provider: str, thinking_level: str = "") -> LLMClient:
"""Return an LLM client for memory operations.
If the requested provider matches the main inference provider the
existing client is reused; otherwise a new one is created using the
API key / base-URL already stored in the agent config.
"""
- return self._background_llm(provider)
+ return self._background_llm(provider, thinking_level)
- def _background_llm(self, provider: str) -> LLMClient:
+ def _background_llm(self, provider: str, thinking_level: str = "") -> LLMClient:
"""Return an LLM client for background tasks (memory, reflection, etc.).
- If the requested provider matches the main inference provider the
- existing client is reused; otherwise a new one is created using the
- API key / base-URL already stored in the agent config.
+ Background tasks carry their own thinking level, independent of the
+ main inference one. When the provider matches the main client we clone
+ it (sharing the underlying SDK connection) and override only the level;
+ otherwise a fresh client is built from the stored credentials.
"""
if provider == self.llm.provider:
- return self.llm
+ clone = copy.copy(self.llm)
+ clone.thinking_level = (thinking_level or "").strip().lower()
+ return clone
cfg = self.config.agent
return LLMClient(
provider=provider,
api_key=getattr(cfg, f"{provider}_api_key", ""),
base_url=getattr(cfg, f"{provider}_base_url", None),
+ thinking_level=thinking_level,
)
def _build_embedder(self):
@@ -1218,7 +1226,7 @@ async def _maybe_decompose(self, message: str) -> DecomposedGoal | None:
Returns None if the message is simple or decomposition fails/is disabled.
"""
gd_cfg = self.config.goal_decomposition
- llm = self._background_llm(gd_cfg.provider)
+ llm = self._background_llm(gd_cfg.provider, gd_cfg.thinking_level)
try:
is_complex = await classify_complexity(llm, gd_cfg.model, message)
@@ -1246,7 +1254,7 @@ async def _reflect_on_task(self, user_msg: str, agent_msg: str, tool_log: list[d
"""
try:
tr_cfg = self.config.task_reflection
- llm = self._background_llm(tr_cfg.provider)
+ llm = self._background_llm(tr_cfg.provider, tr_cfg.thinking_level)
stored = await self.reflections.reflect_on_task(
llm=llm,
model=tr_cfg.model,
diff --git a/core/config.py b/core/config.py
index 1fc1a3b..8d9b90a 100644
--- a/core/config.py
+++ b/core/config.py
@@ -48,6 +48,7 @@ class AgentConfig(BaseModel):
deepseek_api_key: str = ""
deepseek_base_url: str = ""
model: str = "claude-4-6-sonnet"
+ thinking_level: str = "" # "" (off) | "low" | "medium" | "high" — only for reasoning models
timezone: str = "Europe/Zurich"
skills_dir: str = "skills/"
skills_db_path: str = "data/skills.db"
@@ -158,6 +159,8 @@ class MemoryConfig(BaseModel):
extraction_model: str = "claude-haiku-4-5"
consolidation_provider: str = "anthropic"
consolidation_model: str = "claude-haiku-4-5"
+ extraction_thinking_level: str = "" # "" (off) | "low" | "medium" | "high"
+ consolidation_thinking_level: str = "" # "" (off) | "low" | "medium" | "high"
extraction_cooldown_seconds: int = 120 # minimum seconds between extractions
embedding: EmbeddingConfig = EmbeddingConfig()
@@ -177,12 +180,14 @@ class GoalDecompositionConfig(BaseModel):
enabled: bool = True
provider: str = "anthropic"
model: str = "claude-haiku-4-5"
+ thinking_level: str = "" # "" (off) | "low" | "medium" | "high"
class TaskReflectionConfig(BaseModel):
enabled: bool = True
provider: str = "anthropic"
model: str = "claude-haiku-4-5"
+ thinking_level: str = "" # "" (off) | "low" | "medium" | "high"
db_path: str = "data/reflections.db"
max_reflections: int = 50 # max reflections to keep for prompt injection
@@ -197,6 +202,7 @@ class CompactionConfig(BaseModel):
enabled: bool = True
provider: str = "anthropic"
model: str = "claude-haiku-4-5"
+ thinking_level: str = "" # "" (off) | "low" | "medium" | "high"
threshold_type: str = "percent" # "percent" (of context window) or "tokens" (absolute)
threshold_percent: int = 80 # trigger at this % of the model's context window
threshold_tokens: int = 150000 # absolute trigger when threshold_type == "tokens"
diff --git a/core/llm.py b/core/llm.py
index 43ae6d4..5b5318f 100644
--- a/core/llm.py
+++ b/core/llm.py
@@ -101,8 +101,16 @@ def _normalize_model(provider: str, model: str) -> str:
class LLMClient:
- def __init__(self, provider: str, api_key: str, base_url: str | None = None):
+ def __init__(
+ self,
+ provider: str,
+ api_key: str,
+ base_url: str | None = None,
+ thinking_level: str = "",
+ ):
self.provider = _normalize_provider(provider)
+ # "" (off) | "low" | "medium" | "high" — applied only to the main generate() call
+ self.thinking_level = (thinking_level or "").strip().lower()
self._client: Any
if self.provider == "anthropic":
self._client = AsyncAnthropic(api_key=api_key)
@@ -119,36 +127,53 @@ def __init__(self, provider: str, api_key: str, base_url: str | None = None):
}
self._client = cast(Any, client_class)(**client_kwargs) # type: ignore[call-arg]
+ def _reasoning_kwargs(self) -> dict[str, Any]:
+ """Provider-specific request kwargs for the configured thinking level.
+
+ Empty when no level is set, so non-reasoning calls are untouched.
+ """
+ level = self.thinking_level
+ if level not in ("low", "medium", "high"):
+ return {}
+ if self.provider == "anthropic":
+ return {"thinking": {"type": "adaptive"}, "output_config": {"effort": level}}
+ return {"reasoning_effort": level}
+
@classmethod
def from_agent_config(cls, config) -> LLMClient:
provider = _normalize_provider(getattr(config, "llm_provider", "anthropic"))
+ thinking = getattr(config, "thinking_level", "")
if provider == "anthropic":
- return cls(provider, getattr(config, "anthropic_api_key", ""))
+ return cls(provider, getattr(config, "anthropic_api_key", ""), thinking_level=thinking)
if provider == "openai":
return cls(
provider,
getattr(config, "openai_api_key", ""),
getattr(config, "openai_base_url", ""),
+ thinking_level=thinking,
)
if provider == "google":
return cls(
provider,
getattr(config, "google_api_key", ""),
getattr(config, "google_base_url", ""),
+ thinking_level=thinking,
)
if provider == "grok":
return cls(
provider,
getattr(config, "grok_api_key", ""),
getattr(config, "grok_base_url", ""),
+ thinking_level=thinking,
)
if provider == "deepseek":
return cls(
provider,
getattr(config, "deepseek_api_key", ""),
getattr(config, "deepseek_base_url", ""),
+ thinking_level=thinking,
)
- return cls("anthropic", getattr(config, "anthropic_api_key", ""))
+ return cls("anthropic", getattr(config, "anthropic_api_key", ""), thinking_level=thinking)
async def generate(
self,
@@ -180,6 +205,7 @@ async def generate(
system=cast(Any, system_param),
messages=cast(Any, messages),
tools=cast(Any, tools),
+ **self._reasoning_kwargs(),
)
tool_calls = []
text_parts = []
@@ -210,6 +236,7 @@ async def generate(
max_tokens=max_tokens,
messages=cast(Any, full_messages),
tools=cast(Any, openai_tools),
+ **self._reasoning_kwargs(),
)
message = response.choices[0].message
tool_calls = []
@@ -267,6 +294,7 @@ async def generate_text(self, *, model: str, prompt: str, max_tokens: int = 1024
model=resolved_model,
max_tokens=max_tokens,
messages=cast(Any, [{"role": "user", "content": prompt}]),
+ **self._reasoning_kwargs(),
)
for block in response.content:
block_any = cast(Any, block)
@@ -279,5 +307,6 @@ async def generate_text(self, *, model: str, prompt: str, max_tokens: int = 1024
model=resolved_model,
max_tokens=max_tokens,
messages=cast(Any, [{"role": "user", "content": prompt}]),
+ **self._reasoning_kwargs(),
)
return (response.choices[0].message.content or "").strip()
diff --git a/core/scheduler.py b/core/scheduler.py
index 35503d5..6bb840e 100644
--- a/core/scheduler.py
+++ b/core/scheduler.py
@@ -127,7 +127,10 @@ async def run_memory_consolidation() -> None:
log.info("Scheduler running memory consolidation")
try:
- llm = agent._memory_llm(agent.config.memory.consolidation_provider)
+ llm = agent._memory_llm(
+ agent.config.memory.consolidation_provider,
+ agent.config.memory.consolidation_thinking_level,
+ )
result = await agent.memory.consolidate_and_cleanup(
llm=llm,
model=agent.config.memory.consolidation_model,
diff --git a/tests/test_compaction.py b/tests/test_compaction.py
index 6107e4c..1d1cbc4 100644
--- a/tests/test_compaction.py
+++ b/tests/test_compaction.py
@@ -189,7 +189,7 @@ async def test_maybe_compact_replaces_session_and_notifies(agent, monkeypatch) -
for m in _session_with_tool_pair():
await agent.history.append_session_message("telegram", "u", m, "")
- monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S"))
+ monkeypatch.setattr(agent, "_background_llm", lambda provider, thinking_level="": FakeLLM("S"))
response = SimpleNamespace(usage={"context_tokens": 999})
notice = await agent._maybe_compact("telegram", "u", "", response)
@@ -209,7 +209,7 @@ async def test_maybe_compact_below_threshold_noop(agent, monkeypatch) -> None:
agent.config.compaction.threshold_tokens = 100000
for m in _session_with_tool_pair():
await agent.history.append_session_message("telegram", "u", m, "")
- monkeypatch.setattr(agent, "_background_llm", lambda provider: FakeLLM("S"))
+ monkeypatch.setattr(agent, "_background_llm", lambda provider, thinking_level="": FakeLLM("S"))
response = SimpleNamespace(usage={"context_tokens": 50})
assert await agent._maybe_compact("telegram", "u", "", response) is None
diff --git a/tests/test_llm.py b/tests/test_llm.py
new file mode 100644
index 0000000..62ac2fd
--- /dev/null
+++ b/tests/test_llm.py
@@ -0,0 +1,69 @@
+"""Tests for thinking-level plumbing in LLMClient."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from core.config import AgentConfig
+from core.llm import LLMClient
+
+
+def test_thinking_level_normalized_and_defaulted() -> None:
+ assert LLMClient("anthropic", "x", thinking_level=" HIGH ").thinking_level == "high"
+ assert LLMClient("anthropic", "x").thinking_level == ""
+
+
+def test_from_agent_config_carries_thinking_level() -> None:
+ cfg = AgentConfig(llm_provider="openai", openai_api_key="x", thinking_level="low")
+ assert LLMClient.from_agent_config(cfg).thinking_level == "low"
+
+
+@pytest.mark.asyncio
+async def test_anthropic_generate_sends_effort_when_set() -> None:
+ client = LLMClient("anthropic", "x", thinking_level="medium")
+ create = AsyncMock(return_value=type("R", (), {"content": [], "usage": None})())
+ client._client = type("C", (), {"messages": type("M", (), {"create": create})()})()
+
+ await client.generate(model="claude-4-6-opus", system="s", messages=[], tools=[])
+
+ kwargs = create.await_args.kwargs
+ assert kwargs["thinking"] == {"type": "adaptive"}
+ assert kwargs["output_config"] == {"effort": "medium"}
+
+
+@pytest.mark.asyncio
+async def test_anthropic_generate_omits_effort_when_off() -> None:
+ client = LLMClient("anthropic", "x")
+ create = AsyncMock(return_value=type("R", (), {"content": [], "usage": None})())
+ client._client = type("C", (), {"messages": type("M", (), {"create": create})()})()
+
+ await client.generate(model="claude-4-6-opus", system="s", messages=[], tools=[])
+
+ kwargs = create.await_args.kwargs
+ assert "thinking" not in kwargs
+ assert "output_config" not in kwargs
+
+
+@pytest.mark.asyncio
+async def test_anthropic_generate_text_sends_effort_when_set() -> None:
+ """Background tasks (memory/reflection/etc.) honor the client's level too."""
+ client = LLMClient("anthropic", "x", thinking_level="low")
+ create = AsyncMock(return_value=type("R", (), {"content": []})())
+ client._client = type("C", (), {"messages": type("M", (), {"create": create})()})()
+
+ await client.generate_text(model="claude-4-6-opus", prompt="hi")
+
+ kwargs = create.await_args.kwargs
+ assert kwargs["thinking"] == {"type": "adaptive"}
+ assert kwargs["output_config"] == {"effort": "low"}
+
+
+def test_reasoning_kwargs_per_provider() -> None:
+ assert LLMClient("openai", "x", thinking_level="high")._reasoning_kwargs() == {
+ "reasoning_effort": "high"
+ }
+ assert LLMClient("anthropic", "x")._reasoning_kwargs() == {}
+ # unknown level value is ignored (off)
+ assert LLMClient("anthropic", "x", thinking_level="bogus")._reasoning_kwargs() == {}
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index f0e9e28..46a188f 100644
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -62,9 +62,10 @@ async def test_run_memory_consolidation_calls_store(monkeypatch) -> None:
memory=SimpleNamespace(
consolidation_model="model",
consolidation_provider="anthropic",
+ consolidation_thinking_level="",
),
),
- _memory_llm=lambda self_provider: llm_sentinel,
+ _memory_llm=lambda self_provider, thinking_level="": llm_sentinel,
)
set_agent_context(agent)