Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ async def partial_llm() -> HTMLResponse:
deepseek_api_key = await config_store.get("agent.deepseek_api_key") or ""
deepseek_base_url = await config_store.get("agent.deepseek_base_url") or ""
model = await config_store.get("agent.model") or "claude-4-6-sonnet"
thinking_level = await config_store.get("agent.thinking_level") or ""
extraction_provider = await config_store.get("memory.extraction_provider") or "anthropic"
extraction_model = await config_store.get("memory.extraction_model") or "claude-haiku-4-5"
consolidation_provider = (
Expand All @@ -800,6 +801,10 @@ async def partial_llm() -> HTMLResponse:
consolidation_model = (
await config_store.get("memory.consolidation_model") or "claude-haiku-4-5"
)
extraction_thinking_level = await config_store.get("memory.extraction_thinking_level") or ""
consolidation_thinking_level = (
await config_store.get("memory.consolidation_thinking_level") or ""
)
gd_enabled = await config_store.get("goal_decomposition.enabled")
gd_enabled = gd_enabled if gd_enabled is not None else "true"
gd_provider = await config_store.get("goal_decomposition.provider") or "anthropic"
Expand All @@ -808,8 +813,11 @@ async def partial_llm() -> HTMLResponse:
tr_enabled = tr_enabled if tr_enabled is not None else "true"
tr_provider = await config_store.get("task_reflection.provider") or "anthropic"
tr_model = await config_store.get("task_reflection.model") or "claude-haiku-4-5"
gd_thinking_level = await config_store.get("goal_decomposition.thinking_level") or ""
tr_thinking_level = await config_store.get("task_reflection.thinking_level") or ""
compaction_provider = await config_store.get("compaction.provider") or "anthropic"
compaction_model = await config_store.get("compaction.model") or "claude-haiku-4-5"
compaction_thinking_level = await config_store.get("compaction.thinking_level") or ""
prompt_tool_usage_override = await config_store.get("prompt.tool_usage_override") or ""
prompt_history_override = await config_store.get("prompt.history_handling_override") or ""
prompt_capture_enabled = await config_store.get("admin.capture_prompts")
Expand All @@ -829,18 +837,24 @@ async def partial_llm() -> HTMLResponse:
deepseek_api_key=deepseek_api_key,
deepseek_base_url=deepseek_base_url,
model=model,
thinking_level=thinking_level,
extraction_provider=extraction_provider,
extraction_model=extraction_model,
extraction_thinking_level=extraction_thinking_level,
consolidation_provider=consolidation_provider,
consolidation_model=consolidation_model,
consolidation_thinking_level=consolidation_thinking_level,
gd_enabled=gd_enabled,
gd_provider=gd_provider,
gd_model=gd_model,
gd_thinking_level=gd_thinking_level,
tr_enabled=tr_enabled,
tr_provider=tr_provider,
tr_model=tr_model,
tr_thinking_level=tr_thinking_level,
compaction_provider=compaction_provider,
compaction_model=compaction_model,
compaction_thinking_level=compaction_thinking_level,
prompt_tool_usage_override=prompt_tool_usage_override,
prompt_history_override=prompt_history_override,
default_tool_usage=DEFAULT_TOOL_USAGE_BLOCK,
Expand Down Expand Up @@ -2398,6 +2412,25 @@ async def list_models(request: Request) -> dict:
return await _list_models_openai(api_key, base_url)
return {"ok": False, "error": f"Unknown service: {service}"}

@app.post("/setup/thinking-levels")
async def thinking_levels(request: Request) -> dict:
"""Autodiscover supported reasoning-effort levels for a model.

Only Anthropic exposes this via the Models API; other providers must be
configured by typing the effort value (see the docs link in the UI).
"""
payload = await request.json()
service = payload.get("service", "")
api_key = payload.get("api_key", "")
model = payload.get("model", "")
if service == "anthropic":
return await _thinking_levels_anthropic(api_key, model)
return {
"ok": False,
"error": "Autodiscovery is only available for Anthropic — "
"enter the effort value manually for this provider.",
}

return app, auth


Expand Down Expand Up @@ -2456,6 +2489,33 @@ async def _test_openai(api_key: str, base_url: str | None, model: str = "gpt-4o-
return {"ok": False, "error": str(exc)}


async def _thinking_levels_anthropic(api_key: str, model: str) -> dict:
if not api_key:
return {"ok": False, "error": "API key is empty"}
if not model:
return {"ok": False, "error": "Enter a model id first"}
try:
from anthropic import AsyncAnthropic

client = AsyncAnthropic(api_key=api_key)
m = await client.models.retrieve(model)
caps = getattr(m, "capabilities", None)
if caps is not None and not isinstance(caps, dict):
caps = getattr(caps, "model_dump", lambda: {})() or {}
caps = caps or {}
effort = caps.get("effort") or {}
levels = [
lvl
for lvl in ("low", "medium", "high", "xhigh", "max")
if isinstance(effort.get(lvl), dict) and effort[lvl].get("supported")
]
thinking = caps.get("thinking") or {}
supported = bool(thinking.get("supported")) or bool(levels)
return {"ok": True, "supported": supported, "levels": levels}
except Exception as exc:
return {"ok": False, "error": str(exc)}


async def _list_models_anthropic(api_key: str) -> dict:
if not api_key:
return {"ok": False, "error": "API key is empty"}
Expand Down
69 changes: 68 additions & 1 deletion api/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
}
window.showToast = showToast;

function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled, compactionProvider, compactionModel) {
function llmTab(provider, apiKey, model, openaiKey, openaiBaseUrl, googleKey, googleBaseUrl, grokKey, grokBaseUrl, deepseekKey, deepseekBaseUrl, extractionProvider, extractionModel, consolidationProvider, consolidationModel, gdEnabled, gdProvider, gdModel, trEnabled, trProvider, trModel, promptToolUsageOverride, promptHistoryOverride, defaultToolUsage, defaultHistoryHandling, promptCaptureEnabled, compactionProvider, compactionModel, thinkingLevel, extractionThinkingLevel, consolidationThinkingLevel, gdThinkingLevel, trThinkingLevel, compactionThinkingLevel) {
const currentProvider = provider || 'anthropic';
return {
providerOptions: [
Expand All @@ -55,6 +55,15 @@
provider: currentProvider,
apiKey: apiKey || '',
model: model || '',
thinkingLevel: thinkingLevel || '',
extractionThinkingLevel: extractionThinkingLevel || '',
consolidationThinkingLevel: consolidationThinkingLevel || '',
gdThinkingLevel: gdThinkingLevel || '',
trThinkingLevel: trThinkingLevel || '',
compactionThinkingLevel: compactionThinkingLevel || '',
fetchedLevels: {anthropic: [], openai: [], google: [], grok: [], deepseek: []},
fetchingLevels: {anthropic: false, openai: false, google: false, grok: false, deepseek: false},
fetchLevelsResult: {anthropic: '', openai: '', google: '', grok: '', deepseek: ''},
openaiKey: openaiKey || '',
openaiBaseUrl: openaiBaseUrl || '',
googleKey: googleKey || '',
Expand Down Expand Up @@ -161,6 +170,64 @@
}
return this.models[prov] || [];
},
// ponytail: substring heuristic for reasoning-capable models — extend the
// list as model ids change; the thinking-level control shows only when true.
modelSupportsThinking(modelId) {
const id = (modelId || '').toLowerCase();
if (!id) return false;
return ['opus', 'sonnet', 'thinking', 'pro', 'deep-think', 'reasoner', 'grok-4']
.some(pat => id.includes(pat));
},
// Effort values offered in the thinking-level datalist. Anthropic can be
// enriched live via "Fetch levels"; others fall back to the common set.
levelOptions(prov) {
const fetched = this.fetchedLevels[prov];
if (fetched && fetched.length) return fetched;
return ['low', 'medium', 'high'];
},
fetchThinkingLevels(service, model) {
const apiKey = this.keyFor(service);
if (!apiKey) { this.fetchLevelsResult[service] = 'Missing API key — save/enter it first'; return; }
if (!model) { this.fetchLevelsResult[service] = 'Enter a model id first'; return; }
this.fetchingLevels[service] = true;
this.fetchLevelsResult[service] = '';
fetch('/setup/thinking-levels', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + (localStorage.getItem('admin_api_key') || '')
},
body: JSON.stringify({service: service, api_key: apiKey, model: model})
})
.then(r => r.json())
.then(d => {
if (d.ok && Array.isArray(d.levels) && d.levels.length) {
this.fetchedLevels[service] = d.levels;
this.fetchLevelsResult[service] = 'Supported: ' + d.levels.join(', ');
} else if (d.ok) {
this.fetchLevelsResult[service] = d.supported ? 'Thinking supported (no effort levels reported)' : 'Model does not support thinking';
} else {
this.fetchLevelsResult[service] = 'Failed: ' + (d.error || 'unknown');
}
})
.catch(e => { this.fetchLevelsResult[service] = 'Error: ' + e.message; })
.finally(() => { this.fetchingLevels[service] = false; });
},
// Docs for the reasoning-effort value to type, per provider.
providerDocsUrl(prov) {
return ({
anthropic: 'https://platform.claude.com/docs/en/build-with-claude/effort',
openai: 'https://platform.openai.com/docs/guides/reasoning',
google: 'https://ai.google.dev/gemini-api/docs/thinking',
grok: 'https://docs.x.ai/docs/guides/reasoning',
deepseek: 'https://api-docs.deepseek.com/guides/reasoning_model'
})[prov] || '';
},
// True when a background kind points at the exact same provider+model as
// main inference — surfaced in the UI so shared config is obvious.
sameAsMain(prov, model) {
return !!model && prov === this.provider && model === this.model;
},
fetchModels(service) {
const apiKey = this.keyFor(service);
if (!apiKey) {
Expand Down
58 changes: 52 additions & 6 deletions api/templates/partials/llm.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
{# LLM tab partial #}
{#- Reusable thinking-level control. `bg` adds the "shared config" badge for
background inference kinds that point at the same provider+model as main. -#}
{% macro think(level, prov, model, lid, bg=false) -%}
<div class="mt-2">
<label class="label">Thinking level{% if bg %}<span class="text-muted text-xs font-normal" x-show="sameAsMain({{ prov }}, {{ model }})"> · same config as Main inference</span>{% endif %}</label>
<div class="flex items-center gap-2">
<input type="text" class="input-sm" style="max-width:300px" x-model="{{ level }}"
list="{{ lid }}" placeholder="off — type or pick low / medium / high">
<datalist id="{{ lid }}">
<option value=""></option>
<template x-for="lvl in levelOptions({{ prov }})" :key="'{{ lid }}-' + lvl">
<option :value="lvl"></option>
</template>
</datalist>
<button type="button" class="btn-secondary btn-sm" x-show="{{ prov }} === 'anthropic'"
@click="fetchThinkingLevels({{ prov }}, {{ model }})" :disabled="fetchingLevels[{{ prov }}]">
<span x-show="!fetchingLevels[{{ prov }}]">Fetch levels</span>
<span x-show="fetchingLevels[{{ prov }}]">…</span>
</button>
<a class="text-xs text-muted underline" x-show="{{ prov }} !== 'anthropic'"
:href="providerDocsUrl({{ prov }})" target="_blank" rel="noopener">effort docs ↗</a>
</div>
<p class="text-muted text-xs mt-1" x-show="fetchLevelsResult[{{ prov }}]" x-text="fetchLevelsResult[{{ prov }}]"></p>
<p class="text-muted text-xs mt-1">Leave <strong>Off</strong> for non-reasoning models. Higher = more reasoning, more tokens. Anthropic: “Fetch levels” autodiscovers supported values; other providers: see the docs link and type the value.</p>
<p class="text-amber-600 text-xs mt-1" x-show="{{ level }} && !modelSupportsThinking({{ model }})">Heads up: “<span x-text="{{ model }}"></span>” isn't a recognized reasoning model — only set a level if you know it supports thinking, or the inference call may error.</p>
</div>
{%- endmacro %}
<div class="space-y-6" x-data="llmTab(
{{ provider|default('anthropic', true)|tojson|forceescape }},
{{ anthropic_api_key|default('', true)|tojson|forceescape }},
Expand Down Expand Up @@ -27,7 +54,13 @@
{{ default_history_handling|default('', true)|tojson|forceescape }},
{{ prompt_capture_enabled|default(false, true)|tojson|forceescape }},
{{ compaction_provider|default('anthropic', true)|tojson|forceescape }},
{{ compaction_model|default('claude-haiku-4-5', true)|tojson|forceescape }}
{{ compaction_model|default('claude-haiku-4-5', true)|tojson|forceescape }},
{{ thinking_level|default('', true)|tojson|forceescape }},
{{ extraction_thinking_level|default('', true)|tojson|forceescape }},
{{ consolidation_thinking_level|default('', true)|tojson|forceescape }},
{{ gd_thinking_level|default('', true)|tojson|forceescape }},
{{ tr_thinking_level|default('', true)|tojson|forceescape }},
{{ compaction_thinking_level|default('', true)|tojson|forceescape }}
)">
<div class="card">
<h2 class="text-base mb-1">System Prompt Controls</h2>
Expand Down Expand Up @@ -175,6 +208,8 @@ <h2 class="text-base mb-1">Active Inference Provider</h2>
</datalist>
<p class="text-muted text-xs mt-1">Type any model id, or pick from the list. Use “Fetch models” in the provider card below to load the live list from the API.</p>
</div>

{{ think('thinkingLevel', 'provider', 'model', 'dl-think-main') }}
</form>

<div class="flex items-center gap-2 mt-4">
Expand All @@ -197,7 +232,8 @@ <h2 class="text-base mb-1">Active Inference Provider</h2>
'agent.grok_base_url': grokBaseUrl,
'agent.deepseek_api_key': deepseekKey,
'agent.deepseek_base_url': deepseekBaseUrl,
'agent.model': model
'agent.model': model,
'agent.thinking_level': thinkingLevel
}})
})
.then(r => { resultOk = r.ok; return r.json(); })
Expand Down Expand Up @@ -240,6 +276,7 @@ <h2 class="text-base mb-1">Memory Models</h2>
</datalist>
<p class="text-muted text-xs mt-1">Model used to extract memories from conversations.</p>
</div>
{{ think('extractionThinkingLevel', 'extractionProvider', 'extractionModel', 'dl-think-ext', true) }}

<hr class="border-border">

Expand All @@ -262,6 +299,7 @@ <h2 class="text-base mb-1">Memory Models</h2>
</datalist>
<p class="text-muted text-xs mt-1">Model used to consolidate and merge duplicate memories.</p>
</div>
{{ think('consolidationThinkingLevel', 'consolidationProvider', 'consolidationModel', 'dl-think-con', true) }}
</form>

<div class="flex items-center gap-2 mt-4">
Expand All @@ -276,8 +314,10 @@ <h2 class="text-base mb-1">Memory Models</h2>
body: JSON.stringify({values: {
'memory.extraction_provider': extractionProvider,
'memory.extraction_model': extractionModel,
'memory.extraction_thinking_level': extractionThinkingLevel,
'memory.consolidation_provider': consolidationProvider,
'memory.consolidation_model': consolidationModel
'memory.consolidation_model': consolidationModel,
'memory.consolidation_thinking_level': consolidationThinkingLevel
}})
})
.then(r => { memoryResultOk = r.ok; return r.json(); })
Expand Down Expand Up @@ -324,6 +364,7 @@ <h2 class="text-base mb-1">Goal Decomposition</h2>
</datalist>
<p class="text-muted text-xs mt-1">Model used to classify message complexity and decompose goals.</p>
</div>
{{ think('gdThinkingLevel', 'gdProvider', 'gdModel', 'dl-think-gd', true) }}
</form>

<div class="flex items-center gap-2 mt-4">
Expand All @@ -338,7 +379,8 @@ <h2 class="text-base mb-1">Goal Decomposition</h2>
body: JSON.stringify({values: {
'goal_decomposition.enabled': gdEnabled ? 'true' : 'false',
'goal_decomposition.provider': gdProvider,
'goal_decomposition.model': gdModel
'goal_decomposition.model': gdModel,
'goal_decomposition.thinking_level': gdThinkingLevel
}})
})
.then(r => { gdResultOk = r.ok; return r.json(); })
Expand Down Expand Up @@ -385,6 +427,7 @@ <h2 class="text-base mb-1">Task Reflection</h2>
</datalist>
<p class="text-muted text-xs mt-1">Model used to reflect on completed tasks and extract lessons.</p>
</div>
{{ think('trThinkingLevel', 'trProvider', 'trModel', 'dl-think-tr', true) }}
</form>

<div class="flex items-center gap-2 mt-4">
Expand All @@ -399,7 +442,8 @@ <h2 class="text-base mb-1">Task Reflection</h2>
body: JSON.stringify({values: {
'task_reflection.enabled': trEnabled ? 'true' : 'false',
'task_reflection.provider': trProvider,
'task_reflection.model': trModel
'task_reflection.model': trModel,
'task_reflection.thinking_level': trThinkingLevel
}})
})
.then(r => { trResultOk = r.ok; return r.json(); })
Expand Down Expand Up @@ -441,6 +485,7 @@ <h2 class="text-base mb-1">History Compaction</h2>
</datalist>
<p class="text-muted text-xs mt-1">A small, fast model is recommended (summarization is cheap).</p>
</div>
{{ think('compactionThinkingLevel', 'compactionProvider', 'compactionModel', 'dl-think-cp', true) }}
</form>

<div class="flex items-center gap-2 mt-4">
Expand All @@ -454,7 +499,8 @@ <h2 class="text-base mb-1">History Compaction</h2>
},
body: JSON.stringify({values: {
'compaction.provider': compactionProvider,
'compaction.model': compactionModel
'compaction.model': compactionModel,
'compaction.thinking_level': compactionThinkingLevel
}})
})
.then(r => { compactionResultOk = r.ok; return r.json(); })
Expand Down
Loading
Loading