From 2b32568c3704ff6efdb240b6e68ce05777df500b Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 12:02:00 +0100 Subject: [PATCH 1/8] refac --- cptr/env.py | 1 + .../versions/0003_add_context_compaction.py | 21 ++++ cptr/models/chats.py | 1 + cptr/utils/chat_export.py | 3 + cptr/utils/chat_task.py | 84 ++++++++++++++- cptr/utils/context.py | 54 ++++++++++ cptr/utils/summarize.py | 102 ++++++++++++++++++ 7 files changed, 262 insertions(+), 4 deletions(-) create mode 100644 cptr/migrations/versions/0003_add_context_compaction.py create mode 100644 cptr/utils/context.py create mode 100644 cptr/utils/summarize.py diff --git a/cptr/env.py b/cptr/env.py index f0c16c7..6d50a38 100644 --- a/cptr/env.py +++ b/cptr/env.py @@ -27,6 +27,7 @@ ).lower() in ("true", "1", "yes") CHAT_TOOL_MAX_CHARS = int(os.environ.get("CHAT_TOOL_MAX_CHARS", "50000")) CHAT_TOOL_COMMAND_MAX_CHARS = int(os.environ.get("CHAT_TOOL_COMMAND_MAX_CHARS", "8000")) +CHAT_COMPACT_TOKEN_THRESHOLD = int(os.environ.get("CHAT_COMPACT_TOKEN_THRESHOLD", "80000")) # ── AI stream settings ────────────────────────────────────── STREAM_CONNECT_TIMEOUT_SECONDS = float(os.environ.get("CPTR_STREAM_CONNECT_TIMEOUT", "30")) diff --git a/cptr/migrations/versions/0003_add_context_compaction.py b/cptr/migrations/versions/0003_add_context_compaction.py new file mode 100644 index 0000000..e7430f5 --- /dev/null +++ b/cptr/migrations/versions/0003_add_context_compaction.py @@ -0,0 +1,21 @@ +"""Add chat_summary to chat_messages for context compaction. + +Revision ID: 0003 +Revises: 0002 +Create Date: 2026-06-11 +""" +from alembic import op +import sqlalchemy as sa + +revision = '0003' +down_revision = '0002' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column('chat_messages', sa.Column('chat_summary', sa.Text(), nullable=True)) + + +def downgrade() -> None: + op.drop_column('chat_messages', 'chat_summary') diff --git a/cptr/models/chats.py b/cptr/models/chats.py index 88015c3..0eda313 100644 --- a/cptr/models/chats.py +++ b/cptr/models/chats.py @@ -228,6 +228,7 @@ class ChatMessage(Base): output = Column(JSON, nullable=True) # Responses API output items usage = Column(JSON, nullable=True) # {input_tokens, output_tokens, ...} meta = Column(JSON, nullable=True) # {files, followups, error, ...} + chat_summary = Column(Text, nullable=True) # Compaction summary (covers all ancestors before this msg) created_at = Column(BigInteger, nullable=False) # ── Class methods ──────────────────────────────────────── diff --git a/cptr/utils/chat_export.py b/cptr/utils/chat_export.py index 7fb00e1..dd64cad 100644 --- a/cptr/utils/chat_export.py +++ b/cptr/utils/chat_export.py @@ -47,6 +47,8 @@ async def export_chat_to_file(chat_id: str) -> None: entry["output"] = m.output or [] if m.usage: entry["usage"] = m.usage + if m.chat_summary: + entry["chat_summary"] = m.chat_summary msg_map[m.id] = entry @@ -58,6 +60,7 @@ async def export_chat_to_file(chat_id: str) -> None: chat_data = { "id": chat.id, "title": chat.title, + "summary": chat.summary, "created_at": chat.created_at, "updated_at": chat.updated_at, "history": { diff --git a/cptr/utils/chat_task.py b/cptr/utils/chat_task.py index 6353aa3..1a2335c 100644 --- a/cptr/utils/chat_task.py +++ b/cptr/utils/chat_task.py @@ -11,7 +11,9 @@ import uuid from pathlib import Path -from cptr.env import CHAT_MAX_ITERATIONS +from cptr.env import CHAT_MAX_ITERATIONS, CHAT_TOOL_MAX_CHARS +from cptr.utils.context import should_compact +from cptr.utils.summarize import summarize_messages from cptr.models import Chat, ChatMessage, Config from cptr.socket.main import emit_to_user from cptr.utils.ai import ( @@ -398,12 +400,19 @@ async def generate_chat_title( # ── Message history ───────────────────────────────────────── -async def _load_message_history(chat_id: str, message_id: str) -> list[dict]: +async def _load_message_history( + chat_id: str, message_id: str +) -> tuple[list[dict], str | None]: """Load the ancestor chain from message_id to root as LLM messages. Walks up via parent_id so only the active branch is included. The current message (message_id) is always included even if done=False, since it may contain completed tool calls from prior approval rounds. + + If any message in the chain has a chat_summary, everything before it + is skipped and the summary is returned separately for the system prompt. + + Returns (messages, chat_summary_or_None). """ all_msgs = await ChatMessage.get_all_by_chat(chat_id) msg_map = {m.id: m for m in all_msgs} @@ -416,6 +425,14 @@ async def _load_message_history(chat_id: str, message_id: str) -> list[dict]: cur = msg_map.get(cur.parent_id) if cur.parent_id else None chain.reverse() # root → leaf + # Find the most recent message with a chat_summary + existing_summary = None + for i, m in enumerate(chain): + if m.chat_summary: + chain = chain[i:] # keep this message and everything after + existing_summary = m.chat_summary + break + result = [] for m in chain: # Skip in-progress assistant placeholders, but NOT the current @@ -511,11 +528,16 @@ async def _load_message_history(chat_id: str, message_id: str) -> list[dict]: } result.append(entry) - return result + return result, existing_summary def _append_tool_to_messages(messages: list[dict], event: dict, result: str, provider: str): """Append a tool call + result to the message history for the next API call.""" + # Guard against oversized tool outputs + if len(result) > CHAT_TOOL_MAX_CHARS: + half = CHAT_TOOL_MAX_CHARS // 2 + result = result[:half] + "\n\n...(truncated)...\n\n" + result[-half:] + # Add assistant message with tool_call messages.append( { @@ -543,6 +565,30 @@ def _append_tool_to_messages(messages: list[dict], event: dict, result: str, pro ) +def _find_safe_split(messages: list[dict], target_keep: int) -> int: + """Find a safe split index that doesn't break tool call pairs. + + Returns the index where keep_zone starts. Ensures: + - Never splits between an assistant tool_call and its tool result + - keep_zone doesn't start with a tool result message + - At least 2 messages are kept + """ + n = len(messages) + split = max(2, n - target_keep) + + # Walk forward from the initial split to find a safe boundary + while split < n - 1: + msg = messages[split] + # Don't start keep_zone with a tool result — it needs its preceding assistant + if msg.get("role") == "tool": + split += 1 + continue + break + + return min(split, n - 2) # always keep at least 2 + + + # ── Connection resolution ─────────────────────────────────── @@ -651,7 +697,9 @@ def _sync_state(): base_url = connection.get("base_url") or _default_base_url(provider) system = _load_system_prompt(workspace) - messages = await _load_message_history(chat_id, message_id) + messages, loaded_summary = await _load_message_history(chat_id, message_id) + if loaded_summary: + system += f"\n\n[CONVERSATION SUMMARY]\n{loaded_summary}" if regeneration_prompt: messages.append({"role": "user", "content": regeneration_prompt}) tools = get_tool_list() @@ -679,6 +727,34 @@ def _sync_state(): approval_mode = "full" if chat_params["auto_approve_tools"] else "auto" for _iteration in range(CHAT_MAX_ITERATIONS): + # ── Context compaction: summarize older messages if too large ── + if should_compact(messages, system): + target_keep = max(2, len(messages) * 2 // 5) + split_idx = _find_safe_split(messages, target_keep) + drop_zone = messages[:split_idx] + keep_zone = messages[split_idx:] + + api_type = connection.get("api_type", "chat_completions") + summary = await summarize_messages( + drop_zone, loaded_summary, + provider, base_url, api_key, model, + api_type=api_type, + ) + + # Store on the current message — this IS the cutoff + await ChatMessage.update(message_id, chat_summary=summary) + loaded_summary = summary + + # Append summary to system prompt (works for all providers) + system = _load_system_prompt(workspace) + system += f"\n\n[CONVERSATION SUMMARY]\n{summary}" + messages = keep_zone + + logger.info( + "[task %s] compacted: dropped %d msgs, kept %d, summary=%d chars", + message_id[:8], len(drop_zone), len(keep_zone), len(summary), + ) + form_data = ChatCompletionForm( model=model, messages=messages, diff --git a/cptr/utils/context.py b/cptr/utils/context.py new file mode 100644 index 0000000..e8d10e6 --- /dev/null +++ b/cptr/utils/context.py @@ -0,0 +1,54 @@ +"""Context estimation for chat compaction. + +Uses a character-based heuristic (len/4) to estimate token counts. +A follow-up will add real usage data from API responses for precision. +""" + +from __future__ import annotations + +from cptr.env import CHAT_COMPACT_TOKEN_THRESHOLD + + +def estimate_tokens(text: str) -> int: + """Rough token estimate: len/4 for Latin text.""" + return max(1, len(text) // 4) + + +def estimate_messages_tokens(messages: list[dict]) -> int: + """Total estimated tokens for a message list.""" + total = 0 + for m in messages: + content = m.get("content", "") + if isinstance(content, list): + for block in content: + if block.get("type") == "text": + total += estimate_tokens(block.get("text", "")) + elif block.get("type") in ("image", "image_url"): + total += 1000 # rough estimate for images + else: + total += estimate_tokens(content) + # Tool call arguments + for tc in m.get("tool_calls", []): + total += estimate_tokens(tc.get("function", {}).get("arguments", "")) + total += 4 # per-message overhead (role, separators) + return total + + +def should_compact(messages: list[dict], system_prompt: str) -> bool: + """True when estimated tokens exceed the compact token threshold.""" + total = estimate_tokens(system_prompt) + estimate_messages_tokens(messages) + return total > _get_threshold() + + +def _get_threshold() -> int: + """Read threshold: config.toml > env var/default.""" + try: + from cptr.utils.config import load_config + + config = load_config() + val = config.get("chat", {}).get("compact_token_threshold") + if val is not None: + return int(val) + except Exception: + pass + return CHAT_COMPACT_TOKEN_THRESHOLD diff --git a/cptr/utils/summarize.py b/cptr/utils/summarize.py new file mode 100644 index 0000000..c5fb699 --- /dev/null +++ b/cptr/utils/summarize.py @@ -0,0 +1,102 @@ +"""Summarize older messages for context compaction. + +Uses the same LLM as the active chat to generate a rolling summary. +Falls back to naive truncation if the LLM call fails. +""" + +from __future__ import annotations + +import logging + +logger = logging.getLogger(__name__) + +DEFAULT_SUMMARIZE_PROMPT = ( + "Summarize this conversation history concisely. Include:\n" + "- Key decisions made\n" + "- Files created, modified, or deleted\n" + "- Current task state and progress\n" + "- Important context the assistant needs going forward\n" + "- Any user preferences or instructions that should persist\n\n" + "Be factual and specific. Use bullet points. Keep under 500 words." +) + + +def _get_summarize_prompt() -> str: + """Read from config.toml [chat] section, fall back to default.""" + try: + from cptr.utils.config import load_config + + config = load_config() + return config.get("chat", {}).get( + "compact_summary_prompt", DEFAULT_SUMMARIZE_PROMPT + ) + except Exception: + return DEFAULT_SUMMARIZE_PROMPT + + +async def summarize_messages( + messages: list[dict], + existing_summary: str | None, + provider: str, + base_url: str, + api_key: str, + model: str, + api_type: str = "chat_completions", +) -> str: + """Summarize messages into a compact rolling summary. + + If existing_summary is provided, it's included so the new summary + incorporates everything before it. + """ + from cptr.utils.ai import chat_completion + + parts: list[str] = [] + if existing_summary: + parts.append(f"[Previous summary]\n{existing_summary}\n") + parts.append("[Recent messages to summarize]") + for m in messages: + role = m.get("role", "unknown") + content = m.get("content", "") + if isinstance(content, list): + content = " ".join( + b.get("text", "") for b in content if b.get("type") == "text" + ) + # Truncate very long messages (e.g. tool outputs) + if len(content) > 2000: + content = content[:1000] + "\n...(truncated)...\n" + content[-500:] + parts.append(f"{role}: {content}") + + text = "\n".join(parts) + # Cap input to summarization call + if len(text) > 30_000: + text = text[:15_000] + "\n...\n" + text[-10_000:] + + try: + result = await chat_completion( + provider=provider, + base_url=base_url, + api_key=api_key, + model=model, + messages=[{"role": "user", "content": text}], + system=_get_summarize_prompt(), + max_tokens=1000, + api_type=api_type, + ) + logger.info("[summarize] LLM summary: %d chars", len(result)) + return result + except Exception: + logger.warning("[summarize] LLM call failed, using naive fallback", exc_info=True) + return _naive_summary(messages, existing_summary) + + +def _naive_summary(messages: list[dict], existing: str | None) -> str: + """Fallback when LLM summarization fails.""" + parts: list[str] = [] + if existing: + parts.append(existing) + for m in messages: + role = m.get("role", "") + content = m.get("content", "") + if isinstance(content, str) and role in ("user", "assistant"): + parts.append(f"- {role}: {content[:200]}") + return "\n".join(parts)[:2000] From 74a1c267132531a366b2affdff2fb2f86b030518 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 12:35:55 +0100 Subject: [PATCH 2/8] refac --- cptr/utils/ai.py | 20 ++++++++++++++++---- cptr/utils/chat_task.py | 10 +++++++++- cptr/utils/context.py | 28 +++++++++++++++++++++++++--- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/cptr/utils/ai.py b/cptr/utils/ai.py index 45eb39d..e34b6eb 100644 --- a/cptr/utils/ai.py +++ b/cptr/utils/ai.py @@ -234,12 +234,18 @@ async def stream_anthropic( logger.info("[stream] anthropic status=%s", resp.status_code) resp.raise_for_status() current_block: dict = {} + usage_data: dict = {} async for line in resp.aiter_lines(): if not line.startswith("data: "): continue event = json.loads(line[6:]) etype = event.get("type") + if etype == "message_start": + msg_usage = event.get("message", {}).get("usage", {}) + if msg_usage: + usage_data["input_tokens"] = msg_usage.get("input_tokens", 0) + if etype == "content_block_start": block = event["content_block"] current_block = {"type": block["type"], "index": event["index"]} @@ -267,10 +273,11 @@ async def stream_anthropic( } elif etype == "message_delta": - usage = event.get("usage", {}) - if usage: + delta_usage = event.get("usage", {}) + if delta_usage: + usage_data["output_tokens"] = delta_usage.get("output_tokens", 0) emitted = True - yield {"type": "usage", **usage} + yield {"type": "usage", **usage_data} elif etype == "message_stop": emitted = True @@ -392,8 +399,13 @@ async def stream_openai_completions( } if chunk.get("usage"): + raw = chunk["usage"] emitted = True - yield {"type": "usage", **chunk["usage"]} + yield { + "type": "usage", + "input_tokens": raw.get("prompt_tokens", 0), + "output_tokens": raw.get("completion_tokens", 0), + } emitted = True yield {"type": "done"} diff --git a/cptr/utils/chat_task.py b/cptr/utils/chat_task.py index 1a2335c..2e938e7 100644 --- a/cptr/utils/chat_task.py +++ b/cptr/utils/chat_task.py @@ -726,9 +726,12 @@ def _sync_state(): if "tool_approval_mode" not in chat_params and "auto_approve_tools" in chat_params: approval_mode = "full" if chat_params["auto_approve_tools"] else "auto" + last_usage: dict | None = None # real usage from last API call + new_messages_since: int = 0 # messages appended since last API call + for _iteration in range(CHAT_MAX_ITERATIONS): # ── Context compaction: summarize older messages if too large ── - if should_compact(messages, system): + if should_compact(messages, system, last_usage, new_messages_since): target_keep = max(2, len(messages) * 2 // 5) split_idx = _find_safe_split(messages, target_keep) drop_zone = messages[:split_idx] @@ -749,6 +752,8 @@ def _sync_state(): system = _load_system_prompt(workspace) system += f"\n\n[CONVERSATION SUMMARY]\n{summary}" messages = keep_zone + last_usage = None # reset after compaction + new_messages_since = 0 logger.info( "[task %s] compacted: dropped %d msgs, kept %d, summary=%d chars", @@ -829,6 +834,7 @@ def _sync_state(): # Append to messages for next iteration _append_tool_to_messages(messages, event, result, provider) + new_messages_since += 2 # tool_call + tool_result restart = True break @@ -852,6 +858,8 @@ def _sync_state(): elif event["type"] == "usage": _flush_text() usage = {k: v for k, v in event.items() if k != "type"} + last_usage = usage + new_messages_since = 0 logger.info( "[task %s] save (usage): content=%d chars, output=%d items, types=%s", message_id[:8], diff --git a/cptr/utils/context.py b/cptr/utils/context.py index e8d10e6..3451f71 100644 --- a/cptr/utils/context.py +++ b/cptr/utils/context.py @@ -34,10 +34,32 @@ def estimate_messages_tokens(messages: list[dict]) -> int: return total -def should_compact(messages: list[dict], system_prompt: str) -> bool: - """True when estimated tokens exceed the compact token threshold.""" +def should_compact( + messages: list[dict], + system_prompt: str, + last_usage: dict | None = None, + new_messages_since: int = 0, +) -> bool: + """True when estimated tokens exceed the compact token threshold. + + If last_usage is provided (real data from the previous API call), + uses actual input_tokens + output_tokens as the base and only + estimates the new messages appended since that call. + Falls back to full estimation when no usage data exists. + """ + threshold = _get_threshold() + + if last_usage and last_usage.get("input_tokens"): + # Real base from last API call + estimate only new additions + base = last_usage["input_tokens"] + last_usage.get("output_tokens", 0) + if new_messages_since > 0: + new_msgs = messages[-new_messages_since:] + base += estimate_messages_tokens(new_msgs) + return base > threshold + + # Full estimation fallback total = estimate_tokens(system_prompt) + estimate_messages_tokens(messages) - return total > _get_threshold() + return total > threshold def _get_threshold() -> int: From 046864f6e4650075118e3531bf5f52d6026532ec Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 13:38:55 +0100 Subject: [PATCH 3/8] refac --- cptr/frontend/src/lib/apis/chat.ts | 2 +- .../src/lib/components/chat/ChatPanel.svelte | 12 +- .../src/lib/components/chat/PlusMenu.svelte | 139 +++++++++++++++++- cptr/frontend/src/lib/stores.ts | 9 +- cptr/utils/ai.py | 33 ++++- cptr/utils/chat_task.py | 9 +- 6 files changed, 184 insertions(+), 20 deletions(-) diff --git a/cptr/frontend/src/lib/apis/chat.ts b/cptr/frontend/src/lib/apis/chat.ts index 0064f90..2ebf53a 100644 --- a/cptr/frontend/src/lib/apis/chat.ts +++ b/cptr/frontend/src/lib/apis/chat.ts @@ -67,7 +67,7 @@ export const sendMessage = ( workspace: string, chatId?: string, parentId?: string | null, - params: { tool_approval_mode?: string } = {}, + params: { tool_approval_mode?: string; plan_mode?: boolean; request_params?: Record } = {}, regenerationPrompt?: string, files?: { id: string; name: string; url: string; type: string }[] ) => diff --git a/cptr/frontend/src/lib/components/chat/ChatPanel.svelte b/cptr/frontend/src/lib/components/chat/ChatPanel.svelte index 953c65e..1e1bfba 100644 --- a/cptr/frontend/src/lib/components/chat/ChatPanel.svelte +++ b/cptr/frontend/src/lib/components/chat/ChatPanel.svelte @@ -29,7 +29,8 @@ toolApprovalMode, planMode, streamingBehavior, - selectedModelId + selectedModelId, + requestParams } from '$lib/stores'; import ChatInput from './ChatInput.svelte'; @@ -552,7 +553,7 @@ workspace, chatId, parentId, - { tool_approval_mode: mode, plan_mode: get(planMode) }, + { tool_approval_mode: mode, plan_mode: get(planMode), request_params: get(requestParams) }, undefined, files ); @@ -617,7 +618,7 @@ workspace, chatId ?? undefined, parentId, - { tool_approval_mode: mode, plan_mode: get(planMode) }, + { tool_approval_mode: mode, plan_mode: get(planMode), request_params: get(requestParams) }, undefined, files ); @@ -778,7 +779,8 @@ const mode = get(toolApprovalMode); const result = await apiSendMessage('', selectedModel, workspace, chatId, msg.parent_id, { tool_approval_mode: mode, - plan_mode: get(planMode) + plan_mode: get(planMode), + request_params: get(requestParams) }); if (result.assistant_message) { allMessages = [...allMessages, result.assistant_message]; @@ -821,7 +823,7 @@ workspace, chatId, msg.parent_id, - { tool_approval_mode: get(toolApprovalMode), plan_mode: get(planMode) } + { tool_approval_mode: get(toolApprovalMode), plan_mode: get(planMode), request_params: get(requestParams) } ); if (result.user_message && result.assistant_message) { allMessages = [...allMessages, result.user_message, result.assistant_message]; diff --git a/cptr/frontend/src/lib/components/chat/PlusMenu.svelte b/cptr/frontend/src/lib/components/chat/PlusMenu.svelte index f17bc12..e36ec3a 100644 --- a/cptr/frontend/src/lib/components/chat/PlusMenu.svelte +++ b/cptr/frontend/src/lib/components/chat/PlusMenu.svelte @@ -1,6 +1,6 @@ @@ -268,6 +314,31 @@ > + +
+ + {:else if tab === 'tools'} @@ -308,6 +379,70 @@ {/each} + {:else if tab === 'request_params'} + +
+ + +
+ + {#if paramRows.length === 0} +

No parameters configured

+ {:else} +
+ {#each paramRows as row, i} +
+ + + +
+ {/each} +
+ {/if} + + +
{/if} {/if} diff --git a/cptr/frontend/src/lib/stores.ts b/cptr/frontend/src/lib/stores.ts index 7de4a65..a4c5977 100644 --- a/cptr/frontend/src/lib/stores.ts +++ b/cptr/frontend/src/lib/stores.ts @@ -76,6 +76,7 @@ export interface UserPreferences { keybindings?: Record; // user-customised keyboard shortcuts version?: string; // last seen app version for changelog selectedModelId?: string; // last selected chat model, synced across browsers + requestParams?: Record; // arbitrary params merged into API request body } export type Theme = 'dark' | 'light' | 'system'; @@ -135,6 +136,7 @@ export const sidebarWidth = writable(220); export const theme = writable('dark'); export const toolApprovalMode = writable('auto'); export const planMode = writable(false); +export const requestParams = writable>({}); export const appVersion = writable(''); export const lastSeenVersion = writable(''); export const showChangelog = writable(false); @@ -285,7 +287,8 @@ function persistPreferences(): void { workspaceOrder: get(workspaceOrder), keybindings: get(keybindings), version: get(lastSeenVersion), - selectedModelId: get(selectedModelId) || undefined + selectedModelId: get(selectedModelId) || undefined, + requestParams: Object.keys(get(requestParams)).length ? get(requestParams) : undefined }; savePreferences(prefs as unknown as Record).catch(() => {}); }, 300); @@ -313,6 +316,9 @@ function subscribeForPersistence() { planMode.subscribe(() => { if (get(stateLoaded)) persistPreferences(); }); + requestParams.subscribe(() => { + if (get(stateLoaded)) persistPreferences(); + }); workspaceOrder.subscribe(() => { if (get(stateLoaded)) persistPreferences(); }); @@ -351,6 +357,7 @@ export async function loadPreferences(): Promise { if (prefs.keybindings) loadKeybindings(prefs.keybindings as Record); if (prefs.version) lastSeenVersion.set(prefs.version as string); if (prefs.selectedModelId) selectedModelId.set(prefs.selectedModelId as string); + if (prefs.requestParams) requestParams.set(prefs.requestParams as Record); } catch { // First run, no preferences yet } diff --git a/cptr/utils/ai.py b/cptr/utils/ai.py index e34b6eb..072ba91 100644 --- a/cptr/utils/ai.py +++ b/cptr/utils/ai.py @@ -63,6 +63,7 @@ async def chat_completion( system: str = "", max_tokens: int = 100, api_type: str = "chat_completions", + request_params: dict | None = None, ) -> str: """Simple non-streaming chat completion. Returns the text content. @@ -78,6 +79,8 @@ async def chat_completion( } if system: body["system"] = system + if request_params: + body.update(request_params) resp = await client.post( f"{base_url}/messages", json=body, @@ -95,6 +98,8 @@ async def chat_completion( } if system: body_r["instructions"] = system + if request_params: + body_r.update(request_params) resp = await client.post( f"{base_url}/responses", json=body_r, @@ -105,13 +110,16 @@ async def chat_completion( all_messages = messages[:] if system: all_messages.insert(0, {"role": "system", "content": system}) + body_cc: dict = { + "model": model, + "messages": all_messages, + "max_completion_tokens": max_tokens, + } + if request_params: + body_cc.update(request_params) resp = await client.post( f"{base_url}/chat/completions", - json={ - "model": model, - "messages": all_messages, - "max_completion_tokens": max_tokens, - }, + json=body_cc, headers={"Authorization": f"Bearer {api_key}"}, ) if resp.status_code >= 400: @@ -200,7 +208,7 @@ def _to_anthropic_messages(messages: list[dict]) -> list[dict]: async def stream_anthropic( - form_data: ChatCompletionForm, url: str, key: str + form_data: ChatCompletionForm, url: str, key: str, *, request_params: dict | None = None ) -> AsyncIterator[dict]: tools = [ { @@ -219,6 +227,8 @@ async def stream_anthropic( "stream": True, "max_tokens": 4096, } + if request_params: + body.update(request_params) # Remove None values body = {k: v for k, v in body.items() if v is not None} headers = {"x-api-key": key, "anthropic-version": "2023-06-01"} @@ -245,6 +255,9 @@ async def stream_anthropic( msg_usage = event.get("message", {}).get("usage", {}) if msg_usage: usage_data["input_tokens"] = msg_usage.get("input_tokens", 0) + for cache_key in ("cache_read_input_tokens", "cache_creation_input_tokens"): + if msg_usage.get(cache_key): + usage_data[cache_key] = msg_usage[cache_key] if etype == "content_block_start": block = event["content_block"] @@ -328,7 +341,7 @@ def _to_openai_messages(messages: list[dict], instructions: str) -> list[dict]: async def stream_openai_completions( - form_data: ChatCompletionForm, url: str, key: str + form_data: ChatCompletionForm, url: str, key: str, *, request_params: dict | None = None ) -> AsyncIterator[dict]: tools = [ { @@ -350,6 +363,8 @@ async def stream_openai_completions( } if tools: body["tools"] = tools + if request_params: + body.update(request_params) headers = {"Authorization": f"Bearer {key}"} emitted = False @@ -472,7 +487,7 @@ def _to_responses_input(messages: list[dict], instructions: str) -> list[dict]: async def stream_openai_responses( - form_data: ChatCompletionForm, url: str, key: str + form_data: ChatCompletionForm, url: str, key: str, *, request_params: dict | None = None ) -> AsyncIterator[dict]: tools = [ { @@ -493,6 +508,8 @@ async def stream_openai_responses( body["instructions"] = form_data.instructions if tools: body["tools"] = tools + if request_params: + body.update(request_params) headers = {"Authorization": f"Bearer {key}"} emitted = False diff --git a/cptr/utils/chat_task.py b/cptr/utils/chat_task.py index 2e938e7..0068a3b 100644 --- a/cptr/utils/chat_task.py +++ b/cptr/utils/chat_task.py @@ -729,6 +729,9 @@ def _sync_state(): last_usage: dict | None = None # real usage from last API call new_messages_since: int = 0 # messages appended since last API call + # Request params: arbitrary key-value pairs merged into the API request body + request_params = chat_params.get("request_params") or None + for _iteration in range(CHAT_MAX_ITERATIONS): # ── Context compaction: summarize older messages if too large ── if should_compact(messages, system, last_usage, new_messages_since): @@ -768,11 +771,11 @@ def _sync_state(): ) if provider == "anthropic": - stream = stream_anthropic(form_data, base_url, api_key) + stream = stream_anthropic(form_data, base_url, api_key, request_params=request_params) elif connection.get("api_type") == "responses": - stream = stream_openai_responses(form_data, base_url, api_key) + stream = stream_openai_responses(form_data, base_url, api_key, request_params=request_params) else: - stream = stream_openai_completions(form_data, base_url, api_key) + stream = stream_openai_completions(form_data, base_url, api_key, request_params=request_params) restart = False From dc3e2ff85e29d1ccdbf06501033a63ba3228290d Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 13:46:17 +0100 Subject: [PATCH 4/8] refac --- .../src/lib/components/DropdownMenu.svelte | 85 ++++++++++--------- .../components/common/ModelSelector.svelte | 34 +++++--- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/cptr/frontend/src/lib/components/DropdownMenu.svelte b/cptr/frontend/src/lib/components/DropdownMenu.svelte index 1173bc0..37fc2ce 100644 --- a/cptr/frontend/src/lib/components/DropdownMenu.svelte +++ b/cptr/frontend/src/lib/components/DropdownMenu.svelte @@ -29,6 +29,8 @@ maxHeight?: string; /** Optional header snippet rendered above items (e.g. search input). */ header?: Snippet; + /** Optional snippet rendered when items array is empty. */ + empty?: Snippet; /** Additional CSS classes for the menu container. */ className?: string; /** Horizontal alignment relative to anchor: 'start' (left) or 'end' (right). */ @@ -43,6 +45,7 @@ preferAbove = false, maxHeight, header, + empty, className = '', align = 'start' }: Props = $props(); @@ -148,44 +151,48 @@ {/if}
- {#each items as item} - {#if item.divider} -
- {:else} - - {/if} - {/each} + {#if items.length === 0 && empty} + {@render empty()} + {:else} + {#each items as item} + {#if item.divider} +
+ {:else} + + {/if} + {/each} + {/if}
diff --git a/cptr/frontend/src/lib/components/common/ModelSelector.svelte b/cptr/frontend/src/lib/components/common/ModelSelector.svelte index e951c13..5091d82 100644 --- a/cptr/frontend/src/lib/components/common/ModelSelector.svelte +++ b/cptr/frontend/src/lib/components/common/ModelSelector.svelte @@ -31,6 +31,7 @@ ); async function toggle() { + if ($chatModels.length === 0) return; if (open) { open = false; return; @@ -50,22 +51,26 @@ onclick={toggle} > {$chatModels.find((m) => m.id === selectedModel)?.name || 'Select model'}{$chatModels.length === 0 + ? 'No models available' + : $chatModels.find((m) => m.id === selectedModel)?.name || 'Select model'} - - - + {#if $chatModels.length > 0} + + + + {/if} -{#if open && btnEl} +{#if open && btnEl && $chatModels.length > 0} {/snippet} + {#snippet empty()} +
No matches
+ {/snippet}
{/if} From a56e4a9ff1948d4264d906ce672826dc02ff4ebe Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 14:32:49 +0100 Subject: [PATCH 5/8] refac --- cptr/frontend/src/lib/apis/admin.ts | 24 ++ .../src/lib/components/Admin/Models.svelte | 227 ++++++++++++++++++ .../src/lib/components/AdminPanel.svelte | 7 +- cptr/frontend/src/lib/components/Icon.svelte | 4 + .../src/lib/components/chat/PlusMenu.svelte | 17 +- cptr/frontend/src/lib/i18n/locales/en.json | 16 +- cptr/routers/admin.py | 69 ++++++ cptr/routers/chat.py | 7 + cptr/utils/chat_task.py | 12 +- 9 files changed, 368 insertions(+), 15 deletions(-) create mode 100644 cptr/frontend/src/lib/components/Admin/Models.svelte diff --git a/cptr/frontend/src/lib/apis/admin.ts b/cptr/frontend/src/lib/apis/admin.ts index 86735f8..b12eb51 100644 --- a/cptr/frontend/src/lib/apis/admin.ts +++ b/cptr/frontend/src/lib/apis/admin.ts @@ -104,3 +104,27 @@ export const verifyConnection = (id: string) => fetchJSON<{ ok: boolean; message: string }>(`/api/admin/connections/${id}/verify`, { method: 'POST' }); + +// ── Model Config ──────────────────────────────────────────── + +export interface ModelConfigEntry { + is_active?: boolean; + params?: { request_params?: Record }; +} + +export interface ModelConfigResponse { + config: Record; + models: { id: string; name: string; provider: string; connection_id: string }[]; +} + +export const getModelConfig = async (): Promise => + fetchJSON('/api/admin/models/config'); + +export const updateModelConfig = ( + modelId: string, + update: { is_active?: boolean; params?: Record } +) => + fetchJSON(`/api/admin/models/${encodeURIComponent(modelId)}/config`, { + ...jsonBody(update), + method: 'PUT' + }); diff --git a/cptr/frontend/src/lib/components/Admin/Models.svelte b/cptr/frontend/src/lib/components/Admin/Models.svelte new file mode 100644 index 0000000..4f30297 --- /dev/null +++ b/cptr/frontend/src/lib/components/Admin/Models.svelte @@ -0,0 +1,227 @@ + + +{#snippet paramRows(rows: ParamRow[], onInput: () => void, onRemove: (i: number) => void, onAdd: () => void)} +
+ request params + {#each rows as row, i} +
+ + + +
+ {/each} + +
+{/snippet} + +
+

{$t('admin.models')}

+ + {#if loading} +
+ {:else} +
+ + + + {#if globalExpanded} + {@render paramRows( + globalRows, + () => (globalDirty = true), + (i) => { globalRows = globalRows.filter((_, idx) => idx !== i); globalDirty = true; }, + () => { globalRows = [...globalRows, { key: '', value: '' }]; globalDirty = true; } + )} + {/if} + + + {#each models as model} + + + {#if selectedId === model.id} + {@render paramRows( + model.rows, + () => (model.dirty = true), + (i) => { model.rows = model.rows.filter((_, idx) => idx !== i); model.dirty = true; }, + () => { model.rows = [...model.rows, { key: '', value: '' }]; model.dirty = true; } + )} + {/if} + {/each} + + {#if models.length === 0} +

{$t('models.noModels')}

+ {/if} +
+ +
+ +
+ {/if} +
+ + diff --git a/cptr/frontend/src/lib/components/AdminPanel.svelte b/cptr/frontend/src/lib/components/AdminPanel.svelte index a1c3249..7d953fa 100644 --- a/cptr/frontend/src/lib/components/AdminPanel.svelte +++ b/cptr/frontend/src/lib/components/AdminPanel.svelte @@ -3,6 +3,7 @@ import Modal from './Modal.svelte'; import Users from './Admin/Users.svelte'; import Connections from './Admin/Connections.svelte'; + import Models from './Admin/Models.svelte'; import AdminSettings from './Admin/Settings.svelte'; import { t } from '$lib/i18n'; @@ -12,7 +13,7 @@ let { onclose }: Props = $props(); - let activeTab = $state<'users' | 'connections' | 'settings'>('users'); + let activeTab = $state<'users' | 'connections' | 'models' | 'settings'>('users'); {$t('settings.back')} - {#each [{ id: 'users', label: $t('admin.users'), icon: 'user' }, { id: 'connections', label: $t('admin.connections'), icon: 'plug' }, { id: 'settings', label: $t('admin.settings'), icon: 'settings' }] as tab} + {#each [{ id: 'users', label: $t('admin.users'), icon: 'user' }, { id: 'connections', label: $t('admin.connections'), icon: 'plug' }, { id: 'models', label: $t('admin.models'), icon: 'cube' }, { id: 'settings', label: $t('admin.settings'), icon: 'settings' }] as tab} {/each} @@ -436,10 +434,7 @@ onclick={addParamRow} disabled={!canAddParam} > - - - - + Add parameter diff --git a/cptr/frontend/src/lib/i18n/locales/en.json b/cptr/frontend/src/lib/i18n/locales/en.json index 39910d0..69a0704 100644 --- a/cptr/frontend/src/lib/i18n/locales/en.json +++ b/cptr/frontend/src/lib/i18n/locales/en.json @@ -274,5 +274,19 @@ "admin.webCcBaseUrl": "Base URL", "admin.webCcKey": "API Key", "admin.webCcModel": "Model", - "admin.webCcHint": "Use any OpenAI-compatible endpoint (e.g. Perplexity Sonar, LiteLLM proxy)" + "admin.webCcHint": "Use any OpenAI-compatible endpoint (e.g. Perplexity Sonar, LiteLLM proxy)", + + "admin.models": "Models", + "models.defaults": "Defaults", + "models.noDefaults": "No global defaults", + "models.noParams": "No parameters", + "models.noModels": "No models available", + "models.addConnection": "Add a connection first.", + "models.failedToLoad": "Failed to load model config", + "models.failedToToggle": "Failed to toggle model", + "models.failedToSave": "Failed to save", + "models.savedDefaults": "Saved global defaults", + "models.savedParams": "Saved params for {{name}}", + "models.add": "+ Add", + "models.save": "Save" } diff --git a/cptr/routers/admin.py b/cptr/routers/admin.py index 9e2dd92..6a11aeb 100644 --- a/cptr/routers/admin.py +++ b/cptr/routers/admin.py @@ -353,3 +353,72 @@ class UpdateConnectionRequest(BaseModel): api_key: Optional[str] = None enabled: Optional[bool] = None models: Optional[list[str]] = None + + +# ── Model config ───────────────────────────────────────────── + +CONFIG_KEY_CHAT_MODELS = "chat.models" + + +@router.get("/models/config") +async def get_model_config(request: Request): + """Get per-model config and full model list (including inactive) for the admin Models tab.""" + require_admin(request) + config = await Config.get(CONFIG_KEY_CHAT_MODELS) or {} + + # Build full model list from all enabled connections (same as chat.py + # get_models but without filtering inactive models). + from cptr.routers.chat import _get_connections, _get_connection_models + + connections = [c for c in await _get_connections() if c.get("enabled", True)] + models = [] + for conn in connections: + model_ids = await _get_connection_models(conn, request.app.state) + prefix = (conn.get("prefix_id") or "").strip() + for model_id in model_ids or []: + prefixed_id = f"{prefix}/{model_id}" if prefix else model_id + models.append( + { + "id": prefixed_id, + "name": model_id, + "provider": conn.get("provider", ""), + "connection_id": conn["id"], + } + ) + + return {"config": config, "models": models} + + +class UpdateModelConfigRequest(BaseModel): + is_active: Optional[bool] = None + params: Optional[dict] = None + + +@router.put("/models/{model_id:path}/config") +async def update_model_config( + model_id: str, body: UpdateModelConfigRequest, request: Request +): + """Update config for a specific model (or '*' for global defaults).""" + require_admin(request) + all_config = await Config.get(CONFIG_KEY_CHAT_MODELS) or {} + + entry = all_config.get(model_id, {}) + + if body.is_active is not None: + entry["is_active"] = body.is_active + + if body.params is not None: + entry["params"] = body.params + + # Clean up empty entries + if not entry or (entry.keys() <= {"is_active"} and entry.get("is_active") is not False): + if not entry.get("params"): + all_config.pop(model_id, None) + else: + all_config[model_id] = entry + else: + all_config[model_id] = entry + + await Config.upsert({CONFIG_KEY_CHAT_MODELS: all_config}) + return {"ok": True} + diff --git a/cptr/routers/chat.py b/cptr/routers/chat.py index 57e7342..5167029 100644 --- a/cptr/routers/chat.py +++ b/cptr/routers/chat.py @@ -196,6 +196,13 @@ async def get_models(request: Request): ) default_model = await Config.get("chat.default_model") + + # Filter out inactive models + chat_models_config = await Config.get("chat.models") or {} + inactive = {k for k, v in chat_models_config.items() if v.get("is_active") is False} + if inactive: + models = [m for m in models if m["id"] not in inactive] + return {"models": models, "default": default_model} diff --git a/cptr/utils/chat_task.py b/cptr/utils/chat_task.py index 0068a3b..053fd4a 100644 --- a/cptr/utils/chat_task.py +++ b/cptr/utils/chat_task.py @@ -730,7 +730,17 @@ def _sync_state(): new_messages_since: int = 0 # messages appended since last API call # Request params: arbitrary key-value pairs merged into the API request body - request_params = chat_params.get("request_params") or None + # Merge order: global ("*") → per-model → chat overrides (chat wins) + chat_request_params = chat_params.get("request_params") or {} + global_rp = {} + model_rp = {} + try: + chat_models_config = await Config.get("chat.models") or {} + global_rp = chat_models_config.get("*", {}).get("params", {}).get("request_params", {}) + model_rp = chat_models_config.get(model, {}).get("params", {}).get("request_params", {}) + except Exception: + pass + request_params = {**global_rp, **model_rp, **chat_request_params} or None for _iteration in range(CHAT_MAX_ITERATIONS): # ── Context compaction: summarize older messages if too large ── From 49804e92d24282d692df1f28340fe8c418965d66 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 14:34:20 +0100 Subject: [PATCH 6/8] refac --- cptr/frontend/src/lib/components/Admin/Models.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cptr/frontend/src/lib/components/Admin/Models.svelte b/cptr/frontend/src/lib/components/Admin/Models.svelte index 4f30297..4ea41ef 100644 --- a/cptr/frontend/src/lib/components/Admin/Models.svelte +++ b/cptr/frontend/src/lib/components/Admin/Models.svelte @@ -124,7 +124,7 @@ {#snippet paramRows(rows: ParamRow[], onInput: () => void, onRemove: (i: number) => void, onAdd: () => void)} -
+
request params {#each rows as row, i}
From ae760e191ac49a2d030dcd0a868a955fd5d66bdd Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Jun 2026 14:39:50 +0100 Subject: [PATCH 7/8] refac --- .../src/lib/components/AdminPanel.svelte | 60 ----------------- .../src/lib/components/SettingsModal.svelte | 67 +++++++++++++++++-- .../src/lib/components/Sidebar.svelte | 24 ++----- 3 files changed, 67 insertions(+), 84 deletions(-) delete mode 100644 cptr/frontend/src/lib/components/AdminPanel.svelte diff --git a/cptr/frontend/src/lib/components/AdminPanel.svelte b/cptr/frontend/src/lib/components/AdminPanel.svelte deleted file mode 100644 index 7d953fa..0000000 --- a/cptr/frontend/src/lib/components/AdminPanel.svelte +++ /dev/null @@ -1,60 +0,0 @@ - - - - - -
- {#if activeTab === 'users'} - - {:else if activeTab === 'connections'} - - {:else if activeTab === 'models'} - - {:else if activeTab === 'settings'} - - {/if} -
-
diff --git a/cptr/frontend/src/lib/components/SettingsModal.svelte b/cptr/frontend/src/lib/components/SettingsModal.svelte index 44fb49a..49d2524 100644 --- a/cptr/frontend/src/lib/components/SettingsModal.svelte +++ b/cptr/frontend/src/lib/components/SettingsModal.svelte @@ -5,16 +5,47 @@ import Account from './Settings/Account.svelte'; import Keyboard from './Settings/Keyboard.svelte'; import About from './Settings/About.svelte'; + import Users from './Admin/Users.svelte'; + import Connections from './Admin/Connections.svelte'; + import Models from './Admin/Models.svelte'; + import AdminSettings from './Admin/Settings.svelte'; + import { session } from '$lib/session'; import { t } from '$lib/i18n'; + type Tab = + | 'general' + | 'keyboard' + | 'account' + | 'about' + | 'users' + | 'connections' + | 'models' + | 'admin_settings'; + interface Props { onclose: () => void; - initialTab?: 'general' | 'keyboard' | 'account' | 'about'; + initialTab?: Tab; } let { onclose, initialTab = 'general' }: Props = $props(); - let activeTab = $state<'general' | 'keyboard' | 'account' | 'about'>(initialTab); + let activeTab = $state(initialTab); + + const isAdmin = $derived($session?.role === 'admin'); + + const personalTabs: { id: Tab; label: string; icon: string }[] = [ + { id: 'general', label: 'General', icon: 'settings' }, + { id: 'keyboard', label: 'Keyboard', icon: 'terminal' }, + { id: 'account', label: 'Account', icon: 'user' }, + { id: 'about', label: 'About', icon: 'info' } + ]; + + const adminTabs: { id: Tab; label: string; icon: string }[] = [ + { id: 'users', label: 'Users', icon: 'user' }, + { id: 'connections', label: 'Connections', icon: 'plug' }, + { id: 'models', label: 'Models', icon: 'cube' }, + { id: 'admin_settings', label: 'Configuration', icon: 'shield' } + ]; {$t('settings.back')} - {#each [{ id: 'general', label: $t('settings.general'), icon: 'settings' }, { id: 'keyboard', label: 'Keyboard', icon: 'terminal' }, { id: 'account', label: $t('settings.account'), icon: 'user' }, { id: 'about', label: $t('settings.about'), icon: 'info' }] as tab} + + + {#each personalTabs as tab} {/each} + + + {#if isAdmin} + + + {#each adminTabs as tab} + + {/each} + {/if}
@@ -56,6 +107,14 @@ {:else if activeTab === 'about'} + {:else if activeTab === 'users'} + + {:else if activeTab === 'connections'} + + {:else if activeTab === 'models'} + + {:else if activeTab === 'admin_settings'} + {/if}
diff --git a/cptr/frontend/src/lib/components/Sidebar.svelte b/cptr/frontend/src/lib/components/Sidebar.svelte index 346f21e..74f8a44 100644 --- a/cptr/frontend/src/lib/components/Sidebar.svelte +++ b/cptr/frontend/src/lib/components/Sidebar.svelte @@ -21,7 +21,7 @@ import DirectoryPicker from './DirectoryPicker.svelte'; import DropdownMenu from './DropdownMenu.svelte'; import SettingsModal from './SettingsModal.svelte'; - import AdminPanel from './AdminPanel.svelte'; + import { tooltip } from '$lib/tooltip'; import { session, clearSession } from '$lib/session'; import { getWelcome } from '$lib/apis/state'; @@ -37,8 +37,7 @@ let showPicker = $state(false); let showMenu = $state(false); let showSettings = $state(false); - let settingsTab = $state<'general' | 'account' | 'about'>('general'); - let showAdmin = $state(false); + let settingsTab = $state('general'); let wsMenuPath = $state(null); let wsMenuAnchor = $state(null); @@ -508,25 +507,14 @@ image: $session.profile_image_url || '/user.png', onclick: () => { settingsTab = 'account'; + showMenu = false; showSettings = true; } } ] : []), ...($session ? [{ divider: true, label: '', onclick: () => {} }] : []), - { label: $t('sidebar.settings'), icon: 'settings', onclick: openSettings }, - ...($session?.role === 'admin' - ? [ - { - label: $t('sidebar.admin'), - icon: 'shield', - onclick: () => { - showMenu = false; - showAdmin = true; - } - } - ] - : []), + { label: $t('sidebar.settings'), icon: 'settings', shortcut: formatChord($keybindings.openSettings), onclick: openSettings }, { divider: true, label: '', onclick: () => {} }, { label: $t('sidebar.logOut'), icon: 'log-out', onclick: logout } ]} @@ -562,10 +550,6 @@ /> {/if} -{#if showAdmin} - (showAdmin = false)} /> -{/if} -