Skip to content

Commit f422bdc

Browse files
committed
Add ability to compact threads
1 parent 18ec7c2 commit f422bdc

9 files changed

Lines changed: 478 additions & 9 deletions

File tree

llms/db.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import re
34
import sqlite3
45
import threading
56
from queue import Empty, Queue
@@ -116,6 +117,92 @@ def order_by(all_columns, sort):
116117
return f"ORDER BY {', '.join(cols)} " if len(cols) > 0 else ""
117118

118119

120+
def count_tokens_approx(messages: list[dict]) -> int:
121+
"""
122+
Approximate token count for chat completion messages without external libraries.
123+
124+
Handles various message formats:
125+
- Simple string content: {"role": "user", "content": "hello"}
126+
- Content arrays: {"role": "user", "content": [{"type": "text", "text": "hello"}]}
127+
- Tool calls, images, etc.
128+
"""
129+
130+
def count_text_tokens(text: str) -> int:
131+
if not text:
132+
return 0
133+
134+
tokens = 0
135+
chunks = re.findall(r"\d+|[a-zA-Z]+|[^\s\w]|\s+", text)
136+
for chunk in chunks:
137+
if not chunk.strip():
138+
tokens += len(chunk) // 4
139+
elif chunk.isdigit():
140+
tokens += (len(chunk) + 2) // 3
141+
elif chunk.isalpha():
142+
tokens += 1 if len(chunk) <= 4 else (len(chunk) + 3) // 4
143+
else:
144+
tokens += len(chunk)
145+
return tokens
146+
147+
def extract_text_content(value) -> str:
148+
"""Recursively extract text from various content structures."""
149+
if value is None:
150+
return ""
151+
if isinstance(value, str):
152+
return value
153+
if isinstance(value, list):
154+
texts = []
155+
for item in value:
156+
if isinstance(item, dict):
157+
# Handle content blocks: {"type": "text", "text": "..."}
158+
if item.get("type") == "text" and "text" in item:
159+
texts.append(item["text"])
160+
# Handle tool use, tool results, etc.
161+
elif "content" in item:
162+
texts.append(extract_text_content(item["content"]))
163+
elif "text" in item:
164+
texts.append(item["text"])
165+
elif isinstance(item, str):
166+
texts.append(item)
167+
return " ".join(texts)
168+
if isinstance(value, dict):
169+
if "text" in value:
170+
return value["text"]
171+
if "content" in value:
172+
return extract_text_content(value["content"])
173+
return ""
174+
175+
total = 0
176+
for message in messages:
177+
# Message overhead
178+
total += 4
179+
180+
role = message.get("role", "")
181+
total += count_text_tokens(role)
182+
183+
content = message.get("content")
184+
text = extract_text_content(content)
185+
total += count_text_tokens(text)
186+
187+
# Handle thinking/reasoning content
188+
for key in ["thinking", "reasoning", "reasoning_content"]:
189+
if key in message:
190+
text = extract_text_content(message[key])
191+
total += count_text_tokens(text)
192+
193+
# Handle tool calls if present
194+
if "tool_calls" in message:
195+
for tool_call in message.get("tool_calls") or []:
196+
if isinstance(tool_call, dict):
197+
fn = tool_call.get("function", {})
198+
total += count_text_tokens(fn.get("name", ""))
199+
total += count_text_tokens(fn.get("arguments", ""))
200+
# Reply priming
201+
total += 3
202+
203+
return total
204+
205+
119206
class DbManager:
120207
def __init__(self, ctx, db_path, clone=None):
121208
if db_path is None:

llms/extensions/app/__init__.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import asyncio
22
import json
33
import os
4+
import re
45
import time
56
from datetime import datetime
67
from typing import Any
78

89
from aiohttp import web
910

11+
from llms.db import count_tokens_approx
12+
1013
from .db import AppDB
1114

1215
g_db = None
@@ -52,6 +55,7 @@ def get_db():
5255
"metadata",
5356
"error",
5457
"ref",
58+
"contextTokens",
5559
]
5660

5761
def thread_dto(row):
@@ -321,6 +325,112 @@ async def daily_requests_summary(request):
321325

322326
ctx.add_get("requests/summary/{day}", daily_requests_summary)
323327

328+
async def sync_thread(request):
329+
user = ctx.get_username(request)
330+
take = min(int(request.query.get("take", "200")), 1000)
331+
332+
threads = g_db.query_threads({"null": "contextTokens", "take": take}, user=user)
333+
updated = 0
334+
for thread in threads:
335+
id = thread["id"]
336+
messages = json.loads(thread["messages"])
337+
context_tokens = count_tokens_approx(messages)
338+
await g_db.update_thread_async(id, {"contextTokens": context_tokens}, user=user)
339+
updated += 1
340+
341+
return web.json_response({"updated": updated})
342+
343+
ctx.add_get("threads/sync", sync_thread)
344+
345+
async def compact_thread(request):
346+
id = request.match_info["id"]
347+
user = ctx.get_username(request)
348+
thread = g_db.get_thread(id, user=user)
349+
if not thread:
350+
raise Exception("Thread not found")
351+
352+
messages_json = thread["messages"]
353+
thread_messages = json.loads(messages_json)
354+
message_count = len(thread_messages)
355+
token_count = count_tokens_approx(thread_messages)
356+
target_tokens = int(token_count * 0.3) # 30% of original
357+
358+
compact_template = ctx.config["defaults"]["compact"] if "compact" in ctx.config.get("defaults", {}) else None
359+
if not compact_template:
360+
raise Exception("'compact' template not found in llms.json defaults")
361+
362+
compact_template = compact_template.copy()
363+
compact_template_messages = compact_template["messages"].copy()
364+
user_message = compact_template_messages[-1].copy()
365+
user_content = user_message.get("content", "")
366+
if not user_content and not isinstance(user_content, str):
367+
raise Exception("'compact' template has no user message")
368+
if "{messages_json}" not in user_content:
369+
raise Exception("'compact' template has no {messages_json} placeholder")
370+
user_content = user_content.replace("{message_count}", str(message_count), 1)
371+
user_content = user_content.replace("{token_count}", str(token_count), 1)
372+
user_content = user_content.replace("{target_tokens}", str(target_tokens), 1)
373+
user_content = user_content.replace("{messages_json}", messages_json, 1)
374+
user_message["content"] = user_content
375+
compact_template_messages[-1] = user_message
376+
compact_template["messages"] = compact_template_messages
377+
378+
ctx.dbg(f"compact_thread: {id} / {message_count} / {token_count} / {target_tokens}\n{user_content}\n")
379+
context = {"chat": compact_template, "tools": "none", "user": user}
380+
response = await ctx.chat_completion(compact_template, context=context)
381+
382+
answer = response.get("choices", [{}])[0].get("message", {}).get("content", "")
383+
if not answer:
384+
raise Exception("No answer in compact response")
385+
386+
ctx.dbg(answer)
387+
compact_messages_response = ctx.parse_json_response(answer)
388+
if "messages" in compact_messages_response:
389+
compact_messages = compact_messages_response["messages"]
390+
elif (
391+
isinstance(compact_messages_response, list)
392+
and len(compact_messages_response) > 0
393+
and compact_messages_response[0].get("role")
394+
):
395+
compact_messages = compact_messages_response
396+
else:
397+
raise Exception("Invalid compact messages response")
398+
399+
threadId = context.get("threadId")
400+
if not threadId:
401+
raise Exception("Thread not found")
402+
compact_tokens = count_tokens_approx(compact_messages)
403+
404+
update_thread = {
405+
"user": user,
406+
"title": thread.get("title"),
407+
"systemPrompt": thread.get("systemPrompt"),
408+
"model": thread.get("model"),
409+
"modelInfo": thread.get("modelInfo"),
410+
"modalities": thread.get("modalities"),
411+
"messages": compact_messages,
412+
"toolHistory": thread.get("toolHistory"),
413+
"args": thread.get("args"),
414+
"tools": thread.get("tools"),
415+
"provider": thread.get("provider"),
416+
"providerModel": thread.get("providerModel"),
417+
"completedAt": datetime.now(),
418+
"metadata": thread.get("metadata"),
419+
"ref": thread.get("ref"),
420+
"providerResponse": response,
421+
"contextTokens": compact_tokens,
422+
"parentId": thread.get("id"),
423+
}
424+
await g_db.update_thread_async(threadId, update_thread, user=user)
425+
426+
return web.json_response(
427+
{
428+
"id": threadId,
429+
}
430+
)
431+
432+
ctx.add_post("threads/{id}/compact", compact_thread)
433+
324434
async def chat_request(openai_request, context):
325435
chat = openai_request
326436
user = context.get("user", None)

llms/extensions/app/db.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime, timedelta
55
from typing import Any, Dict
66

7-
from llms.db import DbManager, order_by, select_columns, to_dto, valid_columns
7+
from llms.db import DbManager, count_tokens_approx, order_by, select_columns, to_dto, valid_columns
88

99

1010
def with_user(data, user):
@@ -58,6 +58,8 @@ def __init__(self, ctx, db_path):
5858
"error": "TEXT",
5959
"ref": "TEXT",
6060
"providerResponse": "JSON",
61+
"contextTokens": "INTEGER",
62+
"parentId": "INTEGER",
6163
},
6264
"request": {
6365
"id": "INTEGER",
@@ -351,6 +353,7 @@ def prepare_thread(self, thread, id=None, user=None):
351353
self.ctx.cache_message_inline_data(m)
352354
if "timestamp" not in m:
353355
m["timestamp"] = initial_timestamp + idx
356+
thread["contextTokens"] = count_tokens_approx(thread["messages"])
354357
return with_user(thread, user=user)
355358

356359
def create_thread(self, thread: Dict[str, Any], user=None):

llms/llms.json

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,60 @@
121121
],
122122
"max_completion_tokens": 16,
123123
"stream": false
124+
},
125+
"compact": {
126+
"model": "Gemini 2.5 Flash Lite",
127+
"messages": [
128+
{
129+
"role": "system",
130+
"content": "You are a context compaction assistant. Your task is to condense chat conversation histories while preserving all information necessary for the AI to continue the conversation coherently.\n\n## Compaction Rules\n\n1. **Preserve**: User intent, decisions made, key facts established, code/data shared, current task state, any constraints or preferences expressed\n2. **Remove**: Pleasantries, redundant confirmations, superseded information, verbose explanations that led to simple conclusions, failed attempts that were abandoned\n3. **Summarize**: Multi-turn explorations into their conclusions, lengthy discussions into key points, iterative refinements into final versions\n4. **Keep verbatim**: Code snippets still relevant, specific names/values/URLs, exact user requirements, any content the user explicitly asked to remember\n\n## Output Format\n\nReturn a compacted message array in the same OpenAI chat format:\n- Merge assistant knowledge into a single \"context summary\" system or assistant message where appropriate\n- Preserve the most recent 2-3 exchanges verbatim to maintain conversational flow\n- Use concise prose, not bullet points, for summaries\n\n## Critical\n\nNever lose information that would cause the AI to:\n- Repeat questions already answered\n- Forget constraints or preferences stated\n- Lose track of the current task or goal\n- Miss context needed to understand recent messages"
131+
},
132+
{
133+
"role": "user",
134+
"content": "Compact the following conversation while preserving all context needed to continue it coherently. The conversation has {message_count} messages totaling approximately {token_count} tokens. Target approximately {target_tokens} tokens.\n\n<conversation>\n{messages_json}\n</conversation>\n\nReturn your response as a JSON object with a single \"messages\" key containing the compacted array. Do not include any text before or after the JSON. Do not wrap in markdown code fences.\n\nExample output structure:\n{\"messages\":[{\"role\":\"system\",\"content\":\"...\"},{\"role\":\"user\",\"content\":\"...\"}]}"
135+
}
136+
],
137+
"response_format": {
138+
"type": "json_schema",
139+
"json_schema": {
140+
"name": "compacted_conversation",
141+
"strict": true,
142+
"schema": {
143+
"type": "object",
144+
"properties": {
145+
"messages": {
146+
"type": "array",
147+
"items": {
148+
"type": "object",
149+
"properties": {
150+
"role": {
151+
"type": "string",
152+
"enum": [
153+
"system",
154+
"user",
155+
"assistant"
156+
]
157+
},
158+
"content": {
159+
"type": "string"
160+
}
161+
},
162+
"required": [
163+
"role",
164+
"content"
165+
],
166+
"additionalProperties": false
167+
}
168+
}
169+
},
170+
"required": [
171+
"messages"
172+
],
173+
"additionalProperties": false
174+
}
175+
}
176+
},
177+
"stream": false
124178
}
125179
},
126180
"limits": {

llms/main.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1763,6 +1763,7 @@ def convert_tool_args(function_name, function_args):
17631763

17641764
return function_args
17651765

1766+
17661767
def get_tool_property(function_name, prop_name):
17671768
tool_def = g_app.get_tool_definition(function_name)
17681769
if not tool_def:
@@ -1773,6 +1774,7 @@ def get_tool_property(function_name, prop_name):
17731774
return properties.get(prop_name)
17741775
return None
17751776

1777+
17761778
async def g_exec_tool(function_name, function_args):
17771779
_log(f"g_exec_tool: {function_name}")
17781780
if function_name in g_app.tools:
@@ -2370,6 +2372,30 @@ def disable_provider(provider):
23702372
init_llms(g_config, g_providers)
23712373

23722374

2375+
def parse_json_response(text):
2376+
# Try direct parse first
2377+
try:
2378+
return json.loads(text)
2379+
except json.JSONDecodeError:
2380+
pass
2381+
2382+
# Strip markdown fences
2383+
cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
2384+
cleaned = re.sub(r"\s*```$", "", cleaned)
2385+
2386+
try:
2387+
return json.loads(cleaned)
2388+
except json.JSONDecodeError:
2389+
pass
2390+
2391+
# Try to extract JSON object/array
2392+
match = re.search(r"(\{[\s\S]*\}|\[[\s\S]*\])", text)
2393+
if match:
2394+
return json.loads(match.group(1))
2395+
2396+
raise ValueError("Could not parse JSON from response")
2397+
2398+
23732399
def resolve_root():
23742400
# Try to find the resource root directory
23752401
# When installed as a package, static files may be in different locations
@@ -3031,8 +3057,8 @@ async def on_chat_error(self, e: Exception, context: Dict[str, Any]):
30313057
task = filter_func(e, context)
30323058
if inspect.iscoroutine(task):
30333059
await task
3034-
except Exception as e:
3035-
_err("chat error filter failed", e)
3060+
except Exception as ex:
3061+
_err("chat error filter failed", ex)
30363062

30373063
async def on_chat_tool(self, chat: Dict[str, Any], context: Dict[str, Any]):
30383064
m_len = len(chat.get("messages", []))
@@ -3455,6 +3481,9 @@ def create_chat_with_tools(self, chat: Dict[str, Any], use_tools: str = "all") -
34553481
def chat_to_aspect_ratio(self, chat: Dict[str, Any]) -> str:
34563482
return chat_to_aspect_ratio(chat)
34573483

3484+
def parse_json_response(self, text: str) -> Dict[str, Any]:
3485+
return parse_json_response(text)
3486+
34583487

34593488
def get_extensions_path():
34603489
return os.getenv("LLMS_EXTENSIONS_DIR", home_llms_path("extensions"))

0 commit comments

Comments
 (0)