|
1 | 1 | import asyncio |
2 | 2 | import json |
3 | 3 | import os |
| 4 | +import re |
4 | 5 | import time |
5 | 6 | from datetime import datetime |
6 | 7 | from typing import Any |
7 | 8 |
|
8 | 9 | from aiohttp import web |
9 | 10 |
|
| 11 | +from llms.db import count_tokens_approx |
| 12 | + |
10 | 13 | from .db import AppDB |
11 | 14 |
|
12 | 15 | g_db = None |
@@ -52,6 +55,7 @@ def get_db(): |
52 | 55 | "metadata", |
53 | 56 | "error", |
54 | 57 | "ref", |
| 58 | + "contextTokens", |
55 | 59 | ] |
56 | 60 |
|
57 | 61 | def thread_dto(row): |
@@ -321,6 +325,112 @@ async def daily_requests_summary(request): |
321 | 325 |
|
322 | 326 | ctx.add_get("requests/summary/{day}", daily_requests_summary) |
323 | 327 |
|
| 328 | + async def sync_thread(request): |
| 329 | + user = ctx.get_username(request) |
| 330 | + take = min(int(request.query.get("take", "200")), 1000) |
| 331 | + |
| 332 | + threads = g_db.query_threads({"null": "contextTokens", "take": take}, user=user) |
| 333 | + updated = 0 |
| 334 | + for thread in threads: |
| 335 | + id = thread["id"] |
| 336 | + messages = json.loads(thread["messages"]) |
| 337 | + context_tokens = count_tokens_approx(messages) |
| 338 | + await g_db.update_thread_async(id, {"contextTokens": context_tokens}, user=user) |
| 339 | + updated += 1 |
| 340 | + |
| 341 | + return web.json_response({"updated": updated}) |
| 342 | + |
| 343 | + ctx.add_get("threads/sync", sync_thread) |
| 344 | + |
| 345 | + async def compact_thread(request): |
| 346 | + id = request.match_info["id"] |
| 347 | + user = ctx.get_username(request) |
| 348 | + thread = g_db.get_thread(id, user=user) |
| 349 | + if not thread: |
| 350 | + raise Exception("Thread not found") |
| 351 | + |
| 352 | + messages_json = thread["messages"] |
| 353 | + thread_messages = json.loads(messages_json) |
| 354 | + message_count = len(thread_messages) |
| 355 | + token_count = count_tokens_approx(thread_messages) |
| 356 | + target_tokens = int(token_count * 0.3) # 30% of original |
| 357 | + |
| 358 | + compact_template = ctx.config["defaults"]["compact"] if "compact" in ctx.config.get("defaults", {}) else None |
| 359 | + if not compact_template: |
| 360 | + raise Exception("'compact' template not found in llms.json defaults") |
| 361 | + |
| 362 | + compact_template = compact_template.copy() |
| 363 | + compact_template_messages = compact_template["messages"].copy() |
| 364 | + user_message = compact_template_messages[-1].copy() |
| 365 | + user_content = user_message.get("content", "") |
| 366 | + if not user_content and not isinstance(user_content, str): |
| 367 | + raise Exception("'compact' template has no user message") |
| 368 | + if "{messages_json}" not in user_content: |
| 369 | + raise Exception("'compact' template has no {messages_json} placeholder") |
| 370 | + user_content = user_content.replace("{message_count}", str(message_count), 1) |
| 371 | + user_content = user_content.replace("{token_count}", str(token_count), 1) |
| 372 | + user_content = user_content.replace("{target_tokens}", str(target_tokens), 1) |
| 373 | + user_content = user_content.replace("{messages_json}", messages_json, 1) |
| 374 | + user_message["content"] = user_content |
| 375 | + compact_template_messages[-1] = user_message |
| 376 | + compact_template["messages"] = compact_template_messages |
| 377 | + |
| 378 | + ctx.dbg(f"compact_thread: {id} / {message_count} / {token_count} / {target_tokens}\n{user_content}\n") |
| 379 | + context = {"chat": compact_template, "tools": "none", "user": user} |
| 380 | + response = await ctx.chat_completion(compact_template, context=context) |
| 381 | + |
| 382 | + answer = response.get("choices", [{}])[0].get("message", {}).get("content", "") |
| 383 | + if not answer: |
| 384 | + raise Exception("No answer in compact response") |
| 385 | + |
| 386 | + ctx.dbg(answer) |
| 387 | + compact_messages_response = ctx.parse_json_response(answer) |
| 388 | + if "messages" in compact_messages_response: |
| 389 | + compact_messages = compact_messages_response["messages"] |
| 390 | + elif ( |
| 391 | + isinstance(compact_messages_response, list) |
| 392 | + and len(compact_messages_response) > 0 |
| 393 | + and compact_messages_response[0].get("role") |
| 394 | + ): |
| 395 | + compact_messages = compact_messages_response |
| 396 | + else: |
| 397 | + raise Exception("Invalid compact messages response") |
| 398 | + |
| 399 | + threadId = context.get("threadId") |
| 400 | + if not threadId: |
| 401 | + raise Exception("Thread not found") |
| 402 | + compact_tokens = count_tokens_approx(compact_messages) |
| 403 | + |
| 404 | + update_thread = { |
| 405 | + "user": user, |
| 406 | + "title": thread.get("title"), |
| 407 | + "systemPrompt": thread.get("systemPrompt"), |
| 408 | + "model": thread.get("model"), |
| 409 | + "modelInfo": thread.get("modelInfo"), |
| 410 | + "modalities": thread.get("modalities"), |
| 411 | + "messages": compact_messages, |
| 412 | + "toolHistory": thread.get("toolHistory"), |
| 413 | + "args": thread.get("args"), |
| 414 | + "tools": thread.get("tools"), |
| 415 | + "provider": thread.get("provider"), |
| 416 | + "providerModel": thread.get("providerModel"), |
| 417 | + "completedAt": datetime.now(), |
| 418 | + "metadata": thread.get("metadata"), |
| 419 | + "ref": thread.get("ref"), |
| 420 | + "providerResponse": response, |
| 421 | + "contextTokens": compact_tokens, |
| 422 | + "parentId": thread.get("id"), |
| 423 | + } |
| 424 | + await g_db.update_thread_async(threadId, update_thread, user=user) |
| 425 | + |
| 426 | + return web.json_response( |
| 427 | + { |
| 428 | + "id": threadId, |
| 429 | + } |
| 430 | + ) |
| 431 | + |
| 432 | + ctx.add_post("threads/{id}/compact", compact_thread) |
| 433 | + |
324 | 434 | async def chat_request(openai_request, context): |
325 | 435 | chat = openai_request |
326 | 436 | user = context.get("user", None) |
|
0 commit comments