Skip to content

Commit 830a008

Browse files
committed
Removing attachment metadata and streamlining conversation attachments.
1 parent d238a82 commit 830a008

5 files changed

Lines changed: 116 additions & 53 deletions

File tree

src/cache/postgres_cache.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from cache.cache_error import CacheError
99
from models.cache_entry import CacheEntry
1010
from models.config import PostgreSQLDatabaseConfiguration
11+
from models.requests import Attachment
1112
from models.responses import ConversationData
1213
from utils.connection_decorator import connection
1314
from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
@@ -36,6 +37,7 @@ class PostgresCache(Cache):
3637
referenced_documents | jsonb | |
3738
tool_calls | jsonb | |
3839
tool_results | jsonb | |
40+
attachments | jsonb | |
3941
Indexes:
4042
"cache_pkey" PRIMARY KEY, btree (user_id, conversation_id, created_at)
4143
"timestamps" btree (created_at)
@@ -60,6 +62,7 @@ class PostgresCache(Cache):
6062
referenced_documents jsonb,
6163
tool_calls jsonb,
6264
tool_results jsonb,
65+
attachments jsonb,
6366
PRIMARY KEY(user_id, conversation_id, created_at)
6467
);
6568
"""
@@ -81,7 +84,7 @@ class PostgresCache(Cache):
8184

8285
SELECT_CONVERSATION_HISTORY_STATEMENT = """
8386
SELECT query, response, provider, model, started_at, completed_at,
84-
referenced_documents, tool_calls, tool_results
87+
referenced_documents, tool_calls, tool_results, attachments
8588
FROM cache
8689
WHERE user_id=%s AND conversation_id=%s
8790
ORDER BY created_at
@@ -90,8 +93,8 @@ class PostgresCache(Cache):
9093
INSERT_CONVERSATION_HISTORY_STATEMENT = """
9194
INSERT INTO cache(user_id, conversation_id, created_at, started_at, completed_at,
9295
query, response, provider, model, referenced_documents,
93-
tool_calls, tool_results)
94-
VALUES (%s, %s, CURRENT_TIMESTAMP, %s, %s, %s, %s, %s, %s, %s, %s, %s)
96+
tool_calls, tool_results, attachments)
97+
VALUES (%s, %s, CURRENT_TIMESTAMP, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
9598
"""
9699

97100
QUERY_CACHE_SIZE = """
@@ -321,6 +324,22 @@ def get( # pylint: disable=R0914
321324
e,
322325
)
323326

327+
# Parse attachments back into Attachment objects
328+
attachments_data = conversation_entry[9]
329+
attachments_obj = None
330+
if attachments_data:
331+
try:
332+
attachments_obj = [
333+
Attachment.model_validate(att) for att in attachments_data
334+
]
335+
except (ValueError, TypeError) as e:
336+
logger.warning(
337+
"Failed to deserialize attachments for "
338+
"conversation %s: %s",
339+
conversation_id,
340+
e,
341+
)
342+
324343
cache_entry = CacheEntry(
325344
query=conversation_entry[0],
326345
response=conversation_entry[1],
@@ -331,6 +350,7 @@ def get( # pylint: disable=R0914
331350
referenced_documents=docs_obj,
332351
tool_calls=tool_calls_obj,
333352
tool_results=tool_results_obj,
353+
attachments=attachments_obj,
334354
)
335355
result.append(cache_entry)
336356

@@ -405,6 +425,20 @@ def insert_or_append(
405425
e,
406426
)
407427

428+
attachments_json = None
429+
if cache_entry.attachments:
430+
try:
431+
attachments_as_dicts = [
432+
att.model_dump(mode="json") for att in cache_entry.attachments
433+
]
434+
attachments_json = json.dumps(attachments_as_dicts)
435+
except (TypeError, ValueError) as e:
436+
logger.warning(
437+
"Failed to serialize attachments for conversation %s: %s",
438+
conversation_id,
439+
e,
440+
)
441+
408442
# the whole operation is run in one transaction
409443
with self.connection.cursor() as cursor:
410444
cursor.execute(
@@ -421,6 +455,7 @@ def insert_or_append(
421455
referenced_documents_json,
422456
tool_calls_json,
423457
tool_results_json,
458+
attachments_json,
424459
),
425460
)
426461

src/cache/sqlite_cache.py

Lines changed: 59 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Cache that uses SQLite to store cached values."""
22

33
from time import time
4+
from typing import Any
45

56
import sqlite3
67
import json
@@ -9,6 +10,7 @@
910
from cache.cache_error import CacheError
1011
from models.cache_entry import CacheEntry
1112
from models.config import SQLiteDatabaseConfiguration
13+
from models.requests import Attachment
1214
from models.responses import ConversationData
1315
from utils.connection_decorator import connection
1416
from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
@@ -37,6 +39,7 @@ class SQLiteCache(Cache):
3739
referenced_documents | text | |
3840
tool_calls | text | |
3941
tool_results | text | |
42+
attachments | text | |
4043
Indexes:
4144
"cache_pkey" PRIMARY KEY, btree (user_id, conversation_id, created_at)
4245
"cache_key_key" UNIQUE CONSTRAINT, btree (key)
@@ -45,6 +48,26 @@ class SQLiteCache(Cache):
4548
```
4649
"""
4750

51+
@staticmethod
52+
def _safe_json_dumps_models(
53+
items: list[Any] | None, conversation_id: str, field_name: str
54+
) -> str | None:
55+
"""Serialize a list of Pydantic models to JSON, returning None on failure."""
56+
if not items:
57+
return None
58+
59+
try:
60+
as_dicts = [item.model_dump(mode="json") for item in items]
61+
return json.dumps(as_dicts)
62+
except (TypeError, ValueError) as e:
63+
logger.warning(
64+
"Failed to serialize %s for conversation %s: %s",
65+
field_name,
66+
conversation_id,
67+
e,
68+
)
69+
return None
70+
4871
CREATE_CACHE_TABLE = """
4972
CREATE TABLE IF NOT EXISTS cache (
5073
user_id text NOT NULL,
@@ -59,6 +82,7 @@ class SQLiteCache(Cache):
5982
referenced_documents text,
6083
tool_calls text,
6184
tool_results text,
85+
attachments text,
6286
PRIMARY KEY(user_id, conversation_id, created_at)
6387
);
6488
"""
@@ -80,7 +104,7 @@ class SQLiteCache(Cache):
80104

81105
SELECT_CONVERSATION_HISTORY_STATEMENT = """
82106
SELECT query, response, provider, model, started_at, completed_at,
83-
referenced_documents, tool_calls, tool_results
107+
referenced_documents, tool_calls, tool_results, attachments
84108
FROM cache
85109
WHERE user_id=? AND conversation_id=?
86110
ORDER BY created_at
@@ -89,8 +113,8 @@ class SQLiteCache(Cache):
89113
INSERT_CONVERSATION_HISTORY_STATEMENT = """
90114
INSERT INTO cache(user_id, conversation_id, created_at, started_at, completed_at,
91115
query, response, provider, model, referenced_documents,
92-
tool_calls, tool_results)
93-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
116+
tool_calls, tool_results, attachments)
117+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
94118
"""
95119

96120
QUERY_CACHE_SIZE = """
@@ -301,6 +325,22 @@ def get( # pylint: disable=R0914
301325
e,
302326
)
303327

328+
# Parse attachments back into Attachment objects
329+
attachments_json_str = conversation_entry[9]
330+
attachments_obj = None
331+
if attachments_json_str:
332+
try:
333+
attachments_data = json.loads(attachments_json_str)
334+
attachments_obj = [
335+
Attachment.model_validate(att) for att in attachments_data
336+
]
337+
except (json.JSONDecodeError, ValueError) as e:
338+
logger.warning(
339+
"Failed to deserialize attachments for conversation %s: %s",
340+
conversation_id,
341+
e,
342+
)
343+
304344
cache_entry = CacheEntry(
305345
query=conversation_entry[0],
306346
response=conversation_entry[1],
@@ -311,6 +351,7 @@ def get( # pylint: disable=R0914
311351
referenced_documents=docs_obj,
312352
tool_calls=tool_calls_obj,
313353
tool_results=tool_results_obj,
354+
attachments=attachments_obj,
314355
)
315356
result.append(cache_entry)
316357

@@ -342,49 +383,20 @@ def insert_or_append(
342383
cursor = self.connection.cursor()
343384
current_time = time()
344385

345-
referenced_documents_json = None
346-
if cache_entry.referenced_documents:
347-
try:
348-
docs_as_dicts = [
349-
doc.model_dump(mode="json")
350-
for doc in cache_entry.referenced_documents
351-
]
352-
referenced_documents_json = json.dumps(docs_as_dicts)
353-
except (TypeError, ValueError) as e:
354-
logger.warning(
355-
"Failed to serialize referenced_documents for "
356-
"conversation %s: %s",
357-
conversation_id,
358-
e,
359-
)
360-
361-
tool_calls_json = None
362-
if cache_entry.tool_calls:
363-
try:
364-
tool_calls_as_dicts = [
365-
tc.model_dump(mode="json") for tc in cache_entry.tool_calls
366-
]
367-
tool_calls_json = json.dumps(tool_calls_as_dicts)
368-
except (TypeError, ValueError) as e:
369-
logger.warning(
370-
"Failed to serialize tool_calls for conversation %s: %s",
371-
conversation_id,
372-
e,
373-
)
374-
375-
tool_results_json = None
376-
if cache_entry.tool_results:
377-
try:
378-
tool_results_as_dicts = [
379-
tr.model_dump(mode="json") for tr in cache_entry.tool_results
380-
]
381-
tool_results_json = json.dumps(tool_results_as_dicts)
382-
except (TypeError, ValueError) as e:
383-
logger.warning(
384-
"Failed to serialize tool_results for conversation %s: %s",
385-
conversation_id,
386-
e,
387-
)
386+
referenced_documents_json = self._safe_json_dumps_models(
387+
cache_entry.referenced_documents,
388+
conversation_id,
389+
"referenced_documents",
390+
)
391+
tool_calls_json = self._safe_json_dumps_models(
392+
cache_entry.tool_calls, conversation_id, "tool_calls"
393+
)
394+
tool_results_json = self._safe_json_dumps_models(
395+
cache_entry.tool_results, conversation_id, "tool_results"
396+
)
397+
attachments_json = self._safe_json_dumps_models(
398+
cache_entry.attachments, conversation_id, "attachments"
399+
)
388400

389401
cursor.execute(
390402
self.INSERT_CONVERSATION_HISTORY_STATEMENT,
@@ -401,6 +413,7 @@ def insert_or_append(
401413
referenced_documents_json,
402414
tool_calls_json,
403415
tool_results_json,
416+
attachments_json,
404417
),
405418
)
406419

src/models/cache_entry.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from typing import Optional
44
from pydantic import BaseModel
5+
6+
from models.requests import Attachment
57
from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
68

79

@@ -16,6 +18,7 @@ class CacheEntry(BaseModel):
1618
referenced_documents: List of documents referenced by the response
1719
tool_calls: List of tool calls made during response generation
1820
tool_results: List of tool results from tool calls
21+
attachments: Optional list of attachments included with the query
1922
"""
2023

2124
query: str
@@ -27,3 +30,4 @@ class CacheEntry(BaseModel):
2730
referenced_documents: Optional[list[ReferencedDocument]] = None
2831
tool_calls: Optional[list[ToolCallSummary]] = None
2932
tool_results: Optional[list[ToolResultSummary]] = None
33+
attachments: Optional[list[Attachment]] = None

src/models/responses.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from constants import MEDIA_TYPE_EVENT_STREAM
2121
from models.config import Action, Configuration
22+
from models.requests import Attachment
2223
from quota.quota_exceed_error import QuotaExceedError
2324
from utils.types import RAGChunk, ReferencedDocument, ToolCallSummary, ToolResultSummary
2425

@@ -867,6 +868,7 @@ class Message(BaseModel):
867868
content: The message content.
868869
type: The type of message.
869870
referenced_documents: Optional list of documents referenced in an assistant response.
871+
attachments: Optional list of attachments included with the message.
870872
"""
871873

872874
content: str = Field(
@@ -879,10 +881,6 @@ class Message(BaseModel):
879881
description="The type of message",
880882
examples=["user", "assistant", "system", "developer"],
881883
)
882-
referenced_documents: Optional[list[ReferencedDocument]] = Field(
883-
None,
884-
description="List of documents referenced in the response (assistant messages only)",
885-
)
886884

887885

888886
class ConversationTurn(BaseModel):

src/utils/query.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,19 @@ def store_query_results( # pylint: disable=too-many-arguments
309309
tool_results=summary.tool_results,
310310
)
311311
try:
312+
cache_entry = CacheEntry(
313+
query=query_request.query,
314+
response=summary.llm_response,
315+
provider=provider_id,
316+
model=model_id,
317+
started_at=started_at,
318+
completed_at=completed_at,
319+
referenced_documents=summary.referenced_documents,
320+
tool_calls=summary.tool_calls,
321+
tool_results=summary.tool_results,
322+
attachments=query_request.attachments,
323+
)
324+
312325
logger.info("Storing conversation in cache")
313326
store_conversation_into_cache(
314327
user_id=user_id,

0 commit comments

Comments
 (0)