-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathtest_repository.py
More file actions
255 lines (203 loc) · 8.21 KB
/
test_repository.py
File metadata and controls
255 lines (203 loc) · 8.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
from sqlite_rag.models.chunk import Chunk
from sqlite_rag.models.document import Document
from sqlite_rag.models.sentence import Sentence
from sqlite_rag.repository import Repository
class TestRepository:
def test_add_document_without_chunks(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
doc_id = repo.add_document(
Document(
content="This is a test document content.",
uri="test_doc.txt",
metadata={"author": "test"},
)
)
# Verify the document was added
document = repo.find_document_by_id_or_uri(doc_id)
assert document is not None, "Document was not added to the database."
assert document.content == "This is a test document content."
assert document.uri == "test_doc.txt"
assert document.metadata == {"author": "test"}
def test_add_document_with_chunks(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
doc = Document(
content="This is a test document with chunks.",
uri="test_doc_with_chunks.txt",
metadata={"author": "test"},
)
doc.chunks = [
Chunk(content="Chunk 1 content", embedding=b"\x00" * 384),
Chunk(content="Chunk 2 content", embedding=b"\x00" * 384),
]
doc_id = repo.add_document(doc)
# Verify the document and chunks were added
document = repo.find_document_by_id_or_uri(doc_id)
assert document is not None, "Document was not added to the database."
assert document.content == "This is a test document with chunks."
assert document.uri == "test_doc_with_chunks.txt"
assert document.metadata == {"author": "test"}
cursor = conn.execute(
"SELECT content, embedding FROM chunks WHERE document_id = ?", (doc_id,)
)
chunk_rows = cursor.fetchall()
assert len(chunk_rows) == 2, "Chunks were not added to the database."
assert chunk_rows[0][0] == "Chunk 1 content"
assert chunk_rows[0][1] == b"\x00" * 384
assert chunk_rows[1][0] == "Chunk 2 content"
assert chunk_rows[1][1] == b"\x00" * 384
def test_list_documents(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
doc1 = Document(
content="Document 1 content.", uri="doc1.txt", metadata={"author": "test1"}
)
doc2 = Document(
content="Document 2 content.", uri="doc2.txt", metadata={"author": "test2"}
)
repo.add_document(doc1)
repo.add_document(doc2)
documents = repo.list_documents()
assert len(documents) == 2
assert documents[0].id is not None
assert documents[0].uri == "doc1.txt"
assert documents[0].content == "Document 1 content."
assert documents[0].metadata == {"author": "test1"}
assert documents[1].id is not None
assert documents[1].content == "Document 2 content."
assert documents[1].uri == "doc2.txt"
assert documents[1].metadata == {"author": "test2"}
def test_list_documents_empty(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
documents = repo.list_documents()
assert len(documents) == 0
def test_find_document_by_id_or_uri_by_id(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
# Add a document
doc = Document(
content="Test document content.",
uri="test.txt",
metadata={"author": "test"},
)
doc_id = repo.add_document(doc)
# Find by ID
found_doc = repo.find_document_by_id_or_uri(doc_id)
assert found_doc is not None
assert found_doc.id == doc_id
assert found_doc.content == "Test document content."
assert found_doc.uri == "test.txt"
assert found_doc.metadata == {"author": "test"}
def test_find_document_by_id_or_uri_by_uri(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
# Add a document
doc = Document(
content="Test document content.",
uri="test.txt",
metadata={"author": "test"},
)
repo.add_document(doc)
# Find by URI
found_doc = repo.find_document_by_id_or_uri("test.txt")
assert found_doc is not None
assert found_doc.content == "Test document content."
assert found_doc.uri == "test.txt"
assert found_doc.metadata == {"author": "test"}
def test_find_document_by_id_or_uri_not_found(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
# Try to find non-existent document
found_doc = repo.find_document_by_id_or_uri("nonexistent")
assert found_doc is None
def test_remove_document_success(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
# Add a document with chunks and sentences
doc = Document(
content="Test document content.",
uri="test.txt",
metadata={"author": "test"},
)
chunk1 = Chunk(content="Chunk 1", embedding=b"\x00" * 384)
chunk1.sentences = [
Sentence(
content="Sentence 1",
embedding=b"\x00" * 384,
start_offset=0,
end_offset=10,
),
Sentence(
content="Sentence 2",
embedding=b"\x00" * 384,
start_offset=11,
end_offset=20,
),
]
chunk2 = Chunk(content="Chunk 2", embedding=b"\x00" * 384)
chunk2.sentences = [
Sentence(
content="Sentence 3",
embedding=b"\x00" * 384,
start_offset=0,
end_offset=10,
),
]
doc.chunks = [chunk1, chunk2]
doc_id = repo.add_document(doc)
# Verify document, chunks, and sentences exist
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM documents WHERE id = ?", (doc_id,))
assert cursor.fetchone()[0] == 1
cursor.execute("SELECT COUNT(*) FROM chunks WHERE document_id = ?", (doc_id,))
assert cursor.fetchone()[0] == 2
cursor.execute(
"""
SELECT COUNT(*) FROM sentences
WHERE chunk_id IN (SELECT id FROM chunks WHERE document_id = ?)
""",
(doc_id,),
)
assert cursor.fetchone()[0] == 3
# Remove document
success = repo.remove_document(doc_id)
assert success is True
# Verify document, chunks, and sentences are removed
cursor.execute("SELECT COUNT(*) FROM documents WHERE id = ?", (doc_id,))
assert cursor.fetchone()[0] == 0
cursor.execute("SELECT COUNT(*) FROM chunks WHERE document_id = ?", (doc_id,))
assert cursor.fetchone()[0] == 0
cursor.execute(
"""
SELECT COUNT(*) FROM sentences
WHERE chunk_id IN (SELECT id FROM chunks WHERE document_id = ?)
""",
(doc_id,),
)
assert cursor.fetchone()[0] == 0
def test_remove_document_not_found(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
# Try to remove non-existent document
success = repo.remove_document("nonexistent-id")
assert success is False
def test_document_exists_by_hash_exists(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
doc = Document(
content="Test document content.",
uri="test.txt",
metadata={"author": "test"},
)
repo.add_document(doc)
exists = repo.document_exists_by_hash(doc.hash())
assert exists is True
def test_document_exists_by_hash_not_exists(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)
# Check for non-existent hash
fake_doc = Document(content="Non-existent content")
exists = repo.document_exists_by_hash(fake_doc.hash())
assert exists is False