Skip to content

Commit f853e61

Browse files
committed
perf(core): reuse written note content after writes
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 6f207c2 commit f853e61

4 files changed

Lines changed: 111 additions & 42 deletions

File tree

src/basic_memory/api/v2/routers/knowledge_router.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,11 @@ async def create_entity(
306306
):
307307
if fast:
308308
entity = await entity_service.fast_write_entity(data)
309+
written_content = None
309310
else:
310-
entity = await entity_service.create_entity(data)
311+
write_result = await entity_service.create_entity_with_content(data)
312+
entity = write_result.entity
313+
written_content = write_result.content
311314

312315
if fast:
313316
with telemetry.scope(
@@ -329,7 +332,7 @@ async def create_entity(
329332
action="create_entity",
330333
phase="search_index",
331334
):
332-
await search_service.index_entity(entity)
335+
await search_service.index_entity(entity, content=written_content)
333336
with telemetry.scope(
334337
"api.knowledge.create_entity.vector_sync",
335338
domain="knowledge",
@@ -352,8 +355,12 @@ async def create_entity(
352355
domain="knowledge",
353356
action="create_entity",
354357
phase="read_content",
358+
source="file" if fast else "memory",
355359
):
356-
content = await file_service.read_file_content(entity.file_path)
360+
if fast:
361+
content = await file_service.read_file_content(entity.file_path)
362+
else:
363+
content = written_content
357364
result = result.model_copy(update={"content": content})
358365

359366
logger.info(
@@ -421,13 +428,18 @@ async def update_entity_by_id(
421428
):
422429
if fast:
423430
entity = await entity_service.fast_write_entity(data, external_id=entity_id)
431+
written_content = None
424432
response.status_code = 200 if existing else 201
425433
else:
426434
if existing:
427-
entity = await entity_service.update_entity(existing, data)
435+
write_result = await entity_service.update_entity_with_content(existing, data)
436+
entity = write_result.entity
437+
written_content = write_result.content
428438
response.status_code = 200
429439
else:
430-
entity = await entity_service.create_entity(data)
440+
write_result = await entity_service.create_entity_with_content(data)
441+
entity = write_result.entity
442+
written_content = write_result.content
431443
if entity.external_id != entity_id:
432444
entity = await entity_repository.update(
433445
entity.id,
@@ -461,7 +473,7 @@ async def update_entity_by_id(
461473
action="update_entity",
462474
phase="search_index",
463475
):
464-
await search_service.index_entity(entity)
476+
await search_service.index_entity(entity, content=written_content)
465477
with telemetry.scope(
466478
"api.knowledge.update_entity.vector_sync",
467479
domain="knowledge",
@@ -484,8 +496,12 @@ async def update_entity_by_id(
484496
domain="knowledge",
485497
action="update_entity",
486498
phase="read_content",
499+
source="file" if fast else "memory",
487500
):
488-
content = await file_service.read_file_content(entity.file_path)
501+
if fast:
502+
content = await file_service.read_file_content(entity.file_path)
503+
else:
504+
content = written_content
489505
result = result.model_copy(update={"content": content})
490506

491507
logger.info(
@@ -563,16 +579,19 @@ async def edit_entity_by_id(
563579
find_text=data.find_text,
564580
expected_replacements=data.expected_replacements,
565581
)
582+
written_content = None
566583
else:
567584
identifier = entity.permalink or entity.file_path
568-
updated_entity = await entity_service.edit_entity(
585+
write_result = await entity_service.edit_entity_with_content(
569586
identifier=identifier,
570587
operation=data.operation,
571588
content=data.content,
572589
section=data.section,
573590
find_text=data.find_text,
574591
expected_replacements=data.expected_replacements,
575592
)
593+
updated_entity = write_result.entity
594+
written_content = write_result.content
576595

577596
if fast:
578597
with telemetry.scope(
@@ -594,7 +613,7 @@ async def edit_entity_by_id(
594613
action="edit_entity",
595614
phase="search_index",
596615
):
597-
await search_service.index_entity(updated_entity)
616+
await search_service.index_entity(updated_entity, content=written_content)
598617
with telemetry.scope(
599618
"api.knowledge.edit_entity.vector_sync",
600619
domain="knowledge",
@@ -617,8 +636,12 @@ async def edit_entity_by_id(
617636
domain="knowledge",
618637
action="edit_entity",
619638
phase="read_content",
639+
source="file" if fast else "memory",
620640
):
621-
content = await file_service.read_file_content(updated_entity.file_path)
641+
if fast:
642+
content = await file_service.read_file_content(updated_entity.file_path)
643+
else:
644+
content = written_content
622645
result = result.model_copy(update={"content": content})
623646

624647
logger.info(

src/basic_memory/services/entity_service.py

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Service for managing entities in the database."""
22

33
from collections.abc import Callable
4+
from dataclasses import dataclass
45
from datetime import datetime
56
from pathlib import Path
67
from typing import List, Optional, Sequence, Tuple, Union
@@ -50,6 +51,14 @@
5051
from basic_memory.utils import build_canonical_permalink
5152

5253

54+
@dataclass(frozen=True)
55+
class EntityWriteResult:
56+
"""Persisted entity plus the markdown written during this call."""
57+
58+
entity: EntityModel
59+
content: str
60+
61+
5362
class EntityService(BaseService[EntityModel]):
5463
"""Service for managing entities in the database."""
5564

@@ -79,7 +88,7 @@ def __init__(
7988

8089
async def detect_file_path_conflicts(
8190
self, file_path: str, skip_check: bool = False
82-
) -> List[Entity]:
91+
) -> List[str]:
8392
"""Detect potential file path conflicts for a given file path.
8493
8594
This checks for entities with similar file paths that might cause conflicts:
@@ -93,28 +102,19 @@ async def detect_file_path_conflicts(
93102
skip_check: If True, skip the check and return empty list (optimization for bulk operations)
94103
95104
Returns:
96-
List of entities that might conflict with the given file path
105+
List of file paths that might conflict with the given file path
97106
"""
98107
if skip_check:
99108
return []
100109

101110
from basic_memory.utils import detect_potential_file_conflicts
102111

103-
conflicts = []
104-
105-
# Get all existing file paths
106-
all_entities = await self.repository.find_all()
107-
existing_paths = [entity.file_path for entity in all_entities]
112+
# Load only file paths. Conflict detection is on the hot write path and
113+
# does not need observations or relations.
114+
existing_paths = await self.repository.get_all_file_paths()
108115

109116
# Use the enhanced conflict detection utility
110-
conflicting_paths = detect_potential_file_conflicts(file_path, existing_paths)
111-
112-
# Find the entities corresponding to conflicting paths
113-
for entity in all_entities:
114-
if entity.file_path in conflicting_paths:
115-
conflicts.append(entity)
116-
117-
return conflicts
117+
return detect_potential_file_conflicts(file_path, existing_paths)
118118

119119
async def resolve_permalink(
120120
self,
@@ -143,8 +143,7 @@ async def resolve_permalink(
143143
)
144144
if conflicts:
145145
logger.warning(
146-
f"Detected potential file path conflicts for '{file_path_str}': "
147-
f"{[entity.file_path for entity in conflicts]}"
146+
f"Detected potential file path conflicts for '{file_path_str}': {conflicts}"
148147
)
149148

150149
# If markdown has explicit permalink, try to validate it
@@ -255,6 +254,10 @@ async def create_or_update_entity(self, schema: EntitySchema) -> Tuple[EntityMod
255254

256255
async def create_entity(self, schema: EntitySchema) -> EntityModel:
257256
"""Create a new entity and write to filesystem."""
257+
return (await self.create_entity_with_content(schema)).entity
258+
259+
async def create_entity_with_content(self, schema: EntitySchema) -> EntityWriteResult:
260+
"""Create a new entity and return both the entity row and written markdown."""
258261
logger.debug(f"Creating entity: {schema.title}")
259262

260263
# Get file path and ensure it's a Path object
@@ -328,10 +331,19 @@ async def create_entity(self, schema: EntitySchema) -> EntityModel:
328331
action="create",
329332
phase="update_checksum",
330333
):
331-
return await self.repository.update(entity.id, {"checksum": checksum})
334+
updated = await self.repository.update(entity.id, {"checksum": checksum})
335+
if not updated: # pragma: no cover
336+
raise ValueError(f"Failed to update entity checksum after create: {entity.id}")
337+
return EntityWriteResult(entity=updated, content=final_content)
332338

333339
async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> EntityModel:
334340
"""Update an entity's content and metadata."""
341+
return (await self.update_entity_with_content(entity, schema)).entity
342+
343+
async def update_entity_with_content(
344+
self, entity: EntityModel, schema: EntitySchema
345+
) -> EntityWriteResult:
346+
"""Update an entity and return both the entity row and written markdown."""
335347
logger.debug(
336348
f"Updating entity with permalink: {entity.permalink} content-type: {schema.content_type}"
337349
)
@@ -444,8 +456,10 @@ async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> Enti
444456
phase="update_checksum",
445457
):
446458
entity = await self.repository.update(entity.id, {"checksum": checksum})
459+
if not entity: # pragma: no cover
460+
raise ValueError(f"Failed to update entity checksum after update: {file_path}")
447461

448-
return entity
462+
return EntityWriteResult(entity=entity, content=final_content)
449463

450464
async def fast_write_entity(
451465
self,
@@ -988,6 +1002,27 @@ async def edit_entity(
9881002
EntityNotFoundError: If the entity cannot be found
9891003
ValueError: If required parameters are missing for the operation or replacement count doesn't match expected
9901004
"""
1005+
return (
1006+
await self.edit_entity_with_content(
1007+
identifier=identifier,
1008+
operation=operation,
1009+
content=content,
1010+
section=section,
1011+
find_text=find_text,
1012+
expected_replacements=expected_replacements,
1013+
)
1014+
).entity
1015+
1016+
async def edit_entity_with_content(
1017+
self,
1018+
identifier: str,
1019+
operation: str,
1020+
content: str,
1021+
section: Optional[str] = None,
1022+
find_text: Optional[str] = None,
1023+
expected_replacements: int = 1,
1024+
) -> EntityWriteResult:
1025+
"""Edit an entity and return both the entity row and written markdown."""
9911026
logger.debug(f"Editing entity: {identifier}, operation: {operation}")
9921027

9931028
with telemetry.scope(
@@ -1055,8 +1090,10 @@ async def edit_entity(
10551090
phase="update_checksum",
10561091
):
10571092
entity = await self.repository.update(entity.id, {"checksum": checksum})
1093+
if not entity: # pragma: no cover
1094+
raise ValueError(f"Failed to update entity checksum after edit: {file_path}")
10581095

1059-
return entity
1096+
return EntityWriteResult(entity=entity, content=new_content)
10601097

10611098
def apply_edit_operation(
10621099
self,

tests/api/v2/test_knowledge_router.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ async def test_update_entity_by_id(
355355
response = await client.put(
356356
f"{v2_project_url}/knowledge/entities/{original_external_id}",
357357
json=update_data,
358+
params={"fast": False},
358359
)
359360

360361
assert response.status_code == 200
@@ -363,6 +364,8 @@ async def test_update_entity_by_id(
363364
# V2 update must return external_id field
364365
assert updated_entity.external_id is not None
365366
assert updated_entity.api_version == "v2"
367+
assert updated_entity.content is not None
368+
assert "Updated content via V2" in updated_entity.content
366369

367370
# Verify file was updated
368371
file_path = file_service.get_entity_path(updated_entity)
@@ -532,6 +535,7 @@ async def test_edit_entity_by_id_append(
532535
response = await client.patch(
533536
f"{v2_project_url}/knowledge/entities/{original_external_id}",
534537
json=edit_data,
538+
params={"fast": False},
535539
)
536540

537541
assert response.status_code == 200
@@ -540,6 +544,8 @@ async def test_edit_entity_by_id_append(
540544
# V2 patch must return external_id field
541545
assert edited_entity.external_id is not None
542546
assert edited_entity.api_version == "v2"
547+
assert edited_entity.content is not None
548+
assert "Appended content" in edited_entity.content
543549

544550
# Verify file has both original and appended content
545551
file_path = file_service.get_entity_path(edited_entity)

tests/api/v2/test_knowledge_router_telemetry.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,12 @@ async def test_create_entity_emits_root_and_nested_spans(monkeypatch) -> None:
6161
entity = _fake_entity()
6262

6363
class FakeEntityService:
64-
async def create_entity(self, data):
65-
return entity
64+
async def create_entity_with_content(self, data):
65+
return SimpleNamespace(entity=entity, content="telemetry content")
6666

6767
class FakeSearchService:
68-
async def index_entity(self, entity):
68+
async def index_entity(self, entity, content=None):
69+
assert content == "telemetry content"
6970
return None
7071

7172
class FakeTaskScheduler:
@@ -74,7 +75,7 @@ def schedule(self, *args, **kwargs):
7475

7576
class FakeFileService:
7677
async def read_file_content(self, path):
77-
return "telemetry content"
78+
raise AssertionError("non-fast create should not re-read file content")
7879

7980
result = await knowledge_router_module.create_entity(
8081
project_id="project-123",
@@ -115,11 +116,12 @@ async def test_update_entity_emits_root_and_nested_spans(monkeypatch) -> None:
115116
entity = _fake_entity()
116117

117118
class FakeEntityService:
118-
async def update_entity(self, existing, data):
119-
return entity
119+
async def update_entity_with_content(self, existing, data):
120+
return SimpleNamespace(entity=entity, content="updated telemetry content")
120121

121122
class FakeSearchService:
122-
async def index_entity(self, entity):
123+
async def index_entity(self, entity, content=None):
124+
assert content == "updated telemetry content"
123125
return None
124126

125127
class FakeEntityRepository:
@@ -132,7 +134,7 @@ def schedule(self, *args, **kwargs):
132134

133135
class FakeFileService:
134136
async def read_file_content(self, path):
135-
return "updated telemetry content"
137+
raise AssertionError("non-fast update should not re-read file content")
136138

137139
response = Response()
138140
result = await knowledge_router_module.update_entity_by_id(
@@ -178,11 +180,12 @@ async def test_edit_entity_emits_root_and_nested_spans(monkeypatch) -> None:
178180
entity = _fake_entity()
179181

180182
class FakeEntityService:
181-
async def edit_entity(self, **kwargs):
182-
return entity
183+
async def edit_entity_with_content(self, **kwargs):
184+
return SimpleNamespace(entity=entity, content="edited telemetry content")
183185

184186
class FakeSearchService:
185-
async def index_entity(self, entity):
187+
async def index_entity(self, entity, content=None):
188+
assert content == "edited telemetry content"
186189
return None
187190

188191
class FakeEntityRepository:
@@ -195,7 +198,7 @@ def schedule(self, *args, **kwargs):
195198

196199
class FakeFileService:
197200
async def read_file_content(self, path):
198-
return "edited telemetry content"
201+
raise AssertionError("non-fast edit should not re-read file content")
199202

200203
result = await knowledge_router_module.edit_entity_by_id(
201204
data=EditEntityRequest(operation="append", content="edited telemetry content"),

0 commit comments

Comments
 (0)