Skip to content

Commit c50d97e

Browse files
committed
fix(sync): instrument single markdown indexing
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent e2e6557 commit c50d97e

2 files changed

Lines changed: 79 additions & 12 deletions

File tree

src/basic_memory/indexing/batch_indexer.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from loguru import logger
1212
from sqlalchemy.exc import IntegrityError
1313

14+
from basic_memory import telemetry
1415
from basic_memory.config import BasicMemoryConfig
1516
from basic_memory.file_utils import compute_checksum, has_frontmatter, remove_frontmatter
1617
from basic_memory.markdown.schemas import EntityMarkdown
@@ -190,35 +191,48 @@ async def index_markdown_file(
190191
if not self._is_markdown(file):
191192
raise ValueError(f"index_markdown_file requires markdown input: {file.path}")
192193

193-
prepared = await self._prepare_markdown_file(file)
194+
with telemetry.span("index.markdown_file.prepare", path=file.path):
195+
prepared = await self._prepare_markdown_file(file)
194196
if existing_permalink_by_path is None:
195-
existing_permalink_by_path = {
196-
path: permalink
197-
for path, permalink in (
198-
await self.entity_repository.get_file_path_to_permalink_map()
199-
).items()
200-
}
197+
with telemetry.span("index.markdown_file.load_permalink_map", path=file.path):
198+
existing_permalink_by_path = {
199+
path: permalink
200+
for path, permalink in (
201+
await self.entity_repository.get_file_path_to_permalink_map()
202+
).items()
203+
}
201204

202205
reserved_permalinks = {
203206
permalink
204207
for path, permalink in existing_permalink_by_path.items()
205208
if path != file.path and permalink
206209
}
207-
prepared = await self._normalize_markdown_file(prepared, reserved_permalinks)
210+
with telemetry.span("index.markdown_file.normalize", path=file.path):
211+
prepared = await self._normalize_markdown_file(prepared, reserved_permalinks)
208212
existing_permalink_by_path[file.path] = prepared.markdown.frontmatter.permalink
209213

210-
persisted = await self._persist_markdown_file(prepared, is_new=new)
214+
with telemetry.span("index.markdown_file.persist", path=file.path, is_new=new):
215+
persisted = await self._persist_markdown_file(prepared, is_new=new)
211216
existing_permalink_by_path[file.path] = persisted.entity.permalink
212-
await self._resolve_batch_relations([persisted.entity.id], max_concurrent=1)
213217

214-
refreshed = await self.entity_repository.find_by_ids([persisted.entity.id])
218+
with telemetry.span(
219+
"index.markdown_file.reload_entity",
220+
path=file.path,
221+
entity_id=persisted.entity.id,
222+
):
223+
refreshed = await self.entity_repository.find_by_ids([persisted.entity.id])
215224
if len(refreshed) != 1: # pragma: no cover
216225
raise ValueError(f"Failed to reload indexed entity for {file.path}")
217226
entity = refreshed[0]
218227
prepared_entity = self._build_prepared_entity(persisted.prepared, entity)
219228

220229
if index_search:
221-
return await self._refresh_search_index(prepared_entity, entity)
230+
with telemetry.span(
231+
"index.markdown_file.refresh_search_index",
232+
path=file.path,
233+
entity_id=entity.id,
234+
):
235+
return await self._refresh_search_index(prepared_entity, entity)
222236

223237
return IndexedEntity(
224238
path=prepared_entity.path,

tests/sync/test_sync_service_telemetry.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44

55
import importlib
66
from contextlib import contextmanager
7+
from pathlib import Path
8+
from textwrap import dedent
79
from types import SimpleNamespace
10+
from unittest.mock import AsyncMock
811

912
import pytest
1013

1114
from basic_memory.sync.sync_service import SyncReport
1215

16+
batch_indexer_module = importlib.import_module("basic_memory.indexing.batch_indexer")
1317
sync_service_module = importlib.import_module("basic_memory.sync.sync_service")
1418

1519

@@ -30,6 +34,14 @@ def fake_span(name: str, **attrs):
3034
return operations, spans, fake_operation, fake_span
3135

3236

37+
def _write_markdown(project_root: Path, relative_path: str, content: str) -> Path:
38+
"""Create one markdown file under the test project."""
39+
file_path = project_root / relative_path
40+
file_path.parent.mkdir(parents=True, exist_ok=True)
41+
file_path.write_text(content, encoding="utf-8")
42+
return file_path
43+
44+
3345
@pytest.mark.asyncio
3446
async def test_sync_emits_phase_spans(sync_service, project_config, monkeypatch) -> None:
3547
operations, spans, fake_operation, fake_span = _capture_sync_telemetry()
@@ -97,6 +109,47 @@ async def fake_update(project_id, values):
97109
]
98110

99111

112+
@pytest.mark.asyncio
113+
async def test_sync_one_markdown_file_emits_index_phase_spans(
114+
sync_service, test_project, monkeypatch
115+
) -> None:
116+
_, spans, _, fake_span = _capture_sync_telemetry()
117+
monkeypatch.setattr(batch_indexer_module.telemetry, "span", fake_span)
118+
monkeypatch.setattr(sync_service.search_service, "index_entity_data", AsyncMock())
119+
120+
_write_markdown(
121+
Path(test_project.path),
122+
"notes/telemetry.md",
123+
dedent(
124+
f"""\
125+
---
126+
title: Telemetry Note
127+
type: note
128+
permalink: {test_project.name}/notes/telemetry
129+
---
130+
131+
# Telemetry Note
132+
133+
Body content.
134+
"""
135+
),
136+
)
137+
138+
result = await sync_service.sync_one_markdown_file("notes/telemetry.md")
139+
140+
index_spans = [(name, attrs) for name, attrs in spans if name.startswith("index.markdown_file")]
141+
assert [name for name, _ in index_spans] == [
142+
"index.markdown_file.prepare",
143+
"index.markdown_file.load_permalink_map",
144+
"index.markdown_file.normalize",
145+
"index.markdown_file.persist",
146+
"index.markdown_file.reload_entity",
147+
]
148+
assert index_spans[0][1] == {"path": "notes/telemetry.md"}
149+
assert index_spans[3][1] == {"path": "notes/telemetry.md", "is_new": True}
150+
assert index_spans[4][1]["entity_id"] == result.entity.id
151+
152+
100153
@pytest.mark.asyncio
101154
async def test_sync_file_emits_failure_span(sync_service, monkeypatch) -> None:
102155
_, spans, _, fake_span = _capture_sync_telemetry()

0 commit comments

Comments
 (0)