Skip to content

Commit d525af2

Browse files
phernandezclaude
andcommitted
Implement SQLite UPSERT approach for entity permalink conflicts
Replace complex try/catch exception handling with a cleaner UPSERT approach that handles permalink and file_path conflicts at the repository level. Key changes: - Add `upsert_entity()` method to EntityRepository with hybrid approach - Check for existing entity by file_path first (update case) - Handle permalink conflicts with automatic suffix generation - Simplify EntityService.create_entity_from_markdown() to use UPSERT - Update tests to match new behavior - Fix alembic import order lint issue The UPSERT approach provides better conflict resolution: - Updates existing entities when file_path matches - Generates unique permalinks (e.g., "test/note-1") for conflicts - Eliminates complex string parsing of IntegrityError messages - More reliable and maintainable than exception handling Fixes #139 - UNIQUE constraint failed: entity.permalink 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent d8237b3 commit d525af2

5 files changed

Lines changed: 339 additions & 113 deletions

File tree

src/basic_memory/alembic/env.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88

99
from alembic import context
1010

11-
from basic_memory.models import Base
12-
1311
# set config.env to "test" for pytest to prevent logging to file in utils.setup_logging()
1412
os.environ["BASIC_MEMORY_ENV"] = "test"
1513

16-
from basic_memory.config import app_config
14+
# Import after setting environment variable # noqa: E402
15+
from basic_memory.config import app_config # noqa: E402
16+
from basic_memory.models import Base # noqa: E402
1717

1818
# this is the Alembic Config object, which provides
1919
# access to the values within the .ini file in use.

src/basic_memory/repository/entity_repository.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
from pathlib import Path
44
from typing import List, Optional, Sequence, Union
55

6+
from sqlalchemy import select
7+
from sqlalchemy.exc import IntegrityError
68
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
79
from sqlalchemy.orm import selectinload
810
from sqlalchemy.orm.interfaces import LoaderOption
911

12+
from basic_memory import db
1013
from basic_memory.models.knowledge import Entity, Observation, Relation
1114
from basic_memory.repository.repository import Repository
1215

@@ -96,3 +99,115 @@ async def find_by_permalinks(self, permalinks: List[str]) -> Sequence[Entity]:
9699

97100
result = await self.execute_query(query)
98101
return list(result.scalars().all())
102+
103+
async def upsert_entity(self, entity: Entity) -> Entity:
104+
"""Insert or update entity using a hybrid approach.
105+
106+
This method provides a cleaner alternative to the try/catch approach
107+
for handling permalink and file_path conflicts. It first tries direct
108+
insertion, then handles conflicts intelligently.
109+
110+
Args:
111+
entity: The entity to insert or update
112+
113+
Returns:
114+
The inserted or updated entity
115+
"""
116+
117+
async with db.scoped_session(self.session_maker) as session:
118+
# Set project_id if applicable and not already set
119+
self._set_project_id_if_needed(entity)
120+
121+
# Check for existing entity with same file_path first
122+
existing_by_path = await session.execute(
123+
select(Entity).where(
124+
Entity.file_path == entity.file_path,
125+
Entity.project_id == entity.project_id
126+
)
127+
)
128+
existing_path_entity = existing_by_path.scalar_one_or_none()
129+
130+
if existing_path_entity:
131+
# Update existing entity with same file path
132+
for key, value in {
133+
'title': entity.title,
134+
'entity_type': entity.entity_type,
135+
'entity_metadata': entity.entity_metadata,
136+
'content_type': entity.content_type,
137+
'permalink': entity.permalink,
138+
'checksum': entity.checksum,
139+
'updated_at': entity.updated_at,
140+
}.items():
141+
setattr(existing_path_entity, key, value)
142+
143+
await session.flush()
144+
# Return with relationships loaded
145+
query = (
146+
select(Entity)
147+
.where(Entity.file_path == entity.file_path)
148+
.options(*self.get_load_options())
149+
)
150+
result = await session.execute(query)
151+
found = result.scalar_one_or_none()
152+
if not found: # pragma: no cover
153+
raise RuntimeError(f"Failed to retrieve entity after update: {entity.file_path}")
154+
return found
155+
156+
# No existing entity with same file_path, try insert
157+
try:
158+
# Simple insert for new entity
159+
session.add(entity)
160+
await session.flush()
161+
162+
# Return with relationships loaded
163+
query = (
164+
select(Entity)
165+
.where(Entity.file_path == entity.file_path)
166+
.options(*self.get_load_options())
167+
)
168+
result = await session.execute(query)
169+
found = result.scalar_one_or_none()
170+
if not found: # pragma: no cover
171+
raise RuntimeError(f"Failed to retrieve entity after insert: {entity.file_path}")
172+
return found
173+
174+
except IntegrityError:
175+
# Permalink conflict with different file - generate unique permalink
176+
await session.rollback()
177+
return await self._handle_permalink_conflict(entity, session)
178+
179+
async def _handle_permalink_conflict(self, entity: Entity, session: AsyncSession) -> Entity:
180+
"""Handle permalink conflicts by generating a unique permalink."""
181+
base_permalink = entity.permalink
182+
suffix = 1
183+
184+
# Find a unique permalink
185+
while True:
186+
test_permalink = f"{base_permalink}-{suffix}"
187+
existing = await session.execute(
188+
select(Entity).where(
189+
Entity.permalink == test_permalink,
190+
Entity.project_id == entity.project_id
191+
)
192+
)
193+
if existing.scalar_one_or_none() is None:
194+
# Found unique permalink
195+
entity.permalink = test_permalink
196+
break
197+
suffix += 1
198+
199+
# Insert with unique permalink (no conflict possible now)
200+
session.add(entity)
201+
await session.flush()
202+
203+
# Return the inserted entity with relationships loaded
204+
query = (
205+
select(Entity)
206+
.where(Entity.file_path == entity.file_path)
207+
.options(*self.get_load_options())
208+
)
209+
result = await session.execute(query)
210+
found = result.scalar_one_or_none()
211+
if not found: # pragma: no cover
212+
raise RuntimeError(f"Failed to retrieve entity after insert: {entity.file_path}")
213+
return found

src/basic_memory/services/entity_service.py

Lines changed: 8 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -292,57 +292,21 @@ async def create_entity_from_markdown(
292292
293293
Creates the entity with null checksum to indicate sync not complete.
294294
Relations will be added in second pass.
295+
296+
Uses UPSERT approach to handle permalink/file_path conflicts cleanly.
295297
"""
296298
logger.debug(f"Creating entity: {markdown.frontmatter.title} file_path: {file_path}")
297299
model = entity_model_from_markdown(file_path, markdown)
298300

299301
# Mark as incomplete because we still need to add relations
300302
model.checksum = None
301-
# Repository will set project_id automatically
303+
304+
# Use UPSERT to handle conflicts cleanly
302305
try:
303-
return await self.repository.add(model)
304-
except IntegrityError as e:
305-
# Handle different types of UNIQUE constraint failures
306-
if "UNIQUE constraint failed: entity.file_path" in str(e):
307-
# File path conflict - update existing entity
308-
logger.info(
309-
f"Entity already exists for file_path={file_path}, updating instead of creating"
310-
)
311-
return await self.update_entity_and_observations(file_path, markdown)
312-
elif "UNIQUE constraint failed: entity.permalink" in str(e):
313-
# Permalink conflict - check if it's the same file or different file
314-
existing_entity = await self.repository.get_by_permalink(model.permalink)
315-
if existing_entity and existing_entity.file_path == str(file_path):
316-
# Same file - update existing entity
317-
logger.info(
318-
f"Entity already exists for permalink={model.permalink}, updating instead of creating"
319-
)
320-
return await self.update_entity_and_observations(file_path, markdown)
321-
else:
322-
# Different file with same permalink - generate unique permalink
323-
logger.info(
324-
f"Permalink conflict for {model.permalink}, generating unique permalink"
325-
)
326-
# Generate unique permalink
327-
base_permalink = model.permalink
328-
suffix = 1
329-
while await self.repository.get_by_permalink(model.permalink):
330-
model.permalink = f"{base_permalink}-{suffix}"
331-
suffix += 1
332-
logger.debug(f"Using unique permalink: {model.permalink}")
333-
# Try to create with unique permalink
334-
try:
335-
return await self.repository.add(model)
336-
except IntegrityError as e:
337-
logger.error(
338-
f"IntegrityError while adding entity with unique permalink: {model.permalink}. Error: {e}"
339-
)
340-
raise EntityCreationError(
341-
f"Failed to create entity with unique permalink: {model.permalink}"
342-
)
343-
else:
344-
# Re-raise if it's a different integrity error
345-
raise
306+
return await self.repository.upsert_entity(model)
307+
except Exception as e:
308+
logger.error(f"Failed to upsert entity for {file_path}: {e}")
309+
raise EntityCreationError(f"Failed to create entity: {str(e)}") from e
346310

347311
async def update_entity_and_observations(
348312
self, file_path: Path, markdown: EntityMarkdown

0 commit comments

Comments
 (0)