@@ -24,6 +24,11 @@ class PostgresSearchRepository(SearchRepositoryBase):
2424 - GIN indexes for performance
2525 - ts_rank() function for relevance scoring
2626 - JSONB containment operators for metadata search
27+
28+ Note: This implementation uses UPSERT patterns (INSERT ... ON CONFLICT) instead of
29+ delete-then-insert to handle race conditions during parallel entity indexing.
30+ The partial unique index uix_search_index_permalink_project prevents duplicate
31+ permalinks per project.
2732 """
2833
2934 async def init_search_index (self ):
@@ -41,6 +46,63 @@ async def init_search_index(self):
4146 # - CREATE INDEX USING GIN on metadata jsonb_path_ops
4247 pass
4348
49+ async def index_item (self , search_index_row : SearchIndexRow ) -> None :
50+ """Index or update a single item using UPSERT.
51+
52+ Uses INSERT ... ON CONFLICT to handle race conditions during parallel
53+ entity indexing. The partial unique index uix_search_index_permalink_project
54+ on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
55+ permalinks.
56+
57+ For rows with non-null permalinks (entities), conflicts are resolved by
58+ updating the existing row. For rows with null permalinks, no conflict
59+ occurs on this index.
60+ """
61+ async with db .scoped_session (self .session_maker ) as session :
62+ # Serialize JSON for raw SQL
63+ insert_data = search_index_row .to_insert (serialize_json = True )
64+ insert_data ["project_id" ] = self .project_id
65+
66+ # Use upsert to handle race conditions during parallel indexing
67+ # ON CONFLICT (permalink, project_id) matches the partial unique index
68+ # uix_search_index_permalink_project WHERE permalink IS NOT NULL
69+ # For rows with NULL permalinks, no conflict occurs (partial index doesn't apply)
70+ await session .execute (
71+ text ("""
72+ INSERT INTO search_index (
73+ id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
74+ from_id, to_id, relation_type,
75+ entity_id, category,
76+ created_at, updated_at,
77+ project_id
78+ ) VALUES (
79+ :id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
80+ :from_id, :to_id, :relation_type,
81+ :entity_id, :category,
82+ :created_at, :updated_at,
83+ :project_id
84+ )
85+ ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
86+ id = EXCLUDED.id,
87+ title = EXCLUDED.title,
88+ content_stems = EXCLUDED.content_stems,
89+ content_snippet = EXCLUDED.content_snippet,
90+ file_path = EXCLUDED.file_path,
91+ type = EXCLUDED.type,
92+ metadata = EXCLUDED.metadata,
93+ from_id = EXCLUDED.from_id,
94+ to_id = EXCLUDED.to_id,
95+ relation_type = EXCLUDED.relation_type,
96+ entity_id = EXCLUDED.entity_id,
97+ category = EXCLUDED.category,
98+ created_at = EXCLUDED.created_at,
99+ updated_at = EXCLUDED.updated_at
100+ """ ),
101+ insert_data ,
102+ )
103+ logger .debug (f"indexed row { search_index_row } " )
104+ await session .commit ()
105+
44106 def _prepare_search_term (self , term : str , is_prefix : bool = True ) -> str :
45107 """Prepare a search term for tsquery format.
46108
@@ -316,10 +378,14 @@ async def search(
316378 async def bulk_index_items (self , search_index_rows : List [SearchIndexRow ]) -> None :
317379 """Index multiple items in a single batch operation using UPSERT.
318380
319- Uses INSERT ... ON CONFLICT DO UPDATE to handle re-indexing of existing
320- entities (e.g., during forward reference resolution) without requiring
321- a separate delete operation. This eliminates race conditions between
322- delete and insert operations in separate transactions.
381+ Uses INSERT ... ON CONFLICT to handle race conditions during parallel
382+ entity indexing. The partial unique index uix_search_index_permalink_project
383+ on (permalink, project_id) WHERE permalink IS NOT NULL prevents duplicate
384+ permalinks.
385+
386+ For rows with non-null permalinks (entities), conflicts are resolved by
387+ updating the existing row. For rows with null permalinks (observations,
388+ relations), the partial index doesn't apply and they are inserted directly.
323389
324390 Args:
325391 search_index_rows: List of SearchIndexRow objects to index
@@ -338,11 +404,10 @@ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> Non
338404 insert_data ["project_id" ] = self .project_id
339405 insert_data_list .append (insert_data )
340406
341- # Use UPSERT (INSERT ... ON CONFLICT) to handle re-indexing
342- # Primary key is (id, type, project_id)
343- # This handles race conditions during forward reference resolution
344- # where an entity might be re-indexed before the delete commits
345- # Syntax works for both SQLite 3.24+ and PostgreSQL
407+ # Use upsert to handle race conditions during parallel indexing
408+ # ON CONFLICT (permalink, project_id) matches the partial unique index
409+ # uix_search_index_permalink_project WHERE permalink IS NOT NULL
410+ # For rows with NULL permalinks (observations, relations), no conflict occurs
346411 await session .execute (
347412 text ("""
348413 INSERT INTO search_index (
@@ -358,12 +423,13 @@ async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]) -> Non
358423 :created_at, :updated_at,
359424 :project_id
360425 )
361- ON CONFLICT (id, type, project_id) DO UPDATE SET
426+ ON CONFLICT (permalink, project_id) WHERE permalink IS NOT NULL DO UPDATE SET
427+ id = EXCLUDED.id,
362428 title = EXCLUDED.title,
363429 content_stems = EXCLUDED.content_stems,
364430 content_snippet = EXCLUDED.content_snippet,
365- permalink = EXCLUDED.permalink,
366431 file_path = EXCLUDED.file_path,
432+ type = EXCLUDED.type,
367433 metadata = EXCLUDED.metadata,
368434 from_id = EXCLUDED.from_id,
369435 to_id = EXCLUDED.to_id,
0 commit comments