5858}
5959
6060
61+ def _strip_nul (value : str ) -> str :
62+ """Strip NUL bytes that PostgreSQL text columns cannot store.
63+
64+ rclone preallocation on virtual filesystems (e.g. Google Drive File Stream)
65+ can pad files with \\ x00 bytes. See: rclone/rclone#6801
66+ """
67+ return value .replace ("\x00 " , "" )
68+
69+
6170def _mtime_to_datetime (entity : Entity ) -> datetime :
6271 """Convert entity mtime (file modification time) to datetime.
6372
@@ -402,7 +411,7 @@ async def index_entity_file(
402411 id = entity .id ,
403412 entity_id = entity .id ,
404413 type = SearchItemType .ENTITY .value ,
405- title = entity .title ,
414+ title = _strip_nul ( entity .title ) ,
406415 permalink = entity .permalink , # Required for Postgres NOT NULL constraint
407416 file_path = entity .file_path ,
408417 metadata = {
@@ -461,7 +470,7 @@ async def index_entity_markdown(
461470 # Store full content for vector embedding quality.
462471 # The chunker in the vector pipeline splits this into
463472 # appropriately-sized pieces for embedding.
464- content_snippet = content
473+ content_snippet = _strip_nul ( content )
465474
466475 if entity .permalink :
467476 content_stems .extend (self ._generate_variants (entity .permalink ))
@@ -473,7 +482,7 @@ async def index_entity_markdown(
473482 if entity_tags :
474483 content_stems .extend (entity_tags )
475484
476- entity_content_stems = "\n " .join (p for p in content_stems if p and p .strip ())
485+ entity_content_stems = _strip_nul ( "\n " .join (p for p in content_stems if p and p .strip () ))
477486
478487 # Truncate to stay under Postgres's 8KB index row limit
479488 if len (entity_content_stems ) > MAX_CONTENT_STEMS_SIZE : # pragma: no cover
@@ -484,7 +493,7 @@ async def index_entity_markdown(
484493 SearchIndexRow (
485494 id = entity .id ,
486495 type = SearchItemType .ENTITY .value ,
487- title = entity .title ,
496+ title = _strip_nul ( entity .title ) ,
488497 content_stems = entity_content_stems ,
489498 content_snippet = content_snippet ,
490499 permalink = entity .permalink ,
@@ -510,8 +519,8 @@ async def index_entity_markdown(
510519 seen_permalinks .add (obs_permalink )
511520
512521 # Index with parent entity's file path since that's where it's defined
513- obs_content_stems = " \n " . join (
514- p for p in self ._generate_variants (obs .content ) if p and p .strip ()
522+ obs_content_stems = _strip_nul (
523+ " \n " . join ( p for p in self ._generate_variants (obs .content ) if p and p .strip () )
515524 )
516525 # Truncate to stay under Postgres's 8KB index row limit
517526 if len (obs_content_stems ) > MAX_CONTENT_STEMS_SIZE : # pragma: no cover
@@ -520,9 +529,9 @@ async def index_entity_markdown(
520529 SearchIndexRow (
521530 id = obs .id ,
522531 type = SearchItemType .OBSERVATION .value ,
523- title = f"{ obs .category } : { obs .content [:100 ]} ..." ,
532+ title = _strip_nul ( f"{ obs .category } : { obs .content [:100 ]} ..." ) ,
524533 content_stems = obs_content_stems ,
525- content_snippet = obs .content ,
534+ content_snippet = _strip_nul ( obs .content ) ,
526535 permalink = obs_permalink ,
527536 file_path = entity .file_path ,
528537 category = obs .category ,
@@ -539,14 +548,14 @@ async def index_entity_markdown(
539548 # Add relation rows (only outgoing relations defined in this file)
540549 for rel in entity .outgoing_relations :
541550 # Create descriptive title showing the relationship
542- relation_title = (
551+ relation_title = _strip_nul (
543552 f"{ rel .from_entity .title } → { rel .to_entity .title } "
544553 if rel .to_entity
545554 else f"{ rel .from_entity .title } "
546555 )
547556
548- rel_content_stems = " \n " . join (
549- p for p in self ._generate_variants (relation_title ) if p and p .strip ()
557+ rel_content_stems = _strip_nul (
558+ " \n " . join ( p for p in self ._generate_variants (relation_title ) if p and p .strip () )
550559 )
551560 rows_to_index .append (
552561 SearchIndexRow (
0 commit comments