Skip to content

Commit 53c29a3

Browse files
phernandezclaude
andcommitted
fix: resolve FTS5 search syntax errors with special characters
Enhances search term preparation to handle special characters gracefully while preserving functionality: - Improves FTS5 query preparation with targeted special character handling - Preserves boolean operators (AND, OR, NOT) without modification - Quotes problematic characters that cause syntax errors - Maintains wildcard patterns for legitimate use cases - Adds comprehensive error handling with graceful fallback Includes extensive test coverage: - 10 new test cases for various search scenarios - Programming terms (C++, function(), email@domain.com) now searchable - Malformed syntax handled without crashes - Boolean and wildcard functionality preserved Fixes search crashes when users enter queries containing special characters. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent d8c13bf commit 53c29a3

3 files changed

Lines changed: 120 additions & 16 deletions

File tree

src/basic_memory/repository/search_repository.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,25 +128,32 @@ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
128128
is_prefix: Whether to add prefix search capability (* suffix)
129129
130130
For FTS5:
131-
- Special characters and phrases need to be quoted
132-
- Terms with spaces or special chars need quotes
133131
- Boolean operators (AND, OR, NOT) are preserved for complex queries
132+
- Terms with FTS5 special characters are quoted to prevent syntax errors
133+
- Simple terms get prefix wildcards for better matching
134134
"""
135-
if "*" in term:
136-
return term
137-
138135
# Check for explicit boolean operators - if present, return the term as is
139136
boolean_operators = [" AND ", " OR ", " NOT "]
140137
if any(op in f" {term} " for op in boolean_operators):
141138
return term
142139

143-
# List of FTS5 special characters that need escaping/quoting
144-
special_chars = ["/", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
145-
146-
# Check if term contains any special characters
147-
needs_quotes = any(c in term for c in special_chars)
140+
# Check if term is already a proper wildcard pattern (alphanumeric + *)
141+
# e.g., "hello*", "test*world" - these should be left alone
142+
if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
143+
return term
148144

149-
if needs_quotes:
145+
# Characters that can cause FTS5 syntax errors when used as operators
146+
# We're more conservative here - only quote when we detect problematic patterns
147+
problematic_chars = ['"', "'", "(", ")", "[", "]", "+", "!", "@", "#", "$", "%", "^", "&", "=", "|", "\\", "~", "`"]
148+
149+
# Characters that indicate we should quote (spaces, dots, colons, etc.)
150+
needs_quoting_chars = [" ", ".", ":", ";", ",", "<", ">", "?", "/"]
151+
152+
# Check if term needs quoting
153+
has_problematic = any(c in term for c in problematic_chars)
154+
has_spaces_or_special = any(c in term for c in needs_quoting_chars)
155+
156+
if has_problematic or has_spaces_or_special:
150157
# Escape any existing quotes by doubling them
151158
escaped_term = term.replace('"', '""')
152159
# Quote the entire term to handle special characters safely
@@ -273,9 +280,20 @@ async def search(
273280
"""
274281

275282
logger.trace(f"Search {sql} params: {params}")
276-
async with db.scoped_session(self.session_maker) as session:
277-
result = await session.execute(text(sql), params)
278-
rows = result.fetchall()
283+
try:
284+
async with db.scoped_session(self.session_maker) as session:
285+
result = await session.execute(text(sql), params)
286+
rows = result.fetchall()
287+
except Exception as e:
288+
# Handle FTS5 syntax errors and provide user-friendly feedback
289+
if "fts5: syntax error" in str(e).lower():
290+
logger.warning(f"FTS5 syntax error for search term: {search_text}, error: {e}")
291+
# Return empty results rather than crashing
292+
return []
293+
else:
294+
# Re-raise other database errors
295+
logger.error(f"Database error during search: {e}")
296+
raise
279297

280298
results = [
281299
SearchIndexRow(

src/basic_memory/services/project_service.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ async def _ensure_single_default_project(self) -> None:
159159
multiple projects might have is_default=True or no project is marked as default.
160160
"""
161161
if not self.repository:
162-
raise ValueError("Repository is required for _ensure_single_default_project") # pragma: no cover
162+
raise ValueError(
163+
"Repository is required for _ensure_single_default_project"
164+
) # pragma: no cover
163165

164166
# Get all projects with is_default=True
165167
db_projects = await self.repository.find_all()
@@ -597,4 +599,4 @@ def get_system_status(self) -> SystemStatus:
597599
database_size=db_size_readable,
598600
watch_status=watch_status,
599601
timestamp=datetime.now(),
600-
)
602+
)

tests/repository/test_search_repository.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,3 +301,87 @@ def test_directory_property():
301301
project_id=1,
302302
)
303303
assert row3.directory == ""
304+
305+
306+
class TestSearchTermPreparation:
307+
"""Test cases for FTS5 search term preparation."""
308+
309+
def test_simple_terms_get_prefix_wildcard(self, search_repository):
310+
"""Simple alphanumeric terms should get prefix matching."""
311+
assert search_repository._prepare_search_term("hello") == "hello*"
312+
assert search_repository._prepare_search_term("project") == "project*"
313+
assert search_repository._prepare_search_term("test123") == "test123*"
314+
315+
def test_terms_with_existing_wildcard_unchanged(self, search_repository):
316+
"""Terms that already contain * should remain unchanged."""
317+
assert search_repository._prepare_search_term("hello*") == "hello*"
318+
assert search_repository._prepare_search_term("test*world") == "test*world"
319+
320+
def test_boolean_operators_preserved(self, search_repository):
321+
"""Boolean operators should be preserved without modification."""
322+
assert search_repository._prepare_search_term("hello AND world") == "hello AND world"
323+
assert search_repository._prepare_search_term("cat OR dog") == "cat OR dog"
324+
assert search_repository._prepare_search_term("project NOT meeting") == "project NOT meeting"
325+
assert search_repository._prepare_search_term("(hello AND world) OR test") == "(hello AND world) OR test"
326+
327+
def test_programming_terms_should_work(self, search_repository):
328+
"""Programming-related terms with special chars should be searchable."""
329+
# These should be quoted to handle special characters safely
330+
assert search_repository._prepare_search_term("C++") == '"C++"*'
331+
assert search_repository._prepare_search_term("function()") == '"function()"*'
332+
assert search_repository._prepare_search_term("email@domain.com") == '"email@domain.com"*'
333+
assert search_repository._prepare_search_term("array[index]") == '"array[index]"*'
334+
assert search_repository._prepare_search_term("config.json") == '"config.json"*'
335+
336+
def test_malformed_fts5_syntax_quoted(self, search_repository):
337+
"""Malformed FTS5 syntax should be quoted to prevent errors."""
338+
# Multiple operators without proper syntax
339+
assert search_repository._prepare_search_term("+++invalid+++") == '"+++invalid+++"*'
340+
assert search_repository._prepare_search_term("!!!error!!!") == '"!!!error!!!"*'
341+
assert search_repository._prepare_search_term("@#$%^&*()") == '"@#$%^&*()"*'
342+
343+
def test_quoted_strings_handled_properly(self, search_repository):
344+
"""Strings with quotes should have quotes escaped."""
345+
assert search_repository._prepare_search_term('say "hello"') == '"say ""hello"""*'
346+
assert search_repository._prepare_search_term("it's working") == '"it\'s working"*'
347+
348+
def test_file_paths_no_prefix_wildcard(self, search_repository):
349+
"""File paths should not get prefix wildcards."""
350+
assert search_repository._prepare_search_term("config.json", is_prefix=False) == '"config.json"'
351+
assert search_repository._prepare_search_term("docs/readme.md", is_prefix=False) == '"docs/readme.md"'
352+
353+
def test_spaces_handled_correctly(self, search_repository):
354+
"""Terms with spaces should be quoted."""
355+
assert search_repository._prepare_search_term("hello world") == '"hello world"*'
356+
assert search_repository._prepare_search_term("project planning") == '"project planning"*'
357+
358+
@pytest.mark.asyncio
359+
async def test_search_with_special_characters_returns_results(self, search_repository):
360+
"""Integration test: search with special characters should work gracefully."""
361+
# This test ensures the search doesn't crash with FTS5 syntax errors
362+
363+
# These should all return empty results gracefully, not crash
364+
results1 = await search_repository.search(search_text="C++")
365+
assert isinstance(results1, list) # Should not crash
366+
367+
results2 = await search_repository.search(search_text="function()")
368+
assert isinstance(results2, list) # Should not crash
369+
370+
results3 = await search_repository.search(search_text="+++malformed+++")
371+
assert isinstance(results3, list) # Should not crash, return empty results
372+
373+
results4 = await search_repository.search(search_text="email@domain.com")
374+
assert isinstance(results4, list) # Should not crash
375+
376+
@pytest.mark.asyncio
377+
async def test_boolean_search_still_works(self, search_repository):
378+
"""Boolean search operations should continue to work."""
379+
# These should not crash and should respect boolean logic
380+
results1 = await search_repository.search(search_text="hello AND world")
381+
assert isinstance(results1, list)
382+
383+
results2 = await search_repository.search(search_text="cat OR dog")
384+
assert isinstance(results2, list)
385+
386+
results3 = await search_repository.search(search_text="project NOT meeting")
387+
assert isinstance(results3, list)

0 commit comments

Comments
 (0)