Skip to content

Commit 90d5754

Browse files
authored
feat: Implement boolean search (#18)
1 parent e6496df commit 90d5754

5 files changed

Lines changed: 169 additions & 36 deletions

File tree

src/basic_memory/mcp/tools/search.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ async def search(query: SearchQuery, page: int = 1, page_size: int = 10) -> Sear
2323
Args:
2424
query: SearchQuery object with search parameters including:
2525
- text: Full-text search (e.g., "project planning")
26+
Supports boolean operators: AND, OR, NOT and parentheses for grouping
2627
- title: Search only in titles (e.g., "Meeting notes")
2728
- permalink: Exact permalink match (e.g., "docs/meeting-notes")
2829
- permalink_match: Pattern matching for permalinks (e.g., "docs/*-notes")
@@ -33,22 +34,24 @@ async def search(query: SearchQuery, page: int = 1, page_size: int = 10) -> Sear
3334
page_size: The number of results to return per page (default 10)
3435
3536
Returns:
36-
SearchResponse with:
37-
- results: List of matching SearchResult objects with:
38-
- id: Internal ID
39-
- title: Document/entity title
40-
- type: Content type (entity, observation, relation)
41-
- score: Relevance score (higher = more relevant)
42-
- permalink: Permalink for accessing the content
43-
- file_path: File path on disk
44-
- metadata: Additional metadata about the result
45-
- current_page: Current page number
46-
- page_size: Number of results per page
37+
SearchResponse with results and pagination info
4738
4839
Examples:
4940
# Basic text search
5041
results = await search(SearchQuery(text="project planning"))
5142
43+
# Boolean AND search (both terms must be present)
44+
results = await search(SearchQuery(text="project AND planning"))
45+
46+
# Boolean OR search (either term can be present)
47+
results = await search(SearchQuery(text="project OR meeting"))
48+
49+
# Boolean NOT search (exclude terms)
50+
results = await search(SearchQuery(text="project NOT meeting"))
51+
52+
# Boolean search with grouping
53+
results = await search(SearchQuery(text="(project OR planning) AND notes"))
54+
5255
# Search with type filter
5356
results = await search(SearchQuery(
5457
text="meeting notes",

src/basic_memory/repository/search_repository.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,16 @@ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
9494
For FTS5:
9595
- Special characters and phrases need to be quoted
9696
- Terms with spaces or special chars need quotes
97+
- Boolean operators (AND, OR, NOT) and parentheses are preserved
9798
"""
9899
if "*" in term:
99100
return term
100101

102+
# Check for boolean operators - if present, return the term as is
103+
boolean_operators = [" AND ", " OR ", " NOT ", "(", ")"]
104+
if any(op in f" {term} " for op in boolean_operators):
105+
return term
106+
101107
# List of special characters that need quoting (excluding *)
102108
special_chars = ["/", "-", ".", " ", "(", ")", "[", "]", '"', "'"]
103109

@@ -130,9 +136,20 @@ async def search(
130136

131137
# Handle text search for title and content
132138
if search_text:
133-
search_text = self._prepare_search_term(search_text.strip())
134-
params["text"] = search_text
135-
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
139+
has_boolean = any(
140+
op in f" {search_text} " for op in [" AND ", " OR ", " NOT ", "(", ")"]
141+
)
142+
143+
if has_boolean:
144+
# If boolean operators are present, use the raw query
145+
# No need to prepare it, FTS5 will understand the operators
146+
params["text"] = search_text
147+
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
148+
else:
149+
# Standard search with term preparation
150+
processed_text = self._prepare_search_term(search_text.strip())
151+
params["text"] = processed_text
152+
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
136153

137154
# Handle title match search
138155
if title:

src/basic_memory/schemas/search.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,24 @@ class SearchQuery(BaseModel):
2828
Use ONE of these primary search modes:
2929
- permalink: Exact permalink match
3030
- permalink_match: Path pattern with *
31-
- text: Full-text search of title/content
31+
- text: Full-text search of title/content (supports boolean operators: AND, OR, NOT)
3232
3333
Optionally filter results by:
3434
- types: Limit to specific item types
3535
- entity_types: Limit to specific entity types
3636
- after_date: Only items after date
37+
38+
Boolean search examples:
39+
- "python AND flask" - Find items with both terms
40+
- "python OR django" - Find items with either term
41+
- "python NOT django" - Find items with python but not django
42+
- "(python OR flask) AND web" - Use parentheses for grouping
3743
"""
3844

3945
# Primary search modes (use ONE of these)
4046
permalink: Optional[str] = None # Exact permalink match
4147
permalink_match: Optional[str] = None # Glob permalink match
42-
text: Optional[str] = None # Full-text search
48+
text: Optional[str] = None # Full-text search (now supports boolean operators)
4349
title: Optional[str] = None # title only search
4450

4551
# Optional filters
@@ -66,6 +72,17 @@ def no_criteria(self) -> bool:
6672
and self.entity_types is None
6773
)
6874

75+
def has_boolean_operators(self) -> bool:
76+
"""Check if the text query contains boolean operators (AND, OR, NOT)."""
77+
if not self.text: # pragma: no cover
78+
return False
79+
80+
# Check for common boolean operators with correct word boundaries
81+
# to avoid matching substrings like "GRAND" containing "AND"
82+
boolean_patterns = [" AND ", " OR ", " NOT ", "(", ")"]
83+
text = f" {self.text} " # Add spaces to ensure we match word boundaries
84+
return any(pattern in text for pattern in boolean_patterns)
85+
6986

7087
class SearchResult(BaseModel):
7188
"""Search result with score and metadata."""
@@ -93,13 +110,3 @@ class SearchResponse(BaseModel):
93110
results: List[SearchResult]
94111
current_page: int
95112
page_size: int
96-
97-
98-
# Schema for future advanced search endpoint
99-
class AdvancedSearchQuery(BaseModel):
100-
"""Advanced full-text search with explicit FTS5 syntax."""
101-
102-
query: str # Raw FTS5 query (e.g., "foo AND bar")
103-
types: Optional[List[SearchItemType]] = None
104-
entity_types: Optional[List[str]] = None
105-
after_date: Optional[Union[datetime, str]] = None

tests/schemas/test_search.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
SearchQuery,
88
SearchResult,
99
SearchResponse,
10-
AdvancedSearchQuery,
1110
)
1211

1312

@@ -118,12 +117,3 @@ def test_search_response():
118117
response = SearchResponse(results=results, current_page=1, page_size=1)
119118
assert len(response.results) == 2
120119
assert response.results[0].score > response.results[1].score
121-
122-
123-
def test_advanced_search():
124-
"""Test advanced search query."""
125-
query = AdvancedSearchQuery(
126-
query="title:search AND content:implementation", types=[SearchItemType.ENTITY]
127-
)
128-
assert query.query == "title:search AND content:implementation"
129-
assert query.types == [SearchItemType.ENTITY]

tests/services/test_search_service.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,119 @@ async def test_update_index(search_service, full_entity):
220220
# Search for new title
221221
results = await search_service.search(SearchQuery(text="OMG I AM UPDATED"))
222222
assert len(results) > 1
223+
224+
225+
@pytest.mark.asyncio
226+
async def test_boolean_and_search(search_service, test_graph):
227+
"""Test boolean AND search."""
228+
# Create an entity with specific terms for testing
229+
# This assumes the test_graph fixture already has entities with relevant terms
230+
231+
# Test AND operator - both terms must be present
232+
results = await search_service.search(SearchQuery(text="Root AND Entity"))
233+
assert len(results) >= 1
234+
235+
# Verify the result contains both terms
236+
found = False
237+
for result in results:
238+
if (result.title and "Root" in result.title and "Entity" in result.title) or (
239+
result.content_snippet
240+
and "Root" in result.content_snippet
241+
and "Entity" in result.content_snippet
242+
):
243+
found = True
244+
break
245+
assert found, "Boolean AND search failed to find items containing both terms"
246+
247+
# Verify that items with only one term are not returned
248+
results = await search_service.search(SearchQuery(text="NonexistentTerm AND Root"))
249+
assert len(results) == 0, "Boolean AND search returned results when it shouldn't have"
250+
251+
252+
@pytest.mark.asyncio
253+
async def test_boolean_or_search(search_service, test_graph):
254+
"""Test boolean OR search."""
255+
# Test OR operator - either term can be present
256+
results = await search_service.search(SearchQuery(text="Root OR Connected"))
257+
258+
# Should find both "Root Entity" and "Connected Entity"
259+
assert len(results) >= 2
260+
261+
# Verify we find items with either term
262+
root_found = False
263+
connected_found = False
264+
265+
for result in results:
266+
if result.permalink == "test/root":
267+
root_found = True
268+
elif "connected" in result.permalink.lower():
269+
connected_found = True
270+
271+
assert root_found, "Boolean OR search failed to find 'Root' term"
272+
assert connected_found, "Boolean OR search failed to find 'Connected' term"
273+
274+
275+
@pytest.mark.asyncio
276+
async def test_boolean_not_search(search_service, test_graph):
277+
"""Test boolean NOT search."""
278+
# Test NOT operator - exclude certain terms
279+
results = await search_service.search(SearchQuery(text="Entity NOT Connected"))
280+
281+
# Should find "Root Entity" but not "Connected Entity"
282+
for result in results:
283+
assert "connected" not in result.permalink.lower(), (
284+
"Boolean NOT search returned excluded term"
285+
)
286+
287+
288+
@pytest.mark.asyncio
289+
async def test_boolean_group_search(search_service, test_graph):
290+
"""Test boolean grouping with parentheses."""
291+
# Test grouping - (A OR B) AND C
292+
results = await search_service.search(SearchQuery(title="(Root OR Connected) AND Entity"))
293+
294+
# Should find both entities that contain "Entity" and either "Root" or "Connected"
295+
assert len(results) >= 2
296+
297+
for result in results:
298+
# Each result should contain "Entity" and either "Root" or "Connected"
299+
contains_entity = "entity" in result.title.lower()
300+
contains_root_or_connected = (
301+
"root" in result.title.lower() or "connected" in result.title.lower()
302+
)
303+
304+
assert contains_entity and contains_root_or_connected, (
305+
"Boolean grouped search returned incorrect results"
306+
)
307+
308+
309+
@pytest.mark.asyncio
310+
async def test_boolean_operators_detection(search_service):
311+
"""Test detection of boolean operators in query."""
312+
# Test various queries that should be detected as boolean
313+
boolean_queries = [
314+
"term1 AND term2",
315+
"term1 OR term2",
316+
"term1 NOT term2",
317+
"(term1 OR term2) AND term3",
318+
"complex (nested OR grouping) AND term",
319+
]
320+
321+
for query_text in boolean_queries:
322+
query = SearchQuery(text=query_text)
323+
assert query.has_boolean_operators(), f"Failed to detect boolean operators in: {query_text}"
324+
325+
# Test queries that should not be detected as boolean
326+
non_boolean_queries = [
327+
"normal search query",
328+
"brand name", # Should not detect "AND" within "brand"
329+
"understand this concept", # Should not detect "AND" within "understand"
330+
"command line",
331+
"sandbox testing",
332+
]
333+
334+
for query_text in non_boolean_queries:
335+
query = SearchQuery(text=query_text)
336+
assert not query.has_boolean_operators(), (
337+
f"Incorrectly detected boolean operators in: {query_text}"
338+
)

0 commit comments

Comments
 (0)