Skip to content

Commit 8c05a9e

Browse files
committed
fix: stabilize semantic search defaults, FTS fallback, and postgres project sync
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent a6d8d4c commit 8c05a9e

14 files changed

Lines changed: 691 additions & 36 deletions

File tree

.github/workflows/test.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,41 @@ jobs:
102102
uv pip install pytest pytest-cov
103103
just test-postgres
104104
105+
test-semantic:
106+
name: Test Semantic (Python 3.12)
107+
timeout-minutes: 45
108+
runs-on: ubuntu-latest
109+
110+
steps:
111+
- uses: actions/checkout@v4
112+
with:
113+
submodules: true
114+
115+
- name: Set up Python 3.12
116+
uses: actions/setup-python@v4
117+
with:
118+
python-version: "3.12"
119+
cache: "pip"
120+
121+
- name: Install uv
122+
run: |
123+
pip install uv
124+
125+
- uses: extractions/setup-just@v3
126+
127+
- name: Create virtual env
128+
run: |
129+
uv venv
130+
131+
- name: Install dependencies
132+
run: |
133+
uv pip install -e ".[dev,semantic]"
134+
135+
- name: Run tests (Semantic)
136+
run: |
137+
uv pip install pytest pytest-cov
138+
just test-semantic
139+
105140
coverage:
106141
name: Coverage Summary (combined, Python 3.12)
107142
timeout-minutes: 30

justfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ update-deps:
209209
# Run all code quality checks and tests
210210
check: lint format typecheck test
211211

212+
# Run all code quality checks and all test suites, including semantic benchmarks
213+
check-all: lint format typecheck test test-semantic
214+
212215
# Generate Alembic migration with descriptive message
213216
migration message:
214217
cd src/basic_memory/alembic && alembic revision --autogenerate -m "{{message}}"

src/basic_memory/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Configuration management for basic-memory."""
22

3+
import importlib.util
34
import json
45
import os
56
from dataclasses import dataclass
@@ -38,6 +39,11 @@ class DatabaseBackend(str, Enum):
3839
POSTGRES = "postgres"
3940

4041

42+
def _default_semantic_search_enabled() -> bool:
43+
"""Enable semantic search by default when semantic extras are installed."""
44+
return importlib.util.find_spec("fastembed") is not None
45+
46+
4147
@dataclass
4248
class ProjectConfig:
4349
"""Configuration for a specific basic-memory project."""
@@ -138,7 +144,7 @@ class BasicMemoryConfig(BaseSettings):
138144

139145
# Semantic search configuration
140146
semantic_search_enabled: bool = Field(
141-
default=False,
147+
default_factory=_default_semantic_search_enabled,
142148
description="Enable semantic search (vector/hybrid retrieval). Works on both SQLite and Postgres backends. Requires semantic extras.",
143149
)
144150
semantic_embedding_provider: str = Field(

src/basic_memory/mcp/clients/knowledge.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,12 @@ async def delete_directory(self, directory: str) -> DirectoryDeleteResult:
224224

225225
# --- Resolution ---
226226

227-
async def resolve_entity(self, identifier: str) -> str:
227+
async def resolve_entity(self, identifier: str, *, strict: bool = False) -> str:
228228
"""Resolve a string identifier to an entity external_id.
229229
230230
Args:
231231
identifier: The identifier to resolve (permalink, title, or path)
232+
strict: If True, require exact matching (no fuzzy fallback)
232233
233234
Returns:
234235
The resolved entity external_id (UUID)
@@ -239,7 +240,7 @@ async def resolve_entity(self, identifier: str) -> str:
239240
response = await call_post(
240241
self.http_client,
241242
f"{self._base_path}/resolve",
242-
json={"identifier": identifier},
243+
json={"identifier": identifier, "strict": strict},
243244
)
244245
data = response.json()
245246
return data["external_id"]

src/basic_memory/mcp/tools/read_note.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
from basic_memory.utils import validate_project_path
1515

1616

17+
def _is_exact_title_match(identifier: str, title: str) -> bool:
18+
"""Return True when identifier exactly matches a title (case-insensitive)."""
19+
return identifier.strip().casefold() == title.strip().casefold()
20+
21+
1722
@mcp.tool(
1823
description="Read a markdown note by title or permalink.",
1924
# TODO: re-enable once MCP client rendering is working
@@ -118,7 +123,7 @@ async def read_note(
118123

119124
try:
120125
# Try to resolve identifier to entity ID
121-
entity_id = await knowledge_client.resolve_entity(entity_path)
126+
entity_id = await knowledge_client.resolve_entity(entity_path, strict=True)
122127

123128
# Fetch content using entity ID
124129
response = await resource_client.read(entity_id, page=page, page_size=page_size)
@@ -148,17 +153,29 @@ async def read_note(
148153

149154
# Handle both SearchResponse object and error strings
150155
if title_results and hasattr(title_results, "results") and title_results.results:
151-
result = title_results.results[0] # Get the first/best match
152-
if result.permalink:
156+
# Trigger: direct resolution failed and title search returned candidates.
157+
# Why: avoid returning unrelated notes when search yields only fuzzy matches.
158+
# Outcome: fetch content only when a true exact title match exists.
159+
result = next(
160+
(
161+
candidate
162+
for candidate in title_results.results
163+
if _is_exact_title_match(identifier, candidate.title)
164+
),
165+
None,
166+
)
167+
if not result:
168+
logger.info(f"No exact title match found for: {identifier}")
169+
elif result.permalink:
153170
try:
154171
# Resolve the permalink to entity ID
155-
entity_id = await knowledge_client.resolve_entity(result.permalink)
172+
entity_id = await knowledge_client.resolve_entity(result.permalink, strict=True)
156173

157174
# Fetch content using the entity ID
158175
response = await resource_client.read(entity_id, page=page, page_size=page_size)
159176

160177
if response.status_code == 200:
161-
logger.info(f"Found note by title search: {result.permalink}")
178+
logger.info(f"Found note by exact title search: {result.permalink}")
162179
if output_format in ("ascii", "ansi"):
163180
return format_note_preview_ascii(
164181
response.text,

src/basic_memory/mcp/tools/search.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from loguru import logger
77
from fastmcp import Context
88

9+
from basic_memory.config import ConfigManager
910
from basic_memory.mcp.container import get_container
1011
from basic_memory.mcp.project_context import get_project_client, resolve_project_and_path
1112
from basic_memory.mcp.formatting import format_search_results_ascii
@@ -18,6 +19,17 @@
1819
)
1920

2021

22+
def _semantic_search_enabled_for_text_search() -> bool:
23+
"""Resolve semantic-search enablement in both MCP and CLI invocation paths."""
24+
try:
25+
return get_container().config.semantic_search_enabled
26+
except RuntimeError:
27+
# Trigger: MCP container is not initialized (e.g., `bm tool search-notes` direct call).
28+
# Why: CLI path still needs the same semantic-default behavior as MCP server path.
29+
# Outcome: load config directly and keep text-mode retrieval behavior consistent.
30+
return ConfigManager().config.semantic_search_enabled
31+
32+
2133
def _format_search_error_response(
2234
project: str, error_message: str, query: str, search_type: str = "text"
2335
) -> str:
@@ -268,7 +280,8 @@ async def search_notes(
268280
- `search_notes("work-docs", "'exact phrase'")` - Search for exact phrase match
269281
270282
### Advanced Boolean Searches
271-
- `search_notes("my-project", "term1 term2")` - Find content with both terms (implicit AND)
283+
- `search_notes("my-project", "term1 term2")` - Strict implicit-AND first; retries with
284+
relaxed OR terms only if strict search returns no results
272285
- `search_notes("my-project", "term1 AND term2")` - Explicit AND search (both terms required)
273286
- `search_notes("my-project", "term1 OR term2")` - Either term can be present
274287
- `search_notes("my-project", "term1 NOT term2")` - Include term1 but exclude term2
@@ -282,7 +295,8 @@ async def search_notes(
282295
### Search Type Examples
283296
- `search_notes("my-project", "Meeting", search_type="title")` - Search only in titles
284297
- `search_notes("work-docs", "docs/meeting-*", search_type="permalink")` - Pattern match permalinks
285-
- `search_notes("research", "keyword", search_type="text")` - Full-text search (default)
298+
- `search_notes("research", "keyword", search_type="text")` - Text search (default; auto-upgrades
299+
to hybrid when semantic search is enabled)
286300
287301
### Filtering Options
288302
- `search_notes("my-project", "query", types=["entity"])` - Search only entities
@@ -325,7 +339,8 @@ async def search_notes(
325339
page: The page number of results to return (default 1)
326340
page_size: The number of results to return per page (default 10)
327341
search_type: Type of search to perform, one of:
328-
"text", "title", "permalink", "vector", "semantic", "hybrid" (default: "text")
342+
"text", "title", "permalink", "vector", "semantic", "hybrid" (default: "text";
343+
text mode auto-upgrades to hybrid when semantic search is enabled)
329344
output_format: "default" returns structured data, "ascii" returns a plain text table,
330345
"ansi" returns a colorized table for TUI clients.
331346
types: Optional list of note types to search (e.g., ["note", "person"])
@@ -345,6 +360,7 @@ async def search_notes(
345360
Examples:
346361
# Basic text search
347362
results = await search_notes("project planning")
363+
# Plain multi-term text uses strict matching first, then relaxed OR fallback if needed
348364
349365
# Boolean AND search (both terms must be present)
350366
results = await search_notes("project AND planning")
@@ -424,12 +440,8 @@ async def search_notes(
424440
search_query.text = query
425441
# Upgrade to hybrid when semantic search is available —
426442
# combines FTS keyword matching with vector similarity for better results
427-
try:
428-
container = get_container()
429-
if container.config.semantic_search_enabled:
430-
search_query.retrieval_mode = SearchRetrievalMode.HYBRID
431-
except RuntimeError:
432-
pass # Container not initialized (e.g., CLI context) — stay with FTS
443+
if _semantic_search_enabled_for_text_search():
444+
search_query.retrieval_mode = SearchRetrievalMode.HYBRID
433445
elif search_type in ("vector", "semantic"):
434446
search_query.text = query
435447
search_query.retrieval_mode = SearchRetrievalMode.VECTOR

src/basic_memory/services/project_service.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ async def add_project(self, name: str, path: str, set_default: bool = False) ->
205205
f"Projects cannot share directory trees."
206206
)
207207

208-
if self.config_manager.config.database_backend != DatabaseBackend.POSTGRES:
209-
# First add to config file (this will validate the project doesn't exist)
210-
self.config_manager.add_project(name, resolved_path)
208+
# First add to config file (this validates project uniqueness and keeps
209+
# config + database aligned for all backends).
210+
self.config_manager.add_project(name, resolved_path)
211211

212212
# Then add to database
213213
project_data = {
@@ -307,9 +307,8 @@ async def set_default_project(self, name: str) -> None:
307307
# Update database
308308
await self.repository.set_as_default(project.id)
309309

310-
# Update config file only in local mode (cloud mode uses database only)
311-
if self.config_manager.config.database_backend != DatabaseBackend.POSTGRES:
312-
self.config_manager.set_default_project(name)
310+
# Keep config and database default project in sync for all backends.
311+
self.config_manager.set_default_project(name)
313312

314313
logger.info(f"Project '{name}' set as default in configuration and database")
315314

0 commit comments

Comments
 (0)