diff --git a/src/basic_memory/cli/commands/cloud/rclone_commands.py b/src/basic_memory/cli/commands/cloud/rclone_commands.py index 09cb16ce..463b43f1 100644 --- a/src/basic_memory/cli/commands/cloud/rclone_commands.py +++ b/src/basic_memory/cli/commands/cloud/rclone_commands.py @@ -20,6 +20,7 @@ from rich.console import Console from basic_memory.cli.commands.cloud.rclone_installer import is_rclone_installed +from basic_memory.config import resolve_data_dir from basic_memory.utils import normalize_project_path console = Console() @@ -138,13 +139,16 @@ def get_bmignore_filter_path() -> Path: def get_project_bisync_state(project_name: str) -> Path: """Get path to project's bisync state directory. + Honors ``BASIC_MEMORY_CONFIG_DIR`` so isolated instances each keep their + own bisync state alongside their config. + Args: project_name: Name of the project Returns: Path to bisync state directory for this project """ - return Path.home() / ".basic-memory" / "bisync-state" / project_name + return resolve_data_dir() / "bisync-state" / project_name def bisync_initialized(project_name: str) -> bool: diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index 61eb4556..dbdd651b 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -50,6 +50,44 @@ def _default_semantic_search_enabled() -> bool: ) +def resolve_data_dir() -> Path: + """Resolve the Basic Memory data directory. + + Single source of truth for the per-user state directory. Honors + ``BASIC_MEMORY_CONFIG_DIR`` so each process/worktree can isolate config + and database state; otherwise falls back to ``/.basic-memory``. + + Cross-platform: ``Path.home()`` reads ``$HOME`` on POSIX and + ``%USERPROFILE%`` on Windows, so there's no need to check ``$HOME`` + explicitly here. + """ + if config_dir := os.getenv("BASIC_MEMORY_CONFIG_DIR"): + return Path(config_dir) + return Path.home() / DATA_DIR_NAME + + +def default_fastembed_cache_dir() -> str: + """Return the default cache directory used for FastEmbed model artifacts. + + Resolution order: + 1. ``FASTEMBED_CACHE_PATH`` env var — honors FastEmbed's own convention + so users who already configure it through the environment keep working. + 2. ``/fastembed_cache`` — the same stable, + user-writable directory Basic Memory already uses for config and + the default SQLite database. Honors ``BASIC_MEMORY_CONFIG_DIR``. + + Why not ``tempfile.gettempdir()``? + FastEmbed's own default is ``/fastembed_cache``, which is + ephemeral in many sandboxed MCP runtimes (e.g. Codex CLI wipes /tmp + between invocations). The model then disappears and every subsequent + ONNX load raises ``NO_SUCHFILE``. Persisting the cache under the + per-user data directory works identically on macOS, Linux, and Windows. + """ + if env_override := os.getenv("FASTEMBED_CACHE_PATH"): + return env_override + return str(resolve_data_dir() / "fastembed_cache") + + @dataclass class ProjectConfig: """Configuration for a specific basic-memory project.""" @@ -222,7 +260,13 @@ def __init__(self, **data: Any) -> None: ... ) semantic_embedding_cache_dir: str | None = Field( default=None, - description="Optional cache directory for FastEmbed model artifacts.", + description=( + "Optional override for the FastEmbed model cache directory. " + "When unset, Basic Memory resolves this at runtime to " + "/fastembed_cache (or FASTEMBED_CACHE_PATH " + "when that env var is set) so the model persists across runs " + "without hardcoding a path into config.json." + ), ) semantic_embedding_threads: int | None = Field( default=None, @@ -709,11 +753,7 @@ def ensure_project_paths_exists(self) -> "BasicMemoryConfig": # pragma: no cove @property def data_dir_path(self) -> Path: """Get app state directory for config and default SQLite database.""" - if config_dir := os.getenv("BASIC_MEMORY_CONFIG_DIR"): - return Path(config_dir) - - home = os.getenv("HOME", Path.home()) - return Path(home) / DATA_DIR_NAME + return resolve_data_dir() # Module-level cache for configuration @@ -731,16 +771,7 @@ class ConfigManager: def __init__(self) -> None: """Initialize the configuration manager.""" - home = os.getenv("HOME", Path.home()) - if isinstance(home, str): - home = Path(home) - - # Allow override via environment variable - if config_dir := os.getenv("BASIC_MEMORY_CONFIG_DIR"): - self.config_dir = Path(config_dir) - else: - self.config_dir = home / DATA_DIR_NAME - + self.config_dir = resolve_data_dir() self.config_file = self.config_dir / CONFIG_FILE_NAME # Ensure config directory exists diff --git a/src/basic_memory/ignore_utils.py b/src/basic_memory/ignore_utils.py index e68fc5a7..df43f3ed 100644 --- a/src/basic_memory/ignore_utils.py +++ b/src/basic_memory/ignore_utils.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import Set +from basic_memory.config import resolve_data_dir + # Common directories and patterns to ignore by default # These are used as fallback if .bmignore doesn't exist @@ -61,9 +63,11 @@ def get_bmignore_path() -> Path: """Get path to .bmignore file. Returns: - Path to ~/.basic-memory/.bmignore + Path to /.bmignore, honoring + ``BASIC_MEMORY_CONFIG_DIR`` so isolated instances each keep their + own ignore file. """ - return Path.home() / ".basic-memory" / ".bmignore" + return resolve_data_dir() / ".bmignore" def create_default_bmignore() -> None: @@ -176,7 +180,8 @@ def load_gitignore_patterns(base_path: Path, use_gitignore: bool = True) -> Set[ """Load gitignore patterns from .gitignore file and .bmignore. Combines patterns from: - 1. ~/.basic-memory/.bmignore (user's global ignore patterns) + 1. /.bmignore (user's global ignore patterns, honors + BASIC_MEMORY_CONFIG_DIR) 2. {base_path}/.gitignore (project-specific patterns, if use_gitignore=True) Args: diff --git a/src/basic_memory/repository/embedding_provider_factory.py b/src/basic_memory/repository/embedding_provider_factory.py index a0e7dbcb..8f8b13d5 100644 --- a/src/basic_memory/repository/embedding_provider_factory.py +++ b/src/basic_memory/repository/embedding_provider_factory.py @@ -3,7 +3,7 @@ import os from threading import Lock -from basic_memory.config import BasicMemoryConfig +from basic_memory.config import BasicMemoryConfig, default_fastembed_cache_dir from basic_memory.repository.embedding_provider import EmbeddingProvider type ProviderCacheKey = tuple[ @@ -12,7 +12,7 @@ int | None, int, int, - str | None, + str, int | None, int | None, ] @@ -22,6 +22,20 @@ _FASTEMBED_MAX_THREADS = 8 +def _resolve_cache_dir(app_config: BasicMemoryConfig) -> str: + """Resolve the effective FastEmbed cache dir for this config. + + Uses an explicit ``is not None`` check — an empty string override from + config or ``BASIC_MEMORY_SEMANTIC_EMBEDDING_CACHE_DIR`` is an invalid + path, not a request to fall back to the default, and FastEmbed's error + message is clearer than silently swapping in a different directory. + """ + configured = app_config.semantic_embedding_cache_dir + if configured is not None: + return configured + return default_fastembed_cache_dir() + + def _available_cpu_count() -> int | None: """Return the CPU budget available to this process when the runtime exposes it.""" process_cpu_count = getattr(os, "process_cpu_count", None) @@ -61,7 +75,12 @@ def _resolve_fastembed_runtime_knobs( def _provider_cache_key(app_config: BasicMemoryConfig) -> ProviderCacheKey: - """Build a stable cache key from provider-relevant semantic embedding config.""" + """Build a stable cache key from provider-relevant semantic embedding config. + + Uses the *resolved* cache dir — not the raw config field — so different + FASTEMBED_CACHE_PATH values produce distinct cache keys even when the + config field itself is unset. + """ resolved_threads, resolved_parallel = _resolve_fastembed_runtime_knobs(app_config) return ( app_config.semantic_embedding_provider.strip().lower(), @@ -69,7 +88,7 @@ def _provider_cache_key(app_config: BasicMemoryConfig) -> ProviderCacheKey: app_config.semantic_embedding_dimensions, app_config.semantic_embedding_batch_size, app_config.semantic_embedding_request_concurrency, - app_config.semantic_embedding_cache_dir, + _resolve_cache_dir(app_config), resolved_threads, resolved_parallel, ) @@ -103,8 +122,12 @@ def create_embedding_provider(app_config: BasicMemoryConfig) -> EmbeddingProvide from basic_memory.repository.fastembed_provider import FastEmbedEmbeddingProvider resolved_threads, resolved_parallel = _resolve_fastembed_runtime_knobs(app_config) - if app_config.semantic_embedding_cache_dir is not None: - extra_kwargs["cache_dir"] = app_config.semantic_embedding_cache_dir + # Trigger: cache_dir is resolved rather than passed through directly. + # Why: FastEmbed's own default caches to /fastembed_cache, + # which disappears in sandboxed MCP runtimes (e.g. Codex CLI). See #741. + # Outcome: always pass an explicit, user-writable cache dir so the ONNX + # model persists across runs. + extra_kwargs["cache_dir"] = _resolve_cache_dir(app_config) if resolved_threads is not None: extra_kwargs["threads"] = resolved_threads if resolved_parallel is not None: diff --git a/src/basic_memory/services/project_service.py b/src/basic_memory/services/project_service.py index 6ede7df2..e01d43b5 100644 --- a/src/basic_memory/services/project_service.py +++ b/src/basic_memory/services/project_service.py @@ -1137,12 +1137,10 @@ def get_system_status(self) -> SystemStatus: # Get watch service status if available watch_status = None - watch_status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON + watch_status_path = self.config_manager.config.data_dir_path / WATCH_STATUS_JSON if watch_status_path.exists(): - try: # pragma: no cover - watch_status = json.loads( # pragma: no cover - watch_status_path.read_text(encoding="utf-8") - ) + try: + watch_status = json.loads(watch_status_path.read_text(encoding="utf-8")) except Exception: # pragma: no cover pass diff --git a/src/basic_memory/sync/watch_service.py b/src/basic_memory/sync/watch_service.py index 4a7d29b7..549fe4a2 100644 --- a/src/basic_memory/sync/watch_service.py +++ b/src/basic_memory/sync/watch_service.py @@ -89,7 +89,7 @@ def __init__( self.app_config = app_config self.project_repository = project_repository self.state = WatchServiceState() - self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON + self.status_path = app_config.data_dir_path / WATCH_STATUS_JSON self.status_path.parent.mkdir(parents=True, exist_ok=True) self._ignore_patterns_cache: dict[Path, Set[str]] = {} self._sync_service_factory = sync_service_factory diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 1dceb745..74b02896 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -262,7 +262,8 @@ def setup_logging( Args: log_level: DEBUG, INFO, WARNING, ERROR - log_to_file: Write to ~/.basic-memory/basic-memory.log with rotation + log_to_file: Write to /basic-memory.log with rotation + (honors BASIC_MEMORY_CONFIG_DIR) log_to_stdout: Write to stderr (for Docker/cloud deployments) structured_context: Bind tenant_id, fly_region, etc. for cloud observability """ @@ -281,7 +282,11 @@ def setup_logging( # Why: multiple basic-memory processes can share the same log directory at once. # Outcome: use per-process log files on Windows so log rotation stays local. log_filename = f"basic-memory-{os.getpid()}.log" if os.name == "nt" else "basic-memory.log" - log_path = Path.home() / ".basic-memory" / log_filename + # Deferred import: basic_memory.config imports from this module at load time, + # so resolving the data dir via a top-level import would cycle. + from basic_memory.config import resolve_data_dir + + log_path = resolve_data_dir() / log_filename log_path.parent.mkdir(parents=True, exist_ok=True) if os.name == "nt": _cleanup_windows_log_files(log_path.parent, log_path.name) diff --git a/test-int/conftest.py b/test-int/conftest.py index 4a961063..5ceaa689 100644 --- a/test-int/conftest.py +++ b/test-int/conftest.py @@ -307,7 +307,13 @@ async def test_project(config_home, engine_factory) -> Project: @pytest.fixture def config_home(tmp_path, monkeypatch) -> Path: + # Patch both HOME and USERPROFILE so Path.home() returns the test dir on + # every platform — Path.home() reads HOME on POSIX and USERPROFILE on + # Windows, and ConfigManager.data_dir_path now goes through Path.home() + # via resolve_data_dir(). Must mirror tests/conftest.py:config_home. monkeypatch.setenv("HOME", str(tmp_path)) + if os.name == "nt": + monkeypatch.setenv("USERPROFILE", str(tmp_path)) # Set BASIC_MEMORY_HOME to the test directory monkeypatch.setenv("BASIC_MEMORY_HOME", str(tmp_path / "basic-memory")) return tmp_path diff --git a/tests/cli/test_ignore_utils.py b/tests/cli/test_ignore_utils.py index b6470b6c..c86146e4 100644 --- a/tests/cli/test_ignore_utils.py +++ b/tests/cli/test_ignore_utils.py @@ -5,12 +5,29 @@ from basic_memory.ignore_utils import ( DEFAULT_IGNORE_PATTERNS, + get_bmignore_path, load_gitignore_patterns, should_ignore_path, filter_files, ) +def test_get_bmignore_path_honors_basic_memory_config_dir(tmp_path, monkeypatch): + """Regression guard for #742: .bmignore must follow BASIC_MEMORY_CONFIG_DIR.""" + custom_dir = tmp_path / "instance-y" / "state" + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom_dir)) + + assert get_bmignore_path() == custom_dir / ".bmignore" + + +def test_get_bmignore_path_defaults_under_home(tmp_path, monkeypatch): + """Without BASIC_MEMORY_CONFIG_DIR, .bmignore lives under ~/.basic-memory.""" + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.setattr(Path, "home", classmethod(lambda cls: tmp_path)) + + assert get_bmignore_path() == tmp_path / ".basic-memory" / ".bmignore" + + def test_load_default_patterns_only(): """Test loading default patterns when no .gitignore exists.""" with tempfile.TemporaryDirectory() as temp_dir: diff --git a/tests/repository/test_openai_provider.py b/tests/repository/test_openai_provider.py index b55d0c41..f40c0133 100644 --- a/tests/repository/test_openai_provider.py +++ b/tests/repository/test_openai_provider.py @@ -248,6 +248,7 @@ def test_embedding_provider_factory_uses_provider_defaults_when_dimensions_not_s def test_embedding_provider_factory_forwards_fastembed_runtime_knobs(): """Factory should forward FastEmbed runtime tuning config fields.""" + reset_embedding_provider_cache() config = BasicMemoryConfig( env="test", projects={"test-project": "/tmp/basic-memory-test"}, @@ -265,6 +266,66 @@ def test_embedding_provider_factory_forwards_fastembed_runtime_knobs(): assert provider.parallel == 2 +def test_embedding_provider_factory_uses_default_cache_dir_when_unset(config_home, monkeypatch): + """Factory should pass the data-dir-relative default when cache_dir is None. + + Legacy configs that carry an explicit ``semantic_embedding_cache_dir: null`` + must still get a user-writable cache path rather than letting FastEmbed fall + back to ``/fastembed_cache``. See #741. + """ + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + reset_embedding_provider_cache() + + config = BasicMemoryConfig( + env="test", + projects={"test-project": str(config_home / "project")}, + default_project="test-project", + semantic_search_enabled=True, + semantic_embedding_provider="fastembed", + semantic_embedding_cache_dir=None, + ) + + provider = create_embedding_provider(config) + assert isinstance(provider, FastEmbedEmbeddingProvider) + expected = str(config_home / ".basic-memory" / "fastembed_cache") + assert provider.cache_dir == expected + + +def test_embedding_provider_factory_cache_key_reflects_resolved_cache_dir( + config_home, tmp_path, monkeypatch +): + """Changing FASTEMBED_CACHE_PATH must yield a distinct cached provider. + + The provider cache key uses the *resolved* cache dir rather than the raw + (nullable) config field, so env-driven path changes invalidate the cache + instead of silently returning a stale provider pointing at the old path. + """ + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + reset_embedding_provider_cache() + + base_kwargs = dict( + env="test", + projects={"test-project": str(config_home / "project")}, + default_project="test-project", + semantic_search_enabled=True, + semantic_embedding_provider="fastembed", + semantic_embedding_cache_dir=None, + ) + + provider_a = create_embedding_provider(BasicMemoryConfig(**base_kwargs)) + assert isinstance(provider_a, FastEmbedEmbeddingProvider) + + monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "alt-cache")) + provider_b = create_embedding_provider(BasicMemoryConfig(**base_kwargs)) + + assert isinstance(provider_b, FastEmbedEmbeddingProvider) + assert provider_b is not provider_a + assert provider_a.cache_dir == str(config_home / ".basic-memory" / "fastembed_cache") + assert provider_b.cache_dir == str(tmp_path / "alt-cache") + + def test_fastembed_provider_reports_runtime_log_attrs(): """FastEmbed should expose the resolved runtime knobs for batch startup logs.""" provider = FastEmbedEmbeddingProvider(batch_size=128, threads=4, parallel=2) diff --git a/tests/services/test_project_service.py b/tests/services/test_project_service.py index c4c0cb26..57063f18 100644 --- a/tests/services/test_project_service.py +++ b/tests/services/test_project_service.py @@ -126,6 +126,27 @@ async def test_get_system_status(project_service: ProjectService): assert status.database_size +@pytest.mark.asyncio +async def test_get_system_status_reads_watch_status_from_config_dir( + project_service: ProjectService, tmp_path, monkeypatch +): + """Regression guard for #742: watch-status.json is read from the configured + data dir, not hardcoded to ~/.basic-memory.""" + import json as _json + from basic_memory.config import WATCH_STATUS_JSON + + custom_dir = tmp_path / "instance-v" / "state" + custom_dir.mkdir(parents=True) + (custom_dir / WATCH_STATUS_JSON).write_text( + _json.dumps({"running": True, "error_count": 7}), encoding="utf-8" + ) + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom_dir)) + + status = project_service.get_system_status() + + assert status.watch_status == {"running": True, "error_count": 7} + + @pytest.mark.asyncio async def test_get_statistics(project_service: ProjectService, test_graph, test_project): """Test getting statistics.""" diff --git a/tests/sync/test_watch_service.py b/tests/sync/test_watch_service.py index ee659a6f..5ea4b237 100644 --- a/tests/sync/test_watch_service.py +++ b/tests/sync/test_watch_service.py @@ -3,12 +3,14 @@ import asyncio import json from pathlib import Path +from unittest.mock import MagicMock import pytest from watchfiles import Change +from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON from basic_memory.models.project import Project -from basic_memory.sync.watch_service import WatchServiceState +from basic_memory.sync.watch_service import WatchService, WatchServiceState async def create_test_file(path: Path, content: str = "test content") -> None: @@ -25,6 +27,23 @@ def test_watch_service_init(watch_service, project_config): assert watch_service.status_path.parent.exists() +def test_watch_service_status_path_honors_basic_memory_config_dir(tmp_path, monkeypatch): + """Regression guard for #742: watch-status.json follows BASIC_MEMORY_CONFIG_DIR. + + WatchService previously hardcoded ``Path.home() / ".basic-memory"`` which + split state across instances running under an isolated config dir. Ensure + the status path now lives under the configured data dir. + """ + custom_dir = tmp_path / "instance-z" / "state" + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom_dir)) + + app_config = BasicMemoryConfig(projects={"main": {"path": str(tmp_path / "project")}}) + service = WatchService(app_config=app_config, project_repository=MagicMock()) + + assert service.status_path == custom_dir / WATCH_STATUS_JSON + assert service.status_path.parent.exists() + + def test_state_add_event(): """Test adding events to state.""" state = WatchServiceState() diff --git a/tests/test_config.py b/tests/test_config.py index b852ead5..e394aaf4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -10,6 +10,8 @@ ConfigManager, ProjectEntry, ProjectMode, + default_fastembed_cache_dir, + resolve_data_dir, ) from pathlib import Path @@ -129,6 +131,54 @@ def test_app_database_path_defaults_to_home_data_dir(self, config_home, monkeypa assert config.data_dir_path == config_home / ".basic-memory" assert config.app_database_path == config_home / ".basic-memory" / "memory.db" + def test_semantic_embedding_cache_dir_field_stays_none_by_default( + self, config_home, monkeypatch + ): + """The raw config field stays None so it isn't persisted into config.json. + + Resolution to a concrete path happens in embedding_provider_factory at + provider construction time, so ``BASIC_MEMORY_CONFIG_DIR`` and + ``FASTEMBED_CACHE_PATH`` changes take effect on every run instead of + being frozen by the first save. See #741. + """ + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + + config = BasicMemoryConfig() + + assert config.semantic_embedding_cache_dir is None + + def test_semantic_embedding_cache_dir_not_persisted_in_model_dump( + self, config_home, monkeypatch + ): + """model_dump must not bake a resolved cache path into config.json. + + Regression guard for #741: persisting the default would freeze stale + paths when users later change BASIC_MEMORY_CONFIG_DIR or + FASTEMBED_CACHE_PATH. + """ + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + + dumped = BasicMemoryConfig().model_dump(mode="json") + + assert dumped["semantic_embedding_cache_dir"] is None + + def test_semantic_embedding_cache_dir_explicit_user_value_preserved( + self, config_home, monkeypatch + ): + """An explicit user override still round-trips through model_dump.""" + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + + config = BasicMemoryConfig(semantic_embedding_cache_dir="/custom/explicit/path") + + assert config.semantic_embedding_cache_dir == "/custom/explicit/path" + assert ( + config.model_dump(mode="json")["semantic_embedding_cache_dir"] + == "/custom/explicit/path" + ) + def test_explicit_default_project_preserved(self, config_home, monkeypatch): """Test that a valid explicit default_project is not overwritten by model_post_init.""" monkeypatch.delenv("BASIC_MEMORY_HOME", raising=False) @@ -252,6 +302,65 @@ def test_config_file_without_default_project_key(self, config_home, monkeypatch) assert loaded.default_project == "work" +class TestDataDirHelpers: + """Module-level helpers that resolve the Basic Memory data directory.""" + + def test_resolve_data_dir_defaults_to_home_dot_basic_memory(self, config_home, monkeypatch): + """Without BASIC_MEMORY_CONFIG_DIR, resolver returns ~/.basic-memory.""" + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + + assert resolve_data_dir() == config_home / ".basic-memory" + + def test_resolve_data_dir_honors_config_dir_env(self, tmp_path, monkeypatch): + """BASIC_MEMORY_CONFIG_DIR overrides the default location.""" + custom = tmp_path / "elsewhere" + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom)) + + assert resolve_data_dir() == custom + + def test_default_fastembed_cache_dir_uses_data_dir(self, config_home, monkeypatch): + """Default cache path is a subdir of the Basic Memory data dir.""" + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + + assert default_fastembed_cache_dir() == str( + config_home / ".basic-memory" / "fastembed_cache" + ) + + def test_default_fastembed_cache_dir_env_override(self, tmp_path, monkeypatch): + """FASTEMBED_CACHE_PATH is preferred when set.""" + custom = tmp_path / "custom-cache" + monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(custom)) + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(tmp_path / "state")) + + assert default_fastembed_cache_dir() == str(custom) + + def test_default_fastembed_cache_dir_never_falls_back_to_fastembed_tmp_default( + self, config_home, monkeypatch + ): + """Regression guard for #741. + + FastEmbed's own fallback when ``cache_dir`` is ``None`` is + ``/fastembed_cache`` — the exact path that disappears in + sandboxed MCP runtimes (Codex CLI). Ensure Basic Memory's resolver + never lands on that path. + + Compared as exact paths rather than ``startswith(tempfile.gettempdir())`` + because the test runner itself can legitimately live under ``/tmp`` + (pytest's ``tmp_path`` does on Linux CI), and that's not the bug we + care about here. + """ + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + + resolved = Path(default_fastembed_cache_dir()) + + # Must equal the data-dir-relative default. + assert resolved == config_home / ".basic-memory" / "fastembed_cache" + # And must not equal FastEmbed's own /fastembed_cache fallback. + assert resolved != Path(tempfile.gettempdir()) / "fastembed_cache" + + class TestConfigManager: """Test ConfigManager functionality.""" diff --git a/tests/test_rclone_commands.py b/tests/test_rclone_commands.py index 9b1c7c4e..83d9d412 100644 --- a/tests/test_rclone_commands.py +++ b/tests/test_rclone_commands.py @@ -83,12 +83,21 @@ def test_get_project_remote_strips_app_data_prefix(): assert get_project_remote(project, "my-bucket") == "basic-memory-cloud:my-bucket/research" -def test_get_project_bisync_state(): +def test_get_project_bisync_state(monkeypatch): + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) state_path = get_project_bisync_state("research") expected = Path.home() / ".basic-memory" / "bisync-state" / "research" assert state_path == expected +def test_get_project_bisync_state_honors_basic_memory_config_dir(tmp_path, monkeypatch): + """Regression guard for #742: bisync state dir follows BASIC_MEMORY_CONFIG_DIR.""" + custom_dir = tmp_path / "instance-w" / "state" + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom_dir)) + + assert get_project_bisync_state("research") == custom_dir / "bisync-state" / "research" + + def test_bisync_initialized_false_when_not_exists(tmp_path, monkeypatch): monkeypatch.setattr( "basic_memory.cli.commands.cloud.rclone_commands.get_project_bisync_state", diff --git a/tests/utils/test_setup_logging.py b/tests/utils/test_setup_logging.py index 316c29b4..9bf02a8a 100644 --- a/tests/utils/test_setup_logging.py +++ b/tests/utils/test_setup_logging.py @@ -2,6 +2,7 @@ import os import sys +from pathlib import Path from basic_memory import utils @@ -11,6 +12,7 @@ def test_setup_logging_uses_shared_log_file_off_windows(monkeypatch, tmp_path) - added_sinks: list[str] = [] monkeypatch.setenv("BASIC_MEMORY_ENV", "dev") + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) monkeypatch.setattr(utils.os, "name", "posix") monkeypatch.setattr(utils.Path, "home", lambda: tmp_path) monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) @@ -32,6 +34,7 @@ def test_setup_logging_uses_per_process_log_file_on_windows(monkeypatch, tmp_pat added_sinks: list[str] = [] monkeypatch.setenv("BASIC_MEMORY_ENV", "dev") + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) monkeypatch.setattr(utils.os, "name", "nt") monkeypatch.setattr(utils.os, "getpid", lambda: 4242) monkeypatch.setattr(utils.Path, "home", lambda: tmp_path) @@ -63,6 +66,7 @@ def test_setup_logging_trims_stale_windows_pid_logs(monkeypatch, tmp_path) -> No stale_logs.append(log_path) monkeypatch.setenv("BASIC_MEMORY_ENV", "dev") + monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False) monkeypatch.setattr(utils.os, "name", "nt") monkeypatch.setattr(utils.os, "getpid", lambda: 4242) monkeypatch.setattr(utils.Path, "home", lambda: tmp_path) @@ -82,6 +86,44 @@ def test_setup_logging_trims_stale_windows_pid_logs(monkeypatch, tmp_path) -> No ] +def test_setup_logging_honors_basic_memory_config_dir(monkeypatch, tmp_path) -> None: + """Regression guard for #742: log path must follow BASIC_MEMORY_CONFIG_DIR. + + Prior to #742 the log path was hardcoded to ``~/.basic-memory/``, which + split state across instances when users set BASIC_MEMORY_CONFIG_DIR to + isolate config and the database elsewhere. + + Asserts on the log *directory* rather than the exact filename because + Windows uses a per-process ``basic-memory-.log`` while POSIX + shares a single ``basic-memory.log``. The thing this regression guard + cares about is that the log lives under the redirected config dir, + not at ``Path.home() / ".basic-memory"``. Patching ``utils.os.name`` + to force one branch would break ``Path(str)`` dispatch on the other + platform, so we stay platform-agnostic. + """ + added_sinks: list[str] = [] + + custom_dir = tmp_path / "instance-x" / "state" + monkeypatch.setenv("BASIC_MEMORY_ENV", "dev") + monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom_dir)) + monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) + monkeypatch.setattr( + utils.logger, + "add", + lambda sink, **kwargs: added_sinks.append(str(sink)), + ) + monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) + monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) + + utils.setup_logging(log_to_file=True) + + assert len(added_sinks) == 1 + log_path = Path(added_sinks[0]) + assert log_path.parent == custom_dir + assert log_path.name.startswith("basic-memory") + assert log_path.suffix == ".log" + + def test_setup_logging_test_env_uses_stderr_only(monkeypatch) -> None: """Test mode should add one stderr sink and return before other branches run.""" added_sinks: list[object] = []