Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ tailwindcss
# Tailwind CSS build output
api/static/style.css
node_modules

# Local embedding models (fastembed cache)
models/
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ COPY tools/ tools/
COPY voice/ voice/
COPY api/ api/

# Prefetch the local embedding model (semantic memory, Tier 2) so it is bundled
# in the image — no runtime download, works offline. Stored in /app/models,
# OUTSIDE the /app/data volume so the mounted volume cannot shadow it. Keep the
# default in sync with EmbeddingConfig (core/config.py).
ARG EMBED_MODEL=BAAI/bge-small-en-v1.5
RUN uv run python -m core.embeddings prefetch "${EMBED_MODEL}" /app/models

# Build CSS with Tailwind CSS v4 standalone CLI
RUN ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "arm64" ]; then TW_ARCH="linux-arm64"; else TW_ARCH="linux-x64"; fi && \
Expand Down
171 changes: 135 additions & 36 deletions api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,25 +906,47 @@ async def partial_search() -> HTMLResponse:
max_results=max_results,
)

@app.get("/partials/memory", dependencies=[Depends(auth)])
async def partial_memory() -> HTMLResponse:
"""Memory tab partial."""
# Memory config
memory_long_term_limit = await config_store.get("memory.long_term_limit") or "50"
async def _render_memory_partial() -> HTMLResponse:
"""Build the Memory tab partial (config + stored memories).

# Memory data — read directly from DB (works even when agent is stopped)
Shared by the tab load and the post-delete refresh so both render the
full embedding/lifecycle config, not just the memory tables.
"""
import aiosqlite

async def _cfg(key: str, default: str) -> str:
val = await config_store.get(key)
return default if val is None or val == "" else str(val)

async def _bool(key: str, default: str) -> str:
val = await config_store.get(key)
return default if val is None else str(val).lower()

ctx: dict[str, object] = {
"memory_long_term_limit": await _cfg("memory.long_term_limit", "50"),
"emb_enabled": await _bool("memory.embedding.enabled", "true"),
"emb_provider": await _cfg("memory.embedding.provider", "local"),
"emb_model": await _cfg("memory.embedding.model", "BAAI/bge-small-en-v1.5"),
"emb_base_url": await _cfg("memory.embedding.base_url", ""),
"emb_top_k": await _cfg("memory.embedding.injection_top_k", "12"),
"hygiene_enabled": await _bool("memory.hygiene_enabled", "true"),
"default_importance": await _cfg("memory.default_importance", "5.0"),
"archive_after_days": await _cfg("memory.archive_after_days", "90"),
"archive_max_importance": await _cfg("memory.archive_max_importance", "4.0"),
"archive_min_idle_days": await _cfg("memory.archive_min_idle_days", "45"),
"hygiene_threshold": await _cfg("memory.hygiene_similarity_threshold", "0.45"),
}

# Memory data — read directly from DB (works even when agent is stopped)
memory_db = await config_store.get("memory.db_path") or "data/memory.db"
long_term = []
short_term = []
long_term: list[dict] = []
short_term: list[dict] = []
if Path(memory_db).exists():
cols = "id, category, subject, content, source, confidence, created_at, updated_at"
async with aiosqlite.connect(memory_db) as db:
db.row_factory = aiosqlite.Row
cursor = await db.execute(f"SELECT {cols} FROM long_term ORDER BY updated_at DESC")
long_term = [dict(row) for row in await cursor.fetchall()]

cursor = await db.execute(
"SELECT id, content, context, expires_at, created_at "
"FROM short_term WHERE expires_at > datetime('now') "
Expand All @@ -933,12 +955,14 @@ async def partial_memory() -> HTMLResponse:
short_term = [dict(row) for row in await cursor.fetchall()]

return _render_partial(
"partials/memory.html",
long_term=long_term,
short_term=short_term,
memory_long_term_limit=memory_long_term_limit,
"partials/memory.html", long_term=long_term, short_term=short_term, **ctx
)

@app.get("/partials/memory", dependencies=[Depends(auth)])
async def partial_memory() -> HTMLResponse:
"""Memory tab partial."""
return await _render_memory_partial()

@app.get("/partials/history", dependencies=[Depends(auth)])
async def partial_history() -> HTMLResponse:
"""History tab partial."""
Expand Down Expand Up @@ -1243,7 +1267,18 @@ async def patch_config(body: ConfigPatchIn) -> dict:
agent.llm = LLMClient.from_agent_config(new_config.agent)
agent.executor.tool_env = tool_env(new_config)
agent.history_mode = new_config.history.mode
agent.memory.long_term_limit = new_config.memory.long_term_limit
mem_cfg = new_config.memory
agent.memory.long_term_limit = mem_cfg.long_term_limit
# Rebuild the embedder (lazy — no model load here) and refresh the
# Tier 3/4 lifecycle knobs so memory config changes apply live.
agent.memory.embedder = agent._build_embedder()
agent.memory.injection_top_k = mem_cfg.embedding.injection_top_k
agent.memory.default_importance = mem_cfg.default_importance
agent.memory.archive_after_days = mem_cfg.archive_after_days
agent.memory.archive_max_importance = mem_cfg.archive_max_importance
agent.memory.archive_min_idle_days = mem_cfg.archive_min_idle_days
agent.memory.hygiene_enabled = mem_cfg.hygiene_enabled
agent.memory.hygiene_similarity_threshold = mem_cfg.hygiene_similarity_threshold
agent.reflections.max_reflections = new_config.task_reflection.max_reflections
if new_config.search.enabled and new_config.search.api_key:
from tavily import TavilyClient
Expand Down Expand Up @@ -1277,15 +1312,16 @@ async def system_prompt_preview(body: PromptPreviewIn) -> dict:

memories = ""
if body.include_memories:
query = message or None
if agent_state.agent:
memories = await agent_state.agent.memory.format_for_prompt()
memories = await agent_state.agent.memory.format_for_prompt(query=query)
else:
from core.memory import MemoryStore

memories = await MemoryStore(
db_path=config.memory.db_path,
long_term_limit=config.memory.long_term_limit,
).format_for_prompt()
).format_for_prompt(query=query)

reflections = ""
if body.include_reflections and config.task_reflection.enabled:
Expand Down Expand Up @@ -1964,28 +2000,91 @@ async def delete_memory(request: Request) -> HTMLResponse:
if cursor.rowcount == 0:
raise HTTPException(404, f"Memory {memory_id} not found in {tier}")

# Return refreshed memory partial
memory_long_term_limit = await config_store.get("memory.long_term_limit") or "50"
long_term = []
short_term = []
cols = "id, category, subject, content, source, confidence, created_at, updated_at"
async with aiosqlite.connect(agent.memory.db_path) as db:
db.row_factory = aiosqlite.Row
cursor = await db.execute(f"SELECT {cols} FROM long_term ORDER BY updated_at DESC")
long_term = [dict(row) for row in await cursor.fetchall()]
cursor = await db.execute(
"SELECT id, content, context, expires_at, created_at "
"FROM short_term WHERE expires_at > datetime('now') "
"ORDER BY created_at DESC"
)
short_term = [dict(row) for row in await cursor.fetchall()]
return _render_partial(
"partials/memory.html",
long_term=long_term,
short_term=short_term,
memory_long_term_limit=memory_long_term_limit,
# Return refreshed memory partial (full config + tables)
return await _render_memory_partial()

@app.get("/memory/embedding/status", dependencies=[Depends(auth)])
async def embedding_status() -> dict:
"""Report embedding config + whether a local model is already on disk."""
from core.embeddings import LOCAL_PROVIDERS

config = await config_store.export_to_config()
emb = config.memory.embedding
is_local = emb.provider in LOCAL_PROVIDERS
model_ready: bool | None = None
if is_local:
cache = Path(emb.cache_dir)
model_ready = cache.exists() and any(cache.rglob("*.onnx"))
return {
"enabled": emb.enabled,
"provider": emb.provider,
"model": emb.model,
"local": is_local,
"model_ready": model_ready,
"cache_dir": emb.cache_dir,
}

@app.post("/memory/embedding/prefetch", dependencies=[Depends(auth)])
async def embedding_prefetch() -> dict:
"""Download the local embedding model now (also done at Docker build)."""
from core.embeddings import LOCAL_PROVIDERS, prefetch_local_model

config = await config_store.export_to_config()
emb = config.memory.embedding
if emb.provider not in LOCAL_PROVIDERS:
raise HTTPException(400, "Prefetch only applies to the local embedding provider")
try:
dim = await asyncio.to_thread(prefetch_local_model, emb.model, emb.cache_dir)
except Exception as exc:
log.exception("Embedding model prefetch failed")
raise HTTPException(500, f"Prefetch failed: {exc}") from exc
return {"ok": True, "model": emb.model, "dimensions": dim, "cache_dir": emb.cache_dir}

@app.post("/memory/embedding/test", dependencies=[Depends(auth)])
async def embedding_test() -> dict:
"""Embed a few probe sentences and report dimension + a sanity cosine."""
from core.embeddings import (
LOCAL_PROVIDERS,
EmbeddingClient,
LocalEmbeddingClient,
cosine_similarity,
)

config = await config_store.export_to_config()
emb = config.memory.embedding
try:
if emb.provider in LOCAL_PROVIDERS:
client: object = LocalEmbeddingClient(model=emb.model, cache_dir=emb.cache_dir)
else:
cfg = config.agent
api_key = emb.api_key or getattr(cfg, f"{emb.provider}_api_key", "")
base_url = emb.base_url or getattr(cfg, f"{emb.provider}_base_url", "") or None
if not api_key:
raise HTTPException(400, f"No API key configured for provider {emb.provider}")
client = EmbeddingClient(
provider=emb.provider,
api_key=api_key,
model=emb.model,
base_url=base_url,
dimensions=emb.dimensions,
)
probes = ["allergic to shellfish", "cannot eat prawns", "the weather is sunny today"]
vecs = await client.embed(probes) # type: ignore[attr-defined]
if len(vecs) < 3 or not vecs[0]:
raise HTTPException(500, "Embedding returned no vectors")
return {
"ok": True,
"model": emb.model,
"dimensions": len(vecs[0]),
"similar_pair": round(cosine_similarity(vecs[0], vecs[1]), 3),
"unrelated_pair": round(cosine_similarity(vecs[0], vecs[2]), 3),
}
except HTTPException:
raise
except Exception as exc:
log.exception("Embedding test failed")
raise HTTPException(500, f"Test failed: {exc}") from exc

@app.post("/memory/consolidate", dependencies=[Depends(auth)])
async def trigger_consolidation() -> HTMLResponse:
agent = agent_state.agent
Expand Down
Loading
Loading