From 114d8dec6e6dabe4ff382e944039fb5f091f43da Mon Sep 17 00:00:00 2001 From: Anil Murty Date: Tue, 12 May 2026 10:45:54 -0700 Subject: [PATCH] Address deferred items from issue #48 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes, one per sub-item. 1. FastAPI lifespan migration (Greptile P2 from #47) Convert the two `@app.on_event("startup" | "shutdown")` handlers in `cmd_serve.py` to a single `@asynccontextmanager`-decorated lifespan and pass it to `create_app(..., lifespan=...)`. The legacy `on_event` API is deprecated in newer FastAPI releases. `create_app()` grows an optional `lifespan` parameter (default `None`) so existing test callers keep working without changes. As a side benefit, this also delays `scheduler.start()` until after uvicorn binds the port — matching the existing reasoning for the `server.state` write. A failed bind no longer spins up an orphan retention scheduler. 2. Tune doctor's drift baseline check `DriftConfig.enabled` defaults to True, so every new agent (0–9 completed sessions) was tripping a `warning` on every `tj doctor` run — pure noise, since collection-in-progress is the expected state. Downgraded to `info` and reworded as "Collecting baseline: agent (N/M)". Threshold-reached agents are silent ("ok"); agents with drift explicitly disabled are ignored. Multiple in-progress agents are reported in one line instead of returning at the first match. Also tightened the two "skipped" messages in the spans-stats check so they say "running through the HTTP API fallback" instead of the misleading "non-DuckDB backend" — the backend IS DuckDB, the CLI session just doesn't have direct access because `tj serve` holds the write lock. 3. cost_budget_daily now fires in the budget breach demo Pricing reference mismatch, not a bug in the alert engine. The demo used model id `claude-sonnet-4-20250514` which isn't in `pricing/models.toml` (only the short-form ids `claude-{opus,sonnet,haiku}-4-X` are listed). The cost engine fell back to default rates ($0.50/$2.00 per MTok), making the 10-call total $0.0315 — below the $0.05 daily budget, so daily never breached. Switching the demo to `claude-sonnet-4-6` brings total spend to $0.2205 with real Sonnet rates, and both `cost_budget_session` and `cost_budget_daily` fire as the demo's comments describe. (Adding dated-form model ids like `claude-sonnet-4-20250514` to `pricing/models.toml` is a broader concern — real users running real LLM calls will see them too — but that's a separate change.) Verified: 416 tests pass, ruff + mypy clean, `tj serve` starts and shuts down cleanly with the new lifespan, `tj doctor` reports drift-in-progress as info, demo fires both budget alerts on re-run. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../alerts_and_drift/budget_breach_demo.py | 6 ++-- tokenjam/api/app.py | 9 ++++- tokenjam/cli/cmd_doctor.py | 30 ++++++++++++----- tokenjam/cli/cmd_serve.py | 33 +++++++++++-------- 4 files changed, 53 insertions(+), 25 deletions(-) diff --git a/examples/alerts_and_drift/budget_breach_demo.py b/examples/alerts_and_drift/budget_breach_demo.py index 89a7151..b5766bf 100644 --- a/examples/alerts_and_drift/budget_breach_demo.py +++ b/examples/alerts_and_drift/budget_breach_demo.py @@ -22,7 +22,7 @@ # --------------------------------------------------------------------------- -# Pricing reference (from pricing/models.toml for claude-sonnet-4-20250514): +# Pricing reference (from pricing/models.toml for claude-sonnet-4-6): # input = $3.00 / MTok -> $0.000003 per token # output = $15.00 / MTok -> $0.000015 per token # @@ -44,7 +44,7 @@ def run_expensive_agent() -> None: input_tokens = 1000 + (i * 200) output_tokens = 500 + (i * 100) - # Estimate cost (using claude-sonnet-4-20250514 rates) + # Estimate cost (using claude-sonnet-4-6 rates) est_cost = (input_tokens * 3e-6) + (output_tokens * 15e-6) cumulative_cost += est_cost @@ -52,7 +52,7 @@ def run_expensive_agent() -> None: f" ~${est_cost:.4f} (cumulative ~${cumulative_cost:.4f})") record_llm_call( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", provider="anthropic", input_tokens=input_tokens, output_tokens=output_tokens, diff --git a/tokenjam/api/app.py b/tokenjam/api/app.py index 5ffae46..2d7daf5 100644 --- a/tokenjam/api/app.py +++ b/tokenjam/api/app.py @@ -3,7 +3,7 @@ from html import escape as html_escape from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, AsyncContextManager, Callable from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -23,18 +23,25 @@ def create_app( config: TjConfig, db: StorageBackend, ingest_pipeline: IngestPipeline, + lifespan: Callable[[FastAPI], AsyncContextManager[Any]] | None = None, ) -> FastAPI: """ Build and return the FastAPI app. db and ingest_pipeline are passed in (not imported globally) so tests can inject mocks easily. + + `lifespan`, if provided, is a FastAPI lifespan context manager — used by + `tj serve` to start/stop the retention scheduler and write server.state + only after uvicorn has bound the port (so a failed bind can't clobber a + running daemon's state file). """ app = FastAPI( title="TokenJam", version="0.1.0", docs_url="/docs", redoc_url=None, + lifespan=lifespan, ) # CORS — local only by default diff --git a/tokenjam/cli/cmd_doctor.py b/tokenjam/cli/cmd_doctor.py index 119f8f8..9f633d7 100644 --- a/tokenjam/cli/cmd_doctor.py +++ b/tokenjam/cli/cmd_doctor.py @@ -127,13 +127,24 @@ def _check_schema_vs_capture(config: object) -> dict: def _check_drift_inactive(config: object, db: object) -> dict: + """Report drift-baseline progress for any agent that hasn't reached threshold yet. + + Drift detection is enabled by default, so brand-new agents (0–9 sessions) + would otherwise trip a warning on every `tj doctor` run — pure noise, + since collection-in-progress is the expected state. Downgraded to `info` + so the user can see which agents are still building a baseline without + treating it as a problem. + """ + in_progress: list[str] = [] for agent_id, ac in config.agents.items(): - if ac.drift.enabled: - count = db.get_completed_session_count(agent_id) - if count < ac.drift.baseline_sessions: - return {"name": "Drift detection", "level": "warning", - "message": f"Agent '{agent_id}' has drift enabled but only " - f"{count}/{ac.drift.baseline_sessions} baseline sessions."} + if not ac.drift.enabled: + continue + count = db.get_completed_session_count(agent_id) + if count < ac.drift.baseline_sessions: + in_progress.append(f"{agent_id} ({count}/{ac.drift.baseline_sessions})") + if in_progress: + return {"name": "Drift detection", "level": "info", + "message": "Collecting baseline: " + ", ".join(in_progress)} return {"name": "Drift detection", "level": "ok", "message": "Drift detection status is consistent."} @@ -188,7 +199,8 @@ def _check_spans_stats(db: object) -> dict: conn = getattr(db, "conn", None) if conn is None: return {"name": "Spans column statistics", "level": "info", - "message": "Skipped — non-DuckDB backend."} + "message": "Skipped — CLI is running through the HTTP API " + "fallback (stop `tj serve` to access the DB directly)."} try: corrupt = check_spans_stats_corruption(conn) except duckdb.Error as e: @@ -222,7 +234,9 @@ def _attempt_repairs(checks: list[dict], db: object, output_json: bool) -> None: if conn is None: if not output_json: console.print( - " [yellow]Repair skipped — non-DuckDB backend.[/yellow]" + " [yellow]Repair skipped — CLI is using the HTTP API " + "fallback. Stop `tj serve` and retry so doctor has " + "direct DB access.[/yellow]" ) continue try: diff --git a/tokenjam/cli/cmd_serve.py b/tokenjam/cli/cmd_serve.py index 0862ca2..5d006b3 100644 --- a/tokenjam/cli/cmd_serve.py +++ b/tokenjam/cli/cmd_serve.py @@ -1,7 +1,9 @@ from __future__ import annotations import click +from contextlib import asynccontextmanager from pathlib import Path +from typing import AsyncIterator from tokenjam.utils.formatting import console @@ -19,12 +21,12 @@ def cmd_serve(ctx: click.Context, host: str | None, port: int | None, bind_port = port or config.api.port import uvicorn + from fastapi import FastAPI from tokenjam.api.app import create_app from tokenjam.core.ingest import build_default_pipeline db = ctx.obj["db"] pipeline = build_default_pipeline(db, config) - app = create_app(config, db, pipeline) # Schedule retention cleanup using a separate DB connection per run # to avoid concurrent write conflicts with uvicorn worker threads. @@ -46,22 +48,20 @@ def _retention_job() -> None: hour=0, minute=0, ) - scheduler.start() - @app.on_event("shutdown") - async def _shutdown_scheduler() -> None: - scheduler.shutdown(wait=False) - - # Write the resolved config path so other subcommands (e.g. onboard --codex) - # can find the secret this server is using regardless of CWD. Defer the - # write to a FastAPI startup event so it only fires after uvicorn binds - # the port — otherwise a failed-to-bind serve clobbers the state file - # of the running daemon (D2). + # ~/.local/share/tj/server.state lets other subcommands (e.g. `tj onboard + # --codex`) find the config this server is using regardless of CWD. We + # write it from the lifespan so it only happens after uvicorn binds the + # port — a failed bind must NOT clobber the running daemon's state file. + # Same reasoning for `scheduler.start()`: don't fire off a background + # thread for a server that's about to exit with EADDRINUSE. import json as _json _state_path = Path.home() / ".local" / "share" / "tj" / "server.state" - @app.on_event("startup") - async def _write_server_state() -> None: + @asynccontextmanager + async def _lifespan(_app: FastAPI) -> AsyncIterator[None]: + # startup + scheduler.start() _state_path.parent.mkdir(parents=True, exist_ok=True) _state_path.write_text( _json.dumps({ @@ -70,6 +70,13 @@ async def _write_server_state() -> None: "pid": __import__("os").getpid(), }) ) + try: + yield + finally: + # shutdown + scheduler.shutdown(wait=False) + + app = create_app(config, db, pipeline, lifespan=_lifespan) console.print(f"[bold]tj serve[/bold] starting on http://{bind_host}:{bind_port}") console.print(f" API docs: http://{bind_host}:{bind_port}/docs")