diff --git a/api/composers.py b/api/composers.py index dc85450..9a8b2bb 100644 --- a/api/composers.py +++ b/api/composers.py @@ -78,7 +78,12 @@ def list_composers(): try: local = WorkspaceLocalComposer.from_dict(c) except SchemaError as e: - print(f"Schema drift in {db_path}: {e}") + _logger.warning( + "Schema drift in %s: %s (%s)", + db_path, + e, + type(e).__name__, + ) continue # Use the typed view downstream so the dataclass is # load-bearing, not just a filter (Brad's review): the @@ -91,9 +96,20 @@ def list_composers(): c["workspaceFolder"] = workspace_folder composers.append((local, c)) except SchemaError as e: - print(f"Schema drift in {db_path}: {e}") + _logger.warning( + "Schema drift in %s: %s (%s)", + db_path, + e, + type(e).__name__, + ) except Exception as e: - print(f"Failed reading composers from {db_path}: {e}") + _logger.error( + "Failed reading composers from %s: %s (%s)", + db_path, + e, + type(e).__name__, + exc_info=True, + ) composers.sort(key=lambda pair: to_epoch_ms(pair[0].last_updated_at), reverse=True) return jsonify([c for _, c in composers]) @@ -152,7 +168,12 @@ def get_composer(composer_id): # Same drift list_composers() logs and skips at line ~78, # so a single-composer fetch can't silently return malformed # JSON the list endpoint hid. - print(f"Schema drift in workspace-local composer {composer_id}: {e}") + _logger.warning( + "Schema drift in workspace-local composer %s: %s (%s)", + composer_id, + e, + type(e).__name__, + ) continue # Match list_composers() at line 89 and the global # fallback below: `conversation` is normalised to [] @@ -163,7 +184,12 @@ def get_composer(composer_id): payload["conversation"] = payload.get("conversation") or [] return jsonify(payload) except SchemaError as e: - print(f"Schema drift in {db_path}: {e}") + _logger.warning( + "Schema drift in %s: %s (%s)", + db_path, + e, + type(e).__name__, + ) except (OSError, sqlite3.Error, json.JSONDecodeError, ValueError): pass @@ -186,7 +212,12 @@ def get_composer(composer_id): # Don't return malformed JSON to the client — surface the drift # as a 404 + log, matching the silent-skip behaviour of the # list endpoints for the same row. - print(f"Schema drift in composer {composer_id}: {e}") + _logger.warning( + "Schema drift in composer %s: %s (%s)", + composer_id, + e, + type(e).__name__, + ) return jsonify({"error": "Composer schema drift"}), 404 payload = dict(composer.raw) payload["conversation"] = payload.get("conversation") or [] diff --git a/api/config_api.py b/api/config_api.py index f5d4e47..ffefd69 100644 --- a/api/config_api.py +++ b/api/config_api.py @@ -6,6 +6,7 @@ src/app/api/get-username/route.ts GET /api/get-username """ +import logging import os import subprocess import sys @@ -16,6 +17,7 @@ from utils.workspace_path import set_workspace_path_override bp = Blueprint("config_api", __name__) +_logger = logging.getLogger(__name__) @bp.route("/api/detect-environment") @@ -44,7 +46,12 @@ def detect_environment(): }) except Exception as e: - print(f"Failed to detect environment: {e}") + _logger.warning( + "Failed to detect environment: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"os": "unknown", "isWSL": False, "isRemote": False}) @@ -80,7 +87,12 @@ def validate_path(): ) except Exception as e: - print(f"Validation error: {e}") + _logger.error( + "Validation error: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"valid": False, "error": "Failed to validate path"}), 500 @@ -135,5 +147,10 @@ def get_username(): return jsonify({"username": username}) except Exception as e: - print(f"Failed to get username: {e}") + _logger.warning( + "Failed to get username: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"username": "YOUR_USERNAME"}) diff --git a/api/export_api.py b/api/export_api.py index 4c8ddf2..b484bb1 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -6,6 +6,7 @@ import io import json +import logging import os import sqlite3 import zipfile @@ -32,6 +33,7 @@ ) bp = Blueprint("export_api", __name__) +_logger = logging.getLogger(__name__) def _get_state_dir() -> str: @@ -181,7 +183,13 @@ def export_chats(): exported.append({"path": rel_path, "content": md, "updatedAt": updated_at_ms}) except Exception as e: - print(f"Error processing composer {composer_id} for export: {e}") + _logger.error( + "Error processing composer %s for export: %s (%s)", + composer_id, + e, + type(e).__name__, + exc_info=True, + ) count = len(exported) if count == 0: @@ -208,7 +216,10 @@ def export_chats(): ) except Exception as e: - print(f"Export error: {e}") - import traceback - traceback.print_exc() + _logger.error( + "Export failed: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"error": f"Export failed: {str(e)}"}), 500 diff --git a/api/pdf.py b/api/pdf.py index c5c0088..c47c1d9 100644 --- a/api/pdf.py +++ b/api/pdf.py @@ -4,11 +4,13 @@ """ import io +import logging import re from flask import Blueprint, Response, jsonify, request bp = Blueprint("pdf", __name__) +_logger = logging.getLogger(__name__) def _safe_text(text: str) -> str: @@ -168,9 +170,12 @@ def footer(self): ) except Exception as e: - print(f"Failed to generate PDF: {e}") - import traceback - traceback.print_exc() + _logger.error( + "Failed to generate PDF: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"error": f"Failed to generate PDF: {str(e)}"}), 500 diff --git a/api/search.py b/api/search.py index 35e511c..eaede04 100644 --- a/api/search.py +++ b/api/search.py @@ -164,7 +164,12 @@ def search(): # Drift logged so the operator can see why a chat dropped # out of search results; bad row still skipped so search # keeps returning results from the well-formed ones. - print(f"Schema drift in bubble {bid}: {e}") + _logger.warning( + "Schema drift in bubble %s: %s (%s)", + bid, + e, + type(e).__name__, + ) except (json.JSONDecodeError, ValueError): pass @@ -178,7 +183,12 @@ def search(): try: composer = Composer.from_dict(json.loads(row["value"]), composer_id=composer_id) except SchemaError as e: - print(f"Schema drift in composer {composer_id}: {e}") + _logger.warning( + "Schema drift in composer %s: %s (%s)", + composer_id, + e, + type(e).__name__, + ) continue except (json.JSONDecodeError, TypeError, ValueError): continue diff --git a/app.py b/app.py index 1caa215..afebb1c 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ from the Cursor editor's AI chat feature. """ +import logging import os import sys from datetime import datetime @@ -35,6 +36,11 @@ def _get_base_path(): def create_app(exclusion_rules_path=None): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(funcName)s: %(message)s", + ) + base = _get_base_path() app = Flask( __name__, diff --git a/scripts/export.py b/scripts/export.py index f4f93a2..6cea025 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -74,6 +74,18 @@ _logger = logging.getLogger(__name__) +def _configure_cli_logging() -> None: + """Route log records to stderr so stdout stays for export progress lines.""" + root = logging.getLogger() + if root.handlers: + return + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s: %(message)s", + stream=sys.stderr, + ) + + def _json_dump_safe(value) -> str: """Best-effort JSON serialization for exclusion matching.""" try: @@ -165,6 +177,7 @@ def parse_args(): def main(): + _configure_cli_logging() opts = parse_args() since = opts["since"] out_dir = os.path.abspath(opts["out_dir"]) @@ -215,10 +228,9 @@ def main(): with _open_global_db(workspace_path) as (global_db, global_db_path): if global_db is None: - print( - f"Note: Cursor IDE global storage not found at {global_db_path}" - " — skipping IDE chats.", - file=sys.stderr, + _logger.info( + "Cursor IDE global storage not found at %s — skipping IDE chats.", + global_db_path, ) else: project_layouts_map = load_project_layouts_map(global_db) @@ -347,7 +359,12 @@ def main(): try: cli_projects = list_cli_projects(get_cli_chats_path()) except Exception as e: - print(f"Warning: Could not enumerate CLI chats ({e}) — skipping.", file=sys.stderr) + _logger.warning( + "Could not enumerate CLI chats: %s (%s) — skipping", + e, + type(e).__name__, + exc_info=True, + ) cli_projects = [] for cp in cli_projects: @@ -378,7 +395,13 @@ def main(): messages = traverse_blobs(session["db_path"]) bubbles = messages_to_bubbles(messages, created_ms) except Exception as e: - print(f"Warning: Could not read CLI session {session_id}: {e}", file=sys.stderr) + _logger.warning( + "Could not read CLI session %s: %s (%s)", + session_id, + e, + type(e).__name__, + exc_info=True, + ) continue if not bubbles: diff --git a/services/cli_tabs.py b/services/cli_tabs.py index 6edad8f..f28aafc 100644 --- a/services/cli_tabs.py +++ b/services/cli_tabs.py @@ -1,9 +1,12 @@ from __future__ import annotations +import logging from datetime import datetime from flask import current_app, jsonify +_logger = logging.getLogger(__name__) + from utils.cli_chat_reader import list_cli_projects, messages_to_bubbles, traverse_blobs from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.workspace_path import get_cli_chats_path @@ -44,13 +47,25 @@ def _get_cli_workspace_tabs(workspace_id: str): try: messages = traverse_blobs(session["db_path"]) except Exception as e: - print(f"CLI: could not read session {session_id}: {e}") + _logger.warning( + "Could not read CLI session %s: %s (%s)", + session_id, + e, + type(e).__name__, + exc_info=True, + ) continue try: bubbles = messages_to_bubbles(messages, created_ms) except Exception as e: - print(f"CLI: could not convert session {session_id} to bubbles: {e}") + _logger.warning( + "Could not convert CLI session %s to bubbles: %s (%s)", + session_id, + e, + type(e).__name__, + exc_info=True, + ) continue if not bubbles: continue @@ -113,5 +128,11 @@ def _get_cli_workspace_tabs(workspace_id: str): return jsonify({"tabs": tabs}) except Exception as e: - print(f"Failed to get CLI workspace tabs: {e}") + _logger.error( + "Failed to get CLI workspace tabs for %s: %s (%s)", + workspace_id, + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"error": "Failed to get CLI workspace tabs"}), 500 diff --git a/services/workspace_listing.py b/services/workspace_listing.py index dafb9e0..a23dbfd 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -134,8 +134,12 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: cid, e, ) - except Exception: - _logger.exception("Failed to load composer rows from global storage") + except Exception as e: + _logger.error( + "Failed to load composer rows from global storage: %s", + e, + exc_info=True, + ) # Group workspace entries by normalized folder path folder_to_entries: dict[str, list] = {} diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 36b7143..f993e7d 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib import json import logging import os @@ -37,27 +38,43 @@ -def _try_loads_kv_value(raw: str | None) -> Any | None: - """Parse a cursorDiskKV ``value`` column; ``None`` on missing or unparseable input (no raise).""" +def _loads_kv_value_logged(key: str, raw: object | None) -> Any | None: + """Parse a cursorDiskKV ``value``; log and return ``None`` on decode failure.""" if raw is None: return None + if not isinstance(raw, (str, bytes, bytearray)): + payload_len, payload_fp = _kv_payload_log_meta(raw) + _logger.warning( + "Failed to decode cursorDiskKV value for %s: unsupported type %s (payload_len=%d, payload_sha256=%s)", + key, + type(raw).__name__, + payload_len, + payload_fp, + ) + return None try: return json.loads(raw) - except (json.JSONDecodeError, TypeError, ValueError): + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(raw) + _logger.warning( + "Failed to decode cursorDiskKV value for %s: %s (payload_len=%d, payload_sha256=%s)", + key, + e, + payload_len, + payload_fp, + ) return None -_KV_VALUE_LOG_LIMIT = 200 - - -def _kv_value_log_preview(value: object | None, limit: int = _KV_VALUE_LOG_LIMIT) -> str: - """Truncated KV payload for warning logs (avoids multi-MB log lines on bad rows).""" +def _kv_payload_log_meta(value: object | None) -> tuple[int, str | None]: + """Byte length and short SHA-256 prefix for logs without emitting raw KV payloads.""" if value is None: - return "None" - text = value if isinstance(value, str) else str(value) - if len(text) > limit: - return text[:limit] + "..." - return text + return 0, None + if isinstance(value, bytes): + payload = value + else: + payload = str(value).encode("utf-8", errors="replace") + return len(payload), hashlib.sha256(payload).hexdigest()[:12] def assemble_workspace_tabs( @@ -128,12 +145,15 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: continue try: parsed = json.loads(row["value"]) + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(row["value"]) _logger.warning( - "Failed to decode Bubble from %s: %s (value_preview=%r)", + "Failed to decode Bubble from %s: %s (payload_len=%d, payload_sha256=%s)", row["key"], e, - _kv_value_log_preview(row["value"]), + payload_len, + payload_fp, ) continue try: @@ -160,7 +180,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: if len(parts) < 2: continue chat_id = parts[1] - ctx = _try_loads_kv_value(row["value"]) + ctx = _loads_kv_value_logged(row["key"], row["value"]) if not isinstance(ctx, dict): continue @@ -178,7 +198,10 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: project_layouts_map.setdefault(chat_id, []) for layout in layouts: if isinstance(layout, str): - layout = _try_loads_kv_value(layout) + layout = _loads_kv_value_logged( + f"{row['key']}:projectLayout", + layout, + ) if not isinstance(layout, dict): continue if isinstance(layout, dict) and layout.get("rootPath"): @@ -213,12 +236,14 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: try: parsed = json.loads(row["value"]) except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(row["value"]) _logger.warning( - "Failed to decode Composer from composerData:%s: %s (key=%s, value_preview=%r)", + "Failed to decode Composer from composerData:%s: %s (key=%s, payload_len=%d, payload_sha256=%s)", composer_id, e, row["key"], - _kv_value_log_preview(row["value"]), + payload_len, + payload_fp, ) continue try: diff --git a/tests/test_models_wired_at_read_sites.py b/tests/test_models_wired_at_read_sites.py index 491ca58..0e80c89 100644 --- a/tests/test_models_wired_at_read_sites.py +++ b/tests/test_models_wired_at_read_sites.py @@ -130,9 +130,11 @@ def test_workspace_tabs_endpoint_calls_bubble_from_dict(self): def test_bubble_schema_drift_is_logged_not_swallowed_silently(self): # CodeRabbit: SchemaError used to be lumped in with JSONDecodeError / - # ValueError and skipped silently. Schema drift must now print a + # ValueError and skipped silently. Schema drift must now log a # `Schema drift in bubble ` line so disappearing bubbles can be # traced. The well-formed row still loads alongside. + import logging + from app import create_app # Seed a deliberately-malformed bubble row that will trip # Bubble.from_dict's "expected non-empty str" gate on the bubble_id by @@ -147,17 +149,14 @@ def test_bubble_schema_drift_is_logged_not_swallowed_silently(self): app = create_app() app.config["TESTING"] = True app.config["EXCLUSION_RULES"] = [] - import io - from contextlib import redirect_stdout - captured = io.StringIO() - with redirect_stdout(captured): + with self.assertLogs("api.search", level="WARNING") as logs: client = app.test_client() response = client.get("/api/search?q=sentinel-wired") self.assertEqual(response.status_code, 200) - out = captured.getvalue() - self.assertIn("Schema drift in bubble", out, - msg=f"expected drift log line, got stdout:\n{out!r}") - self.assertIn("bub-bad", out, + messages = "\n".join(logs.output) + self.assertIn("Schema drift in bubble", messages, + msg=f"expected drift log line, got logs:\n{messages!r}") + self.assertIn("bub-bad", messages, msg="drift log must include the offending bubble id") def test_workspace_tabs_endpoint_calls_composer_from_dict(self): diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py index 2b2be00..14dbd0c 100644 --- a/utils/cli_chat_reader.py +++ b/utils/cli_chat_reader.py @@ -33,7 +33,10 @@ from __future__ import annotations import json +import logging from models import CliSessionMeta, SchemaError + +_logger = logging.getLogger(__name__) import os import re import sqlite3 @@ -100,7 +103,11 @@ def traverse_blobs(db_path: str) -> list[dict]: json.loads(bytes.fromhex(meta_row[0]).decode("utf-8")) ) except (SchemaError, ValueError, UnicodeDecodeError, TypeError) as e: - print(f"Schema drift in CLI session meta at {db_path}: {e}") + _logger.warning( + "Schema drift in CLI session meta at %s: %s", + db_path, + e, + ) return [] root_id: str = meta.latest_root_blob_id