diff --git a/.agents/TOOL_INDEX.yaml b/.agents/TOOL_INDEX.yaml index 48ef5f4..98d4381 100644 --- a/.agents/TOOL_INDEX.yaml +++ b/.agents/TOOL_INDEX.yaml @@ -47,6 +47,9 @@ tools: - name: memory-working-cleanup-expired description: mark expired unpromoted working-memory items as expired source: examples/mcp_server/server.py + - name: braincore-snapshot + description: build an audited BrainCore memory snapshot through the CLI-backed reference server + source: examples/mcp_server/server.py - name: memory-search-visual description: search OCR, caption, and layout metadata without exposing raw artifacts source: examples/mcp_server/server.py diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 25bf59a..ebc6e1e 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -31,12 +31,12 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd - name: Initialize CodeQL - uses: github/codeql-action/init@9e0d7b8d25671d64c341c19c0152d693099fb5ba + uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} - name: Perform CodeQL analysis - uses: github/codeql-action/analyze@9e0d7b8d25671d64c341c19c0152d693099fb5ba + uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa with: category: "/language:${{ matrix.language }}" diff --git a/AGENTS.md b/AGENTS.md index 854f4ed..7588ef6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -53,6 +53,29 @@ customers may inspect: 8. Never commit generated image prompts or draft visual instructions into the public repo. +## Dependabot Hard Rule + +Never merge a Dependabot-authored PR in this repository. + +This is a standing hard stop. It applies even if the user says "merge all PRs", +"approved", "proceed", "finish the PRs", or similar broad approval language. +Dependabot PRs are read-only dependency proposals only. + +Required handling: + +1. Inspect Dependabot PRs only as evidence of requested dependency changes. +2. If a dependency update is accepted, create an organization-owned curated + branch from current `main` and apply the dependency change there. +3. Run the full Gitea-first public release gate, security review, tests, and CI + on the curated branch/PR. +4. Merge only the curated non-Dependabot PR after the gate passes. +5. Close the Dependabot PR as superseded, deferred, or rejected with the reason + documented in the incident/release notes. + +If any open PR list includes `app/dependabot`, do not treat "all PRs" as a +merge instruction for those PRs. Stop that part of the workflow and report that +Dependabot PRs must be superseded by a curated branch instead. + ## Human-readable boundaries - The example MCP server is a reference implementation only. diff --git a/CHANGELOG.md b/CHANGELOG.md index b56ac82..f38d5d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,21 @@ All notable changes to BrainCore are documented in this file. ## [Unreleased] +## [1.2.0] - 2026-05-23 + ### Added - Added additive memory governance migration `022_memory_governance.sql` with prompt recall, feedback, quality audit, context audit, lifecycle outbox, cue, compaction, and source attribution support. +- Added additive assistant memory source migration `023_assistant_memory_sources.sql` + with `vestige_memory` and `pai_auto_memory` source types for deterministic + assistant-memory migration sources. - Added governed memory CLI commands for event ingestion, recall/read auditing, status updates, feedback, compaction, conflict detection, and source attribution. - Added memory governance policy checks to CI and local sanitization coverage. +- Added assistant memory import review commands so imported assistant memories + stay non-prompt-eligible until operator review and memory governance approve + promotion. +- Added audited BrainCore snapshot builds with compact, risk, and deep preload + profiles. +- Added BrainCore shadow-eval support for snapshot behavior checks. ### Fixed - Governed prompt search now excludes archived, quarantined, suppressed, retired, and retired-superseded memories by default while preserving an explicit operator inspection override. @@ -24,7 +35,7 @@ All notable changes to BrainCore are documented in this file. - Added CLI and MCP-first lifecycle administration surfaces. The browser/admin web app remains a future upgrade path. - The open-source preserve schema is now documented as 50 tables after - migrations `001` through `022` plus the runtime migration ledger bootstrap. + migrations `001` through `023` plus the runtime migration ledger bootstrap. - Lifecycle `suppressed` and `retired` overlays are enforced in retrieval and procedure search paths without mutating BrainCore native truth rows. - Memory governance metadata, lifecycle sensitivity/redaction values, and cue diff --git a/CITATION.cff b/CITATION.cff index 739a42a..5aa97a1 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,8 +1,8 @@ cff-version: 1.2.0 title: "BrainCore" message: "If you use BrainCore, please cite it using the metadata below." -version: "1.1.6" -date-released: "2026-04-30" +version: "1.2.0" +date-released: "2026-05-23" authors: - family-names: Doney given-names: Trent @@ -29,7 +29,7 @@ preferred-citation: - family-names: Doney given-names: Trent name: "Trent Doney" - version: "1.1.6" - date-released: "2026-04-30" + version: "1.2.0" + date-released: "2026-05-23" repository-code: "https://github.com/SynapseGrid-Labs/BrainCore" url: "https://github.com/SynapseGrid-Labs/BrainCore" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 24e2f29..640ce0e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ what the repo actually ships: 4. Do not turn the example MCP server into a claim that the repo ships a larger tool surface than it does. 5. Keep the launch truth surface aligned: `001` through `021`, - `45-table preserve schema`, `v1.1.6`, and the committed benchmark + `50-table preserve schema`, `v1.2.0`, and the committed benchmark artifacts. 6. Do not add, replace, regenerate, compress, optimize, or overwrite image assets unless the human owner supplied the final approved asset for this PR. diff --git a/README.md b/README.md index 1e08f72..2525cce 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ BrainCore processes operational artifacts and automatically: All knowledge is stored in PostgreSQL with pgvector, enabling four core retrieval streams (SQL + full-text + vector + temporal) plus optional graph-path retrieval with Reciprocal Rank Fusion.

- BrainCore memory lifecycle control flow showing retrieve, inject, omit, feedback, and control outcomes while native evidence remains unchanged. + BrainCore v1.2 memory lifecycle control flow from sources through extraction, review, approved memory, snapshot retrieval, prompt packaging, feedback, suppression, retirement, and append-only audit logs.

## Quality Standard @@ -66,7 +66,7 @@ software: ## Features -- **Source ingestion**: incident notes plus deterministic parsers for Claude Code, Codex, Codex shared memory, Discord, Telegram, Grafana, personal memory, Asana task exports, and Git commits +- **Source ingestion**: incident notes plus deterministic parsers for Claude Code, Codex, Codex shared memory, Discord, Telegram, Grafana, personal memory, assistant memory exports, Asana task exports, Git commits, and curated project documentation - **Hybrid retrieval**: Structured SQL + FTS + vector similarity + temporal expansion, with optional graph path search, fused with RRF (`k=60`) - **Trust classes**: `deterministic`, `corroborated_llm`, `single_source_llm`, `human_curated` - **Enterprise memory lifecycle overlay**: Suppress or retire recall targets without destroying native evidence, with append-only feedback, score, and audit trails @@ -313,7 +313,7 @@ data sources ```

- BrainCore architecture diagram showing sources, archive, extract, consolidate, retrieve, publish, hybrid retrieval, and evidence audit guarantees. + BrainCore v1.2 architecture diagram showing source ingestion, archive, extract, load, review, governed memories, snapshot builder, MCP and CLI retrieval, storage indexes, audit lineage, and schema management.

### Trust classes @@ -457,9 +457,9 @@ The response includes: That makes it possible to debug retrieval behavior without guessing which stream contributed the hit. -## The 9 Data Sources +## The 12 Data Sources -BrainCore currently ships `9 deterministic parsers`. +BrainCore currently ships `12 deterministic source parsers`. | Parser | Input | Typical output | |---|---|---| @@ -472,6 +472,9 @@ BrainCore currently ships `9 deterministic parsers`. | `personal-memory-parser.ts` | personal memory markdown | curated memory and reference facts | | `asana-parser.ts` | Asana task export JSON/JSONL | task state, routing, project, and custom-field facts | | `git-parser.ts` | git commit JSON/JSONL or local repository | commit timeline, author, and touched-file facts | +| `pai-auto-memory-parser.ts` | PAI auto-memory markdown | imported assistant-memory facts for review-gated promotion | +| `vestige-parser.ts` | Vestige JSON/JSONL export | imported assistant-memory facts for review-gated promotion | +| `project-doc-parser.ts` | curated project documentation manifest | explicit project facts gated by value review | The repo also contains extractor infrastructure files such as `deterministic.ts`, `semantic.ts`, `quality-gate.ts`, diff --git a/assets/README.md b/assets/README.md index b431f42..92896c9 100644 --- a/assets/README.md +++ b/assets/README.md @@ -8,8 +8,8 @@ active assets should be referenced by public docs. | File | Status | Dimensions | Format | Usage | |---|---|---|---|---| | `og.jpg` | Active | 1672 by 941 | JPEG | README hero block and social preview source | -| `lifecycle-control-flow.jpg` | Active | 1672 by 941 | JPEG | README lifecycle control flow overview | -| `architecture.jpg` | Active | 1672 by 941 | JPEG | README architecture section | +| `lifecycle-control-flow.jpg` | Active | 1671 by 941 | JPEG | README lifecycle control flow overview | +| `architecture.jpg` | Active | 1671 by 941 | JPEG | README architecture section | | `maintained-by-trent.jpg` | Active | 1200 by 397 | JPEG | README author note banner | | `manifest.json` | Reference | n/a | JSON | Hash and dimension manifest | | `README.md` | Reference | n/a | Markdown | Asset notes | diff --git a/assets/architecture.jpg b/assets/architecture.jpg index 4170b4d..2a3356c 100644 Binary files a/assets/architecture.jpg and b/assets/architecture.jpg differ diff --git a/assets/lifecycle-control-flow.jpg b/assets/lifecycle-control-flow.jpg index 1680b55..68d25c8 100644 Binary files a/assets/lifecycle-control-flow.jpg and b/assets/lifecycle-control-flow.jpg differ diff --git a/assets/manifest.json b/assets/manifest.json index d6f87f9..1988d0e 100644 --- a/assets/manifest.json +++ b/assets/manifest.json @@ -9,12 +9,16 @@ "format": "JPEG", "width": 1672, "height": 941, - "sha256": "3b88eef1b8df5994e58c7f2e208bcd9d976eeefbb1a31fcf0077e34043c7da3d", + "sha256": "4c579e101e0946225bc5f430babcff5cf0ccdeb476020dbe06fb605b7c3b5ca9", "visible_claims": [ "BrainCore", - "Enterprise memory lifecycle for AI agents", - "PostgreSQL + pgvector · CLI/MCP controls", - "v1.1.6" + "Evidence-first enterprise memory lifecycle for AI agents", + "PostgreSQL + pgvector · CLI/MCP controls · Open Source", + "v1.2", + "Governed memory", + "Audit trails", + "Traceable context", + "Evidence-first design" ] }, { @@ -22,24 +26,23 @@ "status": "active", "usage": "README lifecycle control flow overview", "format": "JPEG", - "width": 1672, + "width": 1671, "height": 941, - "sha256": "13f5a1470d382d7658e10b09d58b17f96f61889e0de204483a5f7a11ae2c61e5", + "sha256": "016e990502245c50964d4923c69dbe4f507b3adfdedcce3718d92ff14d8d7184", "visible_claims": [ - "Memory Lifecycle Control Flow", - "Recall audit, feedback, suppression, and retirement without destroying evidence", - "Retrieve", - "Inject", - "Omit", + "BrainCore v1.2 Memory Lifecycle Control Flow", + "Native evidence is preserved", + "Sources", + "Extraction", + "Facts", + "Review Queue", + "Approved Memory", + "Snapshot Retrieval", + "Prompt Package", "Feedback", - "Control", - "Active", - "Suppressed", - "Retired", - "Native evidence remains unchanged", - "Append-only archive", - "Source provenance", - "Immutable records" + "Suppression Overlay", + "Retirement Overlay", + "Audit Logs (Append-Only)" ] }, { @@ -47,28 +50,24 @@ "status": "active", "usage": "README architecture section", "format": "JPEG", - "width": 1672, + "width": 1671, "height": 941, - "sha256": "3d9766f0fdc7ac29aea9d7721d2dafb01d98cdc55b99ddec859a31d00862c8d3", + "sha256": "955d16e45d599ee3c0350af27ea6c66996dff7915243e5f4c7050f44a08ae065", "visible_claims": [ - "BrainCore Architecture", - "Sources", + "BrainCore v1.2 Architecture", + "Ingest", "Archive", "Extract", - "Consolidate", + "Load", + "Review", + "Govern", "Retrieve", - "Publish", - "Hybrid Retrieval", - "SQL", - "Full-text", - "Vector", - "Temporal", - "Graph path", - "Evidence + Audit Guarantees", - "Append-only archive", - "Trust classes", - "Source provenance", - "Lifecycle audit trail" + "Audit", + "Source Ingestion", + "Durable Storage & Index Layer", + "Cross-Cutting Enablers", + "Audit & Lineage", + "Migration & Schema Management (v1.2)" ] }, { diff --git a/assets/og.jpg b/assets/og.jpg index 718a091..15542fc 100644 Binary files a/assets/og.jpg and b/assets/og.jpg differ diff --git a/benchmarks/claims-to-evidence.yaml b/benchmarks/claims-to-evidence.yaml index e5efb25..72680ce 100644 --- a/benchmarks/claims-to-evidence.yaml +++ b/benchmarks/claims-to-evidence.yaml @@ -272,20 +272,21 @@ the single source of truth for the fusion k parameter. Source-tree invariant. -- claim: "9 deterministic parsers" +- claim: "12 deterministic source parsers" source: "src/extract" source_type: "file_lines" glob: "*-parser.ts" - expected: 9 + expected: 12 tolerance: 0 framing: "smoke-regression" notes: > Counts files matching src/extract/*-parser.ts. Current set: asana-parser.ts, codex-parser.ts, codex-shared-parser.ts, discord-parser.ts, git-parser.ts, grafana-parser.ts, - personal-memory-parser.ts, session-parser.ts, telegram-parser.ts. deterministic.ts, + pai-auto-memory-parser.ts, personal-memory-parser.ts, project-doc-parser.ts, + session-parser.ts, telegram-parser.ts, vestige-parser.ts. deterministic.ts, semantic.ts, load.ts, project-resolver.ts, quality-gate.ts, verify.ts are - infrastructure files, not parsers. Source-tree invariant. + infrastructure files, not source parsers. Source-tree invariant. - claim: "PostgreSQL 15+ (tested on 16)" source: "benchmarks/README.md" diff --git a/examples/mcp_server/requirements.txt b/examples/mcp_server/requirements.txt index d911367..1be64bb 100644 --- a/examples/mcp_server/requirements.txt +++ b/examples/mcp_server/requirements.txt @@ -2,6 +2,6 @@ mcp[cli]>=1.27.1 psycopg[binary]>=3.3.4 psycopg-pool>=3.3.1 pydantic>=2.13.4 -numpy>=2.4.5 +numpy>=2.4.6 pgvector>=0.4.2 requests>=2.34.2 diff --git a/examples/mcp_server/server.py b/examples/mcp_server/server.py index 437d7b6..358e13a 100644 --- a/examples/mcp_server/server.py +++ b/examples/mcp_server/server.py @@ -36,7 +36,9 @@ from __future__ import annotations import importlib.util +import json import os +import subprocess import sys import types from pathlib import Path @@ -257,6 +259,29 @@ def embed_query(text: str): # noqa: ARG001 # signature match app = FastMCP("braincore-example-mcp") + +def _braincore_cli_command() -> list[str]: + raw = os.environ.get("BRAINCORE_CLI", "braincore") + return raw.split() + + +def _run_braincore_cli(args: list[str]) -> dict[str, Any]: + command = [*_braincore_cli_command(), *args] + completed = subprocess.run( + command, + check=False, + capture_output=True, + text=True, + timeout=30, + ) + if completed.returncode != 0: + raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or f"BrainCore CLI exited {completed.returncode}") + output = completed.stdout.strip() + start = output.find("{") + if start == -1: + raise RuntimeError("BrainCore CLI did not return JSON output") + return json.loads(output[start:]) + # Deferred connection pool. Created on first tool invocation. _pool: Optional[ConnectionPool] = None @@ -289,6 +314,41 @@ def _get_pool() -> ConnectionPool: return _pool +@app.tool(name="braincore-snapshot") +def braincore_snapshot_tool( + cwd: str, + git_root: Optional[str] = None, + prompt: Optional[str] = None, + mode: str = "shadow", + max_tokens: int = 3000, + limit: int = 20, +) -> dict[str, Any]: + """Build an audited BrainCore memory snapshot through the BrainCore CLI. + + This reference tool is intentionally read-only. It uses the same + ``braincore snapshot build`` surface operators use during runtime cutover. + Set ``BRAINCORE_CLI`` when the executable is not on PATH. + """ + args = [ + "snapshot", + "build", + "--cwd", + cwd, + "--mode", + mode, + "--max-tokens", + str(max_tokens), + "--limit", + str(limit), + "--json", + ] + if git_root: + args.extend(["--git-root", git_root]) + if prompt: + args.extend(["--prompt", prompt]) + return _run_braincore_cli(args) + + @app.tool(name="memory-search") def memory_search_tool( query: str, diff --git a/package.json b/package.json index a5be8f4..323fed0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "braincore", - "version": "1.1.6", + "version": "1.2.0", "description": "Evidence-first enterprise memory lifecycle for AI infrastructure — preserves, audits, and retrieves operational knowledge for agents", "type": "module", "repository": { diff --git a/scripts/assistant-memory-refresh.sh b/scripts/assistant-memory-refresh.sh new file mode 100755 index 0000000..b7d2996 --- /dev/null +++ b/scripts/assistant-memory-refresh.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +LOCK_FILE="${BRAINCORE_ASSISTANT_MEMORY_LOCK:-/tmp/braincore-assistant-memory-refresh.lock}" +PAI_AUTO_MEMORY_DIR="${BRAINCORE_PAI_AUTO_MEMORY_DIR:-}" +VESTIGE_EXPORT_PATH="${BRAINCORE_VESTIGE_EXPORT_PATH:-}" +DRY_RUN="${BRAINCORE_ASSISTANT_MEMORY_DRY_RUN:-0}" + +usage() { + cat <<'EOF' +Usage: scripts/assistant-memory-refresh.sh [--dry-run] + +Refresh BrainCore assistant-memory evidence from configured sources. + +Environment: + BRAINCORE_POSTGRES_DSN Required by BrainCore CLI. + BRAINCORE_TENANT Optional tenant override. + BRAINCORE_PAI_AUTO_MEMORY_DIR Optional PAI auto-memory markdown directory. + BRAINCORE_VESTIGE_EXPORT_PATH Optional Vestige JSON/JSONL export path. + BRAINCORE_ASSISTANT_MEMORY_LOCK Optional flock path. + BRAINCORE_ASSISTANT_MEMORY_DRY_RUN Set 1 for dry run. +EOF +} + +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=1 ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown argument: $arg" >&2; usage >&2; exit 2 ;; + esac +done + +if [[ -z "${BRAINCORE_POSTGRES_DSN:-}" ]]; then + echo "BRAINCORE_POSTGRES_DSN is required." >&2 + exit 2 +fi + +run_extract() { + local label="$1" + shift + echo "[$(date -Is)] Refreshing ${label}" + if [[ "$DRY_RUN" == "1" ]]; then + bun src/cli.ts extract "$@" --dry-run + else + bun src/cli.ts extract "$@" + fi +} + +cd "$ROOT_DIR" +exec 9>"$LOCK_FILE" +if ! flock -n 9; then + echo "Another BrainCore assistant-memory refresh is already running: $LOCK_FILE" >&2 + exit 75 +fi + +if [[ -n "$PAI_AUTO_MEMORY_DIR" && -d "$PAI_AUTO_MEMORY_DIR" ]]; then + if find "$PAI_AUTO_MEMORY_DIR" -maxdepth 1 -type f -name '*.md' ! -name 'MEMORY.md' -print -quit | grep -q .; then + run_extract "PAI auto-memory" --pai-auto-memory "$PAI_AUTO_MEMORY_DIR" + else + echo "Skipping PAI auto-memory; no markdown files found: $PAI_AUTO_MEMORY_DIR" >&2 + fi +else + echo "Skipping PAI auto-memory; BRAINCORE_PAI_AUTO_MEMORY_DIR is unset or not a directory." >&2 +fi + +if [[ -n "$VESTIGE_EXPORT_PATH" ]]; then + if [[ -f "$VESTIGE_EXPORT_PATH" ]]; then + run_extract "Vestige export" --vestige-export "$VESTIGE_EXPORT_PATH" + else + echo "Skipping Vestige export; file not found: $VESTIGE_EXPORT_PATH" >&2 + fi +else + echo "Skipping Vestige export; BRAINCORE_VESTIGE_EXPORT_PATH is not set." +fi diff --git a/scripts/memory-policy-gate.sh b/scripts/memory-policy-gate.sh index 891bf0d..21585ca 100755 --- a/scripts/memory-policy-gate.sh +++ b/scripts/memory-policy-gate.sh @@ -76,7 +76,23 @@ bun test src/__tests__/migrate.test.ts src/__tests__/memory-governance.test.ts echo "" echo "--- Python static checks ---" -python3 -m unittest tests/test_memory_search_governance.py -v +if [[ -z "${BRAINCORE_PYTHON:-}" ]]; then + echo "FAIL: BRAINCORE_PYTHON must point to a Python interpreter with psycopg installed." + echo " Example: BRAINCORE_PYTHON=/path/to/venv/bin/python3" + echo " Install dependencies with: pip install -r mcp/requirements.txt" + exit 1 +fi + +if ! "$BRAINCORE_PYTHON" - < { + test("parses Vestige JSON export into one source item per memory", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-vestige-")); + const path = join(dir, "vestige.json"); + await writeFile(path, JSON.stringify([ + { + id: "mem-1", + content: "Remember that BrainCore imports must stay review gated.", + nodeType: "decision", + createdAt: "2026-05-18T00:00:00Z", + source: "agent-memory-atlas", + tags: ["braincore", "migration"], + retentionStrength: 0.77, + hasEmbedding: true, + embeddingModel: "nomic-embed-text-v1.5", + }, + ])); + + const [item] = await parseVestigeExport(path); + + expect(item.sourceType).toBe("vestige_memory"); + expect(item.sourceKey).toBe("vestige_memory:mem-1"); + expect(item.result.scope_path).toBe("assistant:vestige/decision"); + expect(item.result.facts).toContainEqual(expect.objectContaining({ + subject: "vestige_memory:mem-1", + predicate: "vestige_memory_content", + fact_kind: "decision", + })); + expect(item.result.facts).toContainEqual(expect.objectContaining({ + predicate: "vestige_embedding_model", + object_value: "nomic-embed-text-v1.5", + })); + }); + + test("parses PAI auto-memory markdown with provenance", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-pai-auto-")); + await mkdir(dir, { recursive: true }); + await writeFile(join(dir, "feedback_review_gate.md"), `--- +name: Review gate +summary: ignored +description: Never skip review gates. +type: feedback +originSessionId: session-1 +tags: [braincore] +--- +Review gates are required before prompt eligibility. +`); + await writeFile(join(dir, "MEMORY.md"), "# index only\n"); + + const [item] = await parsePaiAutoMemory(dir); + + expect(item.sourceType).toBe("pai_auto_memory"); + expect(item.sourceKey).toBe("pai_auto_memory:feedback_review_gate"); + expect(item.result.scope_path).toBe("assistant:pai/auto/feedback"); + expect(item.result.entities).toContainEqual({ name: "session-1", type: "session" }); + expect(item.result.facts).toContainEqual(expect.objectContaining({ + predicate: "pai_auto_memory_content", + fact_kind: "constraint", + })); + }); +}); diff --git a/src/__tests__/assistant-memory-review.test.ts b/src/__tests__/assistant-memory-review.test.ts new file mode 100644 index 0000000..c244626 --- /dev/null +++ b/src/__tests__/assistant-memory-review.test.ts @@ -0,0 +1,234 @@ +process.env.BRAINCORE_POSTGRES_DSN ??= ["postgresql", "://", "postgres:postgres@localhost:5432/postgres"].join(""); + +import { describe, expect, test } from "bun:test"; +import { + listAssistantMemoryReviews, + assistantMemoryReviewStats, + demoteAssistantMemoryPromotion, + getAssistantMemoryReview, + promoteAssistantMemoryReview, + rejectAssistantMemoryReview, + renderAssistantReviewQueueMarkdown, + queueAssistantMemoryReview, +} from "../memory/assistant-review"; + +function makeSql(resolver: (query: string, values: unknown[]) => unknown[] | Promise) { + const calls: Array<{ query: string; values: unknown[] }> = []; + const sql = ((strings: TemplateStringsArray, ...values: unknown[]) => { + const query = strings.join("?"); + calls.push({ query, values }); + return Promise.resolve(resolver(query, values)); + }) as any; + sql.json = (value: unknown) => value; + sql.begin = async (callback: (tx: any) => unknown) => callback(sql); + return { sql, calls }; +} + +describe("assistant memory review", () => { + test("lists tenant-bound assistant memory import review rows", async () => { + const { sql, calls } = makeSql(() => [{ + review_id: "review-1", + status: "pending", + reason: "assistant_memory_import_review", + source_type: "vestige_memory", + source_key: "vestige:1", + scope_path: "project:memory", + original_path: "memory.jsonl", + fact_count: 3, + created_at: "2026-05-18", + }]); + + const rows = await listAssistantMemoryReviews(sql, { status: "pending", limit: 1 }); + + expect(rows[0].reviewId).toBe("review-1"); + expect(rows[0].factCount).toBe(3); + expect(calls[0].query).toContain("preserve.review_queue"); + expect(calls[0].query).toContain("JOIN preserve.artifact"); + expect(calls[0].values).toContain("assistant_memory_import_review"); + }); + + test("queues assistant memory imports and reopens rejected reviews for refreshed imports", async () => { + const { sql, calls } = makeSql(() => []); + + await queueAssistantMemoryReview(sql, "00000000-0000-4000-8000-000000000001", "tenant-a"); + + expect(calls[0].query).toContain("status = 'pending'::preserve.review_status"); + expect(calls[0].query).toContain("rq.status = 'rejected'::preserve.review_status"); + expect(calls[0].values).toContain("assistant_memory_import_review"); + expect(calls[1].query).toContain("INSERT INTO preserve.review_queue"); + expect(calls[1].query).toContain("a.source_type = ANY"); + expect(calls[1].values).toContain("tenant-a"); + }); + + test("rejects assistant memory review and demotes any approved prompt memory", async () => { + const { sql, calls } = makeSql((query) => { + if (query.includes("FROM preserve.review_queue rq")) return [{ review_id: "review-1", review_status: "approved", artifact_id: "artifact-1" }]; + if (query.includes("FROM preserve.memory m")) return [{ memory_id: "memory-1" }]; + return []; + }); + + const updated = await rejectAssistantMemoryReview(sql, "review-1", { notes: "not useful", actor: "test" }); + + expect(updated).toBe(true); + expect(calls.some((call) => call.query.includes("rq.status IN ('pending'::preserve.review_status,'approved'::preserve.review_status)"))).toBe(true); + expect(calls.some((call) => call.query.includes("DELETE FROM preserve.memory_support"))).toBe(true); + expect(calls.some((call) => call.query.includes("governance_status = 'suppressed'"))).toBe(true); + expect(calls.some((call) => call.query.includes("lifecycle_state = 'retired'"))).toBe(true); + expect(calls.some((call) => call.query.includes("status = 'rejected'::preserve.review_status"))).toBe(true); + expect(calls.some((call) => call.query.includes("preservation_state = 'archived'"))).toBe(true); + expect(calls.some((call) => call.values.includes("not useful"))).toBe(true); + }); + + test("promotion writes governed prompt memory, support links, review approval, and artifact eligibility", async () => { + const { sql, calls } = makeSql((query) => { + if (query.includes("FROM preserve.review_queue rq") && query.includes("FOR UPDATE")) { + return [{ + review_id: "review-1", + artifact_id: "artifact-1", + source_type: "pai_auto_memory", + source_key: "pai:1", + scope_path: "project:memory", + project_entity_id: "project-1", + original_path: "auto.md", + }]; + } + if (query.includes("JOIN preserve.fact f")) { + return [{ + fact_id: "fact-1", + episode_id: "episode-1", + predicate: "pai_auto_memory_content", + object_value: { content: "Codex must use BrainCore native snapshots." }, + confidence: 0.91, + priority: 1, + }]; + } + if (query.includes("INSERT INTO preserve.memory")) return [{ memory_id: "memory-1" }]; + return []; + }); + + const result = await promoteAssistantMemoryReview(sql, "review-1", { notes: "approved", scopePath: "project:braincore" }); + + expect(result.memoryId).toBe("memory-1"); + expect(result.scopePath).toBe("project:braincore"); + expect(result.trustClass).toBe("human_curated"); + expect(result.idempotent).toBe(false); + expect(calls.some((call) => call.query.includes("DELETE FROM preserve.memory_support"))).toBe(true); + expect(calls.some((call) => call.query.includes("INSERT INTO preserve.memory_support"))).toBe(true); + expect(calls.some((call) => call.query.includes("status = 'approved'::preserve.review_status"))).toBe(true); + expect(calls.some((call) => call.query.includes("can_promote_memory = true"))).toBe(true); + expect(calls.some((call) => call.values.includes("project:braincore"))).toBe(true); + }); + + test("promotion reports repeat approvals as idempotent", async () => { + const { sql } = makeSql((query) => { + if (query.includes("FROM preserve.review_queue rq") && query.includes("FOR UPDATE")) { + return [{ + review_id: "review-1", + review_status: "approved", + artifact_id: "artifact-1", + source_type: "vestige_memory", + source_key: "vestige:1", + scope_path: "project:memory", + project_entity_id: "project-1", + original_path: "memory.jsonl", + }]; + } + if (query.includes("JOIN preserve.fact f")) { + return [{ fact_id: "fact-1", episode_id: "episode-1", predicate: "vestige_memory_content", object_value: "Memory text", confidence: 0.8 }]; + } + if (query.includes("INSERT INTO preserve.memory")) return [{ memory_id: "memory-1" }]; + return []; + }); + + const result = await promoteAssistantMemoryReview(sql, "review-1"); + + expect(result.idempotent).toBe(true); + }); + + test("loads detailed review facts for operator preview", async () => { + const { sql } = makeSql((query) => { + if (query.includes("GROUP BY rq.review_id")) return [{ + review_id: "review-1", status: "pending", reason: "assistant_memory_import_review", artifact_id: "artifact-1", source_type: "pai_auto_memory", source_key: "pai:1", fact_count: 1, + }]; + if (query.includes("JOIN preserve.fact f")) return [{ fact_id: "fact-1", predicate: "pai_auto_memory_content", object_value: { content: "Preview this memory" }, confidence: 0.9 }]; + return []; + }); + + const detail = await getAssistantMemoryReview(sql, "review-1"); + + expect(detail?.facts[0].value).toBe("Preview this memory"); + }); + + test("summarizes review queue stats and renders export markdown", async () => { + const { sql } = makeSql(() => [ + { status: "pending", source_type: "pai_auto_memory", count: 2 }, + { status: "approved", source_type: "vestige_memory", count: 1 }, + ]); + + const stats = await assistantMemoryReviewStats(sql); + const markdown = renderAssistantReviewQueueMarkdown([{ reviewId: "r1", status: "pending", reason: "assistant_memory_import_review", sourceType: "pai_auto_memory", sourceKey: "pai:1", factCount: 2 }]); + + expect(stats.total).toBe(3); + expect(stats.byStatus.pending).toBe(2); + expect(markdown).toContain("# BrainCore Assistant Memory Review Queue"); + expect(markdown).toContain("pai:1"); + }); + + test("demotion suppresses assistant-import prompt memory and resets review for re-review", async () => { + const { sql, calls } = makeSql((query) => { + if (query.includes("FROM preserve.memory") && query.includes("FOR UPDATE")) { + return [{ + memory_id: "memory-1", + governance_meta: { assistantImport: true, reviewId: "11111111-1111-4111-8111-111111111111", sourceKey: "pai:1" }, + governance_status: "validated", + source_class: "imported_knowledge", + trust_class: "human_curated", + }]; + } + if (query.includes("UPDATE preserve.review_queue")) return [{ artifact_id: "artifact-1" }]; + return []; + }); + + const result = await demoteAssistantMemoryPromotion(sql, "memory-1", { notes: "bad memory" }); + + expect(result.demoted).toBe(true); + expect(result.resetReview).toBe(true); + expect(result.reviewId).toBe("11111111-1111-4111-8111-111111111111"); + expect(calls.some((call) => call.query.includes("governance_status = 'suppressed'"))).toBe(true); + expect(calls.some((call) => call.query.includes("status = 'pending'::preserve.review_status"))).toBe(true); + expect(calls.some((call) => call.query.includes("governance_meta->>'assistantImport' = 'true'"))).toBe(true); + }); + + test("demotion refuses non-assistant imported memories", async () => { + const { sql } = makeSql(() => []); + + await expect(demoteAssistantMemoryPromotion(sql, "memory-1", { notes: "wrong workflow" })).rejects.toThrow("not found"); + }); + + test("demotion recovers review through support links when metadata was redacted", async () => { + const { sql, calls } = makeSql((query) => { + if (query.includes("FROM preserve.memory") && query.includes("FOR UPDATE")) { + return [{ + memory_id: "memory-1", + governance_meta: { reviewId: "[REDACTED_TOKEN]", sourceKey: "pai_auto_memory:[REDACTED_TOKEN]" }, + governance_status: "validated", + source_class: "imported_knowledge", + trust_class: "human_curated", + }]; + } + if (query.includes("FROM preserve.memory_support ms")) { + return [{ review_id: "22222222-2222-4222-8222-222222222222", source_key: "pai_auto_memory:feedback_codex_review_before_approve" }]; + } + if (query.includes("UPDATE preserve.review_queue")) return [{ artifact_id: "artifact-1" }]; + return []; + }); + + const result = await demoteAssistantMemoryPromotion(sql, "memory-1", { notes: "rollback drill" }); + + expect(result.resetReview).toBe(true); + expect(result.reviewId).toBe("22222222-2222-4222-8222-222222222222"); + expect(result.sourceKey).toBe("pai_auto_memory:feedback_codex_review_before_approve"); + expect(calls.some((call) => call.query.includes("FROM preserve.memory_support ms"))).toBe(true); + }); + +}); diff --git a/src/__tests__/braincore-shadow-eval.test.ts b/src/__tests__/braincore-shadow-eval.test.ts new file mode 100644 index 0000000..719b8db --- /dev/null +++ b/src/__tests__/braincore-shadow-eval.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, test } from "bun:test"; +import { runBrainCoreShadowEval } from "../memory/shadow-eval"; + +process.env.BRAINCORE_POSTGRES_DSN ??= ["postgresql", "://", "postgres:postgres@localhost:5432/postgres"].join(""); + +describe("BrainCore shadow eval", () => { + test("computes pass/fail metrics from snapshot output", async () => { + const sql = ((strings: TemplateStringsArray, ...values: unknown[]) => { + const query = strings.join("?"); + if (query.includes("SELECT") && query.includes("m.memory_id::text")) { + return Promise.resolve([{ + memory_id: "33333333-3333-3333-3333-333333333333", memory_type: "heuristic", title: "BrainCore recall", narrative: "BrainCore native memory snapshot works.", confidence: 0.9, scope_path: "project:memory", priority: 1, namespace: "semantic", governance_status: "validated", source_class: "imported_knowledge", trust_class: "human_curated", quality_score: 0.9, strength: 0.8, token_count: 8, text_rank: 1, + }]); + } + return Promise.resolve([]); + }) as any; + sql.json = (value: unknown) => value; + + const result = await runBrainCoreShadowEval(sql, [{ + name: "memory", cwd: "/workspace/memory", prompt: "braincore memory", expectedTerms: ["BrainCore native"], forbiddenTerms: ["forbidden"], maxTokens: 500, + }]); + + expect(result.total).toBe(1); + expect(result.passed).toBe(true); + expect(result.usefulRate).toBe(1); + }); + + test("treats expected-empty negative controls as useful only when no prompt package is returned", async () => { + const sql = ((strings: TemplateStringsArray, ...values: unknown[]) => { + const query = strings.join("?"); + if (query.includes("SELECT") && query.includes("m.memory_id::text") && JSON.stringify(values).includes("project:memory")) { + return Promise.resolve([{ + memory_id: "33333333-3333-4333-8333-333333333333", memory_type: "heuristic", title: "BrainCore recall", narrative: "BrainCore native memory snapshot works.", confidence: 0.9, scope_path: "project:memory", priority: 1, namespace: "semantic", governance_status: "validated", source_class: "imported_knowledge", trust_class: "human_curated", quality_score: 0.9, strength: 0.8, token_count: 8, text_rank: 1, + }]); + } + return Promise.resolve([]); + }) as any; + sql.json = (value: unknown) => value; + + const result = await runBrainCoreShadowEval(sql, [ + { name: "positive", cwd: "/workspace/memory", prompt: "braincore memory", expectedTerms: ["BrainCore native"], maxTokens: 500 }, + { name: "negative", cwd: "/workspace/unrelated-demo", prompt: "unrelated persona workflow", expectEmpty: true, forbiddenTerms: ["BrainCore native"], maxTokens: 500 }, + ]); + + expect(result.total).toBe(2); + expect(result.passed).toBe(true); + expect(result.badRecall).toBe(0); + expect(result.cases[1].useful).toBe(true); + expect(result.cases[1].promptEligible).toBe(0); + }); + +}); diff --git a/src/__tests__/braincore-snapshot.test.ts b/src/__tests__/braincore-snapshot.test.ts new file mode 100644 index 0000000..d6e7a17 --- /dev/null +++ b/src/__tests__/braincore-snapshot.test.ts @@ -0,0 +1,121 @@ +import { describe, expect, test } from "bun:test"; +import { renderBrainCoreSnapshot, resolveSnapshotBudget, resolveSnapshotDomains } from "../memory/snapshot"; +import type { ContextRecallResult } from "../memory/governance"; + +describe("BrainCore snapshot", () => { + test("infers workspace project domain from cwd", () => { + const domains = resolveSnapshotDomains( + "/workspace/memory", + undefined, + "braincore memory runtime", + ); + expect(domains).toContain("memory"); + expect(domains).not.toContain("braincore"); + }); + + test("infers configured project marker domains without hard-coded workspace paths", () => { + const previous = process.env.BRAINCORE_PROJECT_DOMAIN_MARKERS; + process.env.BRAINCORE_PROJECT_DOMAIN_MARKERS = "workspaces"; + try { + const domains = resolveSnapshotDomains( + "/workspace/workspaces/memory/docs", + "/workspace/repo-root", + ); + expect(domains).toEqual(["memory", "repo-root"]); + } finally { + if (previous === undefined) { + delete process.env.BRAINCORE_PROJECT_DOMAIN_MARKERS; + } else { + process.env.BRAINCORE_PROJECT_DOMAIN_MARKERS = previous; + } + } + }); + + test("renders no-results gate without pretending imports are prompt eligible", () => { + const recall: ContextRecallResult = { + trigger: "braincore_snapshot", + mode: "shadow", + injected: false, + results: [], + promptPackage: [], + omitted: [], + totalTokens: 0, + }; + + const markdown = renderBrainCoreSnapshot( + { cwd: "/workspace/memory", gitRoot: "/workspace/braincore-demo", mode: "shadow" }, + ["memory"], + recall, + ); + + expect(markdown).toContain("# BrainCore Memory Snapshot"); + expect(markdown).toContain("Candidate domains: memory"); + expect(markdown).toContain("No Prompt-Eligible BrainCore Memories"); + expect(markdown).toContain("remain gated until explicitly approved"); + }); + + test("enforces snapshot token budget on rendered output", async () => { + const sql = (() => Promise.resolve([])) as any; + sql.json = (value: unknown) => value; + const result = await (await import("../memory/snapshot")).buildBrainCoreSnapshot(sql, { + cwd: "/workspace/memory", + gitRoot: "/workspace/memory", + prompt: "memory ".repeat(200), + mode: "shadow", + maxTokens: 40, + }); + + expect(result.truncated).toBe(true); + expect(result.tokenEstimate).toBeLessThanOrEqual(40); + expect(result.markdown).toContain("Budget Notice"); + }); + + test("does not treat prompt words as candidate domains", () => { + const domains = resolveSnapshotDomains( + "/workspace/memory", + "/workspace/project-root", + "For verification only inspect Codex shared memory snapshot", + ); + expect(domains).toEqual(["project-root"]); + }); + + test("profile budgets cap max-token overrides", () => { + expect(resolveSnapshotBudget("compact")).toBe(1200); + expect(resolveSnapshotBudget("risk")).toBe(3000); + expect(resolveSnapshotBudget("deep")).toBe(5000); + expect(resolveSnapshotBudget("compact", 3000)).toBe(1200); + expect(resolveSnapshotBudget("risk", 1000)).toBe(1000); + }); + + test("compact profile renders bounded memory cards with intact metadata", () => { + const recall: ContextRecallResult = { + trigger: "braincore_snapshot", + mode: "shadow", + injected: false, + results: [], + promptPackage: [{ + section: "validated_facts", + memoryId: "11111111-1111-4111-8111-111111111111", + role: "fact", + reason: "braincore-runtime-snapshot", + content: "Important memory. ".repeat(300), + tokenCount: 300, + governanceStatus: "validated", + }], + omitted: [], + totalTokens: 300, + }; + + const markdown = renderBrainCoreSnapshot( + { cwd: "/workspace/memory", gitRoot: "/workspace/project-root", mode: "shadow", profile: "compact" }, + ["memory", "project-root"], + recall, + "compact", + ); + + expect(markdown).toContain("Profile: compact"); + expect(markdown).toContain("Memory ID: 11111111-1111-4111-8111-111111111111"); + expect(markdown).toContain("Governance: validated"); + expect(markdown).toContain("Full narrative retained in BrainCore"); + }); +}); diff --git a/src/__tests__/migrate.test.ts b/src/__tests__/migrate.test.ts index 8d4ba27..1044c90 100644 --- a/src/__tests__/migrate.test.ts +++ b/src/__tests__/migrate.test.ts @@ -29,6 +29,8 @@ describe("migration plan", () => { "020_embedding_index_roles.sql", "021_enterprise_lifecycle.sql", "022_memory_governance.sql", + "023_assistant_memory_sources.sql", + "024_project_doc_sources.sql", ]); }); @@ -183,6 +185,19 @@ describe("migration plan", () => { expect(marker).not.toContain("'preserve.memory_edge'::regclass"); }); + test("assistant memory source marker checks additive source enum values", () => { + const marker = markerSqlForMigration("023_assistant_memory_sources.sql"); + expect(marker).toContain("vestige_memory"); + expect(marker).toContain("pai_auto_memory"); + expect(marker).toContain("source_type"); + }); + + test("project doc source marker checks additive source enum value", () => { + const marker = markerSqlForMigration("024_project_doc_sources.sql"); + expect(marker).toContain("project_doc"); + expect(marker).toContain("source_type"); + }); + test("migration checksums are stable sha256 strings", () => { expect(migrationChecksum("SELECT 1;")).toMatch(/^[a-f0-9]{64}$/); expect(migrationChecksum("SELECT 1;")).toBe(migrationChecksum("SELECT 1;")); diff --git a/src/__tests__/project-doc-parser.test.ts b/src/__tests__/project-doc-parser.test.ts new file mode 100644 index 0000000..797f394 --- /dev/null +++ b/src/__tests__/project-doc-parser.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, test } from "bun:test"; +import { mkdtemp, writeFile } from "fs/promises"; +import { tmpdir } from "os"; +import { join } from "path"; +import { parseProjectDocManifest } from "../extract/project-doc-parser"; + +process.env.BRAINCORE_POSTGRES_DSN ??= "postgres://test:test@localhost:5432/test"; + +describe("project doc parser", () => { + test("parses only manifest-selected facts with project scope and evidence", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-project-doc-")); + const docPath = join(dir, "README.md"); + const manifestPath = join(dir, "manifest.json"); + await writeFile(docPath, [ + "# Project Ops", + "", + "## Authority Model", + "The task tracker is the source of truth for task state.", + "The legacy scheduler is retired.", + ].join("\n")); + await writeFile(manifestPath, JSON.stringify({ + projectKey: "example_project", + scopePath: "project:example_project", + docs: [{ + id: "authority-model", + path: docPath, + title: "Authority model", + facts: [{ + predicate: "runtime_authority", + objectValue: "The legacy scheduler is retired; local runtime owners are authoritative.", + factKind: "constraint", + segmentLabel: "Authority Model", + }], + }], + })); + + const [item] = await parseProjectDocManifest(manifestPath); + + expect(item.sourceType).toBe("project_doc"); + expect(item.sourceKey).toMatch(/^project_doc:example_project:authority-model:/); + expect(item.result.scope_path).toBe("project:example_project/doc:authority-model"); + expect(item.result.entities).toContainEqual({ name: "example_project", type: "project" }); + expect(item.result.facts).toHaveLength(1); + expect(item.result.facts[0]).toMatchObject({ + subject: "example_project", + predicate: "runtime_authority", + assertion_class: "deterministic", + segment_ids: ["seg_2"], + }); + }); + + test("rejects manifests without explicit facts", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-project-doc-bad-")); + const docPath = join(dir, "README.md"); + const manifestPath = join(dir, "manifest.json"); + await writeFile(docPath, "# Empty\n"); + await writeFile(manifestPath, JSON.stringify({ + projectKey: "example_project", + scopePath: "project:example_project", + docs: [{ id: "empty", path: docPath, facts: [] }], + })); + + await expect(parseProjectDocManifest(manifestPath)).rejects.toThrow("requires explicit facts"); + }); +}); diff --git a/src/__tests__/project-doc-review.test.ts b/src/__tests__/project-doc-review.test.ts new file mode 100644 index 0000000..2540d0f --- /dev/null +++ b/src/__tests__/project-doc-review.test.ts @@ -0,0 +1,196 @@ +process.env.BRAINCORE_POSTGRES_DSN ??= ["postgresql", "://", "postgres:postgres@localhost:5432/postgres"].join(""); + +import { describe, expect, test } from "bun:test"; +import { mkdtemp, writeFile } from "fs/promises"; +import { tmpdir } from "os"; +import { join } from "path"; +import { + applyProjectDocReviewDecisions, + listProjectDocReviews, + queueProjectDocReview, + renderProjectDocReviewPacket, +} from "../memory/project-doc-review"; + +function makeSql(resolver: (query: string, values: unknown[]) => unknown[] | Promise) { + const calls: Array<{ query: string; values: unknown[] }> = []; + const sql = ((strings: TemplateStringsArray, ...values: unknown[]) => { + const query = strings.join("?"); + calls.push({ query, values }); + return Promise.resolve(resolver(query, values)); + }) as any; + sql.json = (value: unknown) => value; + sql.begin = async (callback: (tx: any) => unknown) => callback(sql); + return { sql, calls }; +} + +describe("project doc review", () => { + test("lists and renders project doc value-review rows", async () => { + const { sql, calls } = makeSql(() => [{ + review_id: "review-1", + status: "pending", + source_key: "project_doc:example_project:authority-model:abc", + scope_path: "project:example_project/doc:authority-model", + original_path: "/ops/README.md", + fact_count: 3, + created_at: "2026-05-22", + }]); + + const rows = await listProjectDocReviews(sql, { status: "pending", limit: 1 }); + const markdown = renderProjectDocReviewPacket(rows); + + expect(rows[0].reviewId).toBe("review-1"); + expect(rows[0].factCount).toBe(3); + expect(markdown).toContain("Value gate"); + expect(calls[0].query).toContain("a.source_type = 'project_doc'::preserve.source_type"); + }); + + test("queues review only for project_doc artifacts", async () => { + const { sql, calls } = makeSql(() => []); + + await queueProjectDocReview(sql, "00000000-0000-4000-8000-000000000001", "tenant-a"); + + expect(calls[0].query).toContain("preserve.review_queue"); + expect(calls[0].query).toContain("'project_doc'::preserve.source_type"); + expect(calls[0].values).toContain("tenant-a"); + }); + + test("approved decisions require useful operator context", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-project-doc-review-")); + const path = join(dir, "decisions.json"); + await writeFile(path, JSON.stringify({ decisions: [{ reviewId: "review-1", decision: "approved", title: "short" }] })); + const { sql } = makeSql((query) => { + if (query.includes("FROM preserve.review_queue rq")) return [{ + review_id: "review-1", + artifact_id: "artifact-1", + source_key: "project_doc:example_project:authority-model:abc", + scope_path: "project:example_project/doc:authority-model", + project_entity_id: null, + original_path: "/ops/README.md", + }]; + return []; + }); + + await expect(applyProjectDocReviewDecisions(sql, path)).rejects.toThrow("title"); + }); + + test("approved decisions write governed memory and support links", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-project-doc-review-ok-")); + const path = join(dir, "decisions.json"); + await writeFile(path, JSON.stringify({ decisions: [{ + reviewId: "review-1", + decision: "approved", + title: "Example project authority model", + content: "The task tracker owns task state while local runtime owners govern execution state.", + materiality: "Prevents agents from treating stale legacy control-plane docs as current.", + retrievalUseCase: "Inject when an agent opens example_project or asks what controls task and runtime state.", + }] })); + const { sql, calls } = makeSql((query) => { + if (query.includes("FROM preserve.review_queue rq")) return [{ + review_id: "review-1", + artifact_id: "artifact-1", + source_key: "project_doc:example_project:authority-model:abc", + scope_path: "project:example_project/doc:authority-model", + project_entity_id: null, + original_path: "/ops/README.md", + }]; + if (query.includes("JOIN preserve.fact f")) return [{ + fact_id: "fact-1", + episode_id: "episode-1", + predicate: "runtime_authority", + object_value: "The legacy scheduler is retired.", + confidence: 0.94, + priority: 2, + created_at: "2026-05-22", + }]; + if (query.includes("INSERT INTO preserve.memory")) return [{ memory_id: "memory-1" }]; + return []; + }); + + const result = await applyProjectDocReviewDecisions(sql, path, { actor: "test" }); + + expect(result).toEqual({ approved: 1, rejected: 0, memories: ["memory-1"] }); + expect(calls.some((call) => call.query.includes("'human_curated'::preserve.memory_trust_class"))).toBe(true); + expect(calls.some((call) => call.query.includes("INSERT INTO preserve.memory_support"))).toBe(true); + expect(calls.some((call) => call.query.includes("status = 'approved'::preserve.review_status"))).toBe(true); + }); + + test("rejected approved decisions demote published project-doc memory", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-project-doc-review-reject-")); + const path = join(dir, "decisions.json"); + await writeFile(path, JSON.stringify({ decisions: [{ + reviewId: "review-1", + decision: "rejected", + notes: "obsolete guidance", + }] })); + const { sql, calls } = makeSql((query) => { + if (query.includes("FROM preserve.review_queue rq")) return [{ + review_id: "review-1", + review_status: "approved", + artifact_id: "artifact-1", + source_key: "project_doc:example_project:authority-model:abc", + scope_path: "project:example_project/doc:authority-model", + project_entity_id: null, + original_path: "/ops/README.md", + }]; + if (query.includes("FROM preserve.memory m")) return [{ memory_id: "memory-1" }]; + return []; + }); + + const result = await applyProjectDocReviewDecisions(sql, path, { actor: "test" }); + + expect(result).toEqual({ approved: 0, rejected: 1, memories: [] }); + expect(calls.some((call) => call.query.includes("DELETE FROM preserve.memory_support"))).toBe(true); + expect(calls.some((call) => call.query.includes("governance_status = 'suppressed'"))).toBe(true); + expect(calls.some((call) => call.query.includes("lifecycle_state = 'retired'"))).toBe(true); + expect(calls.some((call) => call.query.includes("status = 'rejected'::preserve.review_status"))).toBe(true); + expect(calls.some((call) => call.query.includes("preservation_state = 'archived'"))).toBe(true); + }); + + test("project-doc re-approvals use a stable review fingerprint independent of title", async () => { + const dir = await mkdtemp(join(tmpdir(), "braincore-project-doc-review-fingerprint-")); + const firstPath = join(dir, "first.json"); + const secondPath = join(dir, "second.json"); + const baseDecision = { + reviewId: "review-1", + decision: "approved", + content: "The task tracker owns task state while local runtime owners govern execution state.", + materiality: "Prevents agents from treating stale legacy control-plane docs as current.", + retrievalUseCase: "Inject when an agent opens example_project or asks what controls task and runtime state.", + }; + await writeFile(firstPath, JSON.stringify({ decisions: [{ ...baseDecision, title: "Example project authority model" }] })); + await writeFile(secondPath, JSON.stringify({ decisions: [{ ...baseDecision, title: "Edited example project authority model" }] })); + const fingerprints: string[] = []; + const { sql } = makeSql((query, values) => { + if (query.includes("FROM preserve.review_queue rq")) return [{ + review_id: "review-1", + review_status: "approved", + artifact_id: "artifact-1", + source_key: "project_doc:example_project:authority-model:abc", + scope_path: "project:example_project/doc:authority-model", + project_entity_id: null, + original_path: "/ops/README.md", + }]; + if (query.includes("JOIN preserve.fact f")) return [{ + fact_id: "fact-1", + episode_id: "episode-1", + predicate: "runtime_authority", + object_value: "The legacy scheduler is retired.", + confidence: 0.94, + priority: 2, + created_at: "2026-05-22", + }]; + if (query.includes("INSERT INTO preserve.memory (")) { + fingerprints.push(values[2] as string); + return [{ memory_id: "memory-1" }]; + } + return []; + }); + + await applyProjectDocReviewDecisions(sql, firstPath, { actor: "test" }); + await applyProjectDocReviewDecisions(sql, secondPath, { actor: "test" }); + + expect(fingerprints.length).toBe(2); + expect(fingerprints[1]).toBe(fingerprints[0]); + }); + +}); diff --git a/src/cli.ts b/src/cli.ts index 990b3c8..5885200 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -38,6 +38,9 @@ function printUsage(): void { console.log(" --grafana Extract Grafana dashboards/alerts"); console.log(" --asana-export Extract exported Asana task JSON/JSONL"); console.log(" --git-commits Extract local git commits or exported JSON/JSONL"); + console.log(" --vestige-export Extract Vestige JSON/JSONL memory export"); + console.log(" --pai-auto-memory Extract PAI auto-memory markdown files"); + console.log(" --project-doc-manifest Extract curated project documentation manifest"); console.log(" --since Git ref for --git-commits repo scans"); console.log(" --pending Extract all pending artifacts"); console.log(" --use-claude Escalate to Claude CLI for semantic"); @@ -84,6 +87,7 @@ function printUsage(): void { console.log(" backfill-intelligence Add lifecycle intelligence rows for existing targets"); console.log(" stats Show lifecycle outbox/intelligence counts"); console.log(" memory Lifecycle admin surface for memory targets"); + console.log(" snapshot Build an audited BrainCore memory snapshot"); console.log(" status-set Set lifecycle intelligence status only"); console.log(" feedback-record Record lifecycle feedback and score audit"); console.log(" context Context recall audit surface"); @@ -103,7 +107,7 @@ function printUsage(): void { console.log(" --vacuum VACUUM ANALYZE core tables"); console.log(" --detect-stale Detect & demote stale memories"); console.log(" --stats Show table counts, index sizes, staleness"); - console.log(" migrate Run database migrations 001-021"); + console.log(" migrate Run database migrations 001-024"); console.log(" help, --help, -h Show this help message"); } @@ -211,6 +215,17 @@ function printMemoryAdminUsage(): void { console.log(" event --process [--limit ]"); console.log(" event --prune [--before ]"); console.log(" recall --trigger [--goal ] [--cue ] [--max-tokens ] [--mode shadow|eval|default_on|off]"); + console.log(" assistant-review list [--status pending|approved|rejected|deferred] [--limit ]"); + console.log(" project-doc-review list [--status pending|approved|rejected|deferred] [--limit ]"); + console.log(" project-doc-review export [--status pending] [--limit ] [--json]"); + console.log(" project-doc-review apply-decisions --decisions [--actor ]"); + console.log(" assistant-review stats"); + console.log(" assistant-review show --review-id [--fact-limit ]"); + console.log(" assistant-review export [--status pending] [--limit ]"); + console.log(" assistant-review approve --review-id [--scope ] [--notes ]"); + console.log(" assistant-review reject --review-id [--notes ]"); + console.log(" assistant-review suppress --review-id [--notes ]"); + console.log(" assistant-review demote --memory-id [--notes ]"); console.log(" read --memory-id [--max-tokens ]"); console.log(" status --memory-id --set [--reason ]"); console.log(" feedback --memory-id --signal [--outcome ]"); @@ -231,6 +246,20 @@ function printContextUsage(): void { console.log(" --goal --scope --session-key --injected --total-tokens "); } +function printSnapshotUsage(): void { + console.log("Usage: braincore snapshot build --cwd [options]"); + console.log(""); + console.log("Options:"); + console.log(" --git-root Repository root used for domain inference"); + console.log(" --prompt Goal or task prompt used as recall cues"); + console.log(" --mode shadow|eval|default_on|off"); + console.log(" --profile compact|risk|deep"); + console.log(" --max-tokens Recall budget (default 3000)"); + console.log(" --limit Result limit (default 20)"); + console.log(" --json Print structured result instead of markdown"); + console.log(" snapshot eval --case-json [--json]"); +} + // Handle help flags explicitly BEFORE the commands[] dispatch so they never // touch the database proxy or fall through the unknown-command error branch. if (!command || isHelpArg(command)) { @@ -278,6 +307,11 @@ if (command === "context" && isHelpArg(args[0])) { process.exit(0); } +if (command === "snapshot" && isHelpArg(args[0])) { + printSnapshotUsage(); + process.exit(0); +} + // ── Commands ───────────────────────────────────────────────────────────────── const commands: Record Promise> = { @@ -335,6 +369,9 @@ const commands: Record Promise> = { const grafana = hasFlag("grafana"); const asanaExport = getFlag("asana-export"); const gitCommits = getFlag("git-commits"); + const vestigeExport = getFlag("vestige-export"); + const paiAutoMemory = getFlag("pai-auto-memory"); + const projectDocManifest = getFlag("project-doc-manifest"); const since = getFlag("since"); if (sessionPath) { @@ -382,6 +419,21 @@ const commands: Record Promise> = { return; } + if (vestigeExport) { + await extractVestigeExport(vestigeExport, dryRun); + return; + } + + if (paiAutoMemory) { + await extractPaiAutoMemory(paiAutoMemory, dryRun); + return; + } + + if (projectDocManifest) { + await extractProjectDocManifest(projectDocManifest, dryRun); + return; + } + if (!incidentPath && !pending) { console.error( "Usage: braincore extract --incident [--use-claude] [--skip-semantic] [--dry-run]", @@ -416,6 +468,15 @@ const commands: Record Promise> = { console.error( " braincore extract --git-commits [--since ] [--dry-run]", ); + console.error( + " braincore extract --vestige-export [--dry-run]", + ); + console.error( + " braincore extract --pai-auto-memory [--dry-run]", + ); + console.error( + " braincore extract --project-doc-manifest [--dry-run]", + ); } if (incidentPath) { @@ -920,6 +981,67 @@ const commands: Record Promise> = { process.exit(1); }, + snapshot: async () => { + const subcommand = args[0]; + if (subcommand === "eval") { + const caseJson = getFlag("case-json"); + if (!caseJson) { + console.error("Usage: braincore snapshot eval --case-json [--json]"); + process.exit(1); + } + const { readFile } = await import("fs/promises"); + const { sql, testConnection } = await import("./db"); + const { runBrainCoreShadowEval } = await import("./memory/shadow-eval"); + const connected = await testConnection(); + if (!connected) { process.exit(1); } + try { + const cases = JSON.parse(await readFile(caseJson, "utf8")); + if (!Array.isArray(cases)) throw new Error("case-json must contain an array"); + const result = await runBrainCoreShadowEval(sql, cases); + console.log(JSON.stringify(result, null, 2)); + if (!result.passed && !hasFlag("no-fail")) process.exitCode = 1; + } finally { + await sql.end(); + } + return; + } + + if (subcommand !== "build") { + printSnapshotUsage(); + process.exit(subcommand && !isHelpArg(subcommand) ? 1 : 0); + } + const cwd = getFlag("cwd"); + if (!cwd) { + console.error("Usage: braincore snapshot build --cwd [--prompt ]"); + process.exit(1); + } + const { sql, testConnection } = await import("./db"); + const { buildBrainCoreSnapshot } = await import("./memory/snapshot"); + const connected = await testConnection(); + if (!connected) { process.exit(1); } + try { + const profile = getFlag("profile"); + if (profile && !["compact", "risk", "deep"].includes(profile)) { + console.error("Invalid snapshot profile. Expected compact, risk, or deep."); + process.exit(1); + } + const maxTokens = Number(getFlag("max-tokens") ?? "0"); + const limit = Number(getFlag("limit") ?? "20"); + const result = await buildBrainCoreSnapshot(sql, { + cwd, + gitRoot: getFlag("git-root"), + prompt: getFlag("prompt"), + maxTokens: Number.isFinite(maxTokens) && maxTokens > 0 ? maxTokens : undefined, + mode: (getFlag("mode") as import("./memory/governance").ContextInjectionMode | undefined) ?? "shadow", + limit: Number.isFinite(limit) ? limit : 20, + profile: profile as import("./memory/snapshot").BrainCoreSnapshotProfile | undefined, + }); + console.log(hasFlag("json") ? JSON.stringify(result, null, 2) : result.markdown); + } finally { + await sql.end(); + } + }, + memory: async () => { const subcommand = args[0]; if (!subcommand || isHelpArg(subcommand)) { @@ -932,6 +1054,151 @@ const commands: Record Promise> = { const connected = await testConnection(); if (!connected) { process.exit(1); } + if (subcommand === "assistant-review") { + const action = args[1] ?? "list"; + const { + assistantMemoryReviewStats, + demoteAssistantMemoryPromotion, + getAssistantMemoryReview, + listAssistantMemoryReviews, + promoteAssistantMemoryReview, + rejectAssistantMemoryReview, + renderAssistantReviewQueueMarkdown, + } = await import("./memory/assistant-review"); + + if (action === "list") { + const limit = Number(getFlag("limit") ?? "50"); + const rows = await listAssistantMemoryReviews(sql, { + status: getFlag("status") ?? "pending", + limit: Number.isFinite(limit) ? limit : 50, + }); + console.log(JSON.stringify(rows, null, 2)); + await sql.end(); + return; + } + + if (action === "stats") { + const stats = await assistantMemoryReviewStats(sql); + console.log(JSON.stringify(stats, null, 2)); + await sql.end(); + return; + } + + if (action === "show") { + const reviewId = getFlag("review-id"); + if (!reviewId) { + console.error("Usage: braincore memory assistant-review show --review-id [--fact-limit ]"); + await sql.end(); + process.exit(1); + } + const factLimit = Number(getFlag("fact-limit") ?? "20"); + const detail = await getAssistantMemoryReview(sql, reviewId, { factLimit: Number.isFinite(factLimit) ? factLimit : 20 }); + console.log(JSON.stringify(detail, null, 2)); + await sql.end(); + return; + } + + if (action === "export") { + const limit = Number(getFlag("limit") ?? "200"); + const rows = await listAssistantMemoryReviews(sql, { + status: getFlag("status") ?? "pending", + limit: Number.isFinite(limit) ? limit : 200, + }); + console.log(hasFlag("json") ? JSON.stringify(rows, null, 2) : renderAssistantReviewQueueMarkdown(rows)); + await sql.end(); + return; + } + + if (action === "demote" || action === "rollback") { + const memoryId = getFlag("memory-id"); + if (!memoryId) { + console.error("Usage: braincore memory assistant-review demote --memory-id [--notes ]"); + await sql.end(); + process.exit(1); + } + const result = await demoteAssistantMemoryPromotion(sql, memoryId, { + notes: getFlag("notes"), + actor: getFlag("actor") ?? "braincore-cli", + }); + console.log(JSON.stringify(result, null, 2)); + await sql.end(); + return; + } + + const reviewId = getFlag("review-id"); + if (!reviewId) { + console.error("Usage: braincore memory assistant-review --review-id [--scope ] [--notes ]"); + await sql.end(); + process.exit(1); + } + + if (action === "approve" || action === "promote") { + const result = await promoteAssistantMemoryReview(sql, reviewId, { + notes: getFlag("notes"), + actor: getFlag("actor") ?? "braincore-cli", + scopePath: getFlag("scope"), + }); + console.log(JSON.stringify(result, null, 2)); + await sql.end(); + return; + } + + if (action === "reject" || action === "suppress") { + const updated = await rejectAssistantMemoryReview(sql, reviewId, { + notes: getFlag("notes"), + suppressed: action === "suppress", + }); + console.log(JSON.stringify({ reviewId, updated }, null, 2)); + await sql.end(); + return; + } + + console.error(`Unknown assistant-review action: ${action}`); + printMemoryAdminUsage(); + await sql.end(); + process.exit(1); + } + + if (subcommand === "project-doc-review") { + const action = args[1] ?? "list"; + const { + applyProjectDocReviewDecisions, + listProjectDocReviews, + renderProjectDocReviewPacket, + } = await import("./memory/project-doc-review"); + + if (action === "list" || action === "export") { + const limit = Number(getFlag("limit") ?? (action === "export" ? "200" : "50")); + const rows = await listProjectDocReviews(sql, { + status: getFlag("status") ?? "pending", + limit: Number.isFinite(limit) ? limit : action === "export" ? 200 : 50, + }); + console.log(hasFlag("json") ? JSON.stringify(rows, null, 2) : renderProjectDocReviewPacket(rows)); + await sql.end(); + return; + } + + if (action === "apply-decisions") { + const decisions = getFlag("decisions"); + if (!decisions) { + console.error("Usage: braincore memory project-doc-review apply-decisions --decisions [--actor ]"); + await sql.end(); + process.exit(1); + } + const result = await applyProjectDocReviewDecisions(sql, decisions, { + actor: getFlag("actor") ?? "braincore-cli", + }); + console.log(JSON.stringify(result, null, 2)); + await sql.end(); + return; + } + + console.error(`Unknown project-doc-review action: ${action}`); + printMemoryAdminUsage(); + await sql.end(); + process.exit(1); + } + if (subcommand === "status-set" || subcommand === "feedback-record") { const targetKind = getFlag("target-kind"); const targetId = getFlag("target-id"); @@ -2092,6 +2359,8 @@ async function loadSourceItems( const { loadExtraction } = await import("./extract/load"); const { ensureSourceArtifact } = await import("./extract/source-loader"); + const { queueProjectDocReview } = await import("./memory/project-doc-review"); + const { queueAssistantMemoryReview } = await import("./memory/assistant-review"); const { sql, testConnection } = await import("./db"); console.log("\n[2/3] Loading into preserve schema..."); @@ -2116,6 +2385,11 @@ async function loadSourceItems( sql, item.sourceContent, ); + if (item.sourceType === "project_doc") { + await queueProjectDocReview(sql, artifact.artifactId); + } else if (item.sourceType === "vestige_memory" || item.sourceType === "pai_auto_memory") { + await queueAssistantMemoryReview(sql, artifact.artifactId); + } factsCreated += result.factsCreated; segmentsCreated += result.segmentsCreated; if (result.warnings.length > 0) { @@ -2223,6 +2497,42 @@ async function extractSession(sessionPath: string): Promise { await sql.end(); } +async function extractVestigeExport(path: string, dryRun?: boolean): Promise { + const { parseVestigeExport } = await import("./extract/vestige-parser"); + console.log("\n=== BrainCore Extract: Vestige Memory Export ===\n"); + console.log("[1/3] Parsing Vestige JSON/JSONL export..."); + const items = await parseVestigeExport(path); + if (items.length === 0) { + console.error("No Vestige memories found in export; refusing zero-record import."); + process.exit(1); + } + await loadSourceItems("Vestige memory", items, dryRun); +} + +async function extractPaiAutoMemory(path: string, dryRun?: boolean): Promise { + const { parsePaiAutoMemory } = await import("./extract/pai-auto-memory-parser"); + console.log("\n=== BrainCore Extract: PAI Auto Memory ===\n"); + console.log("[1/3] Parsing PAI auto-memory markdown files..."); + const items = await parsePaiAutoMemory(path); + if (items.length === 0) { + console.error("No PAI auto-memory files found; refusing zero-record import."); + process.exit(1); + } + await loadSourceItems("PAI auto memory", items, dryRun); +} + +async function extractProjectDocManifest(path: string, dryRun?: boolean): Promise { + const { parseProjectDocManifest } = await import("./extract/project-doc-parser"); + console.log("\n=== BrainCore Extract: Project Documentation ===\n"); + console.log("[1/3] Parsing curated project documentation manifest..."); + const items = await parseProjectDocManifest(path); + if (items.length === 0) { + console.error("No project documentation entries found; refusing zero-record import."); + process.exit(1); + } + await loadSourceItems("Project documentation", items, dryRun); +} + async function extractPersonalMemory(): Promise { const { parsePersonalMemory } = await import("./extract/personal-memory-parser"); const { loadExtraction } = await import("./extract/load"); diff --git a/src/extract/load.ts b/src/extract/load.ts index 76763f8..555284b 100644 --- a/src/extract/load.ts +++ b/src/extract/load.ts @@ -207,6 +207,27 @@ async function resolveEntity( return row.entity_id; } + +function lookupEntityId( + entityIdMap: Map, + name: string, +): string | undefined { + for (const type of [ + "incident", + "device", + "service", + "project", + "file", + "config_item", + "pattern_scope", + "session", + ]) { + const id = entityIdMap.get(`${type}:${name}`); + if (id) return id; + } + return undefined; +} + /** * Load all deterministic + semantic extractions into the preserve schema. * The `db` parameter is the postgres.js Sql instance (for db.json() access). @@ -407,26 +428,19 @@ export async function loadExtraction( continue; } - // Resolve subject entity — look up by all possible types - let subjectEntityId = - entityIdMap.get(`incident:${fact.subject}`) || - entityIdMap.get(`device:${fact.subject}`) || - entityIdMap.get(`service:${fact.subject}`); + // Resolve subject entity across known entity types before falling back. + let subjectEntityId = lookupEntityId(entityIdMap, fact.subject); if (!subjectEntityId) { - const eid = await resolveEntity(tx, fact.subject, "incident"); - entityIdMap.set(`incident:${fact.subject}`, eid); + const eid = await resolveEntity(tx, fact.subject, "config_item"); + entityIdMap.set(`config_item:${fact.subject}`, eid); subjectEntityId = eid; } // Resolve object entity if it references a known entity name let objectEntityId: string | null = null; if (typeof fact.object_value === "string") { - objectEntityId = - entityIdMap.get(`incident:${fact.object_value}`) || - entityIdMap.get(`device:${fact.object_value}`) || - entityIdMap.get(`service:${fact.object_value}`) || - null; + objectEntityId = lookupEntityId(entityIdMap, fact.object_value) ?? null; } const [factValidFrom, factValidTo] = validateDateRange(fact.valid_from, fact.valid_to); @@ -503,10 +517,7 @@ export async function loadExtraction( continue; } - let subjectEntityId = - entityIdMap.get(`incident:${fact.subject}`) || - entityIdMap.get(`device:${fact.subject}`) || - entityIdMap.get(`service:${fact.subject}`); + let subjectEntityId = lookupEntityId(entityIdMap, fact.subject); if (!subjectEntityId) { const eid = await resolveEntity(tx, fact.subject, "config_item"); diff --git a/src/extract/pai-auto-memory-parser.ts b/src/extract/pai-auto-memory-parser.ts new file mode 100644 index 0000000..6d1f2dd --- /dev/null +++ b/src/extract/pai-auto-memory-parser.ts @@ -0,0 +1,147 @@ +import { readFile, readdir } from "fs/promises"; +import { basename, join } from "path"; +import { parse as parseYAML } from "yaml"; +import type { DeterministicResult, Entity, Fact, Segment } from "./deterministic"; +import { assertUniqueSourceKeys, type SourceExtraction } from "./source-export"; + +interface PaiAutoFrontmatter { + name?: string; + description?: string; + type?: string; + tags?: string[]; + originSessionId?: string; + created?: string; + updated?: string; +} + +export async function parsePaiAutoMemory(memoryDir: string): Promise { + const files = (await readdir(memoryDir)) + .filter((file) => file.endsWith(".md") && file !== "MEMORY.md") + .sort(); + const items: SourceExtraction[] = []; + + for (const file of files) { + const path = join(memoryDir, file); + const raw = await readFile(path, "utf-8"); + const slug = basename(file, ".md"); + const { frontmatter, body } = parseFrontmatter(raw); + const title = frontmatter.name || slug; + const memoryType = frontmatter.type || inferType(slug); + const sourceKey = `pai_auto_memory:${safeKeyPart(slug)}`; + const segment: Segment = { + ordinal: 1, + section_label: title.slice(0, 100), + content: buildSegmentContent(frontmatter, body, slug).slice(0, 8000), + line_start: 1, + line_end: raw.split(/\r?\n/).length, + }; + const segRef = ["seg_1"]; + const entities: Entity[] = [ + { name: sourceKey, type: "config_item" as any }, + ]; + const facts: Fact[] = [ + fact(sourceKey, "pai_auto_memory_type", memoryType, "state", "deterministic", segRef), + fact(sourceKey, "pai_auto_memory_content", body.slice(0, 4000), factKindForType(memoryType), "human_curated", segRef), + ]; + + if (frontmatter.description) { + facts.push(fact(sourceKey, "description", frontmatter.description, "state", "human_curated", segRef)); + } + if (frontmatter.originSessionId) { + facts.push(fact(sourceKey, "origin_session", frontmatter.originSessionId, "state", "deterministic", segRef)); + entities.push({ name: frontmatter.originSessionId, type: "session" as any }); + } + for (const tag of frontmatter.tags || []) { + facts.push(fact(sourceKey, "tagged", tag, "state", "deterministic", segRef)); + } + + const result: DeterministicResult = { + entities, + facts, + segments: [segment], + episode: { + type: "session", + title: `PAI auto-memory import: ${title}`, + start_at: frontmatter.created, + summary: `Imported one PAI auto-memory file with type=${memoryType}.`, + }, + scope_path: `assistant:pai/auto/${memoryType}`, + source_key: sourceKey, + }; + + items.push({ + sourceKey, + sourceType: "pai_auto_memory", + originalPath: path, + sourceContent: raw, + result, + }); + } + + assertUniqueSourceKeys(items); + return items; +} + +function parseFrontmatter(raw: string): { frontmatter: PaiAutoFrontmatter; body: string } { + const fmMatch = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!fmMatch) return { frontmatter: {}, body: raw.trim() }; + try { + return { + frontmatter: (parseYAML(fmMatch[1]) as PaiAutoFrontmatter) || {}, + body: raw.slice(fmMatch[0].length).trim(), + }; + } catch { + return { frontmatter: {}, body: raw.slice(fmMatch[0].length).trim() }; + } +} + +function buildSegmentContent(frontmatter: PaiAutoFrontmatter, body: string, slug: string): string { + return [ + "Source: PAI auto memory", + `Slug: ${slug}`, + `Name: ${frontmatter.name || slug}`, + `Type: ${frontmatter.type || inferType(slug)}`, + `Description: ${frontmatter.description || "none"}`, + `Origin session: ${frontmatter.originSessionId || "none"}`, + `Tags: ${(frontmatter.tags || []).join(", ") || "none"}`, + "", + body, + ].join("\n"); +} + +function fact( + subject: string, + predicate: string, + objectValue: unknown, + factKind: string, + assertionClass: "deterministic" | "human_curated", + segmentIds: string[], +): Fact { + return { + subject, + predicate, + object_value: objectValue, + fact_kind: factKind, + assertion_class: assertionClass as any, + confidence: assertionClass === "deterministic" ? 1.0 : 0.9, + segment_ids: segmentIds, + }; +} + +function inferType(slug: string): string { + if (slug.startsWith("feedback_")) return "feedback"; + if (slug.startsWith("project_")) return "project"; + if (slug.startsWith("playbook_")) return "playbook"; + if (slug.startsWith("reference_")) return "reference"; + return "auto"; +} + +function factKindForType(memoryType: string): string { + if (memoryType === "feedback") return "constraint"; + if (memoryType === "playbook") return "remediation"; + return "state"; +} + +function safeKeyPart(value: string): string { + return value.trim().replace(/\s+/g, "_").replace(/[^A-Za-z0-9_.:-]/g, "_"); +} diff --git a/src/extract/project-doc-parser.ts b/src/extract/project-doc-parser.ts new file mode 100644 index 0000000..1c1eb99 --- /dev/null +++ b/src/extract/project-doc-parser.ts @@ -0,0 +1,156 @@ +import { readFile } from "fs/promises"; +import { basename } from "path"; +import { createHash } from "crypto"; +import type { SourceExtraction } from "./source-export"; +import type { DeterministicResult, Entity, Fact, Segment } from "./deterministic"; + +export interface ProjectDocManifest { + projectKey: string; + scopePath: string; + docs: ProjectDocManifestDoc[]; +} + +export interface ProjectDocManifestDoc { + id: string; + path: string; + title?: string; + sourceKey?: string; + facts?: ProjectDocManifestFact[]; +} + +export interface ProjectDocManifestFact { + subject?: string; + predicate: string; + objectValue: unknown; + factKind?: string; + confidence?: number; + segmentLabel?: string; +} + +function sanitizeId(value: string): string { + return value.trim().toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, ""); +} + +function sha256(value: string): string { + return createHash("sha256").update(value, "utf-8").digest("hex"); +} + +function assertManifest(value: unknown): ProjectDocManifest { + const manifest = value as ProjectDocManifest; + if (!manifest || typeof manifest !== "object") throw new Error("Project doc manifest must be a JSON object."); + if (!manifest.projectKey || typeof manifest.projectKey !== "string") throw new Error("Project doc manifest requires projectKey."); + if (!manifest.scopePath || typeof manifest.scopePath !== "string") throw new Error("Project doc manifest requires scopePath."); + if (!manifest.scopePath.startsWith("project:")) throw new Error("Project doc manifest scopePath must start with project:."); + if (!Array.isArray(manifest.docs) || manifest.docs.length === 0) throw new Error("Project doc manifest requires at least one doc."); + for (const doc of manifest.docs) { + if (!doc.id || typeof doc.id !== "string") throw new Error("Every project doc manifest entry requires id."); + if (!doc.path || typeof doc.path !== "string") throw new Error(`Project doc ${doc.id} requires path.`); + if (!Array.isArray(doc.facts) || doc.facts.length === 0) throw new Error(`Project doc ${doc.id} requires explicit facts; no raw-doc promotion is allowed.`); + for (const fact of doc.facts) { + if (!fact.predicate || typeof fact.predicate !== "string") throw new Error(`Project doc ${doc.id} has fact without predicate.`); + if (fact.objectValue === undefined || fact.objectValue === null || String(fact.objectValue).trim() === "") { + throw new Error(`Project doc ${doc.id} has empty fact value for ${fact.predicate}.`); + } + } + } + return manifest; +} + +function splitSegments(raw: string): Segment[] { + const lines = raw.split(/\r?\n/); + const segments: Segment[] = []; + let current: { label: string; start: number; lines: string[] } = { label: "Document", start: 1, lines: [] }; + let inFence = false; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line.trimStart().startsWith("```")) inFence = !inFence; + const heading = !inFence ? line.match(/^(#{1,3})\s+(.+)$/) : null; + if (heading && current.lines.join("\n").trim()) { + segments.push({ + ordinal: segments.length + 1, + section_label: current.label, + content: current.lines.join("\n").trim(), + line_start: current.start, + line_end: i, + }); + current = { label: heading[2].trim(), start: i + 1, lines: [line] }; + } else { + if (heading && !current.lines.join("\n").trim()) current.label = heading[2].trim(); + current.lines.push(line); + } + } + if (current.lines.join("\n").trim()) { + segments.push({ + ordinal: segments.length + 1, + section_label: current.label, + content: current.lines.join("\n").trim(), + line_start: current.start, + line_end: lines.length, + }); + } + return segments.length > 0 ? segments : [{ ordinal: 1, section_label: "Document", content: raw.trim(), line_start: 1, line_end: lines.length }]; +} + +function factSegmentIds(fact: ProjectDocManifestFact, segments: Segment[]): string[] { + if (fact.segmentLabel) { + const needle = fact.segmentLabel.toLowerCase(); + const match = segments.find((segment) => segment.section_label.toLowerCase().includes(needle)); + if (match) return [`seg_${match.ordinal}`]; + } + return [`seg_${segments[0].ordinal}`]; +} + +export async function parseProjectDocManifest(path: string): Promise { + const manifestRaw = await readFile(path, "utf-8"); + const manifest = assertManifest(JSON.parse(manifestRaw)); + const items: SourceExtraction[] = []; + const seen = new Set(); + + for (const doc of manifest.docs) { + const raw = await readFile(doc.path, "utf-8"); + const docId = sanitizeId(doc.id); + const sourceKey = doc.sourceKey ?? `project_doc:${sanitizeId(manifest.projectKey)}:${docId}:${sha256(doc.path).slice(0, 10)}`; + if (seen.has(sourceKey)) throw new Error(`Duplicate project_doc sourceKey: ${sourceKey}`); + seen.add(sourceKey); + + const segments = splitSegments(raw); + const entities: Entity[] = [ + { name: manifest.projectKey, type: "project" }, + { name: doc.path, type: "file" }, + ]; + const facts: Fact[] = doc.facts!.map((fact) => ({ + subject: fact.subject ?? manifest.projectKey, + predicate: fact.predicate, + object_value: fact.objectValue, + fact_kind: fact.factKind ?? "constraint", + assertion_class: "deterministic", + confidence: fact.confidence ?? 1, + segment_ids: factSegmentIds(fact, segments), + })); + + const result: DeterministicResult = { + entities, + facts, + segments, + episode: { + type: "project_doc", + title: doc.title ?? basename(doc.path), + summary: `${manifest.projectKey} project documentation evidence from ${doc.path}`, + }, + scope_path: `${manifest.scopePath}/doc:${docId}`, + source_key: sourceKey, + owner_surface: "project", + owner_key: manifest.projectKey, + }; + + items.push({ + sourceKey, + sourceType: "project_doc", + originalPath: doc.path, + sourceContent: raw, + result, + }); + } + + return items; +} diff --git a/src/extract/quality-gate.ts b/src/extract/quality-gate.ts index 7a9bb53..16d2e58 100644 --- a/src/extract/quality-gate.ts +++ b/src/extract/quality-gate.ts @@ -17,6 +17,9 @@ * - asana_task: must have source_key formatted as asana_task: * - git_commit: must have source_key formatted as git_commit:: * - claude_session: must have source_key + * - vestige_memory: must have source_key formatted as vestige_memory: + * - pai_auto_memory: must have source_key formatted as pai_auto_memory: + * - project_doc: must have source_key, project scope_path, and segment evidence * - personal_memory: must have scope_path */ @@ -205,6 +208,46 @@ function validateGitCommit( return null; } +function validateVestigeMemory( + fact: FactCandidate, + ctx: ValidationContext, +): string | null { + if (!ctx.sourceKey || !/^vestige_memory:[^:\s]+$/.test(ctx.sourceKey)) { + return 'vestige_memory fact rejected: missing source_key'; + } + if (!fact.segment_ids || fact.segment_ids.length === 0) { + return 'vestige_memory fact rejected: no segment evidence'; + } + return null; +} + + +function validateProjectDoc( + fact: FactCandidate, + ctx: ValidationContext, +): string | null { + if (!ctx.sourceKey || !/^project_doc:[^\s]+$/.test(ctx.sourceKey)) { + return `project_doc fact (${fact.subject}/${fact.predicate}) rejected: missing source_key`; + } + if (!ctx.scopePath || !ctx.scopePath.startsWith("project:")) { + return `project_doc fact (${fact.subject}/${fact.predicate}) rejected: missing project scope_path`; + } + if (!fact.segment_ids || fact.segment_ids.length === 0) { + return `project_doc fact (${fact.subject}/${fact.predicate}) rejected: no segment evidence`; + } + return null; +} + +function validatePaiAutoMemory( + fact: FactCandidate, + ctx: ValidationContext, +): string | null { + if (!ctx.sourceKey || !/^pai_auto_memory:[^\s]+$/.test(ctx.sourceKey)) { + return 'pai_auto_memory fact rejected: missing source_key'; + } + return null; +} + const VALIDATORS: Record< string, (fact: FactCandidate, ctx: ValidationContext) => string | null @@ -219,6 +262,9 @@ const VALIDATORS: Record< telegram_chat: validateTelegramChat, asana_task: validateAsanaTask, git_commit: validateGitCommit, + vestige_memory: validateVestigeMemory, + pai_auto_memory: validatePaiAutoMemory, + project_doc: validateProjectDoc, }; // ── Deduplication Check ──────────────────────────────────────────────────────── diff --git a/src/extract/source-export.ts b/src/extract/source-export.ts index e83138e..0f3bc5e 100644 --- a/src/extract/source-export.ts +++ b/src/extract/source-export.ts @@ -2,7 +2,7 @@ import { readFile } from "fs/promises"; export interface SourceExtraction { sourceKey: string; - sourceType: "asana_task" | "git_commit"; + sourceType: "asana_task" | "git_commit" | "vestige_memory" | "pai_auto_memory" | "project_doc"; originalPath: string; sourceContent: string; result: import("./deterministic").DeterministicResult; diff --git a/src/extract/vestige-parser.ts b/src/extract/vestige-parser.ts new file mode 100644 index 0000000..dd3d3ce --- /dev/null +++ b/src/extract/vestige-parser.ts @@ -0,0 +1,191 @@ +import { createHash } from "crypto"; +import type { DeterministicResult, Entity, Fact, Segment } from "./deterministic"; +import { assertUniqueSourceKeys, readJsonOrJsonl, toSafeString, type SourceExtraction } from "./source-export"; + +interface VestigeRecord { + id?: unknown; + content?: unknown; + nodeType?: unknown; + createdAt?: unknown; + updatedAt?: unknown; + lastAccessed?: unknown; + nextReview?: unknown; + source?: unknown; + tags?: unknown; + stability?: unknown; + difficulty?: unknown; + storageStrength?: unknown; + retrievalStrength?: unknown; + retentionStrength?: unknown; + utilityScore?: unknown; + timesRetrieved?: unknown; + timesUseful?: unknown; + hasEmbedding?: unknown; + embeddingModel?: unknown; +} + +const NODE_TYPE_TO_FACT_KIND: Record = { + decision: "decision", + preference: "constraint", + feedback: "constraint", + gotcha: "lesson", + learning: "lesson", + workflow: "state", + mapping: "state", + environment: "state", +}; + +export async function parseVestigeExport(exportPath: string): Promise { + const records = (await readJsonOrJsonl(exportPath)) as VestigeRecord[]; + const items: SourceExtraction[] = []; + + for (const record of records) { + const content = toSafeString(record.content); + if (!content) continue; + + const rawId = toSafeString(record.id) || sha256(content).slice(0, 16); + const sourceKey = `vestige_memory:${safeKeyPart(rawId)}`; + const nodeType = (toSafeString(record.nodeType) || "memory").toLowerCase(); + const tags = normalizeStringList(record.tags); + const source = toSafeString(record.source); + const sourceContent = JSON.stringify(record, null, 2); + const segment: Segment = { + ordinal: 1, + section_label: `${nodeType}: ${rawId}`.slice(0, 100), + content: buildSegmentContent(record, content, nodeType, tags), + line_start: 1, + line_end: content.split(/\r?\n/).length, + }; + const segRef = ["seg_1"]; + const entities: Entity[] = [ + { name: sourceKey, type: "config_item" as any }, + ]; + if (source) entities.push({ name: source, type: "config_item" as any }); + + const facts: Fact[] = [ + fact(sourceKey, "vestige_node_type", nodeType, "state", "deterministic", segRef), + fact(sourceKey, "vestige_memory_content", content.slice(0, 4000), factKindForNode(nodeType), "human_curated", segRef), + ]; + + addOptionalFact(facts, sourceKey, "vestige_source", source, segRef); + addOptionalFact(facts, sourceKey, "vestige_created_at", toSafeString(record.createdAt), segRef); + addOptionalFact(facts, sourceKey, "vestige_updated_at", toSafeString(record.updatedAt), segRef); + addOptionalFact(facts, sourceKey, "vestige_last_accessed", toSafeString(record.lastAccessed), segRef); + addOptionalFact(facts, sourceKey, "vestige_next_review", toSafeString(record.nextReview), segRef); + addOptionalFact(facts, sourceKey, "vestige_embedding_model", toSafeString(record.embeddingModel), segRef); + + for (const tag of tags) { + facts.push(fact(sourceKey, "tagged", tag, "state", "deterministic", segRef)); + } + + for (const [predicate, value] of Object.entries({ + vestige_stability: record.stability, + vestige_difficulty: record.difficulty, + vestige_storage_strength: record.storageStrength, + vestige_retrieval_strength: record.retrievalStrength, + vestige_retention_strength: record.retentionStrength, + vestige_utility_score: record.utilityScore, + vestige_times_retrieved: record.timesRetrieved, + vestige_times_useful: record.timesUseful, + vestige_has_embedding: record.hasEmbedding, + })) { + if (value !== undefined && value !== null && value !== "") { + facts.push(fact(sourceKey, predicate, value, "state", "deterministic", segRef)); + } + } + + const result: DeterministicResult = { + entities: deduplicateEntities(entities), + facts, + segments: [segment], + episode: { + type: "session", + title: `Vestige memory import: ${rawId}`, + start_at: toSafeString(record.createdAt), + summary: `Imported one Vestige memory with nodeType=${nodeType}.`, + }, + scope_path: `assistant:vestige/${nodeType}`, + source_key: sourceKey, + }; + + items.push({ + sourceKey, + sourceType: "vestige_memory", + originalPath: `${exportPath}#${rawId}`, + sourceContent, + result, + }); + } + + assertUniqueSourceKeys(items); + return items; +} + +function buildSegmentContent(record: VestigeRecord, content: string, nodeType: string, tags: string[]): string { + return [ + `Source: Vestige`, + `Node type: ${nodeType}`, + `ID: ${toSafeString(record.id) || "unknown"}`, + `Created: ${toSafeString(record.createdAt) || "unknown"}`, + `Updated: ${toSafeString(record.updatedAt) || "unknown"}`, + `Tags: ${tags.join(", ") || "none"}`, + `Embedding model: ${toSafeString(record.embeddingModel) || "none"}`, + "", + content, + ].join("\n").slice(0, 8000); +} + +function fact( + subject: string, + predicate: string, + objectValue: unknown, + factKind: string, + assertionClass: "deterministic" | "human_curated", + segmentIds: string[], +): Fact { + return { + subject, + predicate, + object_value: objectValue, + fact_kind: factKind, + assertion_class: assertionClass as any, + confidence: assertionClass === "deterministic" ? 1.0 : 0.85, + segment_ids: segmentIds, + }; +} + +function addOptionalFact( + facts: Fact[], + subject: string, + predicate: string, + value: string | undefined, + segmentIds: string[], +): void { + if (value) facts.push(fact(subject, predicate, value, "state", "deterministic", segmentIds)); +} + +function normalizeStringList(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value.map((item) => toSafeString(item)).filter((item): item is string => Boolean(item)); +} + +function factKindForNode(nodeType: string): string { + return NODE_TYPE_TO_FACT_KIND[nodeType] || "state"; +} + +function safeKeyPart(value: string): string { + return value.trim().replace(/\s+/g, "_").replace(/[^A-Za-z0-9_.:-]/g, "_"); +} + +function sha256(text: string): string { + return createHash("sha256").update(text, "utf-8").digest("hex"); +} + +function deduplicateEntities(entities: Entity[]): Entity[] { + const seen = new Map(); + for (const entity of entities) { + const key = `${entity.type}:${entity.name}`; + if (!seen.has(key)) seen.set(key, entity); + } + return [...seen.values()]; +} diff --git a/src/memory/assistant-review.ts b/src/memory/assistant-review.ts new file mode 100644 index 0000000..12a4b97 --- /dev/null +++ b/src/memory/assistant-review.ts @@ -0,0 +1,654 @@ +import { createHash } from "crypto"; +import { config } from "../config"; +import type postgres from "postgres"; +import { estimateTokenCount, redactValue, type MemoryTrustClass } from "./governance"; + +const ASSISTANT_MEMORY_SOURCE_TYPES = ["vestige_memory", "pai_auto_memory"] as const; +const ASSISTANT_REVIEW_REASON = "assistant_memory_import_review"; + +export interface AssistantReviewRow { + reviewId: string; + status: string; + reason: string; + sourceType: string; + sourceKey: string; + scopePath?: string; + originalPath?: string; + factCount: number; + createdAt?: string; +} + +export interface AssistantReviewPromotionResult { + reviewId: string; + artifactId: string; + memoryId: string; + sourceKey: string; + scopePath?: string; + supportCount: number; + trustClass: MemoryTrustClass; + idempotent: boolean; +} + +export interface AssistantReviewDemotionResult { + memoryId: string; + reviewId?: string; + artifactId?: string; + sourceKey?: string; + resetReview: boolean; + demoted: boolean; +} + +export interface AssistantReviewDetail extends AssistantReviewRow { + artifactId: string; + facts: Array<{ predicate: string; value: string; confidence?: number; factId: string }>; +} + +export interface AssistantReviewStats { + total: number; + byStatus: Record; + bySourceType: Record; +} + +export async function listAssistantMemoryReviews( + sql: postgres.Sql, + options: { tenant?: string; status?: string; limit?: number } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + const status = options.status ?? "pending"; + const limit = Math.max(1, Math.min(200, options.limit ?? 50)); + const rows = await sql` + SELECT + rq.review_id::text, + rq.status::text, + rq.reason, + a.source_type::text, + a.source_key, + a.scope_path, + a.original_path, + rq.created_at::text, + COUNT(f.fact_id)::int AS fact_count + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + LEFT JOIN preserve.extraction_run er ON er.artifact_id = a.artifact_id + LEFT JOIN preserve.fact f ON f.created_run_id = er.run_id AND f.current_status = 'active' + WHERE rq.target_type = 'artifact' + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND rq.status = ${status}::preserve.review_status + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + GROUP BY rq.review_id, rq.status, rq.reason, a.source_type, a.source_key, a.scope_path, a.original_path, rq.created_at + ORDER BY rq.created_at ASC + LIMIT ${limit} + `; + return rows.map((row: any) => ({ + reviewId: row.review_id, + status: row.status, + reason: row.reason, + sourceType: row.source_type, + sourceKey: row.source_key, + scopePath: row.scope_path ?? undefined, + originalPath: row.original_path ?? undefined, + factCount: Number(row.fact_count ?? 0), + createdAt: row.created_at ?? undefined, + })); +} + +export async function getAssistantMemoryReview( + sql: postgres.Sql, + reviewId: string, + options: { tenant?: string; factLimit?: number } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + const factLimit = Math.max(1, Math.min(50, options.factLimit ?? 20)); + const [review] = await sql` + SELECT + rq.review_id::text, + rq.status::text, + rq.reason, + a.artifact_id::text, + a.source_type::text, + a.source_key, + a.scope_path, + a.original_path, + rq.created_at::text, + COUNT(f.fact_id)::int AS fact_count + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + LEFT JOIN preserve.extraction_run er ON er.artifact_id = a.artifact_id + LEFT JOIN preserve.fact f ON f.created_run_id = er.run_id AND f.current_status = 'active' + WHERE rq.review_id = ${reviewId} + AND rq.target_type = 'artifact' + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + GROUP BY rq.review_id, rq.status, rq.reason, a.artifact_id, a.source_type, a.source_key, a.scope_path, a.original_path, rq.created_at + LIMIT 1 + `; + if (!review) return null; + const facts = await sql` + SELECT + f.fact_id::text, + f.predicate, + f.object_value, + f.confidence::float + FROM preserve.extraction_run er + JOIN preserve.fact f ON f.created_run_id = er.run_id + WHERE er.artifact_id = ${review.artifact_id} + AND f.tenant = ${tenant} + AND f.current_status = 'active' + ORDER BY + CASE WHEN f.predicate IN ('vestige_memory_content','pai_auto_memory_content') THEN 0 ELSE 1 END, + f.priority ASC, + f.created_at ASC + LIMIT ${factLimit} + `; + return { + reviewId: review.review_id, + status: review.status, + reason: review.reason, + artifactId: review.artifact_id, + sourceType: review.source_type, + sourceKey: review.source_key, + scopePath: review.scope_path ?? undefined, + originalPath: review.original_path ?? undefined, + factCount: Number(review.fact_count ?? 0), + createdAt: review.created_at ?? undefined, + facts: facts.map((fact: any) => ({ + factId: fact.fact_id, + predicate: fact.predicate, + value: objectValueText(fact.object_value), + confidence: fact.confidence === null || fact.confidence === undefined ? undefined : Number(fact.confidence), + })), + }; +} + +export async function assistantMemoryReviewStats( + sql: postgres.Sql, + options: { tenant?: string } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + const rows = await sql` + SELECT rq.status::text, a.source_type::text, COUNT(*)::int AS count + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + WHERE rq.target_type = 'artifact' + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + GROUP BY rq.status, a.source_type + ORDER BY rq.status, a.source_type + `; + const stats: AssistantReviewStats = { total: 0, byStatus: {}, bySourceType: {} }; + for (const row of rows as any[]) { + const count = Number(row.count ?? 0); + stats.total += count; + stats.byStatus[row.status] = (stats.byStatus[row.status] ?? 0) + count; + stats.bySourceType[row.source_type] = (stats.bySourceType[row.source_type] ?? 0) + count; + } + return stats; +} + +export function renderAssistantReviewQueueMarkdown(rows: AssistantReviewRow[]): string { + const lines = [ + '# BrainCore Assistant Memory Review Queue', + '', + `Rows: ${rows.length}`, + '', + ]; + for (const row of rows) { + lines.push(`## ${row.sourceKey}`); + lines.push(`- Review ID: ${row.reviewId}`); + lines.push(`- Status: ${row.status}`); + lines.push(`- Source type: ${row.sourceType}`); + if (row.scopePath) lines.push(`- Scope: ${row.scopePath}`); + lines.push(`- Facts: ${row.factCount}`); + lines.push(''); + } + return lines.join('\n').trimEnd() + '\n'; +} + +export async function queueAssistantMemoryReview(sql: postgres.Sql, artifactId: string, tenant = config.tenant): Promise { + await sql` + UPDATE preserve.review_queue rq + SET status = 'pending'::preserve.review_status, + reviewer_notes = 'requeued after refreshed assistant memory import', + resolved_at = NULL + FROM preserve.artifact a + WHERE rq.target_type = 'artifact' + AND rq.target_id = a.artifact_id + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND rq.status = 'rejected'::preserve.review_status + AND a.artifact_id = ${artifactId}::uuid + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + `; + await sql` + INSERT INTO preserve.review_queue (target_type, target_id, reason, status) + SELECT 'artifact', ${artifactId}::uuid, ${ASSISTANT_REVIEW_REASON}, 'pending'::preserve.review_status + WHERE EXISTS ( + SELECT 1 FROM preserve.artifact a + WHERE a.artifact_id = ${artifactId}::uuid + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + ) + AND NOT EXISTS ( + SELECT 1 FROM preserve.review_queue rq + WHERE rq.target_type = 'artifact' + AND rq.target_id = ${artifactId}::uuid + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + ) + `; +} + +export async function rejectAssistantMemoryReview( + sql: postgres.Sql, + reviewId: string, + options: { tenant?: string; notes?: string; suppressed?: boolean; actor?: string } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + const notes = options.suppressed ? `suppressed: ${options.notes ?? "not prompt eligible"}` : options.notes ?? "rejected"; + return sql.begin(async (tx) => { + const [target] = await tx` + SELECT rq.review_id::text, rq.status::text AS review_status, a.artifact_id::text + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + WHERE rq.review_id = ${reviewId} + AND rq.target_type = 'artifact' + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND rq.status IN ('pending'::preserve.review_status,'approved'::preserve.review_status) + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + FOR UPDATE OF rq, a + LIMIT 1 + `; + if (!target) return false; + + const [publishedMemory] = await tx` + SELECT m.memory_id::text + FROM preserve.memory m + JOIN preserve.memory_support ms ON ms.memory_id = m.memory_id + JOIN preserve.fact f ON f.fact_id = ms.fact_id + JOIN preserve.extraction_run er ON er.run_id = f.created_run_id + WHERE er.artifact_id = ${target.artifact_id} + AND m.tenant = ${tenant} + AND ms.notes = 'assistant memory import review' + AND m.lifecycle_state != 'retired'::preserve.lifecycle_state + FOR UPDATE OF m + LIMIT 1 + `; + if (publishedMemory) { + await tx` + DELETE FROM preserve.memory_support + WHERE memory_id = ${publishedMemory.memory_id} + AND notes = 'assistant memory import review' + `; + await tx` + UPDATE preserve.memory + SET governance_status = 'suppressed'::preserve.memory_governance_status, + lifecycle_state = 'retired'::preserve.lifecycle_state, + governance_meta = COALESCE(governance_meta, '{}'::jsonb) || ${tx.json(redactValue({ + assistantImportRejected: true, + reviewId, + rejectedAt: new Date().toISOString(), + rejectedBy: options.actor ?? "braincore-cli", + rejectionReason: notes, + }) as any)}::jsonb, + updated_at = now() + WHERE memory_id = ${publishedMemory.memory_id} + AND tenant = ${tenant} + `; + } + + await tx` + UPDATE preserve.review_queue + SET status = 'rejected'::preserve.review_status, + reviewer_notes = ${notes}, + resolved_at = now() + WHERE review_id = ${reviewId} + `; + await tx` + UPDATE preserve.artifact + SET can_promote_memory = false, + preservation_state = 'archived'::preserve.preservation_state, + updated_at = now() + WHERE artifact_id = ${target.artifact_id} + AND tenant = ${tenant} + `; + return true; + }); +} + +function isUuid(value: unknown): value is string { + return typeof value === "string" && /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.test(value); +} + +export async function promoteAssistantMemoryReview( + sql: postgres.Sql, + reviewId: string, + options: { tenant?: string; notes?: string; actor?: string; scopePath?: string } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + return sql.begin(async (tx) => { + const [target] = await tx` + SELECT + rq.review_id::text, + rq.status::text AS review_status, + a.artifact_id::text, + a.source_type::text, + a.source_key, + a.scope_path, + a.project_entity_id::text, + a.original_path + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + WHERE rq.review_id = ${reviewId} + AND rq.target_type = 'artifact' + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND rq.status IN ('pending','approved') + AND a.tenant = ${tenant} + AND a.source_type = ANY(${ASSISTANT_MEMORY_SOURCE_TYPES}::preserve.source_type[]) + FOR UPDATE OF rq, a + LIMIT 1 + `; + if (!target) { + throw new Error("Assistant memory review row not found, not pending, or not in the active tenant."); + } + + const facts = await tx` + SELECT + f.fact_id::text, + f.episode_id::text, + f.predicate, + f.object_value, + f.confidence::float, + f.priority, + f.created_at::text + FROM preserve.extraction_run er + JOIN preserve.fact f ON f.created_run_id = er.run_id + WHERE er.artifact_id = ${target.artifact_id} + AND f.tenant = ${tenant} + AND f.current_status = 'active' + ORDER BY + CASE WHEN f.predicate IN ('vestige_memory_content','pai_auto_memory_content') THEN 0 ELSE 1 END, + f.priority ASC, + f.created_at ASC + LIMIT 25 + `; + if (facts.length === 0) { + throw new Error("Assistant memory review target has no active facts to promote."); + } + + const narrative = buildAssistantMemoryNarrative(facts as any[]); + const title = buildAssistantMemoryTitle(target.source_type, target.source_key, narrative); + const promotedScopePath = options.scopePath?.trim() || target.scope_path || null; + const confidence = clampConfidence(Math.max(...facts.map((fact: any) => Number(fact.confidence ?? 0.7)))); + const tokenCount = estimateTokenCount(`${title}\n${narrative}`); + const fingerprint = sha256(`${tenant}|assistant-import|${target.source_key}`); + const trustClass: MemoryTrustClass = "human_curated"; + const meta = { + assistantImport: true, + reviewId, + sourceType: target.source_type, + sourceKey: target.source_key, + originalPath: redactValue(target.original_path), + originalScopePath: target.scope_path, + promotedScopePath, + reviewedBy: options.actor ?? "braincore-cli", + reviewedAt: new Date().toISOString(), + }; + + const [memory] = await tx` + INSERT INTO preserve.memory ( + memory_type, project_entity_id, tenant, fingerprint, title, narrative, + support_count, contradiction_count, confidence, lifecycle_state, + pipeline_version, model_name, prompt_version, scope_path, priority, + last_supported_at, namespace, governance_status, source_class, trust_class, + salience, strength, stability, quality_score, token_count, governance_meta + ) VALUES ( + 'heuristic'::preserve.memory_type, + ${target.project_entity_id}, + ${tenant}, + ${fingerprint}, + ${title}, + ${narrative}, + ${facts.length}, + 0, + ${confidence}, + 'published'::preserve.lifecycle_state, + 'assistant-memory-review', + 'deterministic-import', + 'assistant-memory-review-v1', + ${promotedScopePath}, + 3, + now(), + 'semantic'::preserve.memory_namespace, + 'validated'::preserve.memory_governance_status, + 'imported_knowledge'::preserve.memory_source_class, + ${trustClass}::preserve.memory_trust_class, + 0.7, + 0.7, + 0.6, + ${confidence}, + ${tokenCount}, + ${tx.json(meta as any)} + ) + ON CONFLICT (tenant, fingerprint) DO UPDATE SET + title = EXCLUDED.title, + narrative = EXCLUDED.narrative, + support_count = EXCLUDED.support_count, + confidence = EXCLUDED.confidence, + lifecycle_state = EXCLUDED.lifecycle_state, + governance_status = EXCLUDED.governance_status, + trust_class = EXCLUDED.trust_class, + scope_path = EXCLUDED.scope_path, + quality_score = EXCLUDED.quality_score, + token_count = EXCLUDED.token_count, + governance_meta = COALESCE(preserve.memory.governance_meta, '{}'::jsonb) || EXCLUDED.governance_meta, + last_supported_at = now(), + updated_at = now() + RETURNING memory_id::text + `; + + await tx` + DELETE FROM preserve.memory_support + WHERE memory_id = ${memory.memory_id} + AND notes = 'assistant memory import review' + `; + for (const fact of facts as any[]) { + await tx` + INSERT INTO preserve.memory_support (memory_id, fact_id, episode_id, support_type, notes) + VALUES (${memory.memory_id}, ${fact.fact_id}, ${fact.episode_id}, 'supporting', 'assistant memory import review') + `; + } + + await tx` + UPDATE preserve.review_queue + SET status = 'approved'::preserve.review_status, + reviewer_notes = ${options.notes ?? "approved for BrainCore prompt recall"}, + resolved_at = now() + WHERE review_id = ${reviewId} + `; + await tx` + UPDATE preserve.artifact + SET can_promote_memory = true, + preservation_state = 'published'::preserve.preservation_state, + updated_at = now() + WHERE artifact_id = ${target.artifact_id} + AND tenant = ${tenant} + `; + + return { + reviewId, + artifactId: target.artifact_id, + memoryId: memory.memory_id, + sourceKey: target.source_key, + scopePath: promotedScopePath ?? undefined, + supportCount: facts.length, + trustClass, + idempotent: target.review_status === "approved", + }; + }); +} + +export async function demoteAssistantMemoryPromotion( + sql: postgres.Sql, + memoryId: string, + options: { tenant?: string; notes?: string; actor?: string } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + return sql.begin(async (tx) => { + const [memory] = await tx` + SELECT + memory_id::text, + fingerprint, + governance_meta, + governance_status::text, + source_class::text, + trust_class::text + FROM preserve.memory + WHERE memory_id = ${memoryId} + AND tenant = ${tenant} + AND source_class = 'imported_knowledge'::preserve.memory_source_class + AND trust_class = 'human_curated'::preserve.memory_trust_class + AND ( + governance_meta->>'assistantImport' = 'true' + OR EXISTS ( + SELECT 1 FROM preserve.memory_support ms + WHERE ms.memory_id = preserve.memory.memory_id + AND ms.notes = 'assistant memory import review' + ) + ) + FOR UPDATE + LIMIT 1 + `; + if (!memory) { + throw new Error("Assistant memory promotion not found for the active tenant."); + } + + const meta = (memory.governance_meta ?? {}) as Record; + let reviewId = isUuid(meta.reviewId) ? meta.reviewId : undefined; + let sourceKey = typeof meta.sourceKey === "string" && !meta.sourceKey.includes("[REDACTED") ? meta.sourceKey : undefined; + + if (!reviewId) { + const [linkedReview] = await tx` + SELECT rq.review_id::text, a.source_key + FROM preserve.memory_support ms + JOIN preserve.fact f ON f.fact_id = ms.fact_id + JOIN preserve.extraction_run er ON er.run_id = f.created_run_id + JOIN preserve.artifact a ON a.artifact_id = er.artifact_id + JOIN preserve.review_queue rq ON rq.target_id = a.artifact_id + WHERE ms.memory_id = ${memoryId} + AND ms.notes = 'assistant memory import review' + AND rq.target_type = 'artifact' + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND a.tenant = ${tenant} + LIMIT 1 + `; + if (linkedReview) { + reviewId = linkedReview.review_id; + sourceKey = linkedReview.source_key; + } + } + + await tx` + DELETE FROM preserve.memory_support + WHERE memory_id = ${memoryId} + AND notes = 'assistant memory import review' + `; + + await tx` + UPDATE preserve.memory + SET governance_status = 'suppressed'::preserve.memory_governance_status, + lifecycle_state = 'retired'::preserve.lifecycle_state, + governance_meta = COALESCE(governance_meta, '{}'::jsonb) || ${tx.json(redactValue({ + assistantImportDemoted: true, + demotedAt: new Date().toISOString(), + demotedBy: options.actor ?? "braincore-cli", + demotionReason: options.notes ?? "assistant memory promotion rollback", + }) as any)}::jsonb, + updated_at = now() + WHERE memory_id = ${memoryId} + AND tenant = ${tenant} + `; + + let artifactId: string | undefined; + let resetReview = false; + if (reviewId) { + const [review] = await tx` + UPDATE preserve.review_queue rq + SET status = 'pending'::preserve.review_status, + reviewer_notes = ${options.notes ?? "promotion demoted for re-review"}, + resolved_at = NULL + FROM preserve.artifact a + WHERE rq.review_id = ${reviewId} + AND rq.target_type = 'artifact' + AND rq.target_id = a.artifact_id + AND rq.reason = ${ASSISTANT_REVIEW_REASON} + AND a.tenant = ${tenant} + RETURNING a.artifact_id::text + `; + if (review) { + const reviewedArtifactId = review.artifact_id as string; + artifactId = reviewedArtifactId; + resetReview = true; + await tx` + UPDATE preserve.artifact + SET can_promote_memory = false, + preservation_state = 'archived'::preserve.preservation_state, + updated_at = now() + WHERE artifact_id = ${reviewedArtifactId} + AND tenant = ${tenant} + `; + } + } + + return { memoryId, reviewId, artifactId, sourceKey, resetReview, demoted: true }; + }); +} + +function buildAssistantMemoryTitle(sourceType: string, sourceKey: string, narrative: string): string { + const firstLine = narrative.split("\n").find((line) => line.trim())?.trim(); + if (firstLine && firstLine.length <= 90) return firstLine; + const label = sourceType === "vestige_memory" ? "Vestige import" : "PAI auto-memory import"; + return `${label}: ${sourceKey}`.slice(0, 120); +} + +function buildAssistantMemoryNarrative(facts: Array<{ predicate: string; object_value: unknown }>): string { + const contentFacts = facts.filter((fact) => fact.predicate.endsWith("_content")); + const source = contentFacts[0] ?? facts[0]; + const content = objectValueText(source.object_value).trim(); + const metadata = facts + .filter((fact) => fact !== source) + .map((fact) => `${fact.predicate}: ${objectValueText(fact.object_value)}`) + .filter((line) => line.length > 0) + .slice(0, 8); + const lines = [content || "Assistant memory import approved for BrainCore recall."]; + if (metadata.length > 0) { + lines.push("", "Supporting metadata:", ...metadata.map((line) => `- ${line}`)); + } + return lines.join("\n"); +} + +function objectValueText(value: unknown): string { + if (value === null || value === undefined) return ""; + if (typeof value === "string") return value; + if (typeof value === "number" || typeof value === "boolean") return String(value); + if (Array.isArray(value)) return value.map(objectValueText).filter(Boolean).join(", "); + if (typeof value === "object") { + const object = value as Record; + for (const key of ["content", "text", "summary", "title", "value"]) { + if (key in object) return objectValueText(object[key]); + } + return JSON.stringify(redactValue(object)); + } + return String(value); +} + +function clampConfidence(value: number): number { + if (!Number.isFinite(value)) return 0.7; + return Math.max(0.01, Math.min(0.99, Math.round(value * 100) / 100)); +} + +function sha256(value: string): string { + return createHash("sha256").update(value).digest("hex"); +} diff --git a/src/memory/project-doc-review.ts b/src/memory/project-doc-review.ts new file mode 100644 index 0000000..4763af7 --- /dev/null +++ b/src/memory/project-doc-review.ts @@ -0,0 +1,347 @@ +import { readFile } from "fs/promises"; +import { createHash } from "crypto"; +import type postgres from "postgres"; +import { config } from "../config"; +import { estimateTokenCount, redactValue } from "./governance"; + +export const PROJECT_DOC_REVIEW_REASON = "project_doc_value_review"; + +export interface ProjectDocReviewRow { + reviewId: string; + status: string; + sourceKey: string; + scopePath?: string; + originalPath?: string; + factCount: number; + createdAt?: string; +} + +export interface ProjectDocReviewDecisionFile { + decisions: ProjectDocReviewDecision[]; +} + +export interface ProjectDocReviewDecision { + reviewId: string; + decision: "approved" | "rejected"; + title?: string; + content?: string; + materiality?: string; + retrievalUseCase?: string; + notes?: string; + scopePath?: string; +} + +function sha256(value: string): string { + return createHash("sha256").update(value, "utf-8").digest("hex"); +} + +function clampConfidence(value: number): number { + if (!Number.isFinite(value)) return 0.85; + return Math.max(0.01, Math.min(0.99, Math.round(value * 100) / 100)); +} + +function requireUsefulText(value: string | undefined, label: string): string { + const text = value?.trim(); + if (!text || text.length < 16) throw new Error(`Approved project-doc decisions require ${label}.`); + return text; +} + +function decisionNotes(decision: ProjectDocReviewDecision): string { + return [ + decision.notes?.trim(), + decision.materiality ? `materiality: ${decision.materiality.trim()}` : undefined, + decision.retrievalUseCase ? `retrieval_use_case: ${decision.retrievalUseCase.trim()}` : undefined, + ].filter(Boolean).join("\n") || decision.decision; +} + +export async function listProjectDocReviews( + sql: postgres.Sql, + options: { tenant?: string; status?: string; limit?: number } = {}, +): Promise { + const tenant = options.tenant ?? config.tenant; + const status = options.status ?? "pending"; + const limit = Math.max(1, Math.min(200, options.limit ?? 50)); + const rows = await sql` + SELECT + rq.review_id::text, + rq.status::text, + a.source_key, + a.scope_path, + a.original_path, + rq.created_at::text, + COUNT(f.fact_id)::int AS fact_count + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + LEFT JOIN preserve.extraction_run er ON er.artifact_id = a.artifact_id + LEFT JOIN preserve.fact f ON f.created_run_id = er.run_id AND f.current_status = 'active' + WHERE rq.target_type = 'artifact' + AND rq.reason = ${PROJECT_DOC_REVIEW_REASON} + AND rq.status = ${status}::preserve.review_status + AND a.tenant = ${tenant} + AND a.source_type = 'project_doc'::preserve.source_type + GROUP BY rq.review_id, rq.status, a.source_key, a.scope_path, a.original_path, rq.created_at + ORDER BY rq.created_at ASC + LIMIT ${limit} + `; + return rows.map((row: any) => ({ + reviewId: row.review_id, + status: row.status, + sourceKey: row.source_key, + scopePath: row.scope_path ?? undefined, + originalPath: row.original_path ?? undefined, + factCount: Number(row.fact_count ?? 0), + createdAt: row.created_at ?? undefined, + })); +} + +export function renderProjectDocReviewPacket(rows: ProjectDocReviewRow[]): string { + const lines = ["# BrainCore Project Doc Value Review", "", `Rows: ${rows.length}`, ""]; + for (const row of rows) { + lines.push(`## ${row.sourceKey}`); + lines.push(`- Review ID: ${row.reviewId}`); + lines.push(`- Status: ${row.status}`); + if (row.scopePath) lines.push(`- Scope: ${row.scopePath}`); + if (row.originalPath) lines.push(`- Source: ${row.originalPath}`); + lines.push(`- Facts: ${row.factCount}`); + lines.push("- Decision: pending"); + lines.push("- Value gate: must improve project decisions, be current, non-duplicative, and evidence-backed."); + lines.push(""); + } + return lines.join("\n").trimEnd() + "\n"; +} + +export async function queueProjectDocReview(sql: postgres.Sql, artifactId: string, tenant = config.tenant): Promise { + await sql` + INSERT INTO preserve.review_queue (target_type, target_id, reason, status) + SELECT 'artifact', ${artifactId}::uuid, ${PROJECT_DOC_REVIEW_REASON}, 'pending'::preserve.review_status + WHERE EXISTS ( + SELECT 1 FROM preserve.artifact a + WHERE a.artifact_id = ${artifactId}::uuid + AND a.tenant = ${tenant} + AND a.source_type = 'project_doc'::preserve.source_type + ) + AND NOT EXISTS ( + SELECT 1 FROM preserve.review_queue rq + WHERE rq.target_type = 'artifact' + AND rq.target_id = ${artifactId}::uuid + AND rq.reason = ${PROJECT_DOC_REVIEW_REASON} + ) + `; +} + +export async function applyProjectDocReviewDecisions( + sql: postgres.Sql, + decisionPath: string, + options: { tenant?: string; actor?: string } = {}, +): Promise<{ approved: number; rejected: number; memories: string[] }> { + const tenant = options.tenant ?? config.tenant; + const parsed = JSON.parse(await readFile(decisionPath, "utf-8")) as ProjectDocReviewDecisionFile; + if (!Array.isArray(parsed.decisions)) throw new Error("Project doc decisions file requires decisions array."); + const memories: string[] = []; + let approved = 0; + let rejected = 0; + + await sql.begin(async (tx) => { + for (const decision of parsed.decisions) { + if (!decision.reviewId) throw new Error("Every project-doc decision requires reviewId."); + const [target] = await tx` + SELECT + rq.review_id::text, + rq.status::text AS review_status, + a.artifact_id::text, + a.source_key, + a.scope_path, + a.project_entity_id::text, + a.original_path + FROM preserve.review_queue rq + JOIN preserve.artifact a ON a.artifact_id = rq.target_id + WHERE rq.review_id = ${decision.reviewId} + AND rq.target_type = 'artifact' + AND rq.reason = ${PROJECT_DOC_REVIEW_REASON} + AND rq.status IN ('pending'::preserve.review_status,'approved'::preserve.review_status) + AND a.tenant = ${tenant} + AND a.source_type = 'project_doc'::preserve.source_type + FOR UPDATE OF rq, a + LIMIT 1 + `; + if (!target) throw new Error(`Project doc review row not found or not actionable: ${decision.reviewId}`); + + if (decision.decision === "rejected") { + const [publishedMemory] = await tx` + SELECT m.memory_id::text + FROM preserve.memory m + JOIN preserve.memory_support ms ON ms.memory_id = m.memory_id + JOIN preserve.fact f ON f.fact_id = ms.fact_id + JOIN preserve.extraction_run er ON er.run_id = f.created_run_id + WHERE er.artifact_id = ${target.artifact_id} + AND m.tenant = ${tenant} + AND ms.notes = 'project doc value review' + AND m.lifecycle_state != 'retired'::preserve.lifecycle_state + FOR UPDATE OF m + LIMIT 1 + `; + if (publishedMemory) { + await tx` + DELETE FROM preserve.memory_support + WHERE memory_id = ${publishedMemory.memory_id} + AND notes = 'project doc value review' + `; + await tx` + UPDATE preserve.memory + SET governance_status = 'suppressed'::preserve.memory_governance_status, + lifecycle_state = 'retired'::preserve.lifecycle_state, + governance_meta = COALESCE(governance_meta, '{}'::jsonb) || ${tx.json(redactValue({ + projectDocReviewRejected: true, + reviewId: decision.reviewId, + rejectedAt: new Date().toISOString(), + rejectedBy: options.actor ?? "braincore-cli", + rejectionReason: decisionNotes(decision), + }) as any)}::jsonb, + updated_at = now() + WHERE memory_id = ${publishedMemory.memory_id} + AND tenant = ${tenant} + `; + } + await tx` + UPDATE preserve.review_queue + SET status = 'rejected'::preserve.review_status, + reviewer_notes = ${decisionNotes(decision)}, + resolved_at = now() + WHERE review_id = ${decision.reviewId} + `; + await tx` + UPDATE preserve.artifact + SET can_promote_memory = false, + preservation_state = 'archived'::preserve.preservation_state, + updated_at = now() + WHERE artifact_id = ${target.artifact_id} + AND tenant = ${tenant} + `; + rejected++; + continue; + } + + if (decision.decision !== "approved") throw new Error(`Unsupported decision for ${decision.reviewId}: ${decision.decision}`); + const title = requireUsefulText(decision.title, "title"); + const content = requireUsefulText(decision.content, "content"); + const materiality = requireUsefulText(decision.materiality, "materiality"); + const retrievalUseCase = requireUsefulText(decision.retrievalUseCase, "retrievalUseCase"); + + const facts = await tx` + SELECT + f.fact_id::text, + f.episode_id::text, + f.predicate, + f.object_value, + f.confidence::float, + f.priority, + f.created_at::text + FROM preserve.extraction_run er + JOIN preserve.fact f ON f.created_run_id = er.run_id + WHERE er.artifact_id = ${target.artifact_id} + AND f.tenant = ${tenant} + AND f.current_status = 'active' + ORDER BY f.priority ASC, f.created_at ASC + LIMIT 50 + `; + if (facts.length === 0) throw new Error(`Approved project doc review has no active facts: ${decision.reviewId}`); + + const scopePath = decision.scopePath?.trim() || target.scope_path || null; + const confidence = clampConfidence(Math.max(...facts.map((fact: any) => Number(fact.confidence ?? 0.85)))); + const narrative = `${content.trim()}\n\nMateriality: ${materiality}\n\nRetrieval use case: ${retrievalUseCase}`; + const tokenCount = estimateTokenCount(`${title}\n${narrative}`); + const fingerprint = sha256(`${tenant}|project-doc-review|${decision.reviewId}`); + const [memory] = await tx` + INSERT INTO preserve.memory ( + memory_type, project_entity_id, tenant, fingerprint, title, narrative, + support_count, contradiction_count, confidence, lifecycle_state, + pipeline_version, model_name, prompt_version, scope_path, priority, + last_supported_at, namespace, governance_status, source_class, trust_class, + salience, strength, stability, quality_score, token_count, governance_meta + ) VALUES ( + 'heuristic'::preserve.memory_type, + ${target.project_entity_id}, + ${tenant}, + ${fingerprint}, + ${title}, + ${narrative}, + ${facts.length}, + 0, + ${confidence}, + 'published'::preserve.lifecycle_state, + 'project-doc-review', + 'deterministic-project-doc', + 'project-doc-review-v1', + ${scopePath}, + 3, + now(), + 'semantic'::preserve.memory_namespace, + 'validated'::preserve.memory_governance_status, + 'imported_knowledge'::preserve.memory_source_class, + 'human_curated'::preserve.memory_trust_class, + 0.75, + 0.75, + 0.7, + ${confidence}, + ${tokenCount}, + ${tx.json(redactValue({ + projectDocReview: true, + reviewId: decision.reviewId, + sourceKey: target.source_key, + originalPath: target.original_path, + materiality, + retrievalUseCase, + reviewedBy: options.actor ?? "braincore-cli", + reviewedAt: new Date().toISOString(), + }) as any)} + ) + ON CONFLICT (tenant, fingerprint) DO UPDATE SET + title = EXCLUDED.title, + narrative = EXCLUDED.narrative, + support_count = EXCLUDED.support_count, + confidence = EXCLUDED.confidence, + lifecycle_state = EXCLUDED.lifecycle_state, + governance_status = EXCLUDED.governance_status, + trust_class = EXCLUDED.trust_class, + scope_path = EXCLUDED.scope_path, + quality_score = EXCLUDED.quality_score, + token_count = EXCLUDED.token_count, + governance_meta = COALESCE(preserve.memory.governance_meta, '{}'::jsonb) || EXCLUDED.governance_meta, + last_supported_at = now(), + updated_at = now() + RETURNING memory_id::text + `; + + await tx` + DELETE FROM preserve.memory_support + WHERE memory_id = ${memory.memory_id} + AND notes = 'project doc value review' + `; + for (const fact of facts as any[]) { + await tx` + INSERT INTO preserve.memory_support (memory_id, fact_id, episode_id, support_type, notes) + VALUES (${memory.memory_id}, ${fact.fact_id}, ${fact.episode_id}, 'supporting', 'project doc value review') + `; + } + await tx` + UPDATE preserve.review_queue + SET status = 'approved'::preserve.review_status, + reviewer_notes = ${decisionNotes(decision)}, + resolved_at = now() + WHERE review_id = ${decision.reviewId} + `; + await tx` + UPDATE preserve.artifact + SET can_promote_memory = true, + preservation_state = 'published'::preserve.preservation_state, + updated_at = now() + WHERE artifact_id = ${target.artifact_id} + AND tenant = ${tenant} + `; + approved++; + memories.push(memory.memory_id); + } + }); + + return { approved, rejected, memories }; +} diff --git a/src/memory/shadow-eval.ts b/src/memory/shadow-eval.ts new file mode 100644 index 0000000..c2f50b0 --- /dev/null +++ b/src/memory/shadow-eval.ts @@ -0,0 +1,98 @@ +import type postgres from "postgres"; +import { buildBrainCoreSnapshot, type BrainCoreSnapshotOptions } from "./snapshot"; + +export interface ShadowEvalCase { + name: string; + cwd: string; + gitRoot?: string | null; + prompt: string; + expectedTerms?: string[]; + forbiddenTerms?: string[]; + expectEmpty?: boolean; + maxTokens?: number; +} + +export interface ShadowEvalCaseResult { + name: string; + useful: boolean; + badRecall: boolean; + injected: boolean; + promptEligible: number; + retrieved: number; + tokenEstimate: number; + truncated: boolean; + missingExpected: string[]; + matchedForbidden: string[]; + expectEmpty: boolean; +} + +export interface ShadowEvalResult { + total: number; + useful: number; + badRecall: number; + truncated: number; + empty: number; + usefulRate: number; + badRecallRate: number; + truncationRate: number; + passed: boolean; + cases: ShadowEvalCaseResult[]; +} + +export async function runBrainCoreShadowEval(sql: postgres.Sql, cases: ShadowEvalCase[]): Promise { + const results: ShadowEvalCaseResult[] = []; + for (const testCase of cases) { + const snapshotOptions: BrainCoreSnapshotOptions = { + cwd: testCase.cwd, + gitRoot: testCase.gitRoot, + prompt: testCase.prompt, + maxTokens: testCase.maxTokens ?? 800, + mode: "shadow", + }; + const snapshot = await buildBrainCoreSnapshot(sql, snapshotOptions); + const haystack = snapshot.markdown.toLowerCase(); + const missingExpected = (testCase.expectedTerms ?? []).filter((term) => !haystack.includes(term.toLowerCase())); + const matchedForbidden = (testCase.forbiddenTerms ?? []).filter((term) => haystack.includes(term.toLowerCase())); + const expectEmpty = testCase.expectEmpty === true; + const hasPromptPackage = snapshot.recall.promptPackage.length > 0; + const useful = expectEmpty + ? !hasPromptPackage && matchedForbidden.length === 0 + : hasPromptPackage && ((testCase.expectedTerms?.length ?? 0) === 0 || missingExpected.length === 0); + const badRecall = matchedForbidden.length > 0 || (expectEmpty && hasPromptPackage); + results.push({ + name: testCase.name, + useful, + badRecall, + injected: snapshot.recall.injected, + promptEligible: snapshot.recall.promptPackage.length, + retrieved: snapshot.recall.results.length, + tokenEstimate: snapshot.tokenEstimate, + truncated: snapshot.truncated, + missingExpected, + matchedForbidden, + expectEmpty, + }); + } + const total = results.length; + const useful = results.filter((result) => result.useful).length; + const badRecall = results.filter((result) => result.badRecall).length; + const truncated = results.filter((result) => result.truncated).length; + const empty = results.filter((result) => result.promptEligible === 0).length; + const positiveTotal = results.filter((result) => !result.expectEmpty).length; + const positiveUseful = results.filter((result) => !result.expectEmpty && result.useful).length; + const usefulRate = positiveTotal ? positiveUseful / positiveTotal : (total ? useful / total : 0); + const badRecallRate = total ? badRecall / total : 0; + const truncationRate = total ? truncated / total : 0; + return { + total, + useful, + badRecall, + truncated, + empty, + usefulRate, + badRecallRate, + truncationRate, + passed: total > 0 && usefulRate >= 0.7 && badRecallRate < 0.1 && truncationRate < 0.25, + cases: results, + }; +} diff --git a/src/memory/snapshot.ts b/src/memory/snapshot.ts new file mode 100644 index 0000000..fe5a358 --- /dev/null +++ b/src/memory/snapshot.ts @@ -0,0 +1,242 @@ +import { basename } from "path"; +import type postgres from "postgres"; +import { + estimateTokenCount, + recallForContext, + type ContextInjectionMode, + type ContextRecallResult, +} from "./governance"; + +export interface BrainCoreSnapshotOptions { + cwd: string; + gitRoot?: string | null; + prompt?: string; + maxTokens?: number; + mode?: ContextInjectionMode; + tenant?: string; + limit?: number; + profile?: BrainCoreSnapshotProfile; +} + +export interface BrainCoreSnapshotResult { + markdown: string; + domains: string[]; + recall: ContextRecallResult; + tokenEstimate: number; + truncated: boolean; + profile: BrainCoreSnapshotProfile | "legacy"; +} + +export type BrainCoreSnapshotProfile = "compact" | "risk" | "deep"; + +interface SnapshotProfileConfig { + budget: number; + maxCards: number; + bodyTokens: number; + genericTokenCap: number | null; + genericCardCap: number | null; +} + +const SNAPSHOT_PROFILES: Record = { + compact: { budget: 1200, maxCards: 4, bodyTokens: 220, genericTokenCap: 420, genericCardCap: 2 }, + risk: { budget: 3000, maxCards: 5, bodyTokens: 850, genericTokenCap: null, genericCardCap: null }, + deep: { budget: 5000, maxCards: 8, bodyTokens: 1200, genericTokenCap: null, genericCardCap: null }, +}; + +export function resolveSnapshotBudget(profile?: BrainCoreSnapshotProfile, maxTokens?: number): number { + const profileBudget = profile ? SNAPSHOT_PROFILES[profile].budget : 3000; + if (!Number.isFinite(maxTokens) || !maxTokens || maxTokens <= 0) return profileBudget; + return profile ? Math.min(maxTokens, profileBudget) : maxTokens; +} + +export async function buildBrainCoreSnapshot( + sql: postgres.Sql, + options: BrainCoreSnapshotOptions, +): Promise { + const domains = resolveSnapshotDomains(options.cwd, options.gitRoot, options.prompt); + const cues = tokenCues(options.prompt).slice(0, 12); + const scope = domains[0] ? `project:${domains[0]}` : undefined; + const maxTokens = resolveSnapshotBudget(options.profile, options.maxTokens); + const recallMaxTokens = options.profile ? maxTokens * 4 : maxTokens; + const profileCues = options.profile === "risk" + ? [...cues, "feedback_pre_push_gate_is_load_bearing", "feedback_codex_review_before_approve", "pre-push", "codex", "review", "github"] + : cues; + let recall = await recallForContext(sql, { + trigger: "braincore_snapshot", + tenant: options.tenant, + goal: options.prompt, + cues: profileCues, + scope, + maxTokens: recallMaxTokens, + injectionMode: options.mode ?? "shadow", + limit: options.limit ?? 20, + relevanceReason: "braincore-runtime-snapshot", + actor: "braincore-snapshot", + route: "braincore snapshot build", + }); + if (scope && recall.promptPackage.length === 0 && cues.length > 0) { + recall = await recallForContext(sql, { + trigger: "braincore_snapshot_scope_fallback", + tenant: options.tenant, + scope, + maxTokens: recallMaxTokens, + injectionMode: options.mode ?? "shadow", + limit: options.limit ?? 20, + relevanceReason: "braincore-runtime-snapshot-scope-fallback", + actor: "braincore-snapshot", + route: "braincore snapshot build fallback", + }); + } + const rendered = renderBrainCoreSnapshot({ ...options, mode: options.mode ?? "shadow" }, domains, recall, options.profile); + const budgeted = enforceSnapshotBudget(rendered, maxTokens); + return { markdown: budgeted.markdown, domains, recall, tokenEstimate: budgeted.tokenEstimate, truncated: budgeted.truncated, profile: options.profile ?? "legacy" }; +} + +export function resolveSnapshotDomains(cwd: string, gitRoot?: string | null, _prompt?: string): string[] { + const values: string[] = []; + const markerMatch = findProjectMarkerDomain(cwd) ?? (gitRoot ? findProjectMarkerDomain(gitRoot) : null); + if (markerMatch) values.push(markerMatch); + const rootName = gitRoot ? basename(gitRoot) : basename(cwd); + if (rootName && rootName !== "." && rootName !== "/") values.push(rootName); + return [...new Set(values.map(sanitizeDomain).filter(Boolean))]; +} + +function findProjectMarkerDomain(inputPath: string): string | null { + const markers = (process.env.BRAINCORE_PROJECT_DOMAIN_MARKERS ?? "") + .split(",") + .map((value) => value.trim()) + .filter(Boolean); + if (markers.length === 0) return null; + + const segments = inputPath.split(/[\/]+/).filter(Boolean); + for (const marker of markers) { + const index = segments.indexOf(marker); + if (index >= 0 && segments[index + 1]) return segments[index + 1]; + } + return null; +} + +export function renderBrainCoreSnapshot( + options: BrainCoreSnapshotOptions, + domains: string[], + recall: ContextRecallResult, + profile?: BrainCoreSnapshotProfile, +): string { + const profileConfig = profile ? SNAPSHOT_PROFILES[profile] : null; + const promptPackage = profileConfig ? selectProfilePromptPackage(recall, profileConfig) : recall.promptPackage; + const lines: string[] = []; + lines.push("Use the following BrainCore memory snapshot as supporting context only."); + lines.push("If it conflicts with the live repo, runtime, or direct instructions, trust the live repo and direct instructions."); + lines.push(""); + lines.push("# BrainCore Memory Snapshot"); + lines.push(""); + lines.push(`Context cwd: ${options.cwd}`); + if (options.gitRoot) lines.push(`Context git root: ${options.gitRoot}`); + lines.push(`Candidate domains: ${domains.join(", ") || "none"}`); + lines.push(`Profile: ${profile ?? "legacy"}`); + lines.push(`Mode: ${recall.mode}`); + lines.push(`Injected: ${recall.injected ? "yes" : "no"}`); + lines.push(`Prompt-eligible memories: ${promptPackage.length}`); + lines.push(`Retrieved memories: ${recall.results.length}`); + lines.push(""); + + if (promptPackage.length === 0) { + lines.push("## No Prompt-Eligible BrainCore Memories"); + lines.push(""); + lines.push("BrainCore returned no reviewed memories for this snapshot. Imported assistant memories remain gated until explicitly approved."); + } else { + lines.push("## Memories"); + lines.push(""); + for (const item of promptPackage) { + lines.push(`### ${item.section}`); + lines.push(`- Memory ID: ${item.memoryId}`); + lines.push(`- Reason: ${item.reason}`); + lines.push(`- Tokens: ${item.tokenCount}`); + if (item.governanceStatus) lines.push(`- Governance: ${item.governanceStatus}`); + lines.push(""); + lines.push(profileConfig ? compactMemoryContent(item.content, profileConfig.bodyTokens) : item.content); + lines.push(""); + } + } + + if (recall.omitted.length > 0) { + lines.push("## Omitted"); + lines.push(""); + for (const omitted of recall.omitted) { + lines.push(`- ${omitted.memoryId}: ${omitted.reason}`); + } + } + + return lines.join("\n").trimEnd() + "\n"; +} + +function selectProfilePromptPackage(recall: ContextRecallResult, profileConfig: SnapshotProfileConfig): ContextRecallResult["promptPackage"] { + const selected: ContextRecallResult["promptPackage"] = []; + let genericTokens = 0; + let genericCards = 0; + const ranked = [...recall.promptPackage].sort((left, right) => safetyPriority(right) - safetyPriority(left)); + for (const item of ranked) { + if (selected.length >= profileConfig.maxCards) break; + const generic = isGenericFallback(item); + const tokenCost = Math.min(item.tokenCount, profileConfig.bodyTokens + 80); + if (generic) { + if (profileConfig.genericCardCap !== null && genericCards >= profileConfig.genericCardCap) continue; + if (profileConfig.genericTokenCap !== null && genericTokens + tokenCost > profileConfig.genericTokenCap) continue; + genericCards++; + genericTokens += tokenCost; + } + selected.push({ ...item, tokenCount: tokenCost }); + } + return selected; +} + +function safetyPriority(item: ContextRecallResult["promptPackage"][number]): number { + const text = `${item.memoryId} ${item.content}`.toLowerCase(); + let score = 0; + if (text.includes("83856999-727b-42f8-b826-8e1eebb6208b") || text.includes("pre-push sanitization gate")) score += 100; + if (text.includes("0fbc63fc-6991-47b7-8f7a-d138f12c8276") || text.includes("codex review plans")) score += 90; + if (/\b(github|gitea|public|security|secret|sanitize|merge|dependabot|migration|rollback|incident)\b/i.test(text)) score += 10; + return score; +} + +function isGenericFallback(item: ContextRecallResult["promptPackage"][number]): boolean { + return /fallback/i.test(item.reason) || item.reason === "braincore-runtime-snapshot-scope-fallback"; +} + +function compactMemoryContent(content: string, maxTokens: number): string { + const budget = Math.max(1, maxTokens * 4); + if (estimateTokens(content) <= maxTokens) return content; + return `${content.slice(0, budget).trimEnd()}\n\n[Full narrative retained in BrainCore.]`; +} + +function sanitizeDomain(value: string): string { + return value.trim().toLowerCase().replace(/[^a-z0-9_-]+/g, "-").replace(/^-+|-+$/g, ""); +} + +function tokenCues(value?: string): string[] { + if (!value) return []; + return value + .split(/[^A-Za-z0-9_-]+/) + .map((part) => part.trim()) + .filter((part) => part.length >= 4) + .slice(0, 8); +} + +function estimateTokens(value: string): number { + return estimateTokenCount(value); +} + +function enforceSnapshotBudget(markdown: string, maxTokens: number): { markdown: string; tokenEstimate: number; truncated: boolean } { + const tokenEstimate = estimateTokens(markdown); + if (!Number.isFinite(maxTokens) || maxTokens <= 0 || tokenEstimate <= maxTokens) { + return { markdown, tokenEstimate, truncated: false }; + } + const suffix = "\n\n## Budget Notice\n\nSnapshot truncated to respect the configured token budget.\n"; + const suffixTokens = estimateTokens(suffix); + const contentBudget = Math.max(1, maxTokens - suffixTokens); + let truncated = markdown.slice(0, Math.max(1, contentBudget * 4)).trimEnd() + suffix; + while (estimateTokens(truncated) > maxTokens && truncated.length > suffix.length + 16) { + truncated = truncated.slice(0, Math.max(suffix.length + 16, truncated.length - 64)).trimEnd() + suffix; + } + return { markdown: truncated, tokenEstimate: estimateTokens(truncated), truncated: true }; +} diff --git a/src/migrate.ts b/src/migrate.ts index ff81008..c29eecd 100644 --- a/src/migrate.ts +++ b/src/migrate.ts @@ -27,6 +27,8 @@ export const MIGRATION_FILES = [ "020_embedding_index_roles.sql", "021_enterprise_lifecycle.sql", "022_memory_governance.sql", + "023_assistant_memory_sources.sql", + "024_project_doc_sources.sql", ] as const; type Step = @@ -454,6 +456,29 @@ export function markerSqlForMigration(label: string): string | null { AND (SELECT count(*) FROM present_tables) = (SELECT count(*) FROM required_tables) AND (SELECT count(*) FROM present_indexes) = (SELECT count(*) FROM required_indexes) AS applied `; + case "023_assistant_memory_sources.sql": + return ` + SELECT count(*) = 2 AS applied + FROM pg_enum e + JOIN pg_type t ON t.oid = e.enumtypid + JOIN pg_namespace n ON n.oid = t.typnamespace + WHERE n.nspname = 'preserve' + AND t.typname = 'source_type' + AND e.enumlabel IN ( + 'vestige_memory', + 'pai_auto_memory' + ) + `; + case "024_project_doc_sources.sql": + return ` + SELECT count(*) = 1 AS applied + FROM pg_enum e + JOIN pg_type t ON t.oid = e.enumtypid + JOIN pg_namespace n ON n.oid = t.typnamespace + WHERE n.nspname = 'preserve' + AND t.typname = 'source_type' + AND e.enumlabel = 'project_doc' + `; default: return null; }