From f3ac9b3a73e4e6bc41559a5ca433ffaec60c6e94 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 15:22:51 +0200 Subject: [PATCH 01/16] add new ln-disambiguate skill --- .agents/skills/ln-consult/SKILL.md | 3 +- .agents/skills/ln-disambiguate/SKILL.md | 144 ++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 .agents/skills/ln-disambiguate/SKILL.md diff --git a/.agents/skills/ln-consult/SKILL.md b/.agents/skills/ln-consult/SKILL.md index e66527a7..96af4b67 100644 --- a/.agents/skills/ln-consult/SKILL.md +++ b/.agents/skills/ln-consult/SKILL.md @@ -53,7 +53,7 @@ Presume **structural** on a fresh thread when the work touches workflow closure, Default rule: -`ln-grill → ln-spec → ln-plan → [ln-design] → [ln-oracles] → ln-scope → [ln-spike] → ln-build → ln-review → [ln-refactor] → [ln-sync]` +`ln-grill` or `ln-disambiguate` → `ln-spec` → `ln-plan` → optional `ln-design` / `ln-oracles` → `ln-scope` → optional `ln-spike` → `ln-build` → `ln-review` → optional `ln-refactor` / `ln-sync` Bounded exception: @@ -80,6 +80,7 @@ Only recommend the bounded serial exception when those same conditions hold and | Situation | Work type | Suggest | | --- | --- | --- | | Idea is vague, needs fleshing out | structural | `ln-grill` | +| Plausible interpretations diverge; examples would clarify faster than open-ended questioning | structural | `ln-disambiguate` | | Understanding exists, needs a written spec | structural | `ln-spec` | | Spec exists, needs work sequencing | structural | `ln-plan` | | Verification strategy is the main uncertainty | structural | `ln-oracles` | diff --git a/.agents/skills/ln-disambiguate/SKILL.md b/.agents/skills/ln-disambiguate/SKILL.md new file mode 100644 index 00000000..349c5b42 --- /dev/null +++ b/.agents/skills/ln-disambiguate/SKILL.md @@ -0,0 +1,144 @@ +--- +name: ln-disambiguate +description: "Collapse meaningful ambiguity by generating concrete divergent interpretations and asking the user to classify examples, counterexamples, edge cases, or candidate outcomes. Use when a plan/design has several plausible meanings, requirements feel vague, examples would clarify intent faster than open-ended grilling, or the user asks to disambiguate, find ambiguity, use behavioral kernels, or ask contrastive questions." +--- + +# Ln Disambiguate + +Collapse ambiguity by asking the smallest concrete question whose answer separates plausible interpretations. + +Users are often better at recognizing intent in examples than authoring abstract predicates. Do not start with “what are the requirements?” if a concrete classification would answer faster. Generate cases where plausible meanings diverge, ask the user to classify the case, and translate the answer into candidate durable conclusions. + +This is an alternative entry point to `ln-grill`: use `ln-grill` when the idea needs broad Socratic pressure; use `ln-disambiguate` when the work already has enough shape that the useful move is resolving ambiguous meanings, behaviors, boundaries, or examples. + +Do not create or edit planning artifacts here. Durable conclusions promote into `memory/SPEC.md` or `memory/PLAN.md` through the next routed skill. + +## Grounding + +If local context can resolve the ambiguity, inspect it instead of asking. Otherwise read only what helps you form precise contrasts: + +1. `memory/SPEC.md` if present — lexicon, live requirements, assumptions, decisions, invariants, and verification stance. +2. `memory/PLAN.md` if the ambiguity concerns sequencing or frontier scope. +3. Relevant design docs when `memory/SPEC.md` points to them. + +Use the current lexicon. If ambiguous language reveals a missing or overloaded term, name the competing meanings explicitly. + +## Method + +For each ambiguity: + +1. **Name the ambiguous claim** — the term, behavior, boundary, decision, requirement, invariant, or criterion that has multiple plausible meanings. +2. **Generate competing interpretations** — usually 2–4. Include the boring/default interpretation, the stricter interpretation, and any interpretation likely to cause a bug if implemented silently. +3. **Find the divergence point** — the smallest concrete scenario where those interpretations produce different outcomes. +4. **Ask a contrastive question** — have the user classify the scenario or choose the expected outcome. +5. **Translate the answer** into candidate durable conclusions: + - `decision` — a chosen option over named alternatives, with rationale. + - `invariant` — a preservation rule that must keep holding. + - `constraint` — a boundary or non-goal that rules out interpretations. + - `assumption` — a material belief that remains unvalidated. + - `example` — a concrete positive, edge-case, trace, or not-relevant case. + - `counterexample` — a rejected case or outcome that rules out an interpretation. + - `criterion` — an observation, test shape, or manual review that would witness the claim. + - `unresolved ambiguity` — a named ambiguity intentionally deferred. +6. **Repeat only while it buys clarity** — stop when the remaining ambiguity is either collapsed, explicitly deferred, or ready for `ln-spec`. + +Prefer one high-yield question at a time. Multiple-choice is good when options are real; forced-choice is bad when it hides a likely fifth answer. Always allow “other / depends — explain.” + +## Good question shapes + +Prefer concrete classification: + +- “In this exact case, which outcome is correct?” +- “Is this inside or outside the commitment?” +- “Would this count as a bug?” +- “Which option should be rejected?” +- “Does this example witness the rule, contradict it, or sit outside scope?” +- “If we implemented interpretation A, what important case would break?” + +Avoid broad prompts unless no contrastive case is available: + +- “How should permissions work?” +- “What are all the requirements?” +- “Tell me more about edge cases.” + +When asking, include your recommended answer if you have enough context, and say why. The user should be able to accept, reject, or refine your classification quickly. + +## Behavioral kernels + +Use kernels as hidden interviewer machinery for generating high-yield contrasts. Do not make the user learn the kernel taxonomy unless it helps them reason. + +Activate at most the top 2–3 relevant kernels from language and context: + +| Kernel | Looks for | Typical artifact | +| --- | --- | --- | +| Identity & reference | ids, references, links, uniqueness | entity / reference invariant | +| Containment & topology | parent/child, folders, ordering, graphs | membership / topology invariant | +| Validation & normalization | valid/invalid input, canonical forms | parser or validation contract | +| State & lifecycle | states, transitions, terminal states | state-machine invariant | +| Temporal history | undo, redo, audit, expiration | history / timeline invariant | +| Optimization & preference | best, preferred, tie-breaks | ranking or objective rule | +| Authority & capability | roles, permissions, delegation | authorization predicate | +| Concurrency & collaboration | offline, stale, conflict, merge | conflict-resolution semantics | +| Transactions & atomicity | all-or-nothing multi-object updates | transaction invariant | +| Resource accounting | balances, quotas, capacity, limits | conservation / bounds invariant | +| Derived data & views | counts, filters, projections, caches | view consistency invariant | +| Error & recovery | retry, rollback, compensation | failure / recovery contract | +| External effects | APIs, queues, webhooks, clocks | boundary / adapter contract | +| Change & migration | legacy, compatibility, upgrade | migration / refinement invariant | +| Observability & evidence | logs, traces, explanations, audit | trace / provenance invariant | + +Kernel move: generate a concrete scenario where plausible policies diverge, then ask the user to classify it. The answer should become a weaker-but-useful checkable artifact: example, counterexample, invariant, criterion, or explicit ambiguity. + +## Example + +Instead of asking: + +> How should project deletion work? + +Ask: + +> A project is deleted while it still has tasks. Which behavior is correct? +> +> A. Delete the tasks too. +> B. Archive the tasks and keep them readable. +> C. Move tasks to an unassigned pool. +> D. Block deletion until tasks are reassigned or deleted. +> E. Other / depends. +> +> My recommendation is B if historical traceability matters more than cleanup, because it preserves references and gives us a clear data-integrity invariant. + +Then translate the answer, for example: + +- decision: “Deleted projects archive their tasks rather than deleting or reassigning them.” +- invariant: “Archived tasks retain a tombstone reference to the deleted project.” +- positive example: “Deleting a project with open tasks makes those tasks archived and readable.” +- counterexample: “Tasks silently disappearing after project deletion is rejected.” +- criterion: “A deletion test verifies task archival and readable tombstone references.” + +## Stop conditions + +Stop when one of these is true: + +- The user selected an interpretation and the durable consequences are clear. +- The ambiguity is explicitly deferred and named. +- More questioning would be generic grilling rather than ambiguity collapse. +- The next correct step is to record, plan, or scope. + +## Routing + +When the ambiguity pass is complete, present these options to the user. If `tool-ask-question` is available, use it; otherwise use a numbered list. + +| # | Label | Target | Why | +| --- | --- | --- | --- | +| 1 | Write/update spec | `ln-spec` | Durable conclusions should enter `memory/SPEC.md` | +| 2 | Plan frontier | `ln-plan` | The meaning is clear but work needs sequencing | +| 3 | Scope one slice | `ln-scope` | One implementation slice is now obvious | +| 4 | Grill further | `ln-grill` | The ambiguity pass exposed broader design uncertainty | + +Recommended: choose `ln-spec` when decisions, invariants, assumptions, lexicon, examples, or criteria changed. + +## References + +- `docs/design/INTENT_GRAPH_SEMANTICS.md` — typed claims, examples/counterexamples, negative edges, progressive checkability. +- `docs/design/BEHAVIORAL_KERNELS.md` — kernel taxonomy and contrastive question patterns. +- `docs/archive/design/INTENT_SPEC_EVOLUTION.md` §6 — ambiguity-targeted disambiguation. From 02b3f7fcef5d071e314bd467ae2e20090df97616 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 15:26:29 +0200 Subject: [PATCH 02/16] distill the disambiguation skill --- .agents/skills/ln-disambiguate/SKILL.md | 97 +++++++------------------ 1 file changed, 26 insertions(+), 71 deletions(-) diff --git a/.agents/skills/ln-disambiguate/SKILL.md b/.agents/skills/ln-disambiguate/SKILL.md index 349c5b42..f755a269 100644 --- a/.agents/skills/ln-disambiguate/SKILL.md +++ b/.agents/skills/ln-disambiguate/SKILL.md @@ -1,73 +1,47 @@ --- name: ln-disambiguate -description: "Collapse meaningful ambiguity by generating concrete divergent interpretations and asking the user to classify examples, counterexamples, edge cases, or candidate outcomes. Use when a plan/design has several plausible meanings, requirements feel vague, examples would clarify intent faster than open-ended grilling, or the user asks to disambiguate, find ambiguity, use behavioral kernels, or ask contrastive questions." +description: "Collapse meaningful ambiguity with contrastive examples. Use when a plan/design has several plausible meanings, requirements feel vague, examples would clarify intent faster than grilling, or the user asks to disambiguate, find ambiguity, use behavioral kernels, or ask contrastive questions." --- # Ln Disambiguate -Collapse ambiguity by asking the smallest concrete question whose answer separates plausible interpretations. +Generate cases where plausible interpretations diverge; ask the user to classify the case. -Users are often better at recognizing intent in examples than authoring abstract predicates. Do not start with “what are the requirements?” if a concrete classification would answer faster. Generate cases where plausible meanings diverge, ask the user to classify the case, and translate the answer into candidate durable conclusions. +Users recognize intent in concrete examples faster than they author abstract predicates. Use the TiCoder move generalized beyond tests: produce examples, counterexamples, edge cases, and candidate outcomes that separate meanings. Then translate the answer into typed conclusions. -This is an alternative entry point to `ln-grill`: use `ln-grill` when the idea needs broad Socratic pressure; use `ln-disambiguate` when the work already has enough shape that the useful move is resolving ambiguous meanings, behaviors, boundaries, or examples. +Use this instead of `ln-grill` when the work has enough shape that ambiguity collapse is the next move. Use `ln-grill` when the idea still needs broad Socratic pressure. -Do not create or edit planning artifacts here. Durable conclusions promote into `memory/SPEC.md` or `memory/PLAN.md` through the next routed skill. - -## Grounding - -If local context can resolve the ambiguity, inspect it instead of asking. Otherwise read only what helps you form precise contrasts: - -1. `memory/SPEC.md` if present — lexicon, live requirements, assumptions, decisions, invariants, and verification stance. -2. `memory/PLAN.md` if the ambiguity concerns sequencing or frontier scope. -3. Relevant design docs when `memory/SPEC.md` points to them. - -Use the current lexicon. If ambiguous language reveals a missing or overloaded term, name the competing meanings explicitly. - -## Method +If local context can answer the question, inspect it instead of asking. Read only the context needed to form precise contrasts: `memory/SPEC.md`, `memory/PLAN.md`, and files they explicitly point to. Use the current lexicon; when terms are overloaded, name the competing meanings. For each ambiguity: -1. **Name the ambiguous claim** — the term, behavior, boundary, decision, requirement, invariant, or criterion that has multiple plausible meanings. -2. **Generate competing interpretations** — usually 2–4. Include the boring/default interpretation, the stricter interpretation, and any interpretation likely to cause a bug if implemented silently. -3. **Find the divergence point** — the smallest concrete scenario where those interpretations produce different outcomes. -4. **Ask a contrastive question** — have the user classify the scenario or choose the expected outcome. -5. **Translate the answer** into candidate durable conclusions: - - `decision` — a chosen option over named alternatives, with rationale. - - `invariant` — a preservation rule that must keep holding. - - `constraint` — a boundary or non-goal that rules out interpretations. - - `assumption` — a material belief that remains unvalidated. - - `example` — a concrete positive, edge-case, trace, or not-relevant case. - - `counterexample` — a rejected case or outcome that rules out an interpretation. - - `criterion` — an observation, test shape, or manual review that would witness the claim. - - `unresolved ambiguity` — a named ambiguity intentionally deferred. -6. **Repeat only while it buys clarity** — stop when the remaining ambiguity is either collapsed, explicitly deferred, or ready for `ln-spec`. - -Prefer one high-yield question at a time. Multiple-choice is good when options are real; forced-choice is bad when it hides a likely fifth answer. Always allow “other / depends — explain.” +1. Name the ambiguous claim. +2. Generate 2–4 competing interpretations. +3. Find the smallest scenario where they produce different outcomes. +4. Ask one contrastive classification question. +5. Translate the answer into candidate durable conclusions: + - `decision` + - `invariant` + - `constraint` + - `assumption` + - `example` + - `counterexample` + - `criterion` + - `unresolved ambiguity` -## Good question shapes +Prefer one high-yield question at a time. Multiple choice is useful when options are real; forced choice is harmful when it hides the likely fifth answer. Always allow “other / depends — explain.” -Prefer concrete classification: +Ask questions like: - “In this exact case, which outcome is correct?” - “Is this inside or outside the commitment?” - “Would this count as a bug?” - “Which option should be rejected?” - “Does this example witness the rule, contradict it, or sit outside scope?” -- “If we implemented interpretation A, what important case would break?” - -Avoid broad prompts unless no contrastive case is available: - -- “How should permissions work?” -- “What are all the requirements?” -- “Tell me more about edge cases.” - -When asking, include your recommended answer if you have enough context, and say why. The user should be able to accept, reject, or refine your classification quickly. -## Behavioral kernels +Include your recommended answer when you have enough context, and explain why. -Use kernels as hidden interviewer machinery for generating high-yield contrasts. Do not make the user learn the kernel taxonomy unless it helps them reason. - -Activate at most the top 2–3 relevant kernels from language and context: +Use behavioral kernels as hidden interviewer machinery. Activate at most the top 2–3 relevant kernels: | Kernel | Looks for | Typical artifact | | --- | --- | --- | @@ -87,15 +61,7 @@ Activate at most the top 2–3 relevant kernels from language and context: | Change & migration | legacy, compatibility, upgrade | migration / refinement invariant | | Observability & evidence | logs, traces, explanations, audit | trace / provenance invariant | -Kernel move: generate a concrete scenario where plausible policies diverge, then ask the user to classify it. The answer should become a weaker-but-useful checkable artifact: example, counterexample, invariant, criterion, or explicit ambiguity. - -## Example - -Instead of asking: - -> How should project deletion work? - -Ask: +Example: > A project is deleted while it still has tasks. Which behavior is correct? > @@ -107,7 +73,7 @@ Ask: > > My recommendation is B if historical traceability matters more than cleanup, because it preserves references and gives us a clear data-integrity invariant. -Then translate the answer, for example: +Translate the answer: - decision: “Deleted projects archive their tasks rather than deleting or reassigning them.” - invariant: “Archived tasks retain a tombstone reference to the deleted project.” @@ -115,14 +81,9 @@ Then translate the answer, for example: - counterexample: “Tasks silently disappearing after project deletion is rejected.” - criterion: “A deletion test verifies task archival and readable tombstone references.” -## Stop conditions +Stop when the ambiguity is collapsed, explicitly deferred, or ready for `ln-spec`. -Stop when one of these is true: - -- The user selected an interpretation and the durable consequences are clear. -- The ambiguity is explicitly deferred and named. -- More questioning would be generic grilling rather than ambiguity collapse. -- The next correct step is to record, plan, or scope. +Do not create or edit planning artifacts here. Durable conclusions promote into `memory/SPEC.md` or `memory/PLAN.md` through the next routed skill. ## Routing @@ -136,9 +97,3 @@ When the ambiguity pass is complete, present these options to the user. If `tool | 4 | Grill further | `ln-grill` | The ambiguity pass exposed broader design uncertainty | Recommended: choose `ln-spec` when decisions, invariants, assumptions, lexicon, examples, or criteria changed. - -## References - -- `docs/design/INTENT_GRAPH_SEMANTICS.md` — typed claims, examples/counterexamples, negative edges, progressive checkability. -- `docs/design/BEHAVIORAL_KERNELS.md` — kernel taxonomy and contrastive question patterns. -- `docs/archive/design/INTENT_SPEC_EVOLUTION.md` §6 — ambiguity-targeted disambiguation. From 7f7c8203391957e72732cb12752c614a6c81a41d Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 15:27:45 +0200 Subject: [PATCH 03/16] new spec structure, and updates to corresponding skills --- .agents/skills/ln-spec/SKILL.md | 41 +- .../skills/ln-spec/assets/spec-template.md | 170 ++-- .agents/skills/ln-sync/SKILL.md | 45 +- AGENTS.md | 2 +- memory/PLAN.md | 6 +- memory/SPEC.md | 912 +++++++----------- memory/SPEC_RESTRUCTURE.md | 175 ---- 7 files changed, 484 insertions(+), 867 deletions(-) delete mode 100644 memory/SPEC_RESTRUCTURE.md diff --git a/.agents/skills/ln-spec/SKILL.md b/.agents/skills/ln-spec/SKILL.md index 3ef76204..89bcba06 100644 --- a/.agents/skills/ln-spec/SKILL.md +++ b/.agents/skills/ln-spec/SKILL.md @@ -30,35 +30,54 @@ The feature or problem: $ARGUMENTS Write or update `memory/SPEC.md` following the [spec template](assets/spec-template.md). If the file already exists, read it first — preserve existing content, evolve sections that need change. +### SPEC shape + +Use the mature SPEC shape unless the existing project clearly predates it and the user only asked for a narrow patch: + +- **Product Contract** — concept, constraints / non-goals, grouped capability requirements. +- **Live Architecture Register** — open assumptions, active decisions, critical invariants. +- **Future Direction Register** — directional bets that shape sequencing but are not current product contract. +- Compact model / architecture sections only when they still serve as SPEC authority. +- Lexicon and Verification Design. + +SPEC is a live register, not an archive. Keep stable product contract separate from live architectural uncertainty and future direction. Prefer short guardrails plus links to PLAN/design docs over long design-doc-scale prose. + ### Verification Design boundary ln-spec owns the **inner loop** of verification design: verification commands, verification policy, and inner-loop oracle items (type checks, fast unit tests, linting). Middle and outer loop oracle strategy, diagnostic assessment, and blind spots are owned by `ln-oracles`. Not every scoped slice requires a full oracle-design pass, but frontier items or slices involving LLM behavior, visual rendering, or compositional/system-level claims should route through `ln-oracles` before implementation. When writing or updating §Verification Design, preserve any content written by ln-oracles (§Verification Stance, §Diagnostic Assessment, §Oracle Strategy middle/outer tiers, §Design notes, §Acknowledged Blind Spots). ### Traceability -If `memory/PLAN.md` exists, verify that changed assumptions and decisions still align with affected frontier items. If it does not exist yet, close the reference chain as far as current artifacts allow: assumptions should still name dependent decisions and validation approaches, and frontier links can be added later by `ln-plan`. +If `memory/PLAN.md` exists, verify that changed requirements, assumptions, decisions, and invariants still align with affected frontier items. If it does not exist yet, close the reference chain as far as current artifacts allow: assumptions should still name dependent decisions and validation approaches, and frontier links can be added later by `ln-plan`. ### Weight management -Use the same unit-of-record rules as `ln-build` §Same-item tests. Before adding a row, compare against nearby items in the same feature area. Prefer **update** or **merge** over **add** when the seam is the same. +Use the same unit-of-record rules as `ln-build` §Same-item tests. Before adding a row, compare against nearby items in the same feature area. Prefer **update**, **merge**, or **omit** over **add** when the seam is the same. **Units of record:** -- **Assumption** = one unresolved question at one seam -- **Decision** = one committed choice between alternatives at one seam -- **Invariant** = one seam-level structural property protected by tests +- **Requirement** = stable product capability or externally observable contract. +- **Assumption** = one unresolved question at one seam that still shapes work. +- **Decision** = one committed spine choice between alternatives at one seam. +- **Invariant** = one seam-level structural property protected by tests or an explicit planned oracle. +- **Future direction** = a directional bet that influences sequencing but is not yet product contract. + +**Validated assumptions retire by default.** If evidence settles an assumption, do not leave it live just as history. Either remove it during `ln-sync`, or promote the durable residue into Product Contract, Active Decisions, Critical Invariants, Lexicon, or PLAN traceability if it still constrains active work. -**These are not new rows** — they are updates or merges to existing rows: +**These are not new rows** — they are updates, merges, links, or no-ops: - confidence changes, validation narratives, added evidence - helper names, file layout, or implementation mechanics - one more branch/state/kind/phase/action example of an existing rule - one implementation step under an already-recorded decision +- detailed rationale better held by a design doc +- future acceptance criteria better held by PLAN until the work is active **Smell checks before adding:** - The sentence starts with "for this slice" or names a temporary cutover step → probably an update, not a new item - The difference is only approve/reject, confirm/force-close, or kind/phase/state variants of one shared rule → merge into the seam-level row -- The item would stop making sense once the code ships and no alternative remains live → probably a decision that should not be tracked +- The item would stop making sense once the code ships and no alternative remains live → probably not a tracked decision - The item is an implementation mechanic inside an already-chosen boundary → no-op +- The row mainly names test files or records implementation history → probably belongs in code/tests or should merge into a broader invariant Large cleanup is `ln-sync` work. When writing or patching, keep the touched area coherent; do not attempt a risky whole-document consolidation. @@ -66,9 +85,11 @@ Large cleanup is `ln-sync` work. When writing or patching, keep the touched area Every amendment must close its reference chain as far as the current lifecycle stage allows. After editing, verify: -- **New assumption** → has: dependent decision(s), validation approach, and implicated frontier item(s) in `memory/PLAN.md` **if `memory/PLAN.md` already exists** -- **New decision** → has: dependent assumption(s), supersession note -- **New invariant** → has: establishing frontier item in `memory/PLAN.md` **if known** (or scoped slice if already defined), protecting test (or `manual (outer loop)`), proved decision +- **New requirement** → has: product capability area and PLAN/frontier references if it changes upcoming work +- **New assumption** → has: dependent decision(s) or invariant(s), validation approach, and implicated frontier item(s) in `memory/PLAN.md` **if `memory/PLAN.md` already exists** +- **New decision** → has: dependent assumption(s) where relevant, supersession note, and enough rationale to identify the chosen seam +- **New invariant** → has: establishing frontier item in `memory/PLAN.md` **if known** (or scoped slice if already defined), protecting test/oracle (or `planned` / `manual (outer loop)`), proved decision or requirement +- **New future direction** → has: PLAN frontier/horizon pointer or design-doc pointer; not full acceptance detail unless already active - **New constraint** → has: rationale for exclusion - **New inner-loop oracle item** → names the invariant(s) it protects diff --git a/.agents/skills/ln-spec/assets/spec-template.md b/.agents/skills/ln-spec/assets/spec-template.md index cf142dd4..83164183 100644 --- a/.agents/skills/ln-spec/assets/spec-template.md +++ b/.agents/skills/ln-spec/assets/spec-template.md @@ -1,122 +1,138 @@ - + When re-running ln-spec: read this file first, preserve existing authority, + and evolve only the touched area. SPEC is not an implementation diary. + Together with PLAN.md, this is the only canonical planning state; do not + create sidecar spec ledgers without explicit permission. --> # [Project Name] -## Concept & Goal +## Product Contract - +### Concept -## Constraints & Non-goals + - +### Constraints & Non-goals -## Requirements + - +### Capability Requirements + + + +#### [Capability area] 1. [Requirement] 2. ... -## Assumptions +## Live Architecture Register + + + +### Open Assumptions + Keep only assumptions that are unresolved or still shape named frontier work. + Validated assumptions retire by default during ln-sync unless they still constrain + an active frontier; promote only durable product facts to Product Contract, + Decisions, Invariants, or Lexicon. --> + +| # | Assumption | Confidence | Status | Depends on | Validation approach | +| --- | --- | --- | --- | --- | --- | +| A1 | [hypothesis] | low/medium/high | open | [D# / I# / Requirement #] | [how to falsify] | + +### Active Decisions + + + +1. **[Decision]** — [rationale]. Depends on: [A1]. Supersedes: [—|D#]. + +### Critical Invariants + + -| # | Assumption | Confidence | Status | Dependent decisions | Implicated frontier items | Validation approach | -| --- | ------------ | --------------- | -------------------------- | ------------------- | ----------------- | ------------------- | -| A1 | [hypothesis] | low/medium/high | open/validated/invalidated | [→ §Decisions #N] | [→ PLAN.md frontier id] | [how to falsify] | +| # | Invariant | Protected by | Proves | +| --- | --- | --- | --- | +| I1 | [property] | [test/manual oracle/planned oracle] | [Requirement # / D#] | -## Decisions +## Future Direction Register - + -1. **[Decision]** — [rationale]. Depends on: [A1, A2]. Supersedes: [—|#N]. +### [Direction area] -## Invariants +- [Future direction, linked to PLAN/design docs] - +## Interaction Stream Model -| # | Invariant | Established by | Protected by | Proves | -| --- | -------------- | -------------- | ------------ | ----------------- | -| I1 | [property] | [slice/spike] | [test file] | [→ §Decisions #N] | + + +## Layout Architecture + + ## Lexicon - - -| Term | Definition | -| --------------- | --------------------------------------------------------------------------------------------- | -| **assumption** | A falsifiable belief accepted as true; tracked with confidence and status, linked to decisions and frontier items / scoped slices | -| **decision** | A recorded choice that resolves a question; ordered, with supersession chain | -| **invariant** | A structural property proven by implementation and protected by tests; must not regress | -| **requirement** | A capability the system must provide | -| **slice** | A thin end-to-end tracer-bullet path through all integration layers | -| **spike** | A time-boxed throwaway investigation to answer one hard question | -| **phase** | A temporal grouping of frontier items / scoped slices and spikes in PLAN.md | -| **[Term]** | [Definition] | + + +| Term | Definition | +| --- | --- | +| **[Term]** | [Definition] | ## Verification Design - + ### Verification Commands - - -| Step | Check | Command | -| ---- | -------------- | ----------- | -| 1 | Type checking | [command] | -| 2 | Unit tests | [command] | -| 3 | Build | [command] | +| Step | Check | Command | +| --- | --- | --- | +| 1 | Type checking | [command] | +| 2 | Unit tests | [command] | +| 3 | Build | [command] | +| all | Full gate | [command] | ### Verification Policy - + - - ### Current Coverage +--> - - -| File | Tests | Protects | -| ------------- | ----- | -------- | -| [test file] | [N] | [I#] | - -## Acceptance Criteria (exit conditions) +### Acceptance Criteria - + 1. [Criterion] 2. ... diff --git a/.agents/skills/ln-sync/SKILL.md b/.agents/skills/ln-sync/SKILL.md index 9fd8c993..8d34ee40 100644 --- a/.agents/skills/ln-sync/SKILL.md +++ b/.agents/skills/ln-sync/SKILL.md @@ -24,7 +24,7 @@ Prefer `ln-sync` at these moments: | File | Authority | Keep live | | --- | --- | --- | -| `memory/SPEC.md` | what and why | active assumptions, current decisions, critical invariants, live constraints | +| `memory/SPEC.md` | what and why | product contract, live architecture register, future direction pointers, lexicon, verification stance | | `memory/PLAN.md` | what's next | sequencing, frontier definitions, near-horizon items, recent completions | | `docs/archive/PLAN_HISTORY.md` | historical ledger | older completed phases and retired plan history | | `HANDOFF.md` | derivative volatile transfer | only unfinished chat state not yet reconciled | @@ -41,27 +41,39 @@ If either `memory/SPEC.md` or `memory/PLAN.md` is missing, route to `ln-spec` or Ask whether each file is still serving re-entry. -- If `memory/SPEC.md` is carrying embedded truths, old implementation detail, or closed historical debates, prune it. +- If `memory/SPEC.md` is carrying embedded truths, old implementation detail, closed historical debates, or validated assumptions that no longer shape frontier work, prune it. - If `memory/PLAN.md` is mostly completed history, collapse it to a rolling frontier and archive the rest. - If `HANDOFF.md`, `memory/CARDS.md`, or `memory/REFACTOR.md` no longer carry live temporary state, delete them. ### 3. SPEC pass — keep only live architecture +Use the mature SPEC shape as the target unless the project has an explicit alternate shape: + +- **Product Contract** — concept, constraints / non-goals, grouped capability requirements. +- **Live Architecture Register** — open assumptions, active decisions, critical invariants. +- **Future Direction Register** — directional bets with PLAN/design-doc pointers. +- Compact model / architecture sections only while they still serve as SPEC authority. +- Lexicon and Verification Design. + For each item in `memory/SPEC.md`, choose one: -- **keep** — still unresolved or still constrains future work +- **keep live** — still unresolved or still constrains future work - **update** — wording / evidence / scope changed -- **remove** — embedded, moot, superseded, or redundant +- **compress / merge** — overlaps another live row or carries too much rationale +- **retire embedded** — fully shipped and now protected by code/tests/design docs +- **move rationale** — valuable context, but too detailed for SPEC; keep a short guardrail and link to a design doc +- **future direction** — not current product contract; move under Future Direction Register or ensure PLAN owns it +- **remove** — moot, superseded, redundant, or implementation diary #### Keep in SPEC -- concept and goal +- stable product contract - constraints and non-goals -- requirements -- live assumptions only -- current decisions only -- durable seam-defining decisions even when implemented +- capability requirements +- open assumptions only +- current spine decisions and durable seam-defining decisions - critical seam-level invariants only +- future direction pointers that shape sequencing - lexicon - verification stance / commands / blind spots @@ -69,8 +81,12 @@ For each item in `memory/SPEC.md`, choose one: - implementation diary entries - historical completion notes already reflected in code or tests -- micro-variant decisions / invariants that are now embedded in a larger seam +- micro-variant decisions / invariants that are embedded in a larger seam - validated assumptions that no longer change future work +- detailed design-doc prose, card styling minutiae, or exhaustive test inventories +- future acceptance criteria that PLAN should own until the work is active + +Validated assumptions retire by default. Promote the durable residue only when it still constrains active work: product facts go to Product Contract, architectural authority goes to Active Decisions / Critical Invariants, vocabulary goes to Lexicon, and sequencing implications go to PLAN. Do **not** remove durable seam rationale merely because code and tests now exist. Prune micro-decisions, not the architectural spine. @@ -112,7 +128,7 @@ Scan recent code / commits for: - new domain concepts not reflected in the lexicon - durable decisions not reflected in `memory/SPEC.md` - active work not represented in `memory/PLAN.md` sequencing or frontier definitions -- stale references between `memory/PLAN.md` and `memory/SPEC.md` +- stale references between `memory/PLAN.md` and `memory/SPEC.md`, especially PLAN links to retired assumptions / decisions / invariants - equivalent facts that should merge instead of coexisting - prepared cards in `memory/CARDS.md` that should be retired, re-scoped, or reconciled into the next thread's live state - stale derivative artifacts that should be deleted after reconciliation @@ -134,7 +150,7 @@ Produce a concise sync report and make the edits. ## Sync Report ### Pruned -- [items removed and why] +- [items removed, merged, or moved and why] ### Archived - [history moved to PLAN_HISTORY.md] @@ -143,7 +159,10 @@ Produce a concise sync report and make the edits. - [temporary artifacts deleted and why] ### Drift fixed -- [concept / decision / frontier updates made] +- [concept / decision / frontier / traceability updates made] + +### Retirement assessment +- [whether embedded items were sufficiently retired, or whether a stronger protocol / follow-up frontier is needed] ### Remaining live items - [important assumptions or frontier work that still matter] diff --git a/AGENTS.md b/AGENTS.md index f0ec17a8..d2e07d67 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -58,7 +58,7 @@ Traceability: assumptions in SPEC.md link to decisions and frontier items in PLA The `/ln-*` skills at `.agents/skills/` follow this flow: -- **Knowledge**: /ln-grill → /ln-spec → /ln-plan → /ln-oracles +- **Knowledge**: /ln-grill or /ln-disambiguate → /ln-spec → /ln-plan → /ln-oracles - **Execution**: /ln-scope → /ln-spike (optional) → /ln-build - **Quality**: /ln-review → /ln-refactor (optional) → /ln-sync - **Process**: /ln-consult (triage), /ln-handoff (state capture), /ln-design (interface exploration) diff --git a/memory/PLAN.md b/memory/PLAN.md index 2a8f3d41..a5607a82 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -80,7 +80,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Why now / unlocks:** Prompt/context and graph-review probes need realistic graph/transcript fixtures, but hand-authoring those fixtures is chicken-and-egg. A JSONL capability adapter lets an external LLM-as-user drive the real lifecycle through the same mutation authority future agents must use, pressure-testing tool-call vocabulary, chat readiness, resource identity, fixture curation, and import-boundary discipline. Pi comparison remains FE-635 after this seam has a real Brunch use case to compare against. - **Acceptance:** Server-owned capability contracts and JSONL protocol/session code are integrated; the probe runner uses only the JSONL client/process boundary; fixture-candidate artifacts preserve scenario briefs, model policy, generated transcripts, and workspace-state inspection without becoming Brunch authority. - **Verification:** Contract/dispatcher tests, JSONL protocol/session tests, import-boundary tests, fake process tests, opt-in real-provider smoke, and fixture-candidate structure/readiness checks. -- **Traceability:** Requirement 43; A89; D143, D147; I114. Also protects Requirements 40, 41, 42 by making prompt/context and mutation-surface probes executable through a real adapter. +- **Traceability:** Requirement 43; A89; D143, D147; I115. Also protects Requirements 40, 41, 42 by making prompt/context and mutation-surface probes executable through a real adapter. - **Design docs:** `docs/design/AGENT_MUTATION_SURFACE.md`; `docs/archive/design/INTENT_SPEC_EVOLUTION.md`; FE-705 branch artifacts until rebased. ### intent-graph-semantics @@ -171,7 +171,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Why now / unlocks:** Extends the same phase-agnostic preface-card model to external research, but should wait for prompt/context scenario substrate proof so web research does not become an ad hoc tool surface. - **Acceptance:** Research tools are invoked through interviewer context gathering, outputs render as provisional preface cards paired with questions, and observer capture treats the validated full turn as atomic. - **Verification:** Prompt/context scenario probes for query framing and tool-output summarization, plus manual review of provisional-context handling. -- **Traceability:** Requirements 20, 21, 40, 41; D99, D112, D139, D142. +- **Traceability:** Requirements 20, 21, 40, 41; D125, D139, D140, D142. - **Design docs:** FE-698 prompt/context scenario substrate references; future productized research notes if needed. ### relation-first-observer-enrichment @@ -223,7 +223,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Why now / unlocks:** Deprioritized below continuous workspace and semantic/generative substrate. Phase 1 substrate already ships schema support; the remaining decision is the anchor model (`chat` row anchor fields vs deferred `chat_focus` table). - **Acceptance:** Side-chat sessions survive remount/reload and remain coherent with graph truth without introducing a second workflow model. - **Verification:** Persistence/reload tests and manual side-chat walkthroughs. -- **Traceability:** Requirement 39; A82, A83; D138. +- **Traceability:** Requirement 39; D138; I111. - **Design docs:** `docs/design/MULTI_CHAT.md` §10 Phase 2; `docs/design/SIDE_CHAT.md` §9 V4 row. ### side-chat-v4b-item-versioning diff --git a/memory/SPEC.md b/memory/SPEC.md index 34a38bce..d5ac20a1 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -1,663 +1,399 @@ + Brunch the built thing. It is not the product-layer ontology that + Brunch users produce while building their own intent graphs. The + dev-workflow trajectory lives in docs/design/ln-skills/EVOLUTION.md. --> # Brunch v2 — Spec Elicitation Tool -## Concept & Goal +## Product Contract -Brunch is an AI-guided spec elicitation tool that turns natural-language goals into structured specifications through a four-phase interview: - -- **grounding** — goals, terms, context, constraints -- **design** — commitments and tradeoffs -- **requirements** — capability review and gap-finding -- **criteria** — verification coverage +### Concept -An interviewer agent conducts the conversation. A separate observer agent extracts typed intent items from each answered turn and links them into an intent graph. The interviewer may also invoke context-gathering capabilities when it lacks enough orientation for the next move; their visible outputs appear in the stream as preface cards. The workspace stream is turn-centered rather than message-shaped: durable conversational turns provide the branch-bearing lineage spine, while projected control cards, phase markers, and activity cards frame them. An open phase should always bottom out in one visible next action — a projected kickoff card, actionable frontier turn, visible generation state, projected recovery card, or closed-phase handoff / completion control. +Brunch is an AI-guided spec elicitation tool that turns natural-language goals into structured specifications through a four-phase interview: -Brunch is strongest while certainty is still being formed: when the real work is clarifying the target, surfacing commitments, and making unresolvedness legible before downstream implementation decomposition takes over. Its output is a calibrated handoff, not fake closure — a truthful starting point for implementation that makes visible what is known, chosen, constrained, required, and still open. Export is therefore built from the active path's accepted review outputs plus reviewed knowledge, not from laundering unresolved uncertainty into a prematurely final document. +1. **grounding** — goals, terms, context, constraints +2. **design** — commitments and tradeoffs +3. **requirements** — capability review and gap-finding +4. **criteria** — verification coverage -The product direction is from **planning specs** toward **intent specs**. Planning and downstream work sequencing remain useful projections, but Brunch's source artifact should preserve meaning first: what the user commits to, what properties define correctness, which examples or counterexamples disambiguate the intent, which assumptions remain open, what evidence has been accepted, and where ambiguity is explicitly unresolved. Because future agent features and post-spec handoff flows should consume the graph rather than a single transcript, Brunch needs explicit prompt/context engineering: scenario-specific graph context packs, reusable prompt doctrines, and lightweight prompt probes before UI surfaces are committed. +An interviewer agent conducts the conversation. A separate observer agent extracts typed intent items from answered turns and links them into an intent graph. The interviewer may invoke context-gathering capabilities when it lacks orientation; visible outputs appear as provisional preface cards paired with question cards inside the same turn. -Brunch operates inside a **workspace**: the cwd-backed software context whose local `.brunch/` directory stores one or more specifications. Grounding supports two strategies: **elicitation-first** for greenfield work and **analysis-first** for brownfield work. Brownfield grounding begins with read-only workspace analysis that produces a visible preface card (grounding brief), and the interviewer may gather more context via preface cards in any phase when it needs orientation. +Brunch's output is a calibrated handoff, not fake closure. The product direction is from **planning specs** toward **intent specs**: the durable source artifact should preserve meaning first — commitments, correctness properties, examples and counterexamples, assumptions, accepted evidence, and unresolved ambiguity. Planning and downstream sequencing remain useful projections from that source truth. -Post-launch, Brunch should support specification work across two axes rather than one: `greenfield <> brownfield` and `end-to-end build <> incremental feature`. That means the interview cannot assume one long whole-product drill-down. It should be able to start broad, deepen recursively where needed, synthesize candidate directions when the user wants help filling in the gaps, and let the intent graph itself become a working surface for refinement instead of only a sidebar summary. +Brunch operates inside a **workspace**: the cwd-backed software context whose local `.brunch/` directory stores one or more specifications. Grounding supports **elicitation-first** greenfield work and **analysis-first** brownfield work. The interview must also support whole-product work and partial-scope / incremental feature elicitation. -## Constraints & Non-goals +### Constraints & Non-goals -- Anthropic direct is the current runtime implementation; near-term provider work may add OpenRouter or provider-neutral routing, but Brunch remains user-supplied-key / no hosted inference account for now. +- Anthropic direct is the current runtime implementation; provider work may add OpenRouter or provider-neutral routing, but Brunch remains user-supplied-key / no hosted inference account for now. - No collaborative editing. - No explicit document-ingestion UX in V1. -- No hard turn-tree branching UX in V1; revisit operates through knowledge-graph edit mode + secondary threads instead. +- No hard turn-tree branching UX in V1; refinement and revisit operate through graph edit mode, multi-chat, and reconciliation surfaces. - No automatic cascade deletion; downstream effects are surfaced and re-resolved explicitly. -- No task-planning surface; Brunch elicits specs, it does not plan implementation work for the user. -- No downstream execution-management workflow in V1; Brunch ends at the handoff boundary rather than owning implementation after export. Verification-aware decomposition and orchestration are a future product frontier to probe through agent-harness experiments before any UI commitment. -- No general-purpose inline document editor in review phases; requirements and criteria review stay recommendation-led with lightweight user comments for revision. +- No task-planning or downstream execution-management surface in V1; Brunch elicits specs and stops at the handoff/export boundary. +- No general-purpose inline document editor in review phases; requirements and criteria review stay recommendation-led with lightweight comments. - No offline-first or multi-tab sync layer; the current system stays server-authoritative and local-first. -## Requirements +### Capability Requirements + +#### Runtime & persistence 1. `npx brunch` in a project directory with configured supported LLM provider credentials opens a working app in the browser with state in local `.brunch/`. -2. Starting a new specification asks only for the specification name before entering the workspace; greenfield / brownfield grounding strategy is then chosen through grounding entry states inside the specification workspace. -3. Brownfield grounding can use read-only workspace analysis to ground the opening flow and the first substantive question. -4. Structured responses support turn-appropriate option selections or explicit action submissions, an explicit `none of the above` path where relevant, and one attached response note. The interviewer autonomously chooses whether to include options on each question based on conversational trajectory; grounding accepts either a free-text response or one-or-more selected options, with the response note optional when an option is selected and required only for the `none of the above` path. Design preserves the current selection-required gate with a structural "none of the above" path. A single turn may carry multiple assistant-part artifacts (e.g. a preface card followed by a question card, or a revision card followed by a review set) rendered as stacked cards with one unified response submission. -5. Users can see thinking, tool usage, and streaming progress in real time; if live-only artifacts are shown, replay keeps concise durable activity metadata (at minimum elapsed thinking time plus a coarse tool-use summary / placeholder seam) instead of dropping them completely. -6. The observer extracts typed intent items and intent edges from answered turns. -7. The accumulated knowledge layer and readiness state stay visible during the interview. -8. Each workflow mode has deterministic closeability plus a separate readiness signal. -9. Phase close records summary text and closure basis. -10. Users can revisit knowledge through edit mode, cascade preview, and a secondary thread. -11. Requirements review synthesizes a candidate requirement set from the knowledge layer, presents stable item reference codes, supports per-item commenting through an inline comment toggle on each item, and resolves through explicit `accept review` / `request changes` submission with per-item comments plus one optional global review note. -12. Criteria review synthesizes a candidate verification set from accepted requirements plus the knowledge layer, presents stable item reference codes, and supports the same per-item commenting and full-set review seam. -13. Export is available only when workflow closure, accepted review outputs, and staleness rules are satisfied. 14. Closing and reopening the browser resumes the specification from persisted state. 15. The dashboard shows multiple specifications / elicitation runs within one `.brunch/` directory. -16. Partial-scope elicitation works for a feature or bounded sub-area, not just whole-workspace greenfield specs. -17. Each phase exposes an explicit kickoff, frontier, recovery, handoff, or completion affordance; the UI must not strand the user with a bare generic composer as the only visible action. -18. Open interview phases default to a projected kickoff card, the current frontier turn, a visible generation state, or a projected recovery affordance when the frontier is missing, and closed phases terminate in a projected handoff or completion artifact at the bottom of the workspace stream. -19. The first phase is grounding in both product language and canonical workflow identifiers. -20. The interviewer may invoke context-gathering capabilities such as workspace analysis in any phase when the workspace directory is available; their outputs appear as visible preface cards paired with question cards within the same turn. -21. Preface cards are provisional context rendered as turn-internal artifacts paired with a question card within the same turn, so the observer captures from the whole validated unit (preface context + question + user response) rather than from unvalidated provisional content alone. -22. Grounding and elicitation persist only the durable exploration ontology (`goal`, `term`, `context`, `constraint`, `decision`, `assumption`); `non-goal` is represented as a `constraint` subtype, and requirements / criteria become durable only through accepted review outputs. -23. The knowledge ontology is defined once and projected consistently through schema, shared registries, observer prompts, API types, fixtures, and UI copy so kind semantics do not drift across layers. -24. Each phase section in the workspace stream opens with a phase section header that states the phase purpose and what kinds of knowledge are captured there, projected from workflow state rather than persisted as a turn. -25. When a user requests changes on a review set, the interviewer regenerates the full set as a successor review turn; revisions stack in the turn lineage but visually only the current revision renders live with a version badge, while prior revisions collapse to compact answered-turn summaries. A revision card (changelog + version badge) renders above the review set card within the same successor turn. -26. The homepage surfaces workspace (CWD) binding so the user understands that listed specifications and the "new specification" affordance are scoped to the current project directory. -27. The grounding interviewer prompt uses a hint-guided priority-ordered topic list (concept, users/audience, existing constraints, scope boundaries) with example question shapes rather than generating questions from scratch, keeping thinking budget low and generation lightweight. -28. Observer capture treats the full turn — including any turn-internal preface card or revision card plus the question or review set plus the user response — as one atomic validated unit for knowledge extraction. -29. Grounding captures both workspace novelty (`greenfield` / `brownfield`) and delivery posture (`end-to-end build` / `incremental feature`), and interviewer behavior adapts to any point in that matrix rather than assuming a whole-product greenfield interview. -30. Observer extraction treats typed relationships as first-class across the ontology and records them whenever they can be reasonably traced from a turn or accepted review state, while abstaining when support is weak. Relationship extraction must stay prompt-budgeted: existing entities should be presented as compact identity anchors, not full Markdown inventories or graph dumps. -31. Users can request a turn-owned candidate-spec set during grounding or design instead of only skipping the remainder of a phase; each candidate direction includes implications, tradeoffs, likely generated knowledge, and what it rules out, and the user can accept a direction, request refinement, reject, or regenerate candidates. Accepting a candidate direction may steer the next interview move and materialize intent items, but does not itself close the phase. -32. Interview detail can proceed as a progressive broad-pass-to-detail flow with explicit `next level of detail` actions, rather than only as one monolithic linear drill-down. -33. Graph view is a first-class alternative to chat view, accessed as a peer route, and projects the intent graph as a navigable workspace with visible relationship topology and supports launching refinement side-chats from graph selections. The first ship is a structured-list layout; a spatial canvas layout follows as a layout switch inside graph mode. -34. First-run setup detects missing expected LLM provider credentials before the user starts a specification, makes the missing-key state visible on the dashboard, and offers a guided setup path rather than requiring README / shell-env debugging. +34. First-run setup detects missing expected LLM provider credentials before the user starts a specification, makes the missing-key state visible on the dashboard, and offers a guided setup path. 35. If Brunch accepts an API key through the UI, it stores credentials outside the project workspace in XDG-compliant user auth/config state; project `.env` files and `.brunch/` never become the default secret-storage target. -36. LLM provider configuration is owned by a shared AI runtime provider seam, so interviewer and observer model creation do not encode direct provider imports or environment-variable reads as product truth. That seam must preserve provider-specific capabilities such as Anthropic thinking / reasoning options or degrade them explicitly. -37. Workspace hygiene detects whether the local `.brunch/` directory is git-ignored and, with explicit user confirmation, can add an idempotent `.gitignore` entry, creating `.gitignore` when absent. -38. The product ontology should expand beyond the current exploration + review kinds to support `invariant` and `example` as first-class durable knowledge kinds, with observer prompts and promotion rules that distinguish descriptive context, constraints, decisions, assumptions, requirements, invariants, criteria, and examples without treating every answer as a decision. -39. Specifications can own multiple durable chat containers below the specification, with turns gradually moving toward chat ownership while preserving current spec-scoped compatibility during transition. The same substrate records directed `reconciliation_need` process debt when changed intent items may affect other graph truth; semantic intent edges remain separate (currently persisted as `knowledge_edge` rows during transition). -40. Prompt and context engineering are first-class server subsystems: prompts and reusable policy doctrines live as inspectable markdown assets, while typed context-pack builders derive scenario-specific intent-graph renderings for interviewer, observer, research, candidate synthesis, behavioral kernels, reconciliation, architect, and downstream decomposition probes. -41. Agent-heavy future capabilities can be tested before product UI exists through a lightweight scenario substrate that runs prompt/context packs against seeded graphs or transcript fixtures, captures raw and structured outputs, and supports harness comparison. Scenario execution may use the existing Anthropic API key or fake adapters for probes, but first-run provider setup, credential storage, OpenRouter defaulting, and the shared production AI runtime seam belong to the provider setup frontier. Pi may be evaluated as a lower-level agent harness, especially for tool experiments and pre-UI probes, but Brunch product authority over durable workflow, replay, graph mutation, and reconciliation remains explicit. -42. Agent-originated mutations of Brunch data use one typed server-owned mutation surface regardless of caller. Internal interviewer/observer flows, scenario probes, CLI/TUI harnesses, Pi or other harness adapters, and future external agents may not mutate durable Brunch state by calling the ORM directly; they must invoke stable mutation handlers with input/output schemas, authority metadata, replay policy, and reconciliation/changeset-ledger semantics. Read-only capability contracts may share the same registry shape, but the hard invariant is single-entry mutation authority. -43. A local agent capability CLI can expose Brunch-owned capability contracts over long-lived JSONL stdin/stdout so an external probe runner or harness can drive the real specification flow without privileged ORM access. The CLI is an adapter over capability contracts, not a separate product API: calls carry explicit resource identifiers, read commands distinguish structured `get` / `list` data from agent-facing `read` projections with affordance hints, and mutating commands stay small and procedural around spec lifecycle requests, chat readiness, and turn response submission. The LLM-as-user scenario brief, model choice, fixture curation, and probe artifacts belong to an external probe runner that talks to the CLI like any other agent. -44. Specifications can evolve through multiple chat-local strategies rather than one global interviewer mode. A chat's first frontier turn may offer or declare its strategy (`step_by_step`, `scenario_options`, `targeted_cases`, `graph_review`, `reconciliation`), and every active/resumable chat should have at most one open assistant/system-first frontier turn waiting for a user completion action. Proposal turns use normalized completion semantics (`accept`, `reject`, `revise`, `ask_followup`, `defer`, `regenerate`); only acceptance of a proposal turn may apply that proposal's semantic changeset. Mid-interview acceleration should branch into a side-chat / strategy chat that completes the current direction from context-packed graph truth, while graph-review critique remains the internal oracle for judging and repairing generated candidate bundles. - -## Assumptions - - - -| # | Assumption | Confidence | Status | Depends on | Validation approach | -| --- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | ------ | ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| A15 | The LLM can offer useful coarse readiness and closure recommendations, but closure authority must remain explainable and user-legible rather than model-owned. | medium | open | D65, D66 | Manual comparison of model recommendations vs user judgment across varied projects. | -| A20 | Users experience observer capture as responsive when every eligible answered turn enters one turn-owned background capture backlog instead of blocking chat stream completion, while replay still attaches capture status and results to the originating turn. | medium | open | D22, D96, D113, D123 | Measure stream completion timing, backlog draining, and replay clarity across grounding, design, requirements, and criteria turns. | -| A48 | Knowledge-graph edges are sufficient to drive accurate cascade preview for revisit work. | medium | open | D50, D80 | Structural cascade tests plus manual judgment about scope. | -| A49 | A modal secondary thread can resolve revisit implications without forcing a full interview restart. | medium | open | D80 | Manual revisit walkthrough once the thread lifecycle lands. | -| A51 | Grounding plus design remain legible if the primary input surface is the workspace-owned card family — durable turn cards for substantive elicitation plus projected control cards for structural affordances — rather than a persistent global composer. | medium | open | D89, D93, D94, D110 | Manual walkthroughs on grounding, design, and resumed states plus story review of entry / handoff patterns. | -| A53 | Concise durable activity summaries are sufficient to preserve transcript trust for live thinking/tool artifacts without persisting hidden reasoning or raw tool results. | medium | open | D93, D112 | Manual replay/reload walkthroughs on streamed turns once transcript activity summaries land. | -| A54 | An open phase can reliably project a kickoff control card, current frontier turn, visible generation state, or projected recovery card on first render without requiring the user to bootstrap the phase by typing into a generic composer. | medium | open | D89, D94, D95, D110 | Manual walkthroughs on kickoff-ready, design-active, review-active, and recovery states. | -| A55 | Trailing observer capture remains trustworthy if waiting/applying state stays attached to the answered turn and deferred completion writes back through that turn's identity rather than the current frontier. | medium | open | D96, D113, D123 | Manual timing walkthroughs plus reload/resume tests on seeded turns with known deferred observer work. | -| A57 | A specification-scoped lifecycle seam — whether implemented as a lightweight runtime supervisor, router-integrated service, or chart-backed helper — can own duplicate-safe automatic phase entry / continue, late-event suppression, and route-independent in-flight operation identity without introducing a second durable workflow model or a general runtime-operations ledger. | medium | open | D113 | Prototype the lifecycle seam on auto-present / recovery / force-close edges; if duplicate-submit or restart truth remains ambiguous, revisit whether the seam needs stronger runtime machinery or more durable coordination. | -| A58 | A cumulative workspace can preserve phase legibility and workflow honesty if realized sections stay visible as historical record, future sections do not render until reachable, and section focus remains navigation-only state rather than redefining durable workflow truth, reachability, or the single actionable frontier. | medium | open | D86, D110, D113, D114 | Prototype the cumulative workspace against future-phase deep-link redirects, scroll/focus transitions, close-to-next-phase motion, and resume/reload walkthroughs; if unrealized-phase routing or single-frontier clarity drifts, keep the current per-phase rendering boundary. | -| A59 | Interviewer-autonomous question format — where the model chooses whether to include options based on conversational trajectory rather than rigid phase rules — produces better grounding conversations than mandating free-text-only, because the interviewer naturally starts open-ended and adds suggestive options as the user's thinking narrows. The observer can interpret option selections phase-appropriately (resonance in grounding, commitment in design) without schema changes. | medium | open | D89, D110, Requirement 4 | Manual walkthroughs across greenfield and brownfield grounding comparing interviewer-chosen format vs phase-mandated format; check whether observer captures stay coherent when the same selection structure carries different semantic weight by phase. | -| A60 | A concise phase section header (purpose + captured knowledge kinds) is sufficient to orient the user at phase entry without requiring a longer onboarding flow or tutorial card. | medium | open | D116 | Manual walkthroughs on fresh specifications; check whether users understand what the phase expects of them. | -| A63 | Hint-guided grounding prompts produce meaningfully adapted questions rather than degenerating into rote template output across different projects. | medium | open | Requirement 27 | Manual greenfield walkthroughs across varied project types; compare question quality against the current unconstrained prompt. | -| A64 | Replacing coarse `router.invalidate()` with query-owned invalidation boundaries eliminates the scroll-jank cascade without introducing coordination complexity or stale-data bugs; the near-term boundary may be one specification bundle domain plus a separate entities domain rather than a fake finer split. | medium | open | D121 | Prototype the staged bundle + entities decomposition and measure scroll stability plus data freshness during observer updates. | -| A65 | The interviewer can adapt usefully to the full `greenfield <> brownfield` by `end-to-end build <> incremental feature` matrix without making kickoff feel bureaucratic or over-parameterized. | medium | open | D124 | Manual walkthroughs across all four corners of the matrix, including partial brownfield feature work. | -| A66 | Relation-first observer capture will improve revisit, export grounding, and graph-view utility without flooding the graph with speculative or low-value edges. | medium | open | D50, D125 | Run post-FE-639 observer corpus probes plus manual graph/export review focused on edge precision, coverage, and visible usefulness. | -| A67 | Users who are tired, rushed, or under-informed will converge faster by reacting to synthesized candidate directions than by continuing a long direct interview or force-closing early. | medium | open | D126, D127 | Manual user-flow comparison between direct questioning, skip-close, and candidate-spec reaction flows. | -| A68 | Broad-pass interviewing followed by explicit deepen-detail actions will preserve coherence better than a single depth-first drill-down while still producing export-worthy specifications. | medium | open | D127 | Prototype broad-pass-first flows and compare resulting knowledge completeness and user comprehension. | -| A69 | A graph-centric refinement surface can launch side-chats without splitting durable specification truth, so chat view and graph view stay two projections over one evolving graph. | medium | open | D128, D114 | Prototype graph-launched refinement with reload/resume checks to ensure side-chat state and graph state stay coherent. | -| A70 | The structured-list graph-view layout provides standalone enumeration value beyond relationship density: users benefit from seeing all intent items grouped by kind even when most have no edges yet, and graceful degradation (collapse the relations footer when zero edges) keeps the view honest while relation-first observer capture matures. | medium | open | A66, D128, D129 | Manual walkthroughs at low and high edge density once the structured list ships; check whether the layout still feels valuable when most items have empty relations footers, and whether observer-density growth visibly improves the view over time. | -| A71 | Semantic mutations will eventually need a changeset-ledger history distinct from conversational turn ancestry, but the first implementation should prove chat containers and reconciliation needs before committing the full ledger shape. | medium | open | D135 | Build chat containers plus reconciliation needs first; revisit whether turn-linked provenance remains sufficient before adding full semantic changesets. | -| A72 | Intent items can carry version history without breaking the active-path durable-truth contract: each version is the result of an applied semantic mutation, prior versions are queryable for diff / comparison / audit, and the active-path projection always reflects the latest version for each item. | low | future | A71, D135 | Prototype item versioning behind the changeset ledger; verify that revisit cascades, span-anchored annotations, and soft-edit audit trails behave correctly across versions. | -| A73 | Autonomous architect / generator loops can propose useful graph mutations only after human-driven multi-chat and reconciliation surfaces prove the shared mutation pipeline. | low | future | A71, D135 | Run architect proposals in shadow mode after multi-chat / reconciliation seams stabilize, then compare proposed changes against user-driven edits. | -| A74 | OpenRouter may reduce first-run friction for Brunch's likely users compared with requiring direct Anthropic keys, but model capability parity and AI SDK support need proof before making it the default provider path. This is provider-setup work, not a default FE-698 prompt/context substrate task. | medium | open | D130, D131 | In the first-run provider setup frontier, spike provider configuration against interviewer/observer calls, especially model naming, structured output, tool use, and reasoning/thinking support. | -| A75 | XDG-compliant user-scoped auth/config storage is acceptable for UI-entered API keys and safer than writing secrets to the project workspace, while environment variables remain useful for automation and CI. | medium | open | D130, D132 | Prototype key save/load/delete precedence and inspect OS/XDG paths; manual first-run walkthrough verifies users understand where the key is stored. | -| A76 | Users will accept Brunch editing `.gitignore` when the action is explicit, previewable, and idempotent; doing so should reduce accidental commits of `.brunch/` without feeling like surprising repo mutation. | high | open | D133 | Unit-test ignore detection / append behavior and manual dashboard walkthrough with absent, present, and already-covering `.gitignore` states. | -| A77 | Progressive checkability will improve generated specs more than a binary "formal / not formal" framing, because the weakest sufficient witness may be prose, example, test, runtime contract, invariant, proof obligation, or explicit unresolved ambiguity depending on the intent item. | medium | open | D134 | Prototype intent-item-to-witness review on a small corpus and compare whether users can validate meaning without being forced into formal-methods terminology. | -| A78 | Adding `invariant` and `example` as product ontology candidates will make intent drift easier to detect without overwhelming early interviews, provided examples carry subtypes such as positive, negative / counterexample, edge-case, and not-relevant rather than expanding into many top-level kinds. | medium | open | D134 | Run transcript probes for examples, counterexamples, not-relevant cases, and state/transition rules; check whether items improve export and review quality or create noisy capture. | -| A79 | Once semantic truth can change through graph edits, side-chats, reconciliation, verifier feedback, or implementation feedback, turn ancestry alone will be insufficient as the semantic history spine. | medium | open | D135 | Prototype chat containers and reconciliation needs before full patch history; revisit if turn-linked provenance remains enough for first-class graph editing. | -| A80 | Behavioral kernels can generate higher-yield disambiguating questions than generic elicitation prompts, but only if kernels stay as interviewer / architect / wizard machinery that emits checkable artifacts rather than user-visible formalism. | low | open | D134 | Try state/lifecycle and containment/topology prototypes first, and compare question value against current prompt-only interviewing. | -| A81 | Knowledge edges can carry intent semantics without becoming too noisy only if relation policy distinguishes semantic relations from reconciliation needs, and distinguishes display edges, cascade-participating edges, export-relevant edges, staleness-producing edges, and low-confidence suggestions. | medium | open | D137 | Design relation-policy semantics before broad observer edge expansion; test low- and high-density graphs for user trust and operational noise. | -| A82 | A soft dual-pointer migration can introduce chat containers without destabilizing current spec-scoped reads: `turn.specification_id` and `specification.active_turn_id` can remain temporarily while `turn.chat_id`, `specification.primary_chat_id`, and `chat.active_turn_id` become the future ownership path. A separate `active_chat_id` is deferred until multiple active chat surfaces need an explicit UI-level pointer. | medium | validated | D138 | Validated by FE-697: `chat-substrate.test.ts` proves read-path equivalence (`spec.active_turn_id === spec.primary_chat → chat.active_turn_id`) and parent-chat consistency; `npm run verify` shows no regression in the existing interview flow. | -| A83 | A minimal item-to-item `reconciliation_need` table is enough for the first queue if it carries narrow kind/status values plus nullable provenance placeholders, and if future relation targets / changeset provenance can extend the shape without renaming the concept. | medium | validated | D137, D138 | Validated by FE-697 for the Phase 1 substrate: `reconciliation-need.test.ts` proves the partial unique index, lifecycle, cascade, and multi-kind-per-pair behaviors. Forward extensibility (changeset provenance, relation-targeted needs) remains untested until the changeset ledger lands. | -| A88 | Path 1 deterministic enumeration over existing `knowledge_edge` rows incident on a changed knowledge item produces a useful cascade preview without requiring the reconciliation agent. Mechanical grouping by `reconciliation_need.kind` plus relation type is enough for the user to walk a hard-edit cascade in V3.0; agent-grouped resolution (auto-confirm / auto-edit / substantive) is V3.1 work that can ship later without reshaping the queue. | medium | open | D135, D137, D138, D146 | Manual hard-edit walkthroughs across the side-chat V3.0 fixture matrix (leaf, 2-downstream, 5+-downstream, in-active-review-set, mixed `supersedes` / `needs_confirmation`). Check whether the mechanical grouping reads as actionable or whether substantive items get lost. If users skip needs without resolving, V3.1 agent work moves up the priority list. | -| A84 | Scenario-specific graph context packs can replace transcript-as-default prompt context without losing conversational nuance, provided packs preserve authority, provenance, unresolvedness, relation neighborhoods, and recency where relevant. | medium | open | D139, D140 | Build prompt/context probes over seeded graphs and compare generated observer, interviewer, candidate, and oracle/decomposition outputs against transcript-heavy baselines. | -| A85 | A lightweight prompt scenario substrate will let Brunch validate LLM-heavy product directions faster than building UI first, if it captures rendered prompts, context packs, model settings, raw outputs, structured parses, and human review notes as repeatable artifacts. | medium | open | D139 | Run multi-scenario prompt probes for observer ontology, behavioral kernels, candidate-spec assist, and downstream oracle/decomposition before productizing their UI. | -| A86 | Pi can serve as a useful pre-UI agent harness or tool-spike backend without forcing Brunch to adopt Pi as its production agent runtime, as long as integration remains adapter-shaped and Brunch-owned authority/replay/mutation semantics stay outside the harness. | low | open | D142 | Spike Pi SDK or RPC with in-memory sessions, custom tools, controlled prompts, and Brunch graph context packs; evaluate event capture, tool ergonomics, provider handling, packaging, and isolation. | -| A87 | Verification-aware post-spec decomposition can be explored as agent scenarios before it is a Brunch product surface: intent graph truth plus progressive checkability can feed design alternatives, oracle strategy, execution slices, and orchestration constraints. | low | future | D141 | Prototype decomposition and oracle-design probes inspired by `ln-design` and `ln-oracles`; compare outputs for traceability to requirements, invariants, examples, criteria, and blind spots. | -| A89 | A long-lived local JSONL agent capability CLI can drive the real Brunch interview flow well enough for external LLM-as-user probes to produce credible completed specification fixtures, while keeping product resources explicit in every call and using ambient process state only for runtime plumbing such as DB handles, provider config, and in-flight generation bookkeeping. | medium | open | D143, D147, Requirement 43 | Prototype the minimal `brunch agent` JSONL loop over capability contracts, then run small LLM-as-user scenarios end-to-end through `chat.ensureReady`, `chat.read`, `turn.submitResponse`, `spec.requestPhaseClosure`, and `spec.requestExport`. Validate that probe logs are replayable, no probe code imports DB/product handlers directly, and no durable operation ledger is needed for the first readiness semantics. | -| A90 | Users who ask to speed up a long interview will prefer a side-chat that generates 2–3 reviewed scenario options completing the current direction over continuing the primary drilldown, provided existing accepted graph truth is treated as fixed premise by default. | medium | open | D126, D148, D151, Requirement 44 | Probe scenario-options against drilldown fixtures and run manual flow review: do users understand the tradeoff profiles, preserve trust in prior answers, and return to the primary interview when generated options disappoint? | -| A91 | Graph-review critique can make scenario-generated candidate bundles safe enough for product use without requiring perfect one-shot generation, if candidate readiness distinguishes `reviewed_clean`, `reviewed_with_issues`, and `blocked`, and if accepted-with-issues immediately opens durable follow-on review work. | medium | open | D151, D152, Requirement 44 | Run candidate bundle probes with graph-review scoring and human review; verify accepted-with-issues flows create a graph-review frontier or appropriate reconciliation needs rather than hiding defects. | -| A92 | A conservative global staleness rule for open proposal turns — stale when `specification.latest_changeset_id` differs from `turn.opened_at_changeset_id` — is acceptable before neighborhood-level staleness calculation exists. | medium | open | D149, I116 | Exercise multi-chat proposal flows where another chat applies a changeset while a proposal remains open; check whether regeneration prompts feel safe rather than noisy. | -| A93 | Relation-policy directionality lookup is safer than trying to force all useful intent-edge verbs into one dependency direction, because graph edges must serve display, context packs, export trace, reconciliation, critique, verification, candidate generation, and explanation. | medium | open | D137, D150 | In FE-700, define canonical/inverse sentences and source/target change behavior for each relation; test direct-edit and hard-impact cascade against mixed-direction relations. | - -## Decisions - - - -22. **Observer-result sync is turn-owned and background by default** — eligible answered turns enter one turn-owned observer capture backlog after durable turn finalization, and chat stream completion must not wait on extraction. Capture may still surface results through the existing turn-owned `data-observer-result` artifact once available, but the runtime path should be normalized around the `/api/specifications/:id/turns/:turnId/observer-capture` seam rather than split between inline stream-blocking capture and deferred capture. This preserves one durable workflow model: durable truth remains the answered turn plus any persisted observer result part, not a separate workflow store or ledger. - -50. **Knowledge relationships live behind one typed graph seam** — persisted graph edges are first-class and drive dependency, derivation, and revisit behavior. -65. **Phase outcomes are explicit durable records** — workflow status, closeability, readiness, and closure provenance project from durable phase outcomes on the active path. -66. **Interviewer-recommended and user-forced closes share one transcript-friendly seam** — one phase-close transport handles both paths, with explicit closure basis. -80. **Intent-graph revisit replaces hard turn-tree branching for V1** — revisit starts from edit mode on intent items, traces cascade through intent edges, and resolves through a secondary thread. **Updated 2026-05-07 (D135):** the older modal secondary-thread and side-chat V2/V3 persistence shapes are superseded by the multi-chat + reconciliation-need direction; the user-facing revisit/cascade goal remains live. **Chat-level branching note:** the no-turn-tree-branching invariant remains in force at the *turn* level, but multiple chats per spec are explicitly allowed at the *chat* level once the multi-chat substrate lands. Branching at the chat level is not user-surfaced as a generic `branch this thread` affordance by default; it manifests through graph-anchored refinement / reconciliation surfaces. -86. **The client is organized by phase-addressable routing and three concentric layout shells** — AppLayout, SpecificationWorkspaceLayout, and ViewLayout own the user-facing route structure. Interview phases remain router-addressable for deep links, gating, and sibling route composition even if the center pane later renders them inside one continuous workspace surface. -87. **Layout-level data ownership partitions invalidation** — the specification bundle and entity collections subscribe through separately owned query domains / route surfaces instead of one monolithic refresh boundary, so entity refreshes do not remount or tear down the transcript-owning surface. -89. **Primary grounding/design input is workspace-owned and card-owned** — substantive elicitation in grounding and design proceeds through durable turn cards inside the workspace stream, while structural phase-entry, recovery, and handoff affordances project as control cards in that same stream; the global bottom composer is not the canonical input seam. Preface cards accept optional comment + continue, while question cards collect substantive answers. Depends on: A51. Supersedes: —. -93. **Replay for elicitation phases is turn-centered, not message-shaped** — completed interview turns collapse into answered-turn records that summarize the offer, the structured user response, and the capture status, while phase markers, projected control cards, and activity cards render as stream elements around those turns rather than as ordinary chat bubbles. Depends on: A51, A53, D110. Supersedes: —. -94. **Phase progression is frontier-anchored** — every open phase bottoms out in exactly one visible next action: a projected kickoff card, actionable frontier turn, visible generation state, or projected recovery card. Accepting a frontier turn durably creates its successor turn, successor generation avoids closed-without-frontier gaps, and recovery is a structural fallback that appears whenever an open phase lacks a valid frontier rather than another generative turn that must itself be created. Closure proposals remain durable proposal-shaped turns on the active path; accepting one confirms phase closure and opens the next phase into its projected entry state, while rejecting one keeps the phase open and requires a same-phase successor frontier. If a phase is closed, the stream bottoms out in a handoff or completion control. Depends on: A51, A54. Supersedes: —. -95. **Structural control affordances project from workflow state rather than masquerading as ordinary turns** — kickoff, recovery, and end-of-phase affordances derive from workflow state, phase outcomes, and neighboring turn anchors instead of from incidental copy or mandatory durable turn rows. Any durable implementation seam used to help project them must be treated as transitional and must not redefine their product meaning as authored conversational turns. Depends on: D65, D94, D110. Supersedes: `why`-based kickoff/recovery sentinels and the earlier persisted-turn-kind framing. -96. **Observer capture trails interviewer progression through one turn-owned backlog** — interviewer completion may unlock the next turn before observer capture finishes, and that should be the default lifecycle for all eligible phases rather than a grounding/design exception. Any trailing observer state remains attached to the just-answered turn card instead of surfacing as a free-floating transcript row; observer-result transport carries the originating turn identity so late capture can hydrate back into that same card. Depends on: A20, A53, A55. Supersedes: inline stream-blocking observer capture as a normal completion path. - -110. **The workspace stream is a merged read model, not identical to the turn tree** — active-path durable conversational turns remain the only branch-bearing lineage spine; durable non-turn workflow facts such as phase outcomes anchor themselves to turn ids for provenance, ordering, and invalidation; projected control cards, phase markers, and activity cards derive from workflow state plus nearby anchors instead of requiring their own turn rows. Depends on: D65, D89, D93, D94, D96. Supersedes: the implicit equivalence between rendered cards and persisted turns. - -111. **The app is seed-first and migration-light until the data model settles** — prioritize one truthful read-model contract plus up-to-date seeded scenarios over compatibility for legacy local rows. Durable authority comes from active-path substantive turns, `phaseOutcome`, workflow state, and the current canonical record/phase identifiers; projected kickoff / recovery / handoff affordances must be derived from those facts rather than preserved as canonical control-turn rows. Transitional seams may survive briefly as internal submit plumbing, but new server reads, client renders, fixtures, and happy-path tests must not depend on legacy aliases or adaptation layers as product truth. When a naming or persistence cutover lands — including `project` → `specification` and `scope` → `grounding` — destructive reseed is preferred over spending time on migration logic for unstable local data. Depends on: D95, D110. Supersedes: the implicit bias toward preserving legacy control-row compatibility during the cutover. - -112. **Turn-artifact persistence is server-owned and interviewer-shaped** — durable review-set, preface-card, activity-summary, and phase-summary artifacts materialize from interviewer output through one server helper, so the chat-runtime finalize path acts as orchestration glue instead of reconstructing artifact semantics ad hoc. Replay, accepted-review materialization, and seeded walkthroughs therefore consume the same persisted artifact contract the interviewer produced. Depends on: D93, D96, D110. Supersedes: the ownership split where runtime finalization re-derived grounding/review artifacts outside one authoritative persistence seam. - -113. **Phase lifecycle side effects are specification-scoped, not route-scoped** — durable workflow truth, landing reconciliation, and routed read-model projection remain authoritative; they do **not** move into a second client-side workflow store. The router continues to own navigation, loader/query subscription, and rendering of the derived read model. A separate specification-scoped lifecycle seam owns only the ephemeral process concerns that routes are poor at holding correctly: one-shot automatic phase entry / continue, in-flight operation identity, duplicate-submit suppression, cancellation, stale-event rejection, and capture-backlog reseeding after hydration. That seam may be implemented as a lightweight runtime supervisor, router-integrated service, or chart-backed helper, but its implementation is intentionally left open; what is decided here is the ownership boundary, not a mandatory framework. Constraints: (1) no second durable workflow model or general runtime-operations ledger by default, (2) no independent client authority over phase status, landing truth, or handoff/completion semantics, (3) no route-local `useEffect` or remount-tied behavior as the trusted owner of lifecycle effects like auto-present, and (4) any lifecycle helper must consume durable truth and emit idempotent, ignorable side effects rather than redefine product state. Depends on: D87, D94, D95, D96, D110, D112. Supersedes: route-local auto-present / continue effects as a trusted lifecycle seam. - -116. **Each phase section opens with a projected phase section header** — a non-turn, non-durable stream artifact that states the phase purpose and what kinds of knowledge are captured there. The header is projected from workflow state and phase metadata (similar to phase markers) and re-projects on hydration. Content is phase-specific: grounding explains goals/terms/context/constraints, elicitation explains design decisions, requirements explains review, criteria explains verification. Depends on: A60, D110. Supersedes: —. - -121. **Client data ownership migrates from coarse loader invalidation to query-owned domains** — the near-term authoritative boundary is one specification bundle seam for workflow state, landing state, and turns, plus a separately invalidable entities domain scoped to the specification. Mutations and SSE events invalidate only the owned query key. `ask_question` tool execution persists the frontier question/options, advances the active head, and returns the acknowledged turn id; the client may then patch the bundle query cache from that tool output while the following bundle fetch remains authoritative reconciliation. The router loader becomes a thin shell that primes or guards those domains instead of owning the read model, and finer core/turn split work waits for a real server ownership boundary rather than a fake cache-key split over one payload. Depends on: A64, D87. Supersedes: monolithic `router.invalidate()` after every mutation. - -123. **Runtime proving uses a lightweight lifecycle seam with observer backlog, not a second workflow store** — every eligible answered turn should be able to unlock successor interactivity as soon as interviewer generation is durably ready, while observer capture for the answered turn runs afterward through a turn-owned `/api/specifications/:id/turns/:turnId/observer-capture` seam. The client lifecycle may keep only ephemeral capture state (`waiting`, `applying`, retry/backlog identity`) and reseed unfinished capture from durable turns after hydration/reload; durable authority remains the persisted turn plus its observer result part. Current constraint: server-side dedupe is process-local, so restart recovery depends on reseeding from turns that still need observer capture rather than on a durable runtime-operations ledger. Depends on: D22, D96, D113. Supersedes: the mixed inline/deferred observer finish boundary. - -114. **Continuous workspace rendering and phase addressability are separate concerns** — the interview center pane may render one cumulative workspace stream whose realized grounding, design, requirements, and criteria sections remain visible as the workflow advances, while the router continues to preserve deep links, gating, and sibling-route composition. A workspace-level controller may own one chat session, cross-section projection, focus / scroll behavior, and close-to-next-phase motion without turning focus state into a second durable workflow model. Phase routes act as focus addresses into that shared surface rather than distinct transcript owners: navigating to a realized phase focuses and scrolls to its section, while direct navigation to an unrealized future phase redirects to the current reachable phase instead of rendering placeholder content. Constraints: (1) one chat runtime per specification, not one per rendered phase, (2) only realized sections render in the cumulative center pane, so future phases do not project empty shells before they become reachable, (3) exactly one actionable frontier remains at the bottom of the current reachable section while prior sections are replay-only record, (4) focused section state must not redefine durable workflow truth or landing truth, (5) graph view is a peer route to the cumulative workspace surface — phase-independent, accessed via direct navigation, but rendered inside the outer specification shell so phase-sidebar continuity and top-bar consistency remain, and (6) output remains a separate route because it is not part of the interview timeline. Depends on: A58, D86, D87, D110, D113. Supersedes: the assumption that each phase route must own a distinct rendered transcript surface. - -124. **Interview framing is two-axis, not novelty-only** — the interviewer should orient itself with both workspace novelty (`greenfield` / `brownfield`) and delivery posture (`end-to-end build` / `incremental feature`). Partial-scope work is therefore a first-class interview shape rather than just a greenfield/brownfield footnote. Depends on: A65. Supersedes: the implicit single-axis framing around grounding strategy plus partial-scope support. - -125. **Observer capture is a prompt-budgeted graph-delta seam** — `runObserver()` remains the public capture boundary, but its internal output includes a generic graph delta: per-kind item collections plus a compact top-level relationship-candidate set that can reference existing entities by `knowledge_item.id` and same-turn provisional items by `{ kind, index }`. Existing-entity identity is the database id; any kind metadata in prompts is only display/validation context, never part of the lookup key. The server owns provisional-reference resolution after persistence, validates candidate edges through one typed relation-policy registry, and writes only supported edges, preferring abstention over speculative edge inflation. Accepted review sets reuse the same relation policy when materializing requirements or criteria so review-authoritative entities can add edges without a second durable graph model. Observer prompts avoid full entity tables and existing graph topology by default; future enrichment should use compact anchor inventories, phase/relevance filtering, and corpus/manual graph review before adding more context. Depends on: A66, D50, D112, D123. Supersedes: the decision/assumption-only relationship extraction instruction in the current observer seam. - -126. **Recognition-first assists synthesize proposals through turn-owned candidate direction sets** — grounding, design, and future architect / wizard-style modes may project user actions like `fill in the rest for me`, compare broad directions, or propose typologies. These invoke interviewer-authored candidate direction set artifacts on ordinary durable turns rather than extending force-close semantics. Each set presents concrete options with implications, tradeoffs, likely generated knowledge, and what each direction rules out. The user responds through a structured reaction seam (`accept-direction`, `refine`, `reject`, or `regenerate`). Accepting a direction may materialize goals, constraints, assumptions, decisions, requirements, invariants, and examples as accepted or proposed-from-selection; rejecting a direction may still create intent evidence through negative / not-relevant examples, `non_goal` constraints, or `rules_out` relations. Criteria are generated later unless the selected bundle includes concrete witness cases. This reuses the existing turn-artifact / workflow model instead of adding a second durable workflow machine. Depends on: A67, A77, A78, D66, D94, D112, D134. Supersedes: skip-only close as the sole user-legible fallback for low-patience or low-information phases, and supersedes treating candidate-spec assist as only a phase-shortening tool. +36. LLM provider configuration is owned by a shared AI runtime provider seam, so interviewer and observer model creation do not encode direct provider imports or environment-variable reads as product truth. +37. Workspace hygiene detects whether local `.brunch/` is git-ignored and, with explicit user confirmation, can add an idempotent `.gitignore` entry, creating `.gitignore` when absent. -127. **Interview detail should flow through a turn-owned breadth skeleton and detail-focus reaction** — grounding and design may pair an ordinary question turn with an interviewer-authored breadth-skeleton artifact that makes the current broad-pass map visible, marks which areas are already sufficient for now, and offers explicit `next level of detail` affordances. The user reacts by choosing whether to deepen one named area now, continue the broad pass, or leave an area sufficient-for-now; that reaction steers the next same-phase frontier turn without creating a second durable workflow or topic-tree store. Recursive follow-up remains ordinary focused turns on the same active path, and any future chat or graph affordance should emit the same detail-focus intent against shared specification truth rather than inventing a parallel deepening model. Depends on: A67, A68, D94, D112, D113. Supersedes: the implicit assumption that every frontier turn advances by the same depth granularity. +#### Interview workflow -128. **Graph view becomes an actionable workspace mode through a projection-first, intent-emitting seam** — graph mode should project shared entity truth into a relationship-aware scene with visible edge topology and own only ephemeral graph-local interaction state such as viewport, selection, focus, and path highlighting. It must not create a second durable workflow or edit-state model. Node-level actions emit intents into the existing workspace lifecycle so refinement side-chats, revisit, and future edit flows still materialize through turn-owned artifacts, projected control cards, and the same durable specification truth used by chat view. The common-case interaction should optimize for `select node -> inspect -> launch focused refinement`, with broader multi-select or edit overlays layered on later. The first ship layout is a **structured list**: kind-grouped item rows with a relations footer (Outgoing / Incoming subsections of relation chips), `referenceCode`-based hash anchors for cross-item navigation, hover-card previews on chips, soft-truncation at 6 chips per direction, and a per-row action rail reserving one disabled `chat-with` placeholder for future intent emission. A **spatial canvas** layout follows as a layout switch inside graph mode without changing the projection seam or action contract. The empty state is a minimal orientation card linking to the current reachable phase rather than empty kind sections; an explicit `Back to chat` affordance returns to the user's last phase route. Depends on: A69, A70, D50, D80, D113, D114. Supersedes: graph view as a placeholder grouped list surface, and graph view as a sibling layout mode under `_view`. - -129. **Graph view's structured-list scope decouples data fetching from default render** — graph view always fetches the `whole-spec` entities so chip targets always resolve, but defaults the rendered row set to active-path items so toggling between chat view and graph view does not silently widen the user's working scope. A `Show all` toggle expands the rendered set to the full data already in memory; no second fetch and no scope-dependent loading. **Phased shipping:** the structured-list ship lands the whole-spec fetch portion of this contract first; the active-path render filter and `Show all` toggle ship in a follow-up frontier item (see `memory/PLAN.md` Horizon: *Graph view active-path filter + scope toggle*) once per-item active-path membership is exposed in the entities API or derived through a stable client-side seam. Until then the structured list renders all `whole-spec` items by default, so the toggle would be a no-op and is not surfaced. Depends on: A70, D87, D121, D128. Supersedes: render scope and fetch scope coupled through the existing `mode=active-path | project-wide` query parameter. +2. Starting a new specification asks only for the specification name before entering the workspace; greenfield / brownfield grounding strategy is chosen through grounding entry states inside the specification workspace. +3. Brownfield grounding can use read-only workspace analysis to ground the opening flow and first substantive question. +4. Structured responses support turn-appropriate option selections or explicit action submissions, an explicit `none of the above` path where relevant, and one attached response note. One turn may carry multiple assistant-part artifacts rendered as stacked cards with one unified response submission. +5. Users can see thinking, tool usage, and streaming progress in real time; replay keeps concise durable activity metadata for live-only artifacts instead of dropping them. +8. Each workflow mode has deterministic closeability plus a separate readiness signal. +9. Phase close records summary text and closure basis. +16. Partial-scope elicitation works for a feature or bounded sub-area, not just whole-workspace greenfield specs. +17. Each phase exposes an explicit kickoff, frontier, recovery, handoff, or completion affordance; the UI must not strand the user with a bare generic composer as the only visible action. +18. Open interview phases default to a projected kickoff card, current frontier turn, visible generation state, or projected recovery affordance; closed phases terminate in a projected handoff or completion artifact. +19. The first phase is grounding in both product language and canonical workflow identifiers. +20. The interviewer may invoke context-gathering capabilities such as workspace analysis in any phase when the workspace directory is available; outputs appear as visible preface cards paired with question cards. +21. Preface cards are provisional context rendered as turn-internal artifacts, so observer capture uses the whole validated unit: preface context + question + user response. +24. Each phase section opens with a projected header that states phase purpose and captured knowledge kinds. +25. Review revisions stack in turn lineage but visually render only the current revision live with a version badge; prior revisions collapse to compact answered-turn summaries. +27. Grounding prompts use hint-guided, priority-ordered topics with example question shapes rather than generating every question from scratch. +28. Observer capture treats the full turn — including preface/revision artifacts, offer, and user response — as one atomic validated unit. +29. Grounding captures both workspace novelty (`greenfield` / `brownfield`) and delivery posture (`end-to-end build` / `incremental feature`). +31. Users can request a turn-owned candidate-spec set during grounding or design; accepting a direction may steer the next interview move and materialize intent items, but does not itself close the phase. +32. Interview detail can proceed as a progressive broad-pass-to-detail flow with explicit `next level of detail` actions. +44. Specifications can evolve through multiple chat-local strategies rather than one global interviewer mode. Each active/resumable chat has at most one open assistant/system-first frontier turn waiting for user completion. Proposal turns use normalized completion semantics; only proposal acceptance may apply semantic changes. + +#### Knowledge / intent graph -130. **First-run setup becomes a product surface, not README-only configuration** — the dashboard should expose provider credential status before specification creation and route users toward setup when no supported key is available. CLI logs and README env instructions can remain, but they are no longer the only supported onboarding path. Depends on: A74, A75. Supersedes: `ANTHROPIC_API_KEY` in project `.env` as the sole user-facing setup contract. -131. **Provider access moves behind one AI runtime provider seam** — interviewer and observer model construction should consume a shared provider/model resolver instead of importing Anthropic directly and reading model environment variables in each caller. The seam may keep Anthropic as the current implementation while testing OpenRouter as the preferred onboarding provider, but provider choice must be explicit and testable. Depends on: A74. Supersedes: direct Anthropic imports in interviewer/observer code as product truth. -132. **UI-entered credentials are user-scoped auth state, not workspace state** — if the app collects an API key, it writes to an XDG-compliant user auth/config location, never to `.brunch/` or the project `.env` by default. Existing environment-variable configuration remains supported as an override path for scripted use. Depends on: A75. Supersedes: project-local `.env` as the only persistent setup mechanism. -133. **`.brunch/` gitignore support is confirm-gated deterministic workspace mutation** — Brunch may inspect the workspace repository and offer to add `.brunch/` to `.gitignore`, but it must not mutate repository files without explicit confirmation. The mutation should be idempotent, preserve existing file content, and create `.gitignore` only when the user accepts. Depends on: A76. Supersedes: relying solely on user memory / docs to ignore the generated workspace directory. +6. The observer extracts typed intent items and intent edges from answered turns. +7. The accumulated knowledge layer and readiness state stay visible during the interview. +10. Users can revisit knowledge through edit mode, cascade preview, and reconciliation / secondary-thread surfaces. +22. Grounding and elicitation persist only the durable exploration ontology (`goal`, `term`, `context`, `constraint`, `decision`, `assumption`); `non-goal` is represented as a `constraint` subtype, and requirements / criteria become durable only through accepted review outputs. +23. The knowledge/intent ontology is defined once and projected consistently through schema, shared registries, observer prompts, API types, fixtures, and UI copy. +30. Observer extraction treats typed relationships as first-class across the ontology and records them when reasonably supported while abstaining when support is weak. +38. The product ontology should expand beyond current exploration + review kinds to support `invariant` and `example` as first-class durable knowledge kinds. +39. Specifications can own multiple durable chat containers below the specification, with turns moving toward chat ownership while preserving temporary spec-scoped compatibility. Reconciliation needs remain process debt, separate from semantic intent edges. -134. **Brunch specs evolve toward recognition-first intent graphs with progressive checkability** — the product direction is to preserve meaning as typed intent items, semantic edges, examples / counterexamples, verification witnesses, unresolved ambiguity, and user validation status rather than treating the spec as a planning document or prose inventory. Requirements and criteria remain distinct product items for now: a requirement is a commitment and a criterion is an oracle / witness. `invariant` and `example` should become first-class product ontology kinds, with positive, negative / counterexample, edge-case, and not-relevant examples represented as subtypes rather than separate top-level kinds. A shared `Property`-like intent primitive remains a design candidate rather than a committed storage or UI surface. Behavioral kernels are hidden interviewer / architect / wizard machinery for surfacing latent state, containment, authority, concurrency, migration, and evidence questions while emitting the weakest useful checkable artifact for the intent item. Depends on: A77, A78, A80, D50, D125, Requirement 38. Supersedes: the implicit framing that requirements / criteria review is the terminal semantic model of product intent. +#### Review & export -135. **Semantic mutation history should split from conversational turn history when graph editing becomes first-class** — turns remain conversational provenance and replay; the intent graph remains current semantic truth; a future changeset ledger records semantic mutation history; and reconciliation needs record semantic debt caused by changes that may stale existing graph truth. The first implementation should follow the multi-chat substrate in D138: chat containers plus durable reconciliation needs before a full changeset ledger, keeping turn-linked provenance and legacy spec-scoped pointers as compatibility while making room for changeset-backed provenance later. User-direct-edit mode should be allowed to land a committed group of intent-item changes immediately, synchronously create reconciliation needs from existing dependency and historical relations, then queue an asynchronous observer pass that may immediately add newly implied intent edges and additional reconciliation needs as a later interpretive-structure changeset. That observer pass may not silently rewrite, retire, or weaken existing accepted intent; content changes that require judgment go through reconciliation review. This explicitly reshapes the older revisit-session draft: revisit / cascade remains a product capability, but `revisit_session` is no longer the preferred persistence foundation once multiple chats, direct graph edits, and reconciliation review sets are in scope. Depends on: A71, A79, D80, D110, D112, D125, D128, D134, D138. Supersedes: turn ancestry as the only plausible semantic history spine, and the `docs/archive/design/REVISIT_MODULE.md` table shape as canonical persistence design. +11. Requirements review synthesizes a candidate requirement set from the knowledge layer, presents stable item reference codes, supports per-item comments, and resolves through explicit `accept review` / `request changes` submission. +12. Criteria review synthesizes a candidate verification set from accepted requirements plus the knowledge layer, presents stable item reference codes, and supports the same per-item commenting and full-set review seam. +13. Export is available only when workflow closure, accepted review outputs, and staleness rules are satisfied. -136. **Observer ontology should classify intent items by modality, not answer shape** — observer capture should distinguish value / outcome items (`goal`), descriptive items (`context`), boundary items (`constraint`), uncertainty items (`assumption`), choice items (`decision`), obligation items (`requirement`), preservation items (`invariant`), oracle items (`criterion`), and concrete witness items (`example`). `Decision` should narrow to chosen directions among plausible alternatives with durable consequences; `constraint` should remain top-level but gain subtypes such as `non_goal`, `scope`, `technical`, `policy`, `resource`, `compatibility`, and `environmental`. Generic `context` should be promoted when the content carries stronger semantics: success condition -> requirement or invariant, solution boundary -> constraint, uncertain material belief -> assumption, chosen alternative -> decision, mere interpretation aid -> context. Depends on: D134, Requirement 38. Supersedes: treating all user commitments or selected options as decisions by default. +#### Workspace / graph UI + +26. The homepage surfaces workspace (CWD) binding so the user understands listed specifications and the new-spec affordance are scoped to the current project directory. +33. Graph view is a first-class alternative to chat view, accessed as a peer route, and projects the intent graph as a navigable workspace with visible relationship topology and graph-launched refinement. The first ship is a structured-list layout; a spatial canvas follows as a layout switch inside graph mode. + +#### Provider / agent substrate + +40. Prompt and context engineering are first-class server subsystems: prompts and reusable policy doctrines live as inspectable markdown assets, while typed context-pack builders derive scenario-specific intent-graph renderings. +41. Agent-heavy future capabilities can be tested before product UI exists through a lightweight scenario substrate that runs prompt/context packs against seeded graphs or transcript fixtures, captures outputs, and supports harness comparison. +42. Agent-originated mutations of Brunch data use one typed server-owned mutation surface regardless of caller; agents and harnesses may not mutate durable Brunch state by calling the ORM directly. +43. A local agent capability CLI can expose Brunch-owned capability contracts over long-lived JSONL stdin/stdout so an external probe runner or harness can drive the real specification flow without privileged ORM access. + +## Live Architecture Register + +### Open Assumptions + + + +| # | Assumption | Confidence | Status | Depends on | Validation approach | +| --- | --- | --- | --- | --- | --- | +| A15 | LLM readiness and closure recommendations can be useful, but closure authority must remain explainable and user-legible rather than model-owned. | medium | open | D65, D66 | Manual comparison of model recommendations vs user judgment across varied projects. | +| A20 | Users experience observer capture as responsive when every eligible answered turn enters one turn-owned background capture backlog instead of blocking chat stream completion. | medium | open | D22, D113, I108 | Measure stream completion timing, backlog draining, and replay clarity. | +| A48 | Intent graph edges are sufficient to drive accurate cascade preview for revisit work. | medium | open | D50, D137, D146 | Structural cascade tests plus manual judgment about scope. | +| A49 | A modal or chat-shaped secondary thread can resolve revisit implications without forcing a full interview restart. | medium | open | D80, D138 | Manual revisit walkthrough once the thread lifecycle lands. | +| A57 | A specification-scoped lifecycle seam can own duplicate-safe automatic phase entry/continue, late-event suppression, and route-independent in-flight operation identity without introducing a second durable workflow model. | medium | open | D113 | Prototype lifecycle edges; revisit if restart or duplicate-submit truth remains ambiguous. | +| A58 | A cumulative workspace can preserve phase legibility if realized sections stay visible, future sections stay unreachable, and section focus remains navigation-only state. | medium | open | D86, D110, D113, D114 | Prototype continuous workspace deep links, scroll/focus transitions, close-to-next-phase motion, and resume/reload. | +| A64 | Query-owned invalidation boundaries can eliminate scroll-jank cascades without stale-data bugs; the near-term boundary may be one specification bundle plus one entities domain. | medium | open | D87, I110 | Prototype bundle/entities decomposition and measure scroll stability plus data freshness during observer updates. | +| A65 | The interviewer can adapt usefully to the full `greenfield <> brownfield` by `end-to-end build <> incremental feature` matrix without making kickoff feel bureaucratic. | medium | open | D124 | Manual walkthroughs across all four corners of the matrix. | +| A66 | Relation-first observer capture will improve revisit, export grounding, and graph-view utility without flooding the graph with speculative or low-value edges. | medium | open | D50, D125 | Observer corpus probes plus manual graph/export review focused on edge precision, coverage, and usefulness. | +| A67 | Users who are tired, rushed, or under-informed will converge faster by reacting to synthesized candidate directions than by continuing a long direct interview or force-closing early. | medium | open | D126, D127 | Manual comparison between direct questioning, skip-close, and candidate-spec reaction flows. | +| A68 | Broad-pass interviewing followed by explicit deepen-detail actions will preserve coherence better than a single depth-first drill-down while still producing export-worthy specifications. | medium | open | D127 | Prototype broad-pass-first flows and compare knowledge completeness and user comprehension. | +| A69 | A graph-centric refinement surface can launch side-chats without splitting durable specification truth. | medium | open | D128, D114 | Prototype graph-launched refinement with reload/resume checks. | +| A70 | Structured-list graph view remains valuable even when edge density is low, provided relation footers gracefully collapse. | medium | open | A66, D128, D129 | Manual walkthroughs at low and high edge density. | +| A71 | Semantic mutations will eventually need a changeset-ledger history distinct from conversational turn ancestry, but the first implementation should prove chat containers and reconciliation needs first. | medium | open | D135 | Revisit after chat containers plus reconciliation needs stabilize. | +| A72 | Intent items can carry version history without breaking the active-path durable-truth contract. | low | future | A71, D135 | Prototype item versioning behind the changeset ledger. | +| A73 | Autonomous architect/generator loops can propose useful graph mutations only after human-driven multi-chat and reconciliation surfaces prove the shared mutation pipeline. | low | future | A71, D135 | Run architect proposals in shadow mode after multi-chat/reconciliation seams stabilize. | +| A74 | OpenRouter may reduce first-run friction, but capability parity and AI SDK support need proof before making it the default provider path. | medium | open | D130, D131 | Spike provider configuration against interviewer/observer calls. | +| A75 | XDG-compliant user-scoped auth/config storage is acceptable for UI-entered API keys and safer than writing secrets to project workspace. | medium | open | D130, D132 | Prototype key save/load/delete precedence and inspect OS/XDG paths. | +| A76 | Users will accept Brunch editing `.gitignore` when the action is explicit, previewable, and idempotent. | high | open | D133 | Unit-test ignore detection/append behavior and manual dashboard walkthroughs. | +| A77 | Progressive checkability will improve generated specs more than a binary formal/not-formal framing. | medium | open | D134 | Prototype intent-item-to-witness review on a small corpus. | +| A78 | Adding `invariant` and `example` as product ontology candidates will make intent drift easier to detect without overwhelming early interviews. | medium | open | D134 | Run transcript probes for examples, counterexamples, not-relevant cases, and state/transition rules. | +| A79 | Once semantic truth can change through graph edits, side-chats, reconciliation, verifier feedback, or implementation feedback, turn ancestry alone will be insufficient as the semantic history spine. | medium | open | D135 | Revisit after chat containers and reconciliation needs. | +| A80 | Behavioral kernels can generate higher-yield disambiguating questions than generic elicitation prompts if they emit checkable artifacts rather than user-visible formalism. | low | open | D134 | Try state/lifecycle and containment/topology prototypes first. | +| A81 | Knowledge/intent edges can carry semantics without becoming noisy only if relation policy distinguishes semantic relations from reconciliation needs and operational participation. | medium | open | D137 | Design relation-policy semantics before broad observer edge expansion. | +| A84 | Scenario-specific graph context packs can replace transcript-as-default prompt context without losing conversational nuance. | medium | open | D139, D140 | Build prompt/context probes over seeded graphs and compare outputs against transcript-heavy baselines. | +| A85 | A lightweight prompt scenario substrate will validate LLM-heavy directions faster than UI-first development if it captures rendered prompts, context packs, model settings, raw outputs, parses, and review notes. | medium | open | D139 | Run multi-scenario prompt probes before productizing UI. | +| A86 | Pi can serve as a useful pre-UI agent harness or tool-spike backend without forcing Brunch to adopt Pi as production runtime. | low | open | D142 | Spike Pi SDK/RPC with controlled prompts, tools, and graph context packs. | +| A87 | Verification-aware post-spec decomposition can be explored as agent scenarios before it is a Brunch product surface. | low | future | D141 | Prototype decomposition and oracle-design probes. | +| A88 | Deterministic enumeration over existing intent edges incident on a changed item can produce a useful cascade preview without requiring the reconciliation agent. | medium | open | D135, D137, D138, D146 | Manual hard-edit walkthroughs across side-chat V3.0 fixture matrix. | +| A89 | A long-lived local JSONL agent capability CLI can drive the real Brunch interview flow well enough for external LLM-as-user probes to produce credible completed-spec fixtures. | medium | open | D143, D147, Requirement 43 | Prototype the minimal JSONL loop and run LLM-as-user scenarios end-to-end. | +| A90 | Users who ask to speed up a long interview will prefer a side-chat that generates 2–3 reviewed scenario options completing the current direction. | medium | open | D126, D148, D151, Requirement 44 | Probe scenario-options against drilldown fixtures and run manual flow review. | +| A91 | Graph-review critique can make scenario-generated candidate bundles safe enough for product use if readiness states and follow-on review work are explicit. | medium | open | D151, D152, Requirement 44 | Run candidate bundle probes with graph-review scoring and human review. | +| A92 | A conservative global staleness rule for open proposal turns is acceptable before neighborhood-level staleness calculation exists. | medium | open | D149, I117 | Exercise multi-chat proposal flows where another chat applies a changeset while a proposal remains open. | +| A93 | Relation-policy directionality lookup is safer than forcing all useful intent-edge verbs into one dependency direction. | medium | open | D137, D150 | Define canonical/inverse sentences and source/target change behavior for each relation. | + +### Active Decisions + + + +#### Workflow runtime and workspace projection + +22. **Observer-result sync is turn-owned and background by default** — eligible answered turns enter one turn-owned observer capture backlog after durable turn finalization, and chat stream completion must not wait on extraction. +65. **Phase outcomes are explicit durable records** — workflow status, closeability, readiness, and closure provenance project from durable phase outcomes on the active path. +66. **Interviewer-recommended and user-forced closes share one transcript-friendly seam** — one phase-close transport handles both paths, with explicit closure basis. +86. **The client is organized by phase-addressable routing and three concentric layout shells** — AppLayout, SpecificationWorkspaceLayout, and ViewLayout own route structure; phases remain router-addressable for links, gating, and sibling composition. +87. **Layout-level data ownership partitions invalidation** — the specification bundle and entity collections subscribe through separately owned query domains / route surfaces instead of one monolithic refresh boundary. +110. **The workspace stream is a merged read model, not identical to the turn tree** — active-path durable turns are the lineage spine; anchored workflow facts and projected control/activity/phase elements derive from workflow state plus nearby anchors. +111. **The app is seed-first and migration-light until the data model settles** — prefer one truthful read-model contract and current seeded scenarios over compatibility for unstable local rows. +113. **Phase lifecycle side effects are specification-scoped, not route-scoped** — durable workflow truth stays server/read-model authoritative; an ephemeral lifecycle seam owns auto-entry, continuation, duplicate-submit suppression, stale-event rejection, and capture-backlog reseeding. +114. **Continuous workspace rendering and phase addressability are separate concerns** — the center pane may render one cumulative realized-section stream while the router preserves focus addresses, gating, and sibling routes. +124. **Interview framing is two-axis, not novelty-only** — interviewer orientation uses workspace novelty and delivery posture. -137. **Intent edges are semantic relations, while reconciliation needs are process debt** — intent-item kinds say what semantic units exist; intent-edge kinds say how items justify, constrain, depend on, refine, illustrate, and verify one another. A negative example is intent content; a boundary relation such as `rules_out`, `excludes`, or `counterexample_for` is intent semantics; a `reconciliation_need` is directed process obligation saying existing semantic truth may require renewed judgment because a change, contradiction, verifier result, or historical premise may affect it. The observer and future graph tools should provide edge-local neighborhoods around active intent items, but not every inferred edge should drive cascade, staleness, export explanation, criteria generation, or reconciliation. Relation policy should classify edge support (`explicit`, strong inference, weak candidate) and operational participation before relation-first capture broadens beyond today's limited edge set. Observer-created interpretive structure may land immediately when it adds supported edges, examples, or reconciliation needs; rewriting accepted intent remains reconciliation-review work. Depends on: A66, A81, D50, D125, D128, D134, D135, D138. Supersedes: treating graph edges as only display infrastructure, and also supersedes treating every visible edge as equally authoritative process truth or work queue state. +#### Intent graph, semantic mutation, and review -138. **Multi-chat substrate is the first concrete persistence slice before the full changeset ledger** — add `chat`, nullable `turn.chat_id`, `specification.primary_chat_id`, mirrored `chat.active_turn_id`, and a minimal `reconciliation_need` table while keeping legacy `turn.specification_id` and `specification.active_turn_id` during transition. Do not add `active_chat_id` in phase one; `primary_chat_id -> chat.active_turn_id` covers the interview head until multiple active chat surfaces need their own pointer. New writes populate both legacy and chat pointers; application assertions preserve same-spec and same-chat ancestry; later cleanup can make chat ownership canonical and remove the legacy pointers. `reconciliation_need` uses directed item-to-item source / target fields, narrow `kind` and `status`, free-text reason, immediate `caused_by_turn_id`, and nullable `caused_by_changeset_id` as a future changeset-ledger placeholder. This supersedes older side-chat substrate assumptions and makes `docs/design/MULTI_CHAT.md` the concrete phase-one design while `docs/design/PATCH_LEDGER.md` remains historical deeper semantic mutation history. Depends on: A71, A82, A83, D135, D137, Requirement 39. Supersedes: implementing multi-chat by preserving an in-memory-only side-chat patch list as the durable substrate, and supersedes naming the process-debt table `reconciliation_edge`. -139. **Prompt/context scenario substrate is a first-class foundation** — Brunch should externalize server-side prompts and reusable agent doctrines into inspectable markdown assets, load and compose them through a typed server seam, and introduce context-pack builders that render the current intent graph for a specific generative scenario rather than letting each call site hand-roll prompt context. The same substrate should support lightweight prompt probes over seeded graphs and transcripts before UI surfaces are built. A prompt scenario composes prompt + context pack + model settings + capability inventory + captured output/review for evaluation; it must not become the owner of prompt doctrine, context semantics, credential UX, or the shared production AI runtime. Depends on: A84, A85, D134, D136, D137, Requirement 40, Requirement 41. Supersedes: scattered TypeScript prompt strings and transcript-dump context as the default mechanism for new agent features. -140. **Intent graph context packs are scenario-specific semantic briefings** — a context pack is an explicit rendering of graph truth, workflow state, relevant provenance, unresolved ambiguity, relation neighborhoods, and authority labels for one agent task. Packs should exist for observer capture, next-question generation, candidate-spec synthesis, criteria/witness generation, web research query framing, reconciliation review, architect proposals, and downstream decomposition/oracle probes. They should be bounded, ranked, and typed rather than raw graph dumps. Depends on: A84, D125, D134, D137, D138, Requirement 40. Supersedes: assuming the active chat transcript is the canonical prompt context after multi-chat. -141. **Post-spec decomposition remains a probe frontier, not a committed Brunch UI** — the next-after-spec direction is to derive design alternatives, oracle strategy, execution slices, and verification-aware orchestration constraints from the intent graph and its checkability implications. This should first run through the prompt/context scenario substrate, borrowing cognitive patterns from `ln-design` and `ln-oracles`, before deciding whether it belongs inside Brunch or a successor product. Depends on: A87, D134, D139, D140, Requirement 41. Supersedes: treating export prose as the only meaningful handoff target. -142. **Pi is a candidate harness adapter, not current product runtime truth** — Pi may be evaluated via SDK or RPC as the first lower-level agent harness for prompt probes, web/tool experiments, and future decomposition scenarios because it already provides sessions, custom tools, provider support, event streams, and embedding modes. Brunch should not assume Pi owns product workflow, durable replay, intent-graph mutation authority, reconciliation review, or credential UX unless a later spike proves and explicitly adopts those boundaries. Depends on: A86, D139, Requirement 41. Supersedes: deciding the web-research tool spike only at the individual tool API level. -143. **Brunch owns the agent mutation surface; harnesses adapt it as tools** — Any mutation of durable Brunch data initiated by an agent must route through Brunch-owned mutation handlers, not direct ORM access or harness-specific tool implementations. Those handlers define the product operation: stable id, input/output schemas, description, authority class, replay policy, and reconciliation/changeset-ledger behavior. AI SDK, Pi, CLI/TUI, or future adapters may expose the handlers as tools, but adapters only translate transport and tool shape; they do not define mutation authority. Read-only capabilities can use the same contract registry for consistency, but the binding rule is that agent-originated writes enter through one server-owned surface. Depends on: Requirement 42, D138, D139, D142. Supersedes: defining separate mutating tool surfaces inside each agent harness or letting agent flows bypass application handlers to call the ORM. -144. **Intent graph vocabulary supersedes knowledge graph vocabulary** — Canonical product vocabulary is `intent graph`, made of `intent items` and `intent edges`. Current schema/code may still use `knowledge_item` and `knowledge_edge` as implementation names during transition, but new planning, agent capability contracts, context packs, operation ids, and user-facing design should prefer intent vocabulary unless referring to current persistence/API names. `Claim` may remain an explanatory generic for natural-language content, but it is not a product/schema noun. Depends on: D134, D136, D137. Supersedes: using `knowledge graph`, `knowledge item`, `knowledge edge`, or `claim` as future-facing product nouns. -145. **Changeset/change supersedes patch/patch_change** — Semantic mutation history uses `changeset` for one submitted semantic mutation bundle and `change` for one atomic mutation inside it. `Patch` and `patch_change` remain historical design-doc vocabulary and may appear in older file names, but new schema, capability contracts, operation ids, and planning language should use `changeset` / `change` unless this decision is explicitly reversed. Depends on: D135, D138, D143. Supersedes: treating naming as open between patch and changeset. -146. **Hard-impact edit cascade reads from the `reconciliation_need` queue, not from REVISIT walk state** — when a hard-impact `propose_edit` patch applies, the server enumerates `knowledge_edge` rows incident on the changed item under typed relation policy and opens one `reconciliation_need` row per affected pair (Path 1 from `docs/design/MULTI_CHAT.md` §5.1). The patch list overlay is the canonical resolution surface: open needs render as a `Pending review` section alongside staged patches, with per-row accept-on-target / edit-target / dismiss actions. The V2 `deferred: true` apply response and the "Hard impact — coming in V3 cascade preview" banner are removed at V3.0 ship. V3.0 groups needs mechanically by `kind` and relation type; agent-grouped resolution (auto-confirm / auto-edit / substantive) is V3.1 work and does not block V3.0. Side-chat thread persistence is not a V3.0 prerequisite — threads stay in-memory until MULTI_CHAT.md Phase 2. Depends on: A71, A83, A88, D80, D135, D137, D138. Supersedes: hard-edit deferral with a placeholder banner, the modal secondary-thread walk in `docs/archive/design/REVISIT_MODULE.md`, and the SIDE_CHAT.md V3 prose that pre-dated the multi-chat substrate. -147. **The local agent CLI is a long-lived JSONL adapter over Brunch capability contracts** — CLI-addressability should first ship as a `brunch agent`-style local process that speaks request/response JSONL over stdin/stdout, dispatches Brunch-owned capability contracts, and keeps all product resources explicit in input payloads. The adapter may hold ambient runtime plumbing such as a DB connection, provider config, and in-flight interviewer / observer generation bookkeeping, but it must not hold hidden selected spec/chat/turn handles as command semantics. Read capabilities use `list` / `get` for structured read-model data and `read` for agent-facing projections with allowed response shapes and next-command hints. Mutations stay capability-first and surface-lazy: add only contracts needed by real probe/tool use, with an initial surface around `spec.create`, `spec.getStatus`, `spec.requestPhaseClosure(specId, phaseId?)`, `spec.requestExport`, `chat.getPrimary`, `chat.ensureReady(chatId?, timeoutMs?)`, `chat.read`, `turn.get`, and `turn.submitResponse(chatId, turnId?, response)`. `chat.ensureReady` is the idempotent synchronization/recovery command: it may trigger continuation when a chat lacks a usable next turn, can block up to a bounded timeout, and returns a derived state such as `generating`, `awaiting_response`, `idle_no_frontier`, `closed`, or `error` without requiring a durable runtime-operations ledger in the first cut. The LLM-as-user probe runner is a separate client of this JSONL adapter, not part of the capability server and not allowed to import DB/product handlers directly; its scenario briefs, model policy, generated transcripts, and curated golden fixture bundles are probe artifacts, not Brunch authority. Suggested module boundary: `src/server/capabilities.ts` plus `src/server/capabilities/` own contracts, schemas, handlers, and dispatch; `src/server/agent-jsonl.ts` plus `src/server/agent-jsonl/` own only protocol/session/transport; `scripts/agent-probes/` owns the outer LLM-as-user loop and artifact writing as development harness code through a JSONL client. Depends on: Requirement 43, A89, D139, D140, D143. Supersedes: treating the CLI as hand-written route wrappers, direct ORM scripts, a one-shot TUI with hidden ambient selection, or a probe runner that bypasses the same mutation surface future agents must use. -148. **Spec evolution strategies are chat-local, turn-mediated process state** — strategy is not specification-level semantic truth. A chat may be established through a first assistant/system frontier turn that offers or declares a strategy such as `step_by_step`, `scenario_options`, `targeted_cases`, `graph_review`, or `reconciliation`; globally triggered flows may create/reuse a pre-strategized chat whose first turn is the procedure kickoff. A chat can technically change strategy through later turns, but explicit switch UX is deferred. Tactical sub-strategies are allowed inside a chat, but broad mid-interview acceleration should branch into a side-chat/strategy chat rather than mutate the primary interview chat in place. Depends on: Requirement 39, Requirement 44, D138. Supersedes: treating the interviewer as one global mode per specification. -149. **Changesets are the atomic semantic mutation boundary, while proposal turns are not mutations until accepted** — a graph-review finding, candidate bundle, or reconciliation suggestion is the assistant/system half of an open frontier turn until the user responds. Only `accept` applies a proposal turn's semantic changeset; `revise`, `ask_followup`, `regenerate`, `defer`, and ordinary `reject` produce successor/process state rather than direct graph mutation. A changeset is the smallest semantic mutation unit that preserves coherence, and may record direct edits, candidate acceptance, reconciliation resolutions, opened reconciliation needs, or future verifier/import results. Turns should stamp the latest applied changeset id at creation (`opened_at_changeset_id` / `base_changeset_id`) so open proposals can be conservatively marked stale when the specification advances. Depends on: A71, A79, A92, D135, D145, Requirement 44. Supersedes: treating agent proposals or review findings as durable semantic truth before user/action acceptance. -150. **Relation policy owns operational directionality for intent edges** — relation names should be semantically clear, but code must not infer cascade or reconciliation behavior from raw edge source/target direction. Each relation kind declares a canonical sentence, inverse display sentence, operational-axis participation, and source-change / target-change behavior. Direct edit and hard-impact cascade enumerate incident accepted edges, then ask relation policy which endpoint, if any, receives a `reconciliation_need`. FE-700 may break current `depends_on` / `derived_from` / `constrains` / `verifies` records while expanding the ontology, but should not force every useful edge verb into one dependency direction at the expense of display, prompt context, export trace, critique, verification, candidate generation, or explanation. Depends on: A81, A88, A93, D137, D146. Supersedes: assuming outgoing edges from the changed item are the cascade direction. -151. **Scenario-options acceleration is product-facing, but graph review is its safety oracle** — the first user-visible alternative to long drilldown should likely be a first-turn strategy choice or mid-interview `speed this up` side-chat that generates 2–3 candidate bundles completing the current direction from context-packed accepted graph truth. Candidate bundles present named tradeoff profiles and are accepted as coherent units, not item-by-item pick lists. Fast gates (parse/schema/fixed-premise/no-obvious-conflict/tradeoff summary) can run before display; deeper graph review, coverage, checkability, provenance, and repair/refinement can run asynchronously. Depends on: A67, A84, A85, A90, A91, D126, D139, D140, D148, Requirement 31, Requirement 44. Supersedes: treating candidate-spec assist as a skip/force-close helper or as one-shot generation that can be committed without critique. -152. **Graph review and reconciliation are separate graph operations** — reconciliation is repair-oriented process debt from a known disturbance (`reconciliation_need`), while graph review is quality-oriented critique over any graph for weakness, genericity, low support, missing coverage, weak checkability, poor provenance, or maturity gaps. Broader review findings start as turn-owned structured artifacts; `reconciliation_need` remains the only first-class problem table until review issues require independent querying, filtering, badges, assignment, or lifecycle. Candidates may be accepted with represented issues if accepting also opens a graph-review frontier or appropriate process-debt records. Depends on: A91, D137, D149, D151, Requirement 44. Supersedes: overloading reconciliation as the umbrella for all graph intelligence or blocking useful imperfect specs until every review issue is repaired. +50. **Knowledge relationships live behind one typed graph seam** — persisted graph edges are first-class and drive dependency, derivation, and revisit behavior. +80. **Intent-graph revisit replaces hard turn-tree branching for V1** — revisit starts from graph edit/refinement surfaces and resolves cascades through reconciliation-oriented flows rather than generic turn-tree branching. +125. **Observer capture is a prompt-budgeted graph-delta seam** — observer output includes per-kind item collections plus relationship candidates resolved and validated by the server through relation policy. +126. **Recognition-first assists synthesize proposals through turn-owned candidate direction sets** — grounding/design/future wizard modes can present candidate direction artifacts for structured user reaction. +127. **Interview detail flows through turn-owned breadth skeletons and detail-focus reactions** — broad-pass maps and next-detail affordances steer ordinary successor turns without creating a second topic-tree store. +128. **Graph view becomes an actionable workspace mode through a projection-first, intent-emitting seam** — graph mode projects shared entity truth, owns only ephemeral graph-local interaction, and emits intents into the workspace lifecycle. +129. **Graph view fetch scope and render scope are decoupled** — graph view fetches whole-spec entities while the intended default render scope trends to active-path items with a later `Show all` toggle. +134. **Brunch specs evolve toward recognition-first intent graphs with progressive checkability** — the product direction is typed intent items, semantic edges, examples/counterexamples, witnesses, unresolved ambiguity, and validation status rather than prose inventory alone. +135. **Semantic mutation history should split from conversational turn history when graph editing becomes first-class** — turns remain conversational provenance, the intent graph remains current truth, future changesets record semantic mutation history, and reconciliation needs record semantic debt. +136. **Observer ontology should classify intent items by modality, not answer shape** — goal/context/constraint/assumption/decision/requirement/invariant/criterion/example semantics should be distinct, with decisions narrowed to durable choices among alternatives. +137. **Intent edges are semantic relations, while reconciliation needs are process debt** — relation policy decides which edges participate in display, cascade, export, staleness, reconciliation, criteria help, or weak suggestions. +138. **Multi-chat substrate is the first concrete persistence slice before the full changeset ledger** — chat containers and minimal reconciliation needs precede canonical changeset history while legacy spec-scoped pointers remain transitional. +144. **Intent graph vocabulary supersedes knowledge graph vocabulary** — planning, product language, capability contracts, and context packs should prefer intent item/edge vocabulary; knowledge item/edge remains implementation language during transition. +145. **Changeset/change supersedes patch/patch_change** — new semantic mutation history vocabulary is changeset/change; patch vocabulary is historical. +146. **Hard-impact edit cascade reads from the `reconciliation_need` queue, not from REVISIT walk state** — direct hard edits enumerate incident relations, open reconciliation needs, and resolve through the patch-list/Pending-review surface. +148. **Spec evolution strategies are chat-local, turn-mediated process state** — strategy belongs to chats/turns, not specification-level semantic truth; broad acceleration should branch into strategy chats rather than mutate the primary interview chat in place. +149. **Changesets are the atomic semantic mutation boundary, while proposal turns are not mutations until accepted** — proposal actions other than accept create successor/process state rather than graph truth. +150. **Relation policy owns operational directionality for intent edges** — cascade/reconciliation behavior is declared per relation, not inferred from raw source/target edge direction. +151. **Scenario-options acceleration is product-facing, but graph review is its safety oracle** — generated candidate bundles may become the user-facing alternative to long drilldown only with fixed-premise, tradeoff, checkability, provenance, and graph-review safeguards. +152. **Graph review and reconciliation are separate graph operations** — reconciliation repairs known disturbance debt; graph review critiques graph quality and starts as turn-owned structured artifacts unless independent lifecycle needs emerge. + +#### Provider, prompt/context, and agent substrate + +130. **First-run setup becomes a product surface, not README-only configuration** — dashboard/provider setup replaces project `.env` docs as the only user-facing path. +131. **Provider access moves behind one AI runtime provider seam** — interviewer and observer construction consume a shared provider/model resolver. +132. **UI-entered credentials are user-scoped auth state, not workspace state** — UI-entered keys go to XDG-compliant user auth/config, not `.brunch/` or project `.env` by default. +133. **`.brunch/` gitignore support is confirm-gated deterministic workspace mutation** — repository mutation is previewable, idempotent, and user-confirmed. +139. **Prompt/context scenario substrate is a first-class foundation** — prompts/doctrines are markdown assets, context packs are typed server builders, and prompt scenarios produce repeatable probe artifacts. +140. **Intent graph context packs are scenario-specific semantic briefings** — packs render bounded graph truth, workflow state, provenance, unresolvedness, relation neighborhoods, and authority labels for one agent task. +141. **Post-spec decomposition remains a probe frontier, not a committed Brunch UI** — decomposition/oracle probes should run through the scenario substrate before product commitment. +142. **Pi is a candidate harness adapter, not current product runtime truth** — Pi may be evaluated for probes/tools, but Brunch owns workflow, replay, mutation authority, reconciliation, and credential UX. +143. **Brunch owns the agent mutation surface; harnesses adapt it as tools** — agent-originated writes route through Brunch-owned mutation handlers; adapters translate transport/tool shape only. +147. **The local agent CLI is a long-lived JSONL adapter over Brunch capability contracts** — `brunch agent` exposes capability contracts with explicit resource ids; the LLM-as-user probe runner remains an external client. + +### Critical Invariants + + + +Each invariant is a formalization candidate: the property is stated in human language, protected today by tests/manual oracles, and may later graduate to stronger runtime/model oracles. + +| # | Invariant | Protected by | Proves | +| --- | --- | --- | --- | +| I4 | Vite proxy routing and the runtime backend-port seam stay aligned through one explicit configuration path. | `runtime-config.test.ts` | Requirement 1 | +| I17 | Data Part schema validation remains confined to true LLM / HTTP boundaries rather than mirrored internal seams. | `parts.test.ts` | Requirement 4 | +| I24 | The routed interview surface preserves hydration, stream projection, lifecycle orchestration, mutation transport, phase projection, successor-frontier continuity, activity summaries, projected controls, preface/revision artifacts, and trailing observer attachment. | `InterviewView.test.tsx`, workspace stream / controller / app tests | D86, D87, D110, D113, D114 | +| I44 | Structured turn responses round-trip through persistence, hydration, projection, and UI affordance state without collapsing back to scalar semantics. | `turn-response.test.ts`, `context.test.ts`, `InterviewView.test.tsx` | Requirement 4 | +| I48 | Canonical intent/knowledge kinds persist with provenance and project through typed entity collections, stable reference codes, turn-linked capture projection, and graph edges without ontology drift. | `db.test.ts`, `core.test.ts`, `knowledge.test.ts`, sidebar/graph tests | D50, Requirements 22, 23 | +| I54 | Phase-aware capture preserves the ontology boundary: grounding/design persist exploration knowledge, accepted review outputs materialize requirements/criteria, and both seams survive persistence and replay. | `observer.test.ts`, `context.test.ts`, `app.test.ts`, `InterviewView.test.tsx` | Requirements 22, 23 | +| I72 | Explicit phase outcomes project shared workflow status, closeability, readiness, closure basis, and closed-phase markers through one durable seam. | `phase-close.test.ts`, `db.test.ts`, `app.test.ts` | D65, D66, D110 | +| I87 | Requirements and criteria review persist interviewer-owned review metadata on the review turn, project stable reference codes, submit semantic review replies, and carry accepted outputs downstream without dead frontier states. | `interview.test.ts`, `db.test.ts`, `app.test.ts`, `InterviewView.test.tsx` | Requirements 11, 12; D110 | +| I100 | Local-first distribution keeps `.brunch/` workspace resolution, package-bin startup, built-client serving, bound URL reporting, runtime ownership, and JSON request limits correct. | `project.test.ts`, `launcher.test.ts`, `cli.test.ts`, `runtime-config.test.ts`, `app.test.ts` | Requirement 1 | +| I102 | File-route generation, directory-based nesting, three-shell route architecture, and phase addressability remain the runtime routing source of truth; graph view stays code-split. | `router.test.tsx`, `build-boundary.test.ts`, `GraphView.test.tsx` | D86, Requirement 33 | +| I106 | Provider credential discovery, precedence, dashboard status, and model-provider resolution stay explicit without exposing raw secret values through `/api/config`, logs, persisted specification state, or client-visible payloads. | planned: config/app/dashboard tests | Requirements 34, 35, 36; D130, D131, D132 | +| I107 | `.brunch/` gitignore hygiene is idempotent and confirmation-gated. | planned: project-gitignore/app/dashboard tests | Requirement 37; D133 | +| I108 | Observer capture does not block chat stream completion for eligible answered turns; backlog state is re-derived from durable turns and persists results to the originating turn. | planned: app/controller tests | D22, D113 | +| I109 | Observer prompts remain compact as relation extraction widens; candidates resolve only through validated existing ids or same-turn provisional references, and accepted reviews reuse relation policy. | `context.test.ts`, `observer.test.ts`, `db.test.ts`, `app.test.ts` | Requirement 30; D50, D125 | +| I110 | Workflow read truth and write truth stay behind named seams instead of transport handlers owning workflow semantics. | workflow projector / transition / phase-close tests | D110, D113 | +| I111 | Multi-chat substrate preserves primary-chat active-head equivalence during transition, same-spec/chat ancestry, and reconciliation-need dedupe without conflating process debt with semantic edges. | `chat-substrate.test.ts`, `reconciliation-need.test.ts`, `db.test.ts` | Requirement 39; D137, D138 | +| I112 | Prompt/context scenarios render from packaged markdown prompts and typed context-pack builders, with deterministic fingerprints and reviewable golden coverage. | prompt loader/build/golden, context-pack, scenario-runner tests | Requirements 40, 41; D139, D140 | +| I113 | Hard-impact direct edit opens reconciliation needs for affected relation-policy endpoints, records provenance, deduplicates idempotently, and no longer returns deferred placeholder responses. | planned: edit-applier/reconciliation/overlay/app tests | Requirement 10; A88; D146, D150 | +| I114 | The reconciliation classifier lifecycle is explicit and recoverable; labels are constrained, failures persist parser/thrown errors, and proposals are never auto-applied. | reconciliation-agent tests | Requirement 10; A88; D139 | +| I115 | The agent capability CLI remains an adapter over Brunch capability contracts: calls validate explicit resource ids/schemas, mutating calls dispatch through server-owned handlers, and probes exercise only the JSONL boundary. | planned: capabilities, agent-jsonl, probe-runner tests | Requirements 42, 43; A89; D143, D147 | +| I116 | Each active/resumable chat has at most one open assistant/system-first frontier turn; user responses complete it through normalized semantics, and strategy is chat-local process state. | planned: chat/transition/capability tests | Requirement 44; D138, D148 | +| I117 | Open proposal turns are stamped with the latest applied changeset id at creation and conservatively stale when the specification's latest changeset advances before completion. | planned: changeset/transition/app tests | A92; D149 | +| I118 | Reconciliation/direct-edit cascade never infers affected endpoints from raw edge direction alone; it consults relation policy source-change / target-change behavior. | planned: relation-policy/edit-impact/reconciliation tests | A93; D137, D150 | +| I119 | Scenario-option candidate bundles can become canonical only by accepting a coherent bundle changeset; accepted-with-issues candidates also create durable follow-on review/process debt. | planned: scenario-runner, turn-artifacts, changeset tests | A90, A91; D151, D152 | + +## Future Direction Register + +### Semantic / generative substrate + +- Intent graph semantics, relation policy, examples/invariants, checkability, and witness strength are the next semantic substrate focus. See `docs/design/INTENT_GRAPH_SEMANTICS.md` and PLAN item `intent-graph-semantics`. +- Changeset/change history is the future semantic mutation spine. See `docs/design/PATCH_LEDGER.md` and PLAN item `changeset-ledger`. +- Graph review and scenario-options acceleration remain probe-first until graph semantics and changeset acceptance are safe. See PLAN items `graph-review-scenario-options` and `productized-scenario-options`. + +### Agent capability substrate + +- Prompt/context packs and scenario runners are the pre-UI harness for LLM-heavy features. See `docs/design/AGENT_MUTATION_SURFACE.md` and PLAN item `agent-fixture-substrate`. +- Pi and other harnesses are adapters over Brunch-owned capability contracts, not product authority. + +### Provider / workspace hardening + +- Provider setup, XDG key storage, AI runtime provider resolution, and `.gitignore` assist are independent near-horizon hardening frontiers. ## Interaction Stream Model -The center column is a **merged stream projection** over multiple artifact families. The turn tree remains the authority for conversational lineage and branching, but the rendered stream is intentionally richer than the tree itself. +The center column is a **merged stream projection** over multiple artifact families. Only conversational turns participate in branch-bearing lineage; other artifacts anchor to that lineage or project from workflow state. -| Artifact family | Durable | Branch-bearing | Current examples | Ordering / invalidation rule | -| ------------------------- | ------- | -------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -| Conversational turn cards | yes | yes | grounding question, design question, review proposal, closure proposal, answered-turn replay | Ordered by the active-path turn chain; branch membership comes from `parent_turn_id`. | -| Anchored workflow facts | yes | no | phase outcome | Stored outside the turn table but anchored to turn ids for provenance; if an anchor falls off the active path, the fact is superseded or hidden. | -| Projected control cards | no | no | kickoff, recovery, proceed / go-to-frontier affordances | Derived from workflow state plus nearby anchors; they re-project on hydration and may disappear / reappear without needing their own durable row. | -| Activity cards | mixed | no | visible generation state, persisted activity summary, trailing observer state | Derived from runtime state or replay summaries adjacent to a turn or control boundary; they do not become branch nodes. | -| Phase markers | no | no | phase start, phase closed | Projected from workflow position and anchored workflow facts such as phase outcomes; they annotate the stream without entering the turn tree. | -| Phase section headers | no | no | grounding purpose + knowledge kinds | Projected from workflow state and phase metadata at the top of each phase section; re-project on hydration. | - -This model is deliberately asymmetric: only conversational turns participate in the linked-list lineage model, while the other artifact families either anchor to that lineage or project from it. A rendered card therefore does not imply a persisted turn row, and a persisted durable record does not need to masquerade as a turn to belong in the stream. - -The ordering rule is: active-path turns provide the spine, anchored workflow facts attach to points on that spine, and projected control / activity / phase-marker elements are injected relative to workflow state and those anchors. The invalidation rule is: if a durable non-turn record is anchored to a turn that leaves the active path, the record must be superseded or hidden rather than left floating as if it still belonged to the trusted branch. +| Artifact family | Durable | Branch-bearing | Examples | Rule | +| --- | --- | --- | --- | --- | +| Conversational turn cards | yes | yes | questions, review proposals, closure proposals, answered-turn replay | Ordered by active-path turn chain. | +| Anchored workflow facts | yes | no | phase outcome | Anchored to turn ids; hidden/superseded if anchor leaves active path. | +| Projected control cards | no | no | kickoff, recovery, proceed/handoff | Derived from workflow state plus nearby anchors. | +| Activity cards | mixed | no | generation state, activity summary, trailing observer state | Adjacent to a turn/control boundary; not branch nodes. | +| Phase markers | no | no | phase start/closed | Projected from workflow position and phase outcomes. | +| Phase section headers | no | no | grounding purpose + captured knowledge kinds | Projected at the top of each realized phase section. | ## Layout Architecture -### Top Bar - -| Element | Content | Position | -| ------------------ | ---------------------------- | --------------------- | -| Logo | Placeholder (TBD) | left | -| App name + version | "Brunch v{version}" | left, after logo | -| Separator | Pipe character | left, after version | -| Tagline | "AI-guided spec elicitation" | left, after separator | -| Working directory | `cwd` in mono | right-aligned | - -Height: `h-10` (40px). Version injected at build time from `package.json`. - -### Three-Pane Layout - -Below the top bar, three vertical panes fill the remaining viewport height. Each pane has a sticky-positioned header and a scrollable body using ScrollArea. - -#### Left Pane — Specification Navigation Sidebar +Brunch uses three route/layout shells: AppLayout, SpecificationWorkspaceLayout, and ViewLayout. The top bar identifies Brunch, version, tagline, and working directory. The specification workspace presents: -**Sticky header:** -- "< Back to Workspace" navigation link -- Read-only specification name (set at creation, not editable) +- **Left pane:** back-to-workspace link, read-only specification name, phase stepper/section navigator, status/readiness/turn-count metadata, and conditional output access. +- **Center pane:** a continuous or phase-focused workspace stream with a sticky header, realized phase sections, phase headers, one actionable bottom artifact for the current reachable phase, activity cards, durable turn cards, and projected recovery/handoff/completion controls. +- **Right pane:** intent graph sidebar grouped by kind, showing item counts and relationship cues. +- **Graph view:** a peer route inside the specification shell, initially a structured list over the intent graph; spatial canvas is deferred. -**Body — Phase stepper / section navigator:** -A vertical timeline with connecting line (blue for completed segments, gray for future). It remains strictly sequential for workflow truth, but it may behave as a section-jump / scroll-spy surface inside one continuous workspace transcript. Each phase item shows: - -| Phase | Internal key | Label | -| ----- | -------------- | ------------------- | -| 1 | `grounding` | Grounding | -| 2 | `design` | Elicitation | -| 3 | `requirements` | Requirements | -| 4 | `criteria` | Acceptance Criteria | -| 5 | *(route only)* | Output | - -Per-phase metadata: status (colored: Closed / In-Progress / Unstarted), readiness band (when in-progress), turn count. Closed phases and the current reachable phase are selectable; future phases may remain visible but locked. Output appears conditionally when all phases are closed. - -#### Center Pane — Chat Transcript - -**Sticky header:** -- "Phase N/M – [Phase Name]" for the currently focused section or current reachable phase — positional progress label -- Status text (colored) -- Turn count -- Readiness band (when in-progress) -- Close Phase button (right-aligned, in-progress only, gated by closeability, triggers confirmation) -- Status badge replaces button when phase is closed - -**Body (chat view):** -- One continuous workspace scroll surface that may be segmented into phase sections rather than remounted per phase -- Each phase section opens with a projected phase section header stating the phase purpose and captured knowledge kinds -- Closed phases replay their phase markers and answered / compacted turn cards as prior sections -- The current reachable phase owns the only actionable bottom artifact -- Activity cards and visible generation state stay attached to their section / turn anchors while the next generative turn is being created -- Active bottom artifact: projected kickoff control card, durable frontier turn card (grounding/question/review/closure proposal), or projected recovery card -- Artifact-specific controls - -**Body (closed phase):** -- Answered question cards -- Phase-closure marker plus any activity cards -- "Proceed to [next phase]" or equivalent handoff control card at bottom - -Scroll container: ChatScroll (ScrollArea + useStickToBottom). - -#### Right Pane — Knowledge Graph Sidebar - -**Sticky header:** -- "Knowledge Graph" title -- Item count + connection count - -**Body — Grouped intent items:** - -| Group label | Kinds | Visible | -| ----------------------- | -------------------------------------------------------- | ------- | -| Goals | goal, context, constraint (including `non-goal` subtype) | yes | -| Assumptions & Decisions | assumption, decision | yes | -| Requirements | requirement | yes | -| Acceptance Criteria | criterion | yes | -| *(hidden)* | term | no | - -Items render as compact DrawerCard instances: code + content in header, edge/dependency reference codes as drawer-peek summary when edges exist, plain card otherwise. - -### Design Tokens - -**Typography scale** (11px–16px, no sizes outside this range): - -| Token | Size | Usage | -| -------------- | ---- | -------------------------------------- | -| `text-xxs` | 11px | Impact badges, tag labels | -| `text-xs` | 12px | Secondary text, metadata | -| `text-xs-plus` | 13px | Secondary body, explanatory text | -| `text-sm` | 14px | Body text | -| `text-sm-plus` | 15px | Card headings, collapsed question text | -| `text-base` | 16px | Section headings | - -Question card titles use arbitrary `text-[17px]` above the scale for emphasis. - -**Font weights**: normal (400), medium (500), semibold (600). No bold (700+). - -**Color tokens**: - -| Token | Hex | Usage | -| ------ | ------- | ------------------------------- | -| `ink` | #202020 | Primary text | -| `sub` | #5b5b5b | Subtitles, secondary text | -| `hint` | #a6a6a6 | Placeholders, inactive elements | -| `rule` | #e3e3e3 | Borders, dividers | -| `wash` | #f0f0f0 | Ghost fills, tracks | -| `tint` | #fafafa | Subtle background | - -**Accent blue** (interactive elements, recommendations, progress): -- Primary: `#2070e6` -- Gradient top: `#3484fa` -- Ring/border: `#1060d6` - -**Shadow tokens**: `--shadow-card`, `--shadow-ring`, `--shadow-card-ring`. - -**Card structure pattern** (DrawerCard): outer `rounded-xl border border-rule bg-tint` shell, inner white header with `-m-px` border overlap trick and `shadow-card`, tinted drawer body below. - -## Critical Invariants - - - -Each row in this table is a **formalization candidate** ascending the progressive-checkability ladder: the `Invariant` column states the property in human-readable form, `Protected by` names the *current oracle* (its present rung on the ladder — typically a regression test today), and `Proves` ties the property back to the requirements or decisions it preserves. Stronger oracles (state-machine model, runtime contract, proof obligation) are deliberate future moves recorded in `docs/design/INTENT_GRAPH_SEMANTICS.md` rather than expanded inline here. - -| # | Invariant | Protected by | Proves | -| ---- | --------- | ------------ | ------ | -| I4 | Vite proxy routing and the runtime backend-port seam stay aligned through one explicit configuration path. | `runtime-config.test.ts` | Requirement 1 | -| I17 | Data Part schema validation remains confined to true LLM / HTTP boundaries rather than mirrored internal seams. | `parts.test.ts` | Requirement 4 | -| I24 | Interview hydration, streaming projection, controller orchestration, mutation transport, phase-scoped rendering, and successor-frontier continuity remain stable through the routed interview surface, including concise durable activity summaries for replay, projected kickoff/recovery/handoff controls, preface-card replay and continue affordances, landing-only grounding-strategy kickoff submission, turn-owned submit/interviewer-processing, visible generation states, anchored phase-boundary projection, and trailing observer attachment. | `InterviewView.test.tsx`, `-workspace-stream-projector.test.ts`, `transcript-parity.test.tsx`, `-interview-data.test.ts`, `-interview-controller.test.tsx`, `app.test.ts`, `client-mutation.test.ts`, `task.test.tsx` | D86, D87, D93, D94, D95, D96, D110, D113 | -| I44 | Structured turn responses round-trip through persistence, hydration, projection, and UI affordance state without collapsing back to scalar semantics. | `turn-response.test.ts`, `context.test.ts`, `InterviewView.test.tsx` | Requirement 4 | -| I48 | Canonical knowledge kinds persist with provenance and project through typed entity collections, stable per-kind reference codes, turn-linked capture projection, and graph edges without ontology drift. | `db.test.ts`, `core.test.ts`, `knowledge.test.ts`, `EntitySidebar.test.tsx`, `InterviewView.test.tsx`, `GraphView.test.tsx` | D50, Requirements 22, 23 | -| I54 | Phase-aware capture preserves the committed ontology boundary: grounding / elicitation persist only durable exploration knowledge, accepted review outputs materialize durable requirements / criteria, and both seams survive persistence, turn-linked replay hydration, and UI refresh without breaking sync. | `observer.test.ts`, `context.test.ts`, `app.test.ts`, `InterviewView.test.tsx` | D95, D112, Requirements 22, 23 | -| I72 | Explicit phase outcomes project shared workflow status, closeability, readiness, closure basis, and closed-phase boundary markers through one durable seam. | `phase-close.test.ts`, `db.test.ts`, `app.test.ts` | D65, D66, D110 | -| I87 | Requirements and criteria review ground themselves in their respective inventories, persist interviewer-owned review metadata on the review turn itself, project stable review-set reference codes, submit lightweight full-set review replies by semantic action rather than assumed option order, and carry accepted review outputs into downstream workflow without leaving dead frontier states. | `interview.test.ts`, `db.test.ts`, `app.test.ts`, `InterviewView.test.tsx`, `project-state-turn.test.ts` | D94, D112 | -| I100 | `.brunch/` workspace resolution, compiled package-bin startup from the packed install artifact, built-client serving, actual bound URL reporting, same-workspace runtime ownership, chat-sized JSON request parsing, and JSON-shaped payload-too-large failures stay correct in local-first distribution. | `project.test.ts`, `launcher.test.ts`, `cli.test.ts`, `runtime-config.test.ts`, `app.test.ts` | Requirement 1 | -| I101 | Grounding strategy and workspace-backed context gathering persist through schema, API, interviewer configuration, and observer context; preface-card assistant metadata round-trips through persistence/projection, and preface cards stay provisional rather than directly mutating durable knowledge. | `db.test.ts`, `interview.test.ts`, `app.test.ts`, `context.test.ts`, `observer.test.ts`, `parts.test.ts`, `project-state-turn.test.ts`, `ProjectList.test.tsx` | D112, Requirements 3, 20, 21 | -| I102 | File-route generation, directory-based nesting, the three-shell route architecture, and phase addressability remain the runtime routing source of truth; graph view stays code-split. | `router.test.tsx`, `build-boundary.test.ts`, `GraphView.test.tsx` | D86 | -| I103 | Trusted fixture state comes only from TypeScript builders or direct DB setup; walkthrough seeds stay builder-owned, observer probes seed directly without a second fixture format, and seeded scenarios remain resumable/exportable through that one surviving fixture model. | `corpus.test.ts`, `walkthrough.test.ts`, `seed.test.ts` | Requirements 13, 14, 15 | -| I104 | Interviewer-owned turn artifacts materialize through one persistence seam, so runtime review metadata, preface cards, activity summaries, phase summaries, and seeded brownfield replay all round-trip without route-specific reconstruction drift. | `turn-artifacts.test.ts`, `app.test.ts`, `walkthrough.test.ts` | D93, D96, D112 | -| I105 | Grounding/design structured-response turns can unlock the next frontier before observer capture finishes, while deferred capture stays keyed to the answered turn, reseeds from durable turns after reload, and avoids stale completion attachment. | `-interview-controller.test.tsx`, `app.test.ts` | D96, D113, D123 | -| I106 | Provider credential discovery, precedence, dashboard status, and model-provider resolution stay explicit without exposing raw secret values through `/api/config`, logs, persisted specification state, or client-visible payloads. | planned: `runtime-config.test.ts`, `app.test.ts`, `ProjectList.test.tsx` | Requirements 34, 35, 36; D130, D131, D132 | -| I107 | `.brunch/` gitignore hygiene is idempotent and confirmation-gated: existing ignore coverage is detected, missing entries are appended only after user confirmation, and absent `.gitignore` files are created only through that same accepted action. | planned: `project-gitignore.test.ts`, `app.test.ts`, `ProjectList.test.tsx` | Requirement 37; D133 | -| I108 | Observer capture no longer blocks chat stream completion for any eligible answered turn; capture backlog state is re-derived from durable turns, drains through the turn-owned observer-capture endpoint, and persists results back onto the originating turn. | planned: `app.test.ts`, `-interview-controller.test.tsx` | D22, D96, D123 | -| I109 | Observer prompts remain compact as relation extraction widens: existing knowledge is passed as id/kind/content-preview anchors with bounded length, graph-delta candidates resolve only through validated `knowledge_item.id` or same-turn provisional references, and accepted review grounding refs reuse the same relation policy. | `context.test.ts`, `observer.test.ts`, `db.test.ts`, `app.test.ts` | Requirement 30; D50, D125 | -| I110 | Workflow read truth and workflow write truth stay behind named seams: durable snapshots project through `projectWorkflowState`, while turn-response, chat-route, phase-intent, and phase-close mutations apply through transition/runtime helpers instead of transport handlers owning workflow semantics. | `workflow-projector.test.ts`, `turn-response-transition.test.ts`, `chat-route-transition.test.ts`, `phase-close.test.ts`, `app.test.ts` | D110, D113, D123 | -| I111 | Multi-chat substrate preserves one interview chat per specification, keeps legacy and chat-derived active heads equivalent during transition, guarantees each turn's `chat_id` belongs to the same specification as its legacy `specification_id`, scopes parent turns to the same chat, and deduplicates simultaneously open reconciliation needs for the same source / target / kind without conflating them with semantic `knowledge_edge` rows. | `chat-substrate.test.ts`, `reconciliation-need.test.ts`, `db.test.ts` | Requirement 39; A82, A83; D137, D138 | -| I112 | Prompt/context scenarios render from packaged markdown prompts and typed context-pack builders rather than scattered inline prompt strings; probe artifacts include deterministic rendered prompt/context fingerprints, prompt asset packaging mirrors current source assets at build time, and production prompt text has reviewable golden coverage without requiring product UI. | `prompt-loader.test.ts`, `prompt-build-boundary.test.ts`, `prompt-golden.test.ts`, `context-pack.test.ts`, `scenario-runner.test.ts` | Requirements 40, 41; D139, D140 | -| I113 | Hard-impact `propose_edit` apply opens at least one `reconciliation_need` per existing typed dependency edge incident on the changed knowledge item (relations: `depends_on`, `derived_from`, `constrains`, `refines`, `verifies`), records `caused_by_turn_id` provenance, deduplicates against the partial unique index, and never returns `deferred: true` from the apply contract; resolutions transition `open → resolved` idempotently. | planned: `edit-applier.test.ts`, `reconciliation-need.test.ts`, `patch-list-overlay.test.tsx`, `app.test.ts` | Acceptance Criterion 7; A88; D135, D137, D138, D146 | -| I114 | The reconciliation classifier (V3.1 `run-agent` route + `classifyNeed`) walks every awaiting open `reconciliation_need` row through the lifecycle `null → queued → classifying → classified \| failed`, persists exactly one of `auto-confirm` / `auto-edit` / `substantive` into `agent_classification` on `classified`, and writes the parser error or thrown message into `agent_proposal` on `failed`; `agent_proposal` is text-only and is never auto-applied by the server (resolution actions remain user-initiated per slice 6), so an invalid label or hallucinated proposal stays recoverable via per-row Re-run. | `reconciliation-agent.test.ts`, `reconciliation-agent-route.test.ts`, `reconciliation-need.test.ts`, `reconciliation-needs-route.test.ts` | Requirement 10; A88; D139 | -| I115 | The agent capability CLI remains an adapter over Brunch capability contracts: JSONL calls validate explicit resource ids and schemas, mutating calls dispatch through server-owned capability handlers rather than ORM/route bypasses, `read` projections provide affordance hints without importing scenario briefs, and the probe runner exercises the surface only through a JSONL client. | planned: `capabilities/*.test.ts`, `agent-jsonl.test.ts`, `probe-runner.test.ts` | Requirements 42, 43; A89; D143, D147 | -| I116 | Each active/resumable chat has at most one open assistant/system-first frontier turn; user responses complete that turn through normalized proposal/response semantics, and strategy is chat-local process state rather than specification-level semantic truth. | planned: `chat-substrate.test.ts`, `turn-response-transition.test.ts`, `capabilities.test.ts` | Requirement 44; D138, D148 | -| I117 | Open proposal turns are stamped with the latest applied changeset id at creation and are conservatively stale when the specification's latest changeset advances before completion; stale proposals refresh/regenerate rather than applying against unknown graph state. | planned: `changeset.test.ts`, `turn-response-transition.test.ts`, `app.test.ts` | A92; D149 | -| I118 | Reconciliation/direct-edit cascade never infers affected endpoints from raw edge direction alone; it consults relation policy source-change / target-change behavior over incident accepted edges. | planned: `knowledge-relationship-policy.test.ts`, `edit-impact.test.ts`, `reconciliation-need.test.ts` | A93; D137, D146, D150 | -| I119 | Scenario-option candidate bundles can only become canonical by accepting a coherent bundle changeset; accepted-with-issues candidates must also create durable follow-on review/process debt so known weaknesses are not hidden. | planned: `scenario-runner.test.ts`, `turn-artifacts.test.ts`, `changeset.test.ts` | A90, A91; D151, D152 | +Detailed card styling, typography tokens, and legacy layout minutiae are implementation/design-system truth, not SPEC authority. ## Lexicon -### Core terms - | Term | Definition | -| ---- | ---------- | +| --- | --- | | **workspace** | The cwd-backed software context whose local `.brunch/` directory stores specifications and runtime state. | -| **prompt/context scenario substrate** | The server-side and test-harness foundation for loading markdown prompts, composing reusable doctrines, deriving typed intent-graph context packs, and running prompt probes before UI commitment. It is not the provider credential/setup system or shared production AI runtime. | -| **context pack** | A scenario-specific semantic briefing derived from intent graph truth, workflow state, provenance, unresolvedness, relation neighborhoods, and authority labels for one agent task. It is bounded and typed, not a raw graph or transcript dump. | -| **progressive checkability** | The discipline of representing intent items at the weakest useful witness level today — prose, example, counterexample, criterion, executable test, runtime invariant, state/transition property, or formal model — while preserving paths toward stronger witnesses where valuable. | -| **behavioral kernel** | Hidden interviewer / architect machinery that recognizes recurring correctness patterns such as lifecycle, containment, authority, concurrency, migration, and evidence, then elicits checkable artifacts without exposing formalism as product ceremony. | -| **scenario runner** | A lightweight pre-UI harness that runs a selected prompt scenario against fixtures, context packs, tools, and model settings, then records outputs for qualitative and structural review. Execution adapters translate this harness input into a concrete fake/model/harness call; they do not define Brunch semantics, credential UX, provider resolution, or mutation authority. | -| **agent mutation surface** | The Brunch-owned typed handler layer for any durable data mutation initiated by an agent, internal or external. It is the only write entry point agents may use; handlers own schemas, authority, replay behavior, and reconciliation/changeset-ledger semantics rather than letting agents call the ORM directly. | -| **agent capability contract** | A Brunch-owned typed contract addressable by agents or harnesses, with a stable id, description, input/output schemas, authority class, and replay policy. Read-only capabilities and mutating handlers can share this registry shape, but mutating contracts must route through the agent mutation surface. | -| **agent capability CLI** | A local machine-facing CLI adapter, initially a long-lived JSONL stdin/stdout process, that exposes Brunch-owned capability contracts to external agents and probe runners without defining its own product API or mutation authority. | -| **JSONL capability session** | The request/response transport between an external harness and `brunch agent`: every call includes an id, capability id, and explicit input resource identifiers; the process may keep DB/provider/in-flight runtime handles internally, but selected spec/chat/turn targets are not hidden ambient state. | -| **probe runner** | An external client of the agent capability CLI that supplies scenario briefs, calls an LLM-as-user, drives Brunch through capability calls, and writes generated transcript/spec/export/graph artifacts for human curation. It must not import Brunch DB or product handlers directly. | -| **tool adapter** | A provider- or harness-specific projection of an agent capability contract into a concrete tool format such as AI SDK tools, Pi tools, CLI/TUI commands, or a future external-agent API. Adapters translate shape and transport while preserving Brunch-owned authority semantics. | -| **authority class** | The contract metadata that says whether an agent capability is read-only, proposal-only, or commits durable product truth, and therefore which replay, reconciliation, and mutation boundaries govern it. | -| **AI runtime provider** | The shared server seam that resolves the configured LLM provider, model names, API-key source, and provider-specific options for interviewer and observer calls. | -| **provider credential status** | The app-visible setup state indicating whether a supported LLM key is available, which source supplied it, and what user action is needed, without exposing the secret value itself. | -| **XDG auth state** | User-scoped configuration / credential storage outside the project workspace, used for API keys entered through Brunch UI when implemented. | -| **workspace hygiene affordance** | A confirm-gated local repository action that helps keep generated Brunch state such as `.brunch/` out of version control without silently mutating the workspace. | -| **specification** | One elicitation run within a workspace. Browser routes, HTTP paths, shared transport contracts, and durable DB/storage should all use canonical `specification` terms. | -| **project** *(legacy term)* | A deprecated older name for a specification record. Remove it rather than preserving it as a long-term compatibility seam. | -| **workspace stream** | The merged center-column read model composed from active-path turns, anchored workflow facts, projected control cards, phase markers, and activity cards. | -| **specification runtime** | The live lifecycle owner for one specification: it reconciles durable truth into the current landing, owns in-flight interviewer / successor / capture orchestration, and rejects stale lifecycle outputs that routes must not treat as their own authority. | -| **turn** | One persisted authored conversational interaction, with typed offer/reply parts and parent linkage. Today the primary interview active path still provides the main lineage spine; the multi-chat substrate is moving turn ownership toward chat-scoped chains. Questions, review proposals, closure proposals, and future side-chat turns use this seam. | -| **turn kind** *(current internal seam)* | The current persisted implementation field on a turn (`question`, `kickoff`, `recovery`). It may help project control state today, but kickoff / recovery are product-level structural affordances rather than durable authored turn categories. | -| **turn card** | The user-facing rendering of a durable conversational turn inside the workspace stream. | -| **anchored workflow fact** | A durable non-turn record whose validity is anchored to one or more turns on the active path. `phaseOutcome` is the canonical current example. | -| **projected control card** | A workflow affordance derived from durable state rather than authored conversational content. Kickoff, recovery, and proceed / handoff controls live here. | -| **kickoff card** | A projected phase-entry control card that appears whenever an open phase is in entry-pending state and requires an explicit user action before substantive interviewer progression begins. | -| **frontier turn** | The single actionable durable conversational turn currently at the bottom of an open phase when the phase is in substantive elicitation rather than structural control. | -| **proposal turn** | An assistant/system-first frontier turn that offers a candidate bundle, graph-review finding, reconciliation suggestion, or other proposed action. It is not a semantic mutation until the user completes it, usually by accepting, revising, asking follow-up, deferring, regenerating, or rejecting. | -| **preface card** | A turn-internal artifact that presents provisional context from interviewer-invoked context gathering, rendered above a paired question card within the same turn. The observer captures from the whole turn (preface context + question + user response) as one validated unit rather than from the preface card alone. Available in any phase when the workspace directory is present. Implementation: `preface` / `PrefaceCard` / `present_preface` tool / `data-preface` part. Renders as a simple `bg-tint` rounded box with italic subdued text, not as a DrawerCard. | -| **question card** | A turn card that asks a structured interviewer question and expects a substantive user response. | -| **review turn** | A full-set requirements or criteria review interaction that offers a synthesized candidate list with stable reference codes, supports per-item commenting (inline comment toggle on each item) plus one optional global review note, and persists its own `reviewActions` / `reviewSet` metadata on the turn. On `request changes`, the successor review turn carries a revision card above the new review set. | -| **closure turn** | A durable proposal turn whose offer proposes closing a phase and whose reply explicitly accepts or rejects that proposal. Accepting it confirms the phase outcome on that same turn and advances the workflow into the next phase's projected entry state. | -| **recovery card** | A projected control card that appears whenever an open phase lacks a valid actionable frontier and offers the user a repair path without requiring a separately generated recovery turn. | -| **active turn** | The live frontier turn currently awaiting substantive user completion inside the workspace. Structural control cards such as kickoff and recovery are not active turns. | -| **answered-turn card** | The compact replay form of a completed elicitation turn, summarizing the offer, the structured response, and the turn-owned capture status. | -| **response note** | The single attached text field on a structured user response; it may explain selections, annotate a review, add missing context, or redirect the interviewer. | -| **grounding** | The first phase of a specification, aimed at establishing enough orientation to proceed into design. It is both the product term and the canonical workflow key. | -| **grounding strategy** | The method used to reach grounding sufficiency: elicitation-first (`greenfield`) or analysis-first (`brownfield`). | -| **delivery posture** | The second interview-orientation axis: `end-to-end build` for whole-system creation or reshaping, versus `incremental feature` for bounded change inside an existing or emerging system. | -| **grounding brief** | The concise visible summary surfaced on a preface card after context gathering during grounding. | -| **grounding sufficiency** | The threshold at which the interviewer has enough stable orientation to begin design. | -| **recognition-first elicitation** | The strategy of helping users converge by reacting to concrete possibilities, tradeoffs, examples, and ruled-out directions rather than requiring them to author intent from scratch. | -| **candidate direction** | An agent-synthesized possible specification direction offered when the user asks Brunch to fill in the rest, compare options, or react to proposed typologies. It includes rationale, implications, tradeoffs, likely generated knowledge, and what it rules out. | -| **candidate-spec set** | A turn-owned interviewer artifact in grounding or design that presents one or more candidate directions for reaction-driven refinement. It is analogous to a review set in being a persisted artifact on the turn, but it proposes possible directions rather than reviewing a synthesized inventory. | -| **candidate graph bundle** | The coherent commit/review unit produced by scenario-options flows: a named scenario with tradeoff profile, generated intent items and edges, required core items, optional/swappable items, known risks, graph-review findings, provenance labels, and commit preconditions. It should be accepted or revised as a bundle rather than item-by-item unless semantic closure can be proved. | -| **candidate-spec reaction** | The structured user response to a candidate-spec set, choosing whether to accept a direction, request refinement of one candidate, or regenerate a fresh set. It steers the next interview move without directly closing the phase. | -| **breadth skeleton** | A turn-owned interviewer artifact used during a progressive detail pass that summarizes the current broad-pass map, highlights areas that remain shallow, and offers explicit deepening targets. | -| **detail focus** | The selected area or lens for the next recursive follow-up pass. It scopes the next same-phase frontier turn without becoming a separate workflow state or durable topic tree. | -| **detail reaction** | The structured user response to a breadth skeleton, choosing whether to deepen a specific area now, continue broad coverage, or leave an area sufficient for now. | -| **progressive detail pass** | An interview shape that establishes broad structure first, then offers explicit `next level of detail` actions to deepen selected areas recursively rather than drilling to maximum depth immediately. | -| **review set** | A synthesized candidate list used in requirements or criteria review, presented with stable reference codes, supporting per-item commenting, and resolved through `accept review` or `request changes` with per-item comments plus one optional global review note. | -| **review revision** | A successor review set generated after `request changes`, carrying a revision card (changelog + version badge) as a turn-internal artifact above the new review set card. Prior revisions collapse to compact answered-turn summaries. | -| **revision card** | A turn-internal artifact on a review revision turn that summarizes what changed from the prior version and displays a version badge (v2, v3, etc.), paralleling how preface cards sit above question cards. | -| **per-item comment** | An inline comment placed on a specific item in a review set via a comment toggle, forming part of the structured change-request payload alongside the optional global review note. | -| **accepted review set** | The terminal accepted review output for a review phase; this is the authoritative carry-forward set for later review and export seams, and any accepted requirement / criterion items derive their authority from membership in this set. | -| **phase entry state** | The workspace state shown when a projected kickoff card is the current bottom-of-phase affordance. | -| **landing reconciliation** | The pure derivation from durable specification snapshot into the one truthful visible bottom artifact for hydration/restart, plus any pending capture backlog the runtime must re-seed. | -| **observer capture backlog** | The ephemeral specification-scoped queue of answered turns that still need deferred observer capture. It is re-derived from durable turns with a persisted response but no turn-owned observer result, then drained by the runtime lifecycle once a successor frontier exists. | -| **phase handoff state** | The workspace state shown when a phase is complete and a projected handoff / completion control card is the current bottom-of-phase affordance. | -| **control marker** | A transcript-visible workspace event such as interview start, resume, or confirmation that is not rendered as a normal user chat bubble. | -| **phase marker** | A projected boundary annotation in the workspace stream, such as phase start or phase closed, derived from workflow position or anchored workflow facts. | -| **turn capture status** | The per-turn state describing what the observer has captured already, is still capturing, or failed to capture from that answered turn. | -| **active path** | The trusted chain from HEAD to root in the primary interview chat. Side-chats are sibling chat chains under the same specification, not branches of this active path. | -| **phase / mode** | One workflow stage: `grounding` *(label: Grounding)*, `design` *(label: Elicitation)*, `requirements` *(label: Requirements)*, or `criteria` *(label: Acceptance Criteria)*. | +| **specification** | One elicitation run within a workspace. Canonical product term for what older code may still call a project. | +| **project** | Deprecated older name for a specification record; remove rather than preserving as long-term compatibility language. | +| **intent spec** | A specification optimized for preserving and validating meaning rather than sequencing downstream work. | +| **planning spec** | A specification optimized for downstream work sequencing; in Brunch it should be a projection from intent truth, not the source artifact. | +| **intent graph** | Canonical semantic substrate: typed intent items, intent edges, examples/counterexamples, validation status, and semantic mutation state. | +| **intent item** | One durable typed semantic unit in the intent graph. Current implementation may still persist `knowledge_item`. | +| **intent edge** | One durable typed semantic relation between intent items. Current implementation may still persist `knowledge_edge`. | +| **reconciliation need** | Durable process debt saying existing intent truth may require renewed judgment because related truth changed. Not an intent edge. | +| **changeset** | Future canonical term for one submitted semantic mutation bundle against the intent graph. Supersedes patch. | +| **change** | One atomic semantic mutation inside a changeset. Supersedes patch_change. | +| **chat** | A conversation container inside one specification; primary interview, side-chats, reconciliation chats, and review discussions may own turns without owning semantic truth directly. | +| **turn** | One persisted authored conversational interaction with typed offer/reply parts and parent linkage. | +| **frontier turn** | The single actionable durable conversational turn currently awaiting user completion. | +| **proposal turn** | An assistant/system-first frontier turn offering a candidate bundle, graph-review finding, reconciliation suggestion, or other proposed action. It is not semantic truth until accepted. | +| **workspace stream** | The merged center-column read model composed from active-path turns, anchored workflow facts, projected controls, phase markers, and activity cards. | +| **projected control card** | A workflow affordance derived from durable state rather than authored conversational content. | +| **preface card** | A turn-internal artifact presenting provisional context from context gathering, paired with a question card and captured only as part of the validated whole turn. | +| **review set** | A synthesized candidate requirements or criteria list with stable reference codes and per-item/full-set review actions. | | **phase outcome** | Durable closure artifact for a phase, including summary and closure basis. | -| **closure basis** | Whether a confirmed phase close came from interviewer recommendation or explicit user-forced closure. | | **closeability** | Deterministic minimum bar for whether the user may close a phase now. | -| **readiness band** | Coarse descriptive signal (`low`, `medium`, `high`) separate from closeability. | -| **review action** | The explicit submit path on a review turn: `accept review` or `request changes`; the action gives any attached review note its meaning. | -| **exploration knowledge** | Durable knowledge captured during grounding or elicitation: `goal`, `term`, `context`, `constraint`, `decision`, and `assumption`. | -| **context** | Descriptive situational truth, actors, workflows, repo facts, or bounded area under discussion that would remain true even if the specification paused tomorrow. Promote context when it carries stronger semantics: success condition -> requirement / invariant, solution boundary -> constraint, uncertain material belief -> assumption, chosen alternative -> decision. | -| **constraint** | A durable boundary on acceptable scope or solution space. Planned subtypes include `non_goal`, `scope`, `technical`, `policy`, `resource`, `compatibility`, and `environmental`. | -| **non-goal** | A `constraint` subtype expressing an explicit exclusion from the current specification scope. | -| **decision** | A chosen direction among plausible alternatives, with durable consequences for future design, implementation, or interpretation. Not every user answer or option selection is a decision. | -| **assumption** | A durable material belief supporting a direction or decision that could later prove false. | -| **intent graph** | Canonical product term for Brunch's semantic substrate: typed intent items, intent edges, examples / counterexamples, validation status, and semantic mutation state. Chat and graph views are projections over this truth; reconciliation needs are process state attached to the graph, not intent content. Supersedes `knowledge graph` as future-facing product vocabulary. | -| **intent item** | Canonical product term for one durable typed semantic unit in the intent graph. Current schema/code may still persist these as `knowledge_item` rows during transition. Use `knowledge item` only when referring to current implementation names. | -| **intent edge** | Canonical product term for one durable typed semantic relation between intent items. Current schema/code may still persist these as `knowledge_edge` rows during transition. Use `knowledge edge` only when referring to current implementation names. | -| **knowledge item / knowledge edge** | Legacy implementation names for current persistence/API records backing intent items and intent edges. Avoid these in new product concepts, capability contracts, and operation ids unless referring to existing code or database schema. | -| **progressive checkability** | The stance that each intent item should receive the weakest sufficient witness: human review, concrete example, counterexample, regression test, runtime contract, state-machine rule, invariant, proof obligation, or explicit unresolved ambiguity. | -| **property** *(candidate ontology)* | A normalized intent primitive that requirements could commit to and criteria could observe. It is a design candidate, not a committed storage or UI surface. | -| **invariant** *(planned ontology kind)* | A property that must remain true across relevant states, transitions, executions, versions, or semantic revisions. | -| **example** *(planned ontology kind)* | A concrete scenario, trace, input/output, edge case, approved example, rejected example, not-relevant label, or counterexample that disambiguates or witnesses intent. Expected subtypes include positive, negative / counterexample, edge-case, and not-relevant. | -| **edge-local neighborhood** | The focused relation context around one intent item: incoming and outgoing intent edges with nearby item summaries, support strength, and relation semantics. Used by interviewer / observer prompts and graph refinement instead of dumping all grouped knowledge. | -| **behavioral kernel** | Reusable interviewer machinery for one class of latent correctness question, such as state/lifecycle, containment, authority, concurrency, transactionality, migration, or evidence. Kernels are not user-facing formalism by default. | -| **intent spec** | The complementary framing to a planning spec: a specification optimized for preserving and validating meaning rather than sequencing downstream work. Carries typed intent items, examples and counterexamples, witness strength, unresolved ambiguity, and validation status. The intent graph is the durable substrate; an intent spec is the human-facing projection of that graph. Contrast with `planning spec`. | -| **planning spec** | A specification optimized for downstream work sequencing — what to build, what scope is in or out, which slices follow. Brunch's product direction is for planning to remain a useful projection from the intent graph rather than the source artifact. | -| **checkability** | A typed field on an intent item describing the strongest oracle that currently witnesses it, drawn from the progressive-checkability ladder: `human_review` / `example` / `counterexample` / `regression_test` / `runtime_contract` / `state_machine_rule` / `invariant` / `proof_obligation` / `unresolved_ambiguity`. The discipline is `progressive checkability`; the field is `checkability`. | -| **witness strength** | The breadth of an intent item's oracle coverage, distinct from which oracle exists. "Checked on three examples" and "proved for all reachable states" can both be `checkability: invariant`, but they have very different `strength`. The pairing forces honesty about what is actually verified. | -| **formalization candidate** | A Brunch-internal intent item that is worth promoting along the progressive-checkability ladder. Critical invariants are formalization candidates: each one states a property currently witnessed by a regression test, with stronger oracles (state-machine model, runtime contract, proof obligation) as deliberate future moves rather than implicit expectations. | -| **disambiguating example** | An `example` whose primary purpose is to settle ambiguity between plausible interpretations of a requirement, invariant, or decision. Linked through the `disambiguates` relation. Generalizes the TiCoder move beyond test cases: the interviewer generates cases where interpretations diverge, and the user's classification settles the meaning. | -| **spec drift** | A divergence between an intent item's recorded meaning and the artifact (criterion, generated requirement, candidate spec, export bundle, or downstream implementation behavior) meant to satisfy it. Surfaced in human terms — "original intent vs generated behavior vs potential mismatch" — so the user can validate meaning at the point where it could have changed, rather than after the divergence has been laundered into a final document. | -| **relation family** | One of five semantic groupings that organize the relation kinds in the intent graph: `justification`, `dependency`, `boundary`, `refinement`, and `verification`. Distinct from the relation `kind` itself; a single kind belongs to exactly one family. Drives prompt grouping, default policy, and observer classification heuristics. | -| **relation policy** | The per-relation, per-axis registry that decides whether each edge participates in `visible`, `cascade`, `export_trace`, `staleness`, `reconciliation`, `criteria_help`, or `weak_suggestion` capabilities. Replaces the implicit assumption that every edge is equally authoritative. Gated by edge `support` (`explicit` / `strong_inference` / `weak_candidate`) and `status` (`proposed` / `accepted` / `rejected` / `stale`). It also owns operational directionality: source-change and target-change behavior must be explicit rather than inferred from raw edge direction. | -| **graph-review finding** | A turn-owned structured artifact produced by graph review. It may later lead to a changeset if accepted, but it is not itself semantic truth or process debt unless represented through a follow-on turn, changeset, or reconciliation need. | -| **structured list** | The first-ship graph-view layout: kind-grouped item rows with a relations footer of Outgoing / Incoming relation chips. Item-first; relationships visible inline. It currently renders the whole-spec entity set because D129 ships the whole-spec fetch first; the intended default becomes active-path items over whole-spec data once the active-path membership seam and `Show all` toggle land. | -| **spatial canvas** | A deferred future graph-view layout where intent items render as nodes with visible edges in a 2D scene. Shares the projection seam and intent contract of D128 with the structured-list layout. | -| **relation chip** | A compact UI element representing one intent-edge endpoint inside a relations footer, carrying the target item's reference code and content snippet. Hover reveals a preview card; click navigates to the target item via hash anchor. | -| **relations footer** | The grouped Outgoing / Incoming subsections beneath an item row in the structured list, listing relation chips for that item's incoming and outgoing edges. Soft-truncates at 6 chips per direction with an inline `+N more` expander; collapses to nothing when an item has zero edges. | -| **action rail** | The per-row right-aligned slot in graph view's structured list reserved for node-level action affordances. Actions emit intents into the existing workspace lifecycle rather than owning their own state. The first ship reserves the slot with one disabled `chat-with` placeholder. | -| **secondary thread** | Modal revisit conversation anchored to a primary-path turn and used to resolve cascade implications. | -| **needs-revisit** | Flag meaning an item is affected by upstream invalidation and must be explicitly resolved before the specification is whole again. | -| **chat** *(planned persistence seam)* | A conversation container inside one specification. The primary interview, side-chats, reconciliation chats, verifier feedback, and review discussions may all own turns without owning semantic truth directly. Phase one adds the table and transitional pointers before making chat ownership canonical. | -| **changeset** *(future persistence seam)* | Canonical term for one submitted semantic mutation bundle against the intent graph. It records what changed and why, separate from the conversational turn that may have initiated it. A changeset is the smallest atomic unit that preserves graph coherence; proposals/findings become changesets only when accepted or otherwise acted on. Supersedes `patch` as the future-facing schema/contract noun. | -| **change** *(future persistence seam)* | Canonical term for one atomic semantic mutation inside a changeset, such as `intentItem.create`, `intentItem.updateContent`, `intentEdge.create`, or `intentEdge.delete`. Supersedes `patch_change`. | -| **patch / patch_change** | Historical design-doc vocabulary for changeset/change. Avoid in new schema, capability contracts, and operation ids unless referring to older docs or source-control-style analogy. | -| **reconciliation need** *(planned persistence seam)* | Durable semantic debt saying existing intent-graph truth may require renewed judgment because an upstream item, relation, verifier, contradiction, or historical premise changed. Phase one stores directed item-to-item needs with narrow kind/status and provenance placeholders; later phases may add relation targets and changeset-backed cause/resolution. It is process state, not an intent edge or intent content. | -| **DrawerCard** | Shared card primitive with header/summary/children slots that supports static, summary-peeking, and toggleable (minimized ↔ maximized) render modes. A `locked` prop disables toggle for controlled-state cards. | -| **ChatScroll** | Composite scroll container that wires Radix ScrollArea (custom scrollbar) with `useStickToBottom` (auto-scroll-to-bottom + scroll-down indicator). Used for the center pane transcript. | -| **phase stepper** | The vertical timeline navigation in the left sidebar showing phases as sequential steps with connecting line, status, readiness, and turn count. | -| **phase addressability** | The ability to deep-link, gate, and focus interview phases through router state even when the center pane renders one continuous sectioned workspace. | -| **knowledge group** | A display-level grouping of knowledge kinds for the sidebar, defined by a hard-coded registry that maps kinds to group labels and visibility. | -| **output view** | The terminal route available when all phases are closed, providing specification summary and markdown export. Not a workflow phase. | -| **activity card** | A projected runtime or replay artifact adjacent to a turn or phase boundary, such as visible generation state, coarse interviewer activity summary, or trailing observer status. It is not a branch-bearing conversational turn. | -| **activity placeholder** | The compact replayable presentation of an activity card between turn cards, showing elapsed thinking time and a coarse tool-use summary for the interviewer without exposing hidden reasoning or raw tool payloads. | -| **phase section header** | A projected, non-durable artifact at the top of each phase section that states the phase purpose and what kinds of knowledge are captured there. Re-projects from workflow state on hydration. | -| **grounding question** | A free-text-first question format used during grounding that presents the question, a why explanation, and a response note field without requiring option selections. Distinct from the option-selection format used in elicitation. | -| **turn-internal artifact** | An assistant-part artifact rendered as its own visual card within a turn but sharing the turn's single response submission. Preface cards and revision cards are turn-internal artifacts that render above their paired question or review set card. | -| **query domain** | An independently invalidable TanStack Query scope within a specification. The current live ownership target is one specification bundle domain (`workflow`, `landing`, `turns`) plus a separate entities domain; finer splits should follow real server ownership boundaries rather than outrunning them. | - -### Boundary terms - -| Term | Definition | -| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| **greenfield** | A grounding strategy for a new concept or under-specified area where the system grounds primarily through elicitation. | -| **brownfield** | A grounding strategy for work inside an existing codebase where the system grounds through analysis, then interrogation. | -| **end-to-end build** | A delivery posture where the specification is shaping a whole system, workflow, or major slice from entry to outcome. | -| **incremental feature** | A delivery posture where the specification is shaping a bounded change inside an existing or partially established system. | -| **context-gathering capability** | An interviewer-invoked capability such as workspace analysis or future web research that gathers provisional orientation for the next move. | -| **BrunchUIMessage** | Typed UI message contract spanning validation, persistence, SSE streaming, and hydration. | -| **Data Part** | Typed custom message part used for structured input and domain-specific assistant output. | -| **context builder** | Typed projection from specification state into inference context for interviewer, observer, or closure logic. | -| **walkthrough scenario** | Named trusted fixture scenario used to seed a resumable manual-inspection workspace. | +| **readiness band** | Coarse descriptive signal separate from closeability. | +| **exploration knowledge** | Durable grounding/design knowledge: `goal`, `term`, `context`, `constraint`, `decision`, and `assumption`. | +| **constraint** | A durable boundary on acceptable scope or solution space; `non-goal` is a subtype. | +| **decision** | A chosen direction among plausible alternatives, with durable consequences. | +| **assumption** | A material belief supporting a direction or decision that could later prove false. | +| **invariant** | Planned ontology kind for a property that must remain true across relevant states, transitions, executions, versions, or semantic revisions. | +| **example** | Planned ontology kind for concrete positive, negative/counterexample, edge-case, or not-relevant cases that disambiguate intent. | +| **progressive checkability** | Represent each intent item at the weakest useful witness level today while preserving paths toward stronger witnesses. | +| **checkability** | A typed field describing the strongest oracle currently witnessing an intent item. | +| **witness strength** | The breadth/confidence of an item's oracle coverage, distinct from the oracle kind. | +| **relation policy** | Per-relation registry deciding display, cascade, export, staleness, reconciliation, criteria-help, weak-suggestion participation, support/status semantics, and operational directionality. | +| **context pack** | A scenario-specific semantic briefing derived from intent graph truth, workflow state, provenance, unresolvedness, relation neighborhoods, and authority labels. | +| **prompt/context scenario substrate** | Foundation for markdown prompts, reusable doctrines, typed context packs, and repeatable prompt probes before UI commitment. | +| **agent mutation surface** | Brunch-owned typed handler layer for any durable data mutation initiated by an agent. | +| **agent capability contract** | Stable, typed read or mutation contract exposed to agents/harnesses with authority and replay metadata. | +| **agent capability CLI** | Local JSONL adapter exposing Brunch capability contracts without defining its own product API or mutation authority. | +| **AI runtime provider** | Shared server seam resolving configured LLM provider, model names, API-key source, and provider-specific options. | +| **XDG auth state** | User-scoped credential/config storage outside the project workspace. | +| **graph-review finding** | A turn-owned structured critique artifact; not itself semantic truth or reconciliation debt. | +| **candidate graph bundle** | Coherent scenario-options commit/review unit with tradeoffs, generated intent items/edges, provenance, risks, and preconditions. | +| **greenfield / brownfield** | Grounding strategies for new concepts vs existing-codebase work. | +| **end-to-end build / incremental feature** | Delivery postures for whole-system shaping vs bounded changes. | +| **output view** | Terminal route available when phases are closed; not a workflow phase. | ## Verification Design ### Verification Commands -| Step | Check | Command | -| ---- | ----------------- | ------------------- | -| 1 | Formatting | `npm run fmt:check` | -| 2 | Lint + type check | `npm run lint` | -| 3 | Unit tests | `npm run test` | -| 4 | Build | `npm run build` | -| all | Full gate | `npm run verify` | +| Step | Check | Command | +| --- | --- | --- | +| 1 | Formatting | `npm run fmt:check` | +| 2 | Lint + type check | `npm run lint` | +| 3 | Unit tests | `npm run test` | +| 4 | Build | `npm run build` | +| all | Full gate | `npm run verify` | ### Verification Policy -Every meaningful code change should pass `npm run fix` in the inner loop and `npm run verify` before commit. Slices that touch the user-facing boundary should also stay manually walkthrough-able via the local app. +Every meaningful code change should pass `npm run fix` in the inner loop and `npm run verify` before commit. Slices that touch user-facing boundaries should also stay manually walkthrough-able via the local app. ### Verification Stance -- Verification is first-class work; this wave stays **manual-heavy by deliberate choice**, not by accident. +- Verification is first-class work; current product work is manual-heavy by deliberate choice, not accident. - **Inner loop** proves structural validity, boundary safety, and non-destructive behavior. -- **Middle loop** proves replay, refresh-boundary ownership, and explicit state projection where cheap automated checks can remove bad degrees of freedom. -- **Outer loop** is the authority for brownfield grounding quality, transcript legibility, waiting-state clarity, and phase-layout differentiation. -- Outer-loop UI review uses a **dramaturgical see-and-inspect** posture: judge whether the product stages its state transitions legibly for a human, not just whether bytes round-trip. - -### Diagnostic Assessment - -| Dimension | Score | Notes | Change trigger | -| --------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | -| Observability | partial | Persistence, DB state, TypeScript seed builders, and route seams are visible in text, but the most important failures in this wave still present as browser-visible transcript disappearance, waiting-state ambiguity, and layout legibility issues. | Promote instrumentation if manual browser inspection cannot explain refresh or lock behavior confidently. | -| Reproducibility | partial | TypeScript scenario builders and direct observer probes give a strong base, but brownfield kickoff quality still varies by repo shape and live refresh behavior is not yet represented by a canonical replay matrix. | Promote a stronger corpus or replay harness if ad hoc brownfield/manual checks stop being trustworthy. | -| Controllability | partial | The agent can iterate on fixtures, stories, and structural tests autonomously, but the core acceptance signals for this wave remain human judgment calls. | Raise controllability only if manual review becomes the bottleneck or repeated ambiguity blocks progress. | +- **Middle loop** proves replay, refresh-boundary ownership, explicit state projection, and corpus/golden stability where cheap automated checks remove bad degrees of freedom. +- **Outer loop** is the authority for brownfield grounding quality, transcript legibility, waiting-state clarity, graph/workspace staging, and qualitative generation trust. +- LLM-heavy features need layered oracles: schema/contract tests inside, fixture/golden/corpus probes in the middle, and human review outside. ### Oracle Strategy by Loop Tier -| Tier | Oracle families | What they prove | Main targets | -| ------ | ------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- | -| Inner | Schema validation, type-aware linting, focused unit/integration tests, negative-space regressions | Boundaries remain type-safe; persistence and transport seams do not silently collapse; obvious bad failures are caught cheaply. | I4, I17, I24, I44, I48, I54, I72, I87, I100, I101, I102, I103 | -| Middle | Round-trip / replay oracles for seeded projects, hydration, export, and resume | Seeded or persisted state can be loaded, projected, re-rendered, and exported without losing required semantic markers. | Requirements 13, 14, 15; I24, I44, I100, I103 | -| Middle | Route/query ownership integration oracles | Observer updates and response mutations refresh only their owned surfaces instead of tearing down unrelated transcript state. | Requirements 5, 7, 14; A20, A64; I24, I54, I102 | -| Middle | Explicit state-model oracles for in-flight UI states | Every major in-flight mode is named, projectable, and visibly representable instead of collapsing into one opaque loading bit. | Requirement 5; I24, I44 | -| Outer | Fixture-backed manual walkthroughs on seeded scenarios | Walkthrough fixtures are useful enough to inspect phase transitions, export output, resume behavior, and missing-view discovery. | Requirements 13, 14, 15; I100, I103 | -| Outer | Brownfield kickoff walkthroughs on real repos, evaluated qualitatively | Kickoff yields durable useful knowledge and a grounded first question for feature-area work, without needing a fully automated quality score. | Requirements 3, 16; A63; I101 | -| Outer | Dramaturgical story and transcript review | Phase differentiation, transcript artifact legibility, and waiting-state clarity are judged as staged user experience rather than just structural output. | Requirement 5; A15, A51, A53, A54 | -| Inner | Deterministic state-machine tests for `reconciliation_need.agent_status` transitions with a stubbed classifier function | The queue's lifecycle (`null → queued → classifying → classified` / `failed`) is enforced in code, classification rows never get orphaned, and `agent_classification` only takes one of three labels. The LLM call is the only swap point. | Requirement 10; A88; D139; I114 | -| Middle | Golden-fixture corpus of `(source change, target content, relation kind) → expected classification` tuples, evaluated against a non-mocked classifier behind a recorded-or-live model adapter | Classifier output is regressable run-to-run; structural failures (wrong label vocabulary, missing proposal on `auto-edit`, hallucinated proposal on `auto-confirm`) are caught before manual review; intentional drift is recorded as an updated golden. Seed corpus seeded by slice 4 lives at `src/server/__corpus__/reconciliation-classifier-seeds.json`; the runner that exercises it against the live AI SDK adapter is its own slice. | Requirement 10; A48, A88; I114 | -| Outer | Manual cascade walkthroughs on dense, real specifications (5+ downstream targets, mixed `supersedes` / `needs_confirmation`), comparing agent-grouped vs flat-list resolution | A88 is validated qualitatively: do users actually skip flat-list needs without resolving, and does agent grouping change that? The substantive note must read as useful context, not noise. | Requirement 10; A48, A88 | - -### Design Notes - -- **Legible replay fidelity beats exact replay fidelity for now** — hydrated transcripts may use placeholders or summary markers to indicate that reasoning or tool activity happened at a point in the conversation, even if the full original content is not persisted. -- **Turn-first replay now beats message-first replay** — for grounding/design, the replay unit should trend toward completed turns plus one live unresolved turn, not alternating assistant/user chat bubbles and stream markers. -- **Brownfield kickoff has a deliberately modest proof bar** — this wave only needs durable useful knowledge plus a grounded first question, not a fully proven grounding bundle before design can proceed. -- **Waiting states should become an explicit vocabulary in code** — the user-facing contract is that each major in-flight mode is visibly represented; deep lock/wait introspection is diagnostic scaffolding, not yet a product requirement. -- **Manual verification is intentionally lightweight** — no heavyweight scripted walkthrough protocol yet; use seeded scenarios and see-and-inspect review rather than bureaucratic checklists. -- **Kickoff strategy comparison stays qualitative unless proven insufficient** — if the brownfield mode fork remains ambiguous after manual repo comparisons, promote that question to a spike with a stronger comparison harness. -- **Graph-view fixture matrix is project-shareable infrastructure** — named scenario builders (`emptySpec`, `singleItemNoEdges`, `crossPhaseDecisionLink`, `denseGoalAnchor`, `activePathDivergence`, plus an explicit `compareLowVsHighEdgeDensity` for A70) underwrite both inner-loop component tests and outer-loop manual walkthroughs. Reusable beyond graph view as similar visualization slices land. -- **The V3.1 reconciliation classifier is verified as a structural state machine inside, a regressable corpus in the middle, and a qualitative cascade walkthrough outside** — the inside ring proves the queue can never get stuck or label-corrupt regardless of what the LLM returns; the middle ring proves classification stays stable as the prompt evolves; the outside ring is the only ring that can answer A88 ("does grouping actually help"). Each ring catches a different failure class — do not collapse them into one. The `agent_proposal` field is text-only and never auto-applied without an explicit user click, so a hallucinated proposal is recoverable by the user clicking Skip; that recoverability is what lets the inner/middle rings stay shallow. -- **The cascade-edit cards (snapshots + inline target edit) are inner-loop only** — Cards 1-3 in `memory/CARDS.md` are pure structural plumbing over settled seams (schema column, prop drilling, component composition over `` and the existing edit-route). Their failure modes are caught by extending existing route and component tests; no middle/outer oracle is added. If the inline-edit cascade re-entry surfaces unstable behavior under manual walkthrough, promote a route/query ownership integration oracle for the Pending review surface. +| Tier | Oracle families | What they prove | Main targets | +| --- | --- | --- | --- | +| Inner | Schema validation, type-aware linting, focused unit/integration tests, negative-space regressions | Boundaries remain type-safe; persistence/transport seams do not collapse. | I4, I17, I24, I44, I48, I54, I72, I87, I100–I119 | +| Middle | Round-trip/replay oracles for seeded projects, hydration, export, and resume | Seeded or persisted state can be loaded, projected, re-rendered, and exported without semantic loss. | Requirements 13–15; I24, I44, I100 | +| Middle | Route/query ownership and state-model oracles | Mutations refresh owned surfaces only; major in-flight modes are named and projectable. | Requirements 5, 7, 14; A20, A64; I24, I108, I110 | +| Middle | Prompt/context golden and classifier corpora | Prompt/context output remains inspectable and regressable as prompts evolve. | Requirements 40, 41; A84, A88; I112, I114 | +| Outer | Fixture-backed manual walkthroughs | Phase transitions, export, resume, graph view, and waiting states feel legible. | Requirements 5, 13–15, 33 | +| Outer | Brownfield and scenario-quality review | Generated questions/bundles are useful, grounded, honest about tradeoffs, and not overconfident. | Requirements 3, 16, 20; A67, A68, A90, A91 | +| Outer | Dense cascade/reconciliation walkthroughs | Users can understand and resolve downstream graph impact without skipping necessary judgment. | A48, A88, I113, I114 | ### Acknowledged Blind Spots -| Blind spot | Reason | Current mitigation | Revisit trigger | -| ------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- | -| Qualitative interviewer and kickoff quality across many repo shapes | Chosen manual-first; no broad brownfield corpus or score harness yet | Manual brownfield walkthroughs on representative repos | Brownfield regressions recur or kickoff strategy debates cannot be resolved qualitatively | -| Transcript trust and readability after hydration | Exact replay of all reasoning/tool detail is intentionally deferred | Legible placeholders/summary markers plus manual transcript review | Users still cannot understand what happened after replay despite visible markers | -| Actual lock/wait causality in the UI | Instrumentation is not yet the primary investment | Require explicit visible in-flight states and inspect browser behavior manually | Manual inspection cannot explain a repeated perceived lock or disappearance bug | -| Story quality and phase differentiation | Design quality is not executable in a trustworthy way yet | Story variants reviewed against seeded walkthrough findings | Story/app drift grows or design disagreement blocks implementation | -| Observer latency and layout refresh freshness | No explicit latency budget or perf gate yet | Runtime observation during manual sessions | A20 shows recurring latency or coarse refresh pain | -| Revisit UX and secondary-thread adequacy | That seam is still future work | Keep structural coverage on graph/persistence seams only | Revisit work moves from horizon into the active frontier | -| Real browser scroll behavior under JSDOM | `scrollIntoView` is shimmed in JSDOM — component tests cannot prove real scroll happens after chip click | Outer-loop manual walkthrough explicitly checks scroll-into-view + highlight on chip click | Reports of chip click "doing nothing" or scroll behaving inconsistently across browsers | -| Hover-card timing and popover positioning feel | Animation delay and placement perception are not text-observable | Outer-loop manual review with shadcn defaults (~300ms open, ~150ms close) | Users report flicker, misplaced popovers, or unintended dismissal | -| Mobile / touch / keyboard-only ergonomics for relation chips | HoverCard pattern is mouse-biased; long-press fallback is designed but has no automated test surface | Manual walkthrough on touch device once per slice family | Touch users report missing or undiscoverable preview | -| Performance under large intent graphs | No render or memory budget yet; relation-first observer expansion (A66) will increase edge density | Defer until specs with hundreds of items + dense edges become common | Render lag visible on representative manual walkthroughs | -| Cross-session "Back to chat" target persistence | sessionStorage clears on tab close so the deep-linked entry to graph view has no remembered chat origin | Falls back to current reachable phase via workflow state | Users report "Back to chat" landing in the wrong phase after a fresh tab | -| Visual regression infrastructure | Manual-heavy stance accepted across the project; no Chromatic/Playwright-screenshot seam yet | Outer-loop manual walkthrough on the named graph-view fixture scenarios | Three or more visual regressions caught only after merge | -| V3.1 classifier label correctness on substantive items | The single highest-stakes failure mode (substantive misclassified as auto-confirm leading to silent stale truth) is not testable without ground-truth corpus, which does not yet exist | `agent_proposal` is never auto-applied; bulk Confirm-all surfaces what would be confirmed before the user commits; user can fall back to per-row Resolve at any time | A walkthrough produces a substantive item the agent labelled auto-confirm, OR a user reports clearing a need they didn't actually understand | -| V3.1 classifier multi-run determinism | First cut runs the LLM single-shot; no temperature pinning, no multi-run variance characterization, no confidence scoring | Re-run agent button exists; classification is per-need so a flaky run can be retried in place | Two consecutive Run-agent passes on the same fixture produce different labels for the same need | -| V3.1 cross-need correlation | Each need is classified independently; the agent cannot see that two needs share a root concept and should be grouped | Needs stay in the same Pending review section so the user sees them adjacent | Users repeatedly merge or hand-group classifier output, signalling missing cross-need context | -| Re-entrant cascade behavior under inline target edit | Card 3's Edit target may itself trigger a hard apply that opens new needs; the test surface for re-entry is shallow until manually walked | Inner-loop integration test covers one re-entry depth; query invalidation reuses the existing needs query | Manual walkthrough shows new needs failing to appear, double-render, or stale Resolve buttons after inline-edit re-entry | - -### Current Coverage - -| File | Protects | -| ----------------------------------------------------------------- | ------------------------------ | -| `db.test.ts` | I48, I72, I101 | -| `core.test.ts` | I48 | -| `app.test.ts` | I24, I54, I72, I87, I101, I104 | -| `context.test.ts` | I44, I54 | -| `observer.test.ts` | I48, I54 | -| `parts.test.ts` | I17, I101 | -| `project-state-turn.test.ts` | I24, I44, I87, I101 | -| `task.test.tsx` | I24 | -| `EntitySidebar.test.tsx` | I48 | -| `InterviewView.test.tsx` | I24, I44, I48, I54, I72 | -| `-interview-controller.test.tsx` | I24, I105 | -| `-workspace-stream-projector.test.ts` | I24 | -| `transcript-parity.test.tsx` | I24 | -| `interview.test.ts` | I87, I101 | -| `turn-artifacts.test.ts` | I104 | -| `phase-close.test.ts` | I72 | -| `router.test.tsx` | I102 | -| `GraphView.test.tsx` | I48, I102 | -| `project.test.ts` / `launcher.test.ts` / `runtime-config.test.ts` | I4, I100 | -| `corpus.test.ts` / `walkthrough.test.ts` / `seed.test.ts` | I103 | - -## Acceptance Criteria +| Blind spot | Current mitigation | Revisit trigger | +| --- | --- | --- | +| Qualitative interviewer and kickoff quality across many repo shapes | Manual brownfield walkthroughs on representative repos. | Brownfield regressions recur or kickoff strategy debates cannot resolve qualitatively. | +| Transcript trust after hydration | Legible placeholders/summaries plus manual transcript review. | Users cannot understand what happened after replay. | +| UI lock/wait causality | Explicit visible in-flight states and manual browser inspection. | Manual inspection cannot explain repeated lock/disappearance bugs. | +| Story quality and phase differentiation | Story variants reviewed against seeded walkthroughs. | Story/app drift grows or design disagreement blocks implementation. | +| Observer latency and layout refresh freshness | Runtime observation during manual sessions. | A20 shows recurring latency or coarse refresh pain. | +| Revisit/reconciliation UX adequacy | Structural coverage on graph/persistence seams plus manual cascade walkthroughs. | Revisit work moves active or users skip unresolved needs. | +| Real browser scroll/hover/touch behavior | Outer-loop manual graph-view walkthroughs. | Users report chip navigation/preview failures. | +| Performance under large intent graphs | Defer explicit budget until dense specs are common. | Render lag visible on representative walkthroughs. | +| Visual regression infrastructure | Manual-heavy stance accepted. | Three or more visual regressions are caught only after merge. | +| LLM classifier correctness and determinism | Proposals never auto-apply; re-run exists; corpora/goldens grow from failures. | Substantive items are mislabeled as auto-confirm or repeated runs diverge materially. | + +### Acceptance Criteria 1. `npx brunch` can start from a workspace directory with local-first persistence in `.brunch/`. 2. Greenfield and brownfield grounding both work, with brownfield able to start from workspace analysis and converge into the same grounding phase purpose. 3. Structured turns support rich responses without losing semantic fidelity. -4. The knowledge layer stays visible, typed, and linked through graph relationships. +4. The intent layer stays visible, typed, and linked through graph relationships. 5. Phase closeability, readiness, and closure provenance stay legible to the user. 6. Requirements and criteria review remain explicit, lightweight, durable at the turn level, and export-relevant. -7. Revisit can invalidate knowledge, surface cascade through the `reconciliation_need` queue, and re-resolve through the patch list — no separate modal or secondary-thread surface. -8. The routed UI stays stable across dashboard, phase views, sidebar knowledge, and graph view. +7. Revisit can invalidate intent, surface cascade through the `reconciliation_need` queue, and re-resolve without a separate modal-only substrate. +8. The routed UI stays stable across dashboard, phase views, sidebar intent graph, and graph view. 9. Resume works from persisted state. 10. The verification gate passes. -11. Grounding/design use workspace-owned turn cards for substantive elicitation, requirements/criteria use full-set review turns, and structural kickoff / recovery / handoff / completion affordances project without a bare generic composer. -12. Hydrated transcripts preserve interviewer-side structure plus stable durable activity summaries for any live-only artifacts that were shown during streaming, including elapsed thinking time and a coarse tool-use summary / placeholder seam. -13. Open phases bottom-load a projected kickoff card, the current frontier turn, a visible generation state, or a projected recovery card; completed elicitation turns replay as answered-turn records, and closed phases bottom-load a projected handoff or completion artifact. -14. Preface cards render as turn-internal artifacts paired with question cards, so the observer captures from the whole validated turn rather than from unvalidated provisional content alone. -15. Grounding and elicitation persist only the durable exploration ontology, with `non-goal` represented as a `constraint` subtype rather than a separate top-level kind. -16. Observer prompt, shared kind registry, schema / API types, fixtures, and UI copy describe the same ontology and accepted-review semantics without per-layer language drift. -17. The interview can orient itself anywhere in the `greenfield <> brownfield` by `end-to-end build <> incremental feature` matrix without forcing whole-project assumptions. -18. Observer capture records intent edges broadly enough that most durable intent items link to upstream or downstream context whenever that relation is reasonably traceable. -19. Users who cannot complete a long interview can request candidate directions with explained tradeoffs and refine by reacting to them. -20. The interview can stop at a broad pass and deepen selected areas incrementally through explicit next-detail actions. -21. Graph view renders the intent graph as a navigable workspace with visible edges and node-launched refinement flows, not just a grouped list. -22. First-run setup makes missing provider credentials visible and recoverable from the dashboard without requiring users to hand-edit project `.env` files. -23. Brunch can help users keep `.brunch/` out of version control through an explicit, idempotent `.gitignore` confirmation flow. +11. Structural kickoff / recovery / handoff / completion affordances project without a bare generic composer. +12. Hydrated transcripts preserve interviewer-side structure plus stable durable activity summaries for live-only artifacts. +13. Open phases bottom-load one visible next action; completed turns replay as answered-turn records; closed phases bottom-load handoff/completion artifacts. +14. Preface cards render as turn-internal artifacts paired with question cards, so observer capture uses the whole validated turn. +15. Grounding and elicitation persist only the durable exploration ontology, with `non-goal` represented as a `constraint` subtype. +16. Observer prompt, shared kind registry, schema/API types, fixtures, and UI copy describe the same ontology. +17. The interview can orient anywhere in the two-axis workspace novelty × delivery posture matrix. +18. Observer capture records useful intent edges while abstaining under weak support. +19. Users can request candidate directions with explained tradeoffs and refine by reacting to them. +20. The interview can stop at a broad pass and deepen selected areas incrementally. +21. Graph view renders the intent graph as a navigable workspace with visible edges and node-launched refinement flows. +22. First-run setup makes missing provider credentials visible and recoverable without hand-editing project `.env` files. +23. Brunch can help users keep `.brunch/` out of version control through explicit, idempotent `.gitignore` confirmation. diff --git a/memory/SPEC_RESTRUCTURE.md b/memory/SPEC_RESTRUCTURE.md deleted file mode 100644 index 8c159187..00000000 --- a/memory/SPEC_RESTRUCTURE.md +++ /dev/null @@ -1,175 +0,0 @@ -# SPEC Restructure Plan - -> Status: proposed one-off workflow doc. -> Created: 2026-05-13. -> Purpose: capture the intended cleanup for `memory/SPEC.md` before splitting this work into a separate branch / PR. Delete this file after the restructure is completed or explicitly abandoned. - -## Goal - -Make `memory/SPEC.md` lighter, more structurally resistant to branch conflicts, and clearer about what belongs in the live architecture register versus historical/product-embedded truth. - -The cleanup should preserve durable product/architecture authority while retiring rows that are already fully embedded in code, tests, or design docs. - -## Diagnosis - -`memory/SPEC.md` now mixes several kinds of truth in one long mutable document: - -1. **Stable product contract** — concept, non-goals, durable product requirements. -2. **Live uncertainty** — assumptions still awaiting validation or still shaping frontier work. -3. **Current architectural guardrails** — decisions and invariants that actively constrain near-term work. -4. **Historical embedded decisions** — shipped seams whose rationale is now code/test/design-doc truth. -5. **Future direction** — semantic/generative/agent/provider trajectories not yet productized. -6. **Verification policy and coverage** — useful, but partly over-detailed as implementation/test history. - -This creates churn because ordinary feature work edits the same numbered tables/sections, and because sequential IDs (`Requirement N`, `A##`, `D###`, `I###`) are collision-prone across branches. - -## Desired document shape - -Target structure, to be refined during the cleanup: - -```md -# Brunch v2 — Spec Elicitation Tool - -## Product Contract -### Concept -### Constraints & Non-goals -### Capability Requirements -#### Runtime & persistence -#### Interview workflow -#### Knowledge / intent graph -#### Review & export -#### Workspace / graph UI -#### Provider / agent substrate - -## Live Architecture Register -### Open Assumptions -### Active Decisions -### Critical Invariants - -## Future Direction Register -### Semantic / generative substrate -### Agent capability substrate -### Provider / workspace hardening - -## Interaction Stream Model -[keep if still actively useful, but compress or move details to design docs] - -## Layout Architecture -[compress; move design-level detail out if it is no longer needed as SPEC authority] - -## Lexicon - -## Verification Design -``` - -Principles: - -- Separate **stable product contract** from **live architecture register** from **future direction**. -- Keep `SPEC.md` as the authority for active constraints, not as the full archive of how each seam was built. -- Prefer short guardrails plus links to design docs over long design-doc-scale paragraphs. -- Do not renumber surviving tracked IDs unless the cleanup explicitly adopts a new ID scheme. -- Leave concise retirement comments for removed ID ranges when useful. - -## Assessment pass - -Classify each tracked row before editing: - -| Classification | Meaning | Action | -| --- | --- | --- | -| keep live | Still unresolved or actively constrains near-term work | Keep, possibly tighten wording | -| compress / merge | Overlaps another row or carries too much rationale | Merge into one active guardrail | -| retire embedded | Fully shipped and now protected by code/tests/design docs | Remove from live table; optionally note retired IDs in an HTML comment | -| move rationale | Valuable context but too detailed for SPEC | Keep a short SPEC guardrail and point to design doc | -| future direction | Not current product contract but shapes frontier work | Move under Future Direction Register or ensure PLAN owns it | - -### Assumptions to inspect first - -Strong candidates: - -- `A82`, `A83` — already validated; likely retire from live assumptions unless still needed as FE-701 constraints. - -Possible embedded/product-fact candidates: - -- `A51`, `A53`, `A54`, `A55` — workspace turn-card / activity / frontier projection assumptions may now be product facts or invariants. -- `A59`, `A60`, `A63` — prompt/question/header assumptions may be embedded or lower-priority watch items. -- `A64` — query invalidation may have become a concrete architectural decision/invariant if already built. -- `A66`–`A70` — graph/relation assumptions should be checked against shipped graph view and FE-700 direction. -- `A71`–`A73`, `A77`–`A81`, `A84`–`A91`, `A93` — likely still live future/semantic/generative assumptions; may move to Future Direction Register. - -### Decisions to inspect first - -Potential merge/compression clusters: - -- Runtime / stream / workflow cluster: - - `D22`, `D89`, `D93`, `D94`, `D95`, `D96`, `D110`, `D112`, `D113`, `D116`, `D121`, `D123`, `D114` - - Goal: compress overlapping turn-centered stream, projected controls, lifecycle, observer backlog, route/query ownership, and continuous workspace guardrails. - -- Graph / side-chat / semantic mutation cluster: - - `D80`, `D125`, `D134`, `D135`, `D136`, `D137`, `D138`, `D144`, `D145`, `D146`, `D149`, `D150`, `D152` - - Goal: keep current semantic direction and active changeset/reconciliation guardrails; retire or compress older side-chat/revisit wording superseded by multi-chat + reconciliation docs. - -- Prompt/context / agent capability cluster: - - `D139`, `D140`, `D141`, `D142`, `D143`, `D147` - - Goal: keep concise active guardrails for prompt/context substrate and Brunch-owned mutation surface; move implementation boundary detail to design docs where possible. - -- Candidate/scenario strategy cluster: - - `D126`, `D127`, `D148`, `D151` - - Goal: separate current product contract from future strategy/proposal direction. - -- Provider/workspace hardening cluster: - - `D130`, `D131`, `D132`, `D133` - - Goal: likely keep as active near-term frontier constraints; wording can be shorter. - -### Invariants to inspect first - -Keep only critical seam-level invariants live. - -Candidates to compress or retire: - -- Rows that primarily enumerate test filenames or implementation history rather than a reusable invariant. -- Older invariants whose protected behavior is fully covered by a broader newer invariant. -- Planned invariants for not-yet-built future work should be checked against `memory/PLAN.md`; if they only describe future acceptance criteria, PLAN may be the better home until implemented. - -Likely keep live: - -- Distribution/runtime startup invariants (`I4`, `I100`). -- Boundary/schema invariants (`I17`, `I48`, `I54`). -- Workflow/turn/lifecycle invariants (`I24`, `I72`, `I87`, `I104`, `I105`, `I108`, `I110`). -- Current frontier invariants for provider/gitignore/agent/changing semantic substrate (`I106` onward), if they still correspond to active PLAN frontier items. - -## Rewrite pass - -1. Create a branch specifically for SPEC restructuring. -2. Read `memory/SPEC.md`, `memory/PLAN.md`, and current design docs named by SPEC rows. -3. Classify rows using the assessment table above. -4. Rewrite `SPEC.md` into the target structure. -5. Preserve cross-reference integrity: - - `PLAN.md` frontier definitions still point at surviving SPEC requirements/assumptions/decisions/invariants. - - Retired IDs are not referenced by live PLAN frontier definitions unless intentionally historical. - - Design docs carry detailed rationale that SPEC no longer repeats. -6. Run link/reference checks if available, then `npm run fix` and `npm run verify` before PR. - -## Output expectations - -The completed PR should include: - -- `memory/SPEC.md` rewritten / pruned. -- Any necessary small updates to `memory/PLAN.md` traceability references caused by retired/merged SPEC rows. -- Optional updates to `ln-spec` / `ln-sync` instructions **only if** the restructure changes the intended SPEC shape. -- Deletion of this `memory/SPEC_RESTRUCTURE.md` file once its plan has been executed or superseded. - -## Non-goals - -- Do not change product behavior. -- Do not add new requirements just because there is a new section for them. -- Do not migrate to a structured generated spec registry in this pass; that remains `structured-development-spec-registry` horizon work. -- Do not rewrite design docs unless a SPEC row is moved there and the target doc needs a small anchor. -- Do not renumber surviving IDs casually. - -## Open design questions for the restructure branch - -1. Should requirements remain a single numbered sequence, or should they become grouped stable IDs by capability area? -2. Should assumptions/decisions/invariants stay as global tables/lists, or be grouped by subsystem to reduce edit conflicts? -3. Should validated assumptions be removed immediately, or retained for one release window with a retirement note? -4. How much of Interaction Stream Model and Layout Architecture still belongs in SPEC versus `docs/design/CONVERSATIONAL_WORKSPACE_RUNTIME.md` and related design docs? -5. Should future direction rows live in SPEC at all, or should SPEC only link to PLAN frontier definitions and design docs for unbuilt future work? From 9fbbb599cc7b08d61bf96c1ca05ca6e087fcc004 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 15:46:50 +0200 Subject: [PATCH 04/16] add documentation about the skills system --- AGENTS.md | 2 +- docs/praxis/ln-skills.md | 136 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 docs/praxis/ln-skills.md diff --git a/AGENTS.md b/AGENTS.md index d2e07d67..710936af 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -56,7 +56,7 @@ Traceability: assumptions in SPEC.md link to decisions and frontier items in PLA ### skills -The `/ln-*` skills at `.agents/skills/` follow this flow: +The `/ln-*` skills at `.agents/skills/` follow this flow. See `docs/praxis/ln-skills.md` for the colleague-facing reference, discretionary tools, and chooser table. - **Knowledge**: /ln-grill or /ln-disambiguate → /ln-spec → /ln-plan → /ln-oracles - **Execution**: /ln-scope → /ln-spike (optional) → /ln-build diff --git a/docs/praxis/ln-skills.md b/docs/praxis/ln-skills.md new file mode 100644 index 00000000..3f3d229b --- /dev/null +++ b/docs/praxis/ln-skills.md @@ -0,0 +1,136 @@ +# ln-* Skill System Reference + +This is the working guide for Brunch's project-local `ln-*` skills in `.agents/skills/`. + +The skills are a development workflow for keeping product intent, planning, implementation, verification, and handoff aligned. They do not replace judgment: choose the smallest skill that matches the current uncertainty. + +## Canonical state + +| File | Authority | +| --- | --- | +| `memory/SPEC.md` | What and why: product contract, live assumptions, decisions, invariants, lexicon, verification stance. | +| `memory/PLAN.md` | What's next: frontier items, sequencing, acceptance, verification notes. | +| `HANDOFF.md` | Temporary resumability state when a session ends or context is fragile. | +| `memory/CARDS.md` | Temporary multi-card execution queue inside one settled frontier item, when explicitly created. | +| `memory/REFACTOR.md` | Temporary refactor execution plan, when explicitly created. | + +Do not invent alternate planning stores. If a fact matters durably, promote it through `ln-spec`, `ln-plan`, or `ln-sync`. + +## Default flow + +```text +ln-consult + → ln-grill or ln-disambiguate + → ln-spec + → ln-plan + → ln-oracles + → ln-scope + → ln-spike (optional) + → ln-build + → ln-review + → ln-refactor (optional) + → ln-sync + → ln-handoff (when stopping or transferring) +``` + +The flow is not a checklist. Skip steps whose uncertainty is already retired. + +## Skill map + +### Triage and orientation + +| Skill | Use when | Produces | +| --- | --- | --- | +| `ln-consult` | You are unsure which `ln-*` skill applies, starting a fresh thread, or re-entering ambiguous work. | A short assessment and recommended next route. | +| `ln-handoff` | Ending a session, switching threads, nearing context limits, or preserving volatile state. | `HANDOFF.md` with current state and next action. | + +### Knowledge shaping + +| Skill | Use when | Produces | +| --- | --- | --- | +| `ln-grill` | The idea is fuzzy and needs broad Socratic pressure-testing. | Shared understanding; constraints, motivations, and lexicon pressure surfaced. | +| `ln-disambiguate` | Several plausible meanings exist and examples/counterexamples would clarify faster than open-ended questioning. | Collapsed ambiguity, typed candidate conclusions, or named unresolved ambiguity. | +| `ln-spec` | Understanding should become durable product truth, or requirements/assumptions/decisions/invariants changed. | Updates to `memory/SPEC.md`. | +| `ln-plan` | Product truth is clear enough to sequence frontier work. | Updates to `memory/PLAN.md`. | +| `ln-sync` | SPEC/PLAN are stale, overweight, drifted from code, or need mature reconciliation. | Refreshed canonical docs and retired stale derivative artifacts. | + +### Design and verification strategy + +| Skill | Use when | Produces | +| --- | --- | --- | +| `ln-design` | API shape, module boundary, ownership, or information hiding is uncertain. Use especially before committing to a public seam. | Competing module shapes, chosen direction, rejected tradeoffs. | +| `ln-oracles` | Verification strategy is uncertain or materially shapes implementation order, especially for LLM, visual, compositional, or multi-surface work. | Oracle strategy by loop tier, observability diagnosis, blind spots. | +| `ln-prototype` | A throwaway playable/model/UI probe would answer design questions faster than production work. | Disposable prototype evidence; no production commitment. | +| `ln-spike` | One hard technical question blocks a scoped slice or frontier item. | Spike verdict and recommendation; throwaway code unless explicitly promoted. | + +### Execution and quality + +| Skill | Use when | Produces | +| --- | --- | --- | +| `ln-scope` | A frontier item or next step needs a thin vertical slice with target behavior and acceptance criteria. | Scope card / slice definition. | +| `ln-build` | A scoped slice is ready for TDD implementation. | Code, tests, inner-loop verification, and PLAN updates when appropriate. | +| `ln-diagnose` | Something is broken, failing, flaky, slow, or nondeterministic. | Trusted repro loop, falsified hypotheses, regression oracle, route back to planning if needed. | +| `ln-review` | After implementation bursts, or when architecture/model hygiene needs an opinionated audit. | Quality findings and next-step recommendations. | +| `ln-refactor` | Working code needs restructuring without behavior change. | Refactor plan as tiny safe commits. | + +## Discretionary skills that are easy to miss + +These are not always visible in the shortest default path, but they are important. + +| Skill | Why it matters | +| --- | --- | +| `ln-grill` | Prevents premature specs by forcing motivations, constraints, and premises into the open. | +| `ln-disambiguate` | Prevents vague requirements by asking contrastive example/counterexample questions where interpretations diverge. | +| `ln-design` | Prevents shallow modules and accidental public APIs by exploring multiple shapes before implementation. | +| `ln-oracles` | Prevents fake confidence by designing the right evidence before build work. | +| `ln-prototype` | Retires UX/state/model uncertainty cheaply before the production seam hardens. | +| `ln-diagnose` | Keeps debugging scientific and routes durable lessons back into SPEC/PLAN. | +| `ln-review` | Catches domain-model erosion and agent-navigability problems after code lands. | +| `ln-sync` | Keeps canonical docs from becoming an append-only attic. | + +There is currently no project-local `ln-map` skill in `.agents/skills/`. If you mean milestone/topology mapping, use `ln-plan` for frontier sequencing, `ln-scope` for one slice, or create a new `ln-map` skill only after its boundary is distinct from those two. + +## Choosing between similar skills + +| If you are asking… | Use | +| --- | --- | +| “What are we even trying to do?” | `ln-grill` | +| “Which interpretation is intended?” | `ln-disambiguate` | +| “What should the canonical truth say?” | `ln-spec` | +| “What work items should exist?” | `ln-plan` | +| “What is the smallest buildable slice?” | `ln-scope` | +| “Which module/API shape should we choose?” | `ln-design` | +| “How will we know this works?” | `ln-oracles` | +| “Can this technical approach work?” | `ln-spike` | +| “Can we make the idea tangible before committing?” | `ln-prototype` | +| “Why is this failing?” | `ln-diagnose` | +| “Is this code still conceptually clean?” | `ln-review` | +| “How do we restructure safely?” | `ln-refactor` | +| “Are the docs still true?” | `ln-sync` | + +## Branch and tracker boundary + +Plan-level frontier items in `memory/PLAN.md` are the unit of Linear issue and Graphite branch work. Scope-card slices do not get their own issue/branch by default. + +When starting a new frontier item, follow `AGENTS.md` and `docs/praxis/graphite-workflow.md`: create the Linear issue, create the Graphite stacked branch, then scope/build within that branch. + +## Verification ownership + +| Layer | Owner | +| --- | --- | +| Verification commands and inner-loop policy | `ln-spec` | +| Middle/outer loop strategy and blind spots | `ln-oracles` | +| Per-slice application of oracle strategy | `ln-scope` | +| TDD and inner-loop execution | `ln-build` | +| Coverage audit after implementation | `ln-review` | + +Default commands: + +- Inner loop after meaningful edits: `npm run fix` +- Gate before commit: `npm run verify` + +## References + +- Runtime skill instructions: `.agents/skills/ln-*/SKILL.md` +- Repo protocol summary: `AGENTS.md` +- Dev-layer design rationale: `docs/design/ln-skills/EVOLUTION.md` From 0b26e1fcc76963ad8c0717002126e02afb77cb77 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 15:48:34 +0200 Subject: [PATCH 05/16] coordinate review and design skills, for a codebase improvement/deepening flow --- .agents/skills/ln-design/SKILL.md | 14 ++++++++++---- .agents/skills/ln-review/SKILL.md | 31 +++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/.agents/skills/ln-design/SKILL.md b/.agents/skills/ln-design/SKILL.md index b45172ca..c764fbff 100644 --- a/.agents/skills/ln-design/SKILL.md +++ b/.agents/skills/ln-design/SKILL.md @@ -6,7 +6,9 @@ argument-hint: "[module or API boundary to explore]" # Ln Design -Apply Ousterhout's "Design It Twice": generate **3+ radically different module shapes**, compare on depth, and synthesize. The goal is deep modules — small API surfaces hiding significant complexity. Do not implement; this is purely about the shape of the boundary. +Apply Ousterhout's "Design It Twice": generate **3+ radically different module shapes**, compare on depth, and synthesize. The goal is deep modules — small interfaces hiding significant complexity. Do not implement; this is purely about the shape of the seam. + +Use `ln-design` as the deepening pathway from `ln-review`: when review surfaces a shallow module or weak seam, explore alternative deepened module shapes here before routing to `ln-scope` or `ln-refactor`. ## Input @@ -16,7 +18,9 @@ The module or API boundary: $ARGUMENTS ### 1. Gather requirements -Understand the problem, the callers, the key operations, constraints, and — crucially — what complexity should be hidden inside vs exposed. Skip steps you already know the answer to. +Understand the problem, the callers, the key operations, constraints, and — crucially — what complexity should be hidden inside vs exposed. If this design follows an `ln-review` deepening candidate, start from that candidate's files, problem, possible direction, and benefits. Skip steps you already know the answer to. + +Read `memory/SPEC.md` first when it exists. Use its lexicon for domain terms and respect its live assumptions, decisions, and invariants. Read `memory/PLAN.md` when the seam touches active or near-horizon work. ### 2. Generate designs (parallel sub-agents) @@ -27,13 +31,15 @@ Spawn 3+ sub-agents simultaneously. Each must produce a **radically different** - "Optimize for the most common case" - "Take inspiration from [specific paradigm or library]" -Each agent returns: **API signature** (types, methods, params), **usage example**, **what it hides**, and **trade-offs**. +Each agent returns: **interface** (types, methods, params, invariants, ordering constraints, error modes, required configuration, and performance characteristics), **usage example**, **what it hides**, **seam / adapter strategy** where relevant, and **trade-offs**. ### 3. Present and compare Show each design sequentially, then compare in prose on: -- **Depth** (Ousterhout's depth test): small surface hiding significant complexity (good) vs large surface with thin implementation (bad) +- **Depth** (Ousterhout's depth test): small interface hiding significant complexity (good) vs large interface with thin implementation (bad) +- **Locality**: whether change, bugs, knowledge, and verification concentrate behind the seam +- **Leverage**: what callers get per fact they must learn about the interface - **Ease of correct use** vs ease of misuse - **General-purpose vs specialized**: flexibility vs focus - **Implementation efficiency**: does the shape allow efficient internals? diff --git a/.agents/skills/ln-review/SKILL.md b/.agents/skills/ln-review/SKILL.md index 6fefc8fb..e3b826f8 100644 --- a/.agents/skills/ln-review/SKILL.md +++ b/.agents/skills/ln-review/SKILL.md @@ -18,14 +18,18 @@ If "recent" or unspecified, focus on recently modified files. ## What to look for +Read `memory/SPEC.md` first when it exists. Use its lexicon for domain terms, and treat the live architecture register as the current decision record. Read `memory/PLAN.md` for active frontier context when the reviewed area touches active or near-horizon work. If ADRs or design docs exist in the touched area, respect them as supporting context, but do not introduce ADRs or sidecar decision logs by default; durable updates reconcile through `memory/SPEC.md` / `memory/PLAN.md`. + Apply Ousterhout's depth test: modules should have small interfaces hiding significant complexity. Modules that move together should live together — clusters of small files always used in concert are a single deep module waiting to be extracted. Use the deletion test for suspected shallow modules: if deleting the module makes complexity vanish, it was pass-through structure; if the same complexity reappears across multiple callers, the module was earning its keep. Prefer depth as leverage/locality, not line-count ratio. -Treat the interface as the test surface. If callers or tests must reach past the interface to verify important behavior, the module shape is probably wrong. A good seam lets tests and callers cross the same public boundary. +Treat the interface as the test surface. The interface is everything callers must know to use the module correctly: types, invariants, ordering constraints, error modes, required configuration, and performance characteristics. If callers or tests must reach past the interface to verify important behavior, the module shape is probably wrong. A good seam lets tests and callers cross the same public boundary. Apply seam discipline: one adapter usually means a hypothetical seam; two adapters make a real seam. Flag indirection introduced only for imagined future variation, especially when it spreads configuration, mocks, or ordering knowledge into callers. +When a finding is a deepening opportunity, present it as a candidate rather than a detailed design. Name the current shallow module shape, the deepened module that might replace it, what complexity would move behind the seam, and why that would improve locality, leverage, and the test surface. Do **not** propose detailed interfaces in `ln-review`; route selected deepening candidates to `ln-design` before scoping or refactoring. + Check the functional core / imperative shell boundary (Gary Bernhardt, "Boundaries"). Pure functions should stay pure. Flag when a pure function has acquired side effects or a growing parameter list — it has drifted into shell territory. Make invalid states unrepresentable (Yaron Minsky). Split optional fields into distinct types. Use branded types for domain-distinct values. @@ -54,7 +58,7 @@ Collect misalignments as numbered findings (category: `naming`) with the canonic ## Output -Present findings as numbered candidates: +Present findings as numbered candidates. Use the compact form for ordinary findings: ```md ## Review: [area] @@ -65,19 +69,30 @@ Present findings as numbered candidates: 2. ... ``` +Use the deepening form when the finding is a shallow-module or weak-seam opportunity: + +```md +1. **[Deepening candidate]** — [category: depth|seam|coupling|testability] — [impact: low|medium|high] + **Files** — [modules/files involved] + **Problem** — [why the current module shape causes friction] + **Possible direction** — [plain English target shape; no detailed interface yet] + **Benefits** — [locality, leverage, and test-surface improvement] +``` + Recommend the highest-impact improvement. ## Routing After presenting findings, present these options to the user (use `tool-ask-question`): -| # | Label | Target | Why | -| --- | --------------- | ------------- | ------------------------------------------------ | -| 1 | Scope a fix | `ln-scope` | A finding warrants a planned slice | -| 2 | Plan a refactor | `ln-refactor` | Multiple findings need coordinated restructuring | -| 3 | Back to triage | `ln-consult` | Review complete, no immediate action needed | +| # | Label | Target | Why | +| --- | -------------------------- | ------------- | ------------------------------------------------ | +| 1 | Scope a fix | `ln-scope` | A finding warrants a planned slice | +| 2 | Explore a deepening design | `ln-design` | A selected candidate needs seam/interface design before scoping or refactoring | +| 3 | Plan a refactor | `ln-refactor` | Multiple findings need coordinated restructuring | +| 4 | Back to triage | `ln-consult` | Review complete, no immediate action needed | -Recommended: **1** if high-impact findings exist, **3** otherwise. +Recommended: **2** if the highest-impact finding is a deepening candidate, **1** if high-impact findings are concrete fixes, **4** otherwise. --- *Draws from [mattpocock/skills/improve-codebase-architecture](https://github.com/mattpocock/skills/tree/main/improve-codebase-architecture) and [theswerd/aicode/skills/self-documenting-code](https://github.com/theswerd/aicode/blob/main/skills/self-documenting-code/SKILL.md).* From ec81e6a6c9c9f2cf3124afb24727ecd40e605f5a Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 16:51:20 +0200 Subject: [PATCH 06/16] Extract reconciliation store from db facade --- memory/CARDS.md | 295 ++++++++++++++++++++++++++ src/server/db.ts | 215 ++----------------- src/server/db/reconciliation-store.ts | 209 ++++++++++++++++++ 3 files changed, 523 insertions(+), 196 deletions(-) create mode 100644 memory/CARDS.md create mode 100644 src/server/db/reconciliation-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md new file mode 100644 index 00000000..05f522aa --- /dev/null +++ b/memory/CARDS.md @@ -0,0 +1,295 @@ +# Scope Cards — server-mini-library-compartmentalization / db.ts extraction + +Containing frontier: `server-mini-library-compartmentalization`. + +Execution posture: keep `src/server/db.ts` as the public persistence root while moving cohesive implementation regions into private `src/server/db/*-store.ts` modules. Preserve existing `./db.js` caller imports unless a later card explicitly scopes API redesign. + +## Card 1 — Reconciliation store extraction + +Status: done / uncommitted + +### Target Behavior + +`db.ts` remains the public persistence import surface while reconciliation-need persistence implementation lives in a private `src/server/db/reconciliation-store.ts` module. + +### Boundary Crossings + +```txt +→ existing callers importing reconciliation helpers from ./db.js +→ public persistence root src/server/db.ts +→ private persistence implementation src/server/db/reconciliation-store.ts +→ Drizzle schema / SQLite rows +``` + +### Risks and Assumptions + +- RISK: import cycles emerge between `db.ts`, `schema.ts`, and the private store → MITIGATION: private store imports only `schema`, Drizzle helpers, and type-only public DB where needed. +- ASSUMPTION: reconciliation helpers are cohesive enough to extract first → VALIDATE: reconciliation tests pass with unchanged caller imports. + +### Acceptance Criteria + +```txt +✓ Reconciliation helpers/types are implemented outside db.ts and re-exported through db.ts. +✓ Existing callers continue importing reconciliation helpers/types from ./db.js. +✓ Reconciliation-related regression tests pass. +``` + +### Verification Approach + +- Inner: focused persistence/agent tests — `npm run test -- db reconciliation-need reconciliation-agent`. +- Gate: `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 2 — Annotation store extraction + +Status: next + +### Target Behavior + +`db.ts` remains the public persistence import surface while annotation persistence implementation lives in a private `src/server/db/annotation-store.ts` module. + +### Boundary Crossings + +```txt +→ annotation routes and tests importing annotation helpers from ./db.js +→ public persistence root src/server/db.ts +→ private persistence implementation src/server/db/annotation-store.ts +→ Drizzle schema / SQLite rows +``` + +### Risks and Assumptions + +- RISK: the annotation region is too small to pay for a subtree module → MITIGATION: keep the extraction mechanically simple and use it as the low-risk proof that small cohesive stores can live behind the facade. +- ASSUMPTION: annotation CRUD is independent of other db.ts private helpers → VALIDATE: private module imports only schema/Drizzle helpers plus `DB` type. + +### Acceptance Criteria + +```txt +✓ `createAnnotation`, `getAnnotationsForSpecification`, `getAnnotation`, and `deleteAnnotation` are implemented outside db.ts. +✓ Existing callers continue importing annotation helpers/types from ./db.js. +✓ Annotation route tests and db tests pass without behavior changes. +``` + +### Verification Approach + +- Inner: focused route/store tests — `npm run test -- annotation db`. +- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 3 — Edit-impact query extraction + +Status: queued + +### Target Behavior + +`db.ts` remains the public persistence import surface while downstream edit-impact query implementation lives in a private `src/server/db/edit-impact-store.ts` module. + +### Boundary Crossings + +```txt +→ edit route / side-chat route importing edit-impact query helpers from ./db.js +→ public persistence root src/server/db.ts +→ private persistence implementation src/server/db/edit-impact-store.ts +→ knowledge_edge / knowledge_item / phase_outcome rows +``` + +### Risks and Assumptions + +- RISK: this store overlaps conceptually with the broader intent graph store → MITIGATION: extract only downstream impact queries first because they form a cohesive read-side seam used by edit-impact classification. +- ASSUMPTION: preserving current helper names avoids route churn → VALIDATE: `edit-route.ts` and `side-chat-route.ts` imports stay unchanged. + +### Acceptance Criteria + +```txt +✓ `getDownstreamItems`, `getDownstreamEdges`, and `isItemInActiveReviewSet` are implemented outside db.ts and re-exported through db.ts. +✓ Edit-impact callers continue importing from ./db.js. +✓ Focused edit-impact/edit-route/side-chat tests pass or only fail for known unrelated authorization flakes. +``` + +### Verification Approach + +- Inner: focused tests — `npm run test -- edit-impact side-chat-route edit-route`. +- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 4 — Intent graph mutation store extraction + +Status: queued + +### Target Behavior + +`db.ts` remains the public persistence import surface while generic intent/knowledge item and edge mutation helpers live in a private `src/server/db/intent-graph-store.ts` module. + +### Boundary Crossings + +```txt +→ observer/edit/core tests and routes importing intent graph helpers from ./db.js +→ public persistence root src/server/db.ts +→ private persistence implementation src/server/db/intent-graph-store.ts +→ knowledge_item / turn_knowledge_item / knowledge_edge rows +``` + +### Risks and Assumptions + +- RISK: compatibility projection helpers (`createDecision`, `createAssumption`, parent helpers) obscure the canonical intent graph model → MITIGATION: move them as legacy-named facade exports over generic store internals without expanding compatibility language. +- RISK: this extraction may need shared reference-code/projection helpers from later read-model code → MITIGATION: keep mutation helpers separate from entity projection helpers; stop if extraction forces projection redesign. +- ASSUMPTION: mutation helpers form a real store seam independent of accepted review materialization → VALIDATE: observer/edit tests pass with unchanged public imports. + +### Acceptance Criteria + +```txt +✓ `createKnowledgeItem`, `getKnowledgeItem`, `linkKnowledgeItemToTurn`, `addKnowledgeRelationship`, `removeKnowledgeRelationship`, `updateKnowledgeItemContent`, and legacy decision/assumption helper exports are implemented outside db.ts. +✓ Existing callers continue importing from ./db.js. +✓ Observer/edit/db tests covering item and edge writes pass. +``` + +### Verification Approach + +- Inner: focused tests — `npm run test -- observer edit-route db`. +- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 5 — Entity projection read-model extraction + +Status: queued + +### Target Behavior + +`db.ts` remains the public persistence import surface while entity projection/read-model implementation lives in a private `src/server/db/entity-projection-store.ts` module. + +### Boundary Crossings + +```txt +→ app/export/context/observer callers importing entity projection helpers from ./db.js +→ public persistence root src/server/db.ts +→ private persistence implementation src/server/db/entity-projection-store.ts +→ knowledge tables + active-path turn lineage rows +→ shared API entity projection types +``` + +### Risks and Assumptions + +- RISK: active-path filtering depends on turn lineage helpers currently local to db.ts → MITIGATION: either pass the small lineage primitive into the projection store or defer this card until chat/turn store extraction; do not duplicate lineage logic. +- RISK: product lexicon says intent graph, while implementation still says knowledge → MITIGATION: prefer intent/entity naming for new private helpers where possible, while preserving public compatibility exports. +- ASSUMPTION: read-model projection is separable from mutation helpers after Card 4 → VALIDATE: no circular import between intent graph mutation store and projection store. + +### Acceptance Criteria + +```txt +✓ `getEntitiesForSpecificationByMode`, `getEntitiesForSpecification`, `getEntitiesForSpecificationOnActivePath`, `getCapturedItemsForTurns`, accepted entity read helpers, and supporting projection helpers are implemented outside db.ts. +✓ App/export/context/observer callers continue importing from ./db.js. +✓ Entity projection, observer, export, context, and db tests pass. +``` + +### Verification Approach + +- Inner: focused tests — `npm run test -- db observer context export app`. +- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 6 — Review materialization store extraction + +Status: queued + +### Target Behavior + +`db.ts` remains the public persistence import surface while accepted requirements/criteria review materialization lives in a private `src/server/db/review-materialization-store.ts` module. + +### Boundary Crossings + +```txt +→ interview/core/db tests importing review materialization helpers from ./db.js +→ public persistence root src/server/db.ts +→ private persistence implementation src/server/db/review-materialization-store.ts +→ review-set assistant parts parsing +→ knowledge_item / turn_knowledge_item / knowledge_edge rows +``` + +### Risks and Assumptions + +- RISK: review materialization shares too many helpers with entity projection and intent graph mutation stores → MITIGATION: run this after Cards 4–5 and import only private store helpers if a real shared seam exists; otherwise keep duplicated SQL localized rather than creating a premature common utility. +- ASSUMPTION: accepted review materialization is a cohesive write-side seam distinct from generic intent graph mutation → VALIDATE: requirements/criteria review tests pass unchanged. + +### Acceptance Criteria + +```txt +✓ `materializeAcceptedRequirementsReviewSet`, `materializeAcceptedCriteriaReviewSet`, and their private accepted-review helpers are implemented outside db.ts. +✓ Existing callers continue importing materialization helpers from ./db.js. +✓ Requirements/criteria review persistence tests pass. +``` + +### Verification Approach + +- Inner: focused tests — `npm run test -- db interview app`. +- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 7 — Workflow and phase outcome store extraction + +Status: queued + +### Target Behavior + +`db.ts` remains the public persistence import surface while phase outcome and workflow projection snapshot persistence lives in private `src/server/db/workflow-store.ts` and/or `src/server/db/phase-outcome-store.ts` modules. + +### Boundary Crossings + +```txt +→ core/chat transition/phase intent callers importing workflow helpers from ./db.js +→ public persistence root src/server/db.ts +→ private workflow persistence implementation +→ turn / option / phase_outcome / knowledge rows +→ workflow-projector read model +``` + +### Risks and Assumptions + +- RISK: this is the highest-coupling extraction because workflow snapshots read turns, outcomes, accepted knowledge counts, and structural artifact ids → MITIGATION: do it late, after entity/review extractions clarify which helpers should be imported vs passed in. +- RISK: moving this may accidentally alter I110 workflow read/write truth boundaries → MITIGATION: no behavior changes; preserve existing workflow projector interface and run transition/projector tests. +- ASSUMPTION: phase outcome CRUD and workflow snapshot reads can share one private module without becoming too broad → VALIDATE: module exports remain cohesive and smaller than the original db.ts region. + +### Acceptance Criteria + +```txt +✓ Phase outcome helpers and workflow snapshot/current-phase helpers are implemented outside db.ts and re-exported through db.ts. +✓ Workflow transition callers continue importing from ./db.js. +✓ Workflow projector, phase close, chat transition, app, and db tests pass. +``` + +### Verification Approach + +- Inner: focused tests — `npm run test -- workflow-projector phase-close chat-route-transition phase-intent app db`. +- Middle: route/workflow regression — ensure active path, closeability, and structural artifact projections still match fixtures. +- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. + +## Card 8 — Specification/chat/turn store extraction + +Status: queued + +### Target Behavior + +`db.ts` remains the public persistence import surface while specification, chat, turn, option, and active-head persistence lives in private `src/server/db/specification-store.ts` and `src/server/db/chat-turn-store.ts` modules. + +### Boundary Crossings + +```txt +→ nearly all server callers importing specification/turn helpers from ./db.js +→ public persistence root src/server/db.ts +→ private specification/chat-turn persistence modules +→ specification / chat / turn / option rows +``` + +### Risks and Assumptions + +- RISK: this is the broadest and most central extraction, so earlier cards may reveal a better split → MITIGATION: run this last and revise before building if prior extractions expose a different boundary. +- RISK: primary-chat active-head equivalence and multi-chat transitional invariants could regress → MITIGATION: run chat-substrate, core, app, and transition tests. +- ASSUMPTION: preserving public exports avoids broad caller churn while still clarifying ownership → VALIDATE: no non-test caller import paths change. + +### Acceptance Criteria + +```txt +✓ Specification creation/list/read, chat ownership, turn CRUD, option CRUD, active path, and active-head helpers are implemented outside db.ts and re-exported through db.ts. +✓ Existing callers continue importing from ./db.js. +✓ Core/chat-substrate/transition/app/db tests pass. +✓ `db.ts` is reduced to connection setup, type facade exports, and curated re-exports from private stores. +``` + +### Verification Approach + +- Inner: focused tests — `npm run test -- db core chat-substrate chat-route-transition turn-response app`. +- Middle: persisted resume/projection regression via app tests. +- Gate: `npm run verify` or explicitly document unrelated failures before commit. diff --git a/src/server/db.ts b/src/server/db.ts index a95ce3fa..50a579d5 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -54,6 +54,25 @@ import { import * as schema from './schema.js'; import { projectWorkflowState, type WorkflowProjectionSnapshot } from './workflow-projector.js'; +export { + claimReconciliationNeedForClassification, + getCascadeRelationBetween, + getReconciliationNeed, + listOpenReconciliationNeeds, + listOpenReconciliationNeedsAwaitingClassification, + openReconciliationNeed, + openReconciliationNeedIfAbsent, + resolveReconciliationNeed, + updateReconciliationNeedAgentFields, +} from './db/reconciliation-store.js'; +export type { + OpenReconciliationNeedInput, + ReconciliationNeed, + ReconciliationNeedAgentClassification, + ReconciliationNeedAgentStatus, + ReconciliationNeedKind, +} from './db/reconciliation-store.js'; + export type DB = ReturnType>; export type Specification = InferSelectModel; export type Annotation = InferSelectModel; @@ -63,13 +82,6 @@ export type Turn = Omit & { }; export type Option = InferSelectModel; export type PhaseOutcome = InferSelectModel; -export type ReconciliationNeed = InferSelectModel; -export type ReconciliationNeedKind = ReconciliationNeed['kind']; -// V3.1 slice 4: lifecycle and label vocabulary derive from the schema enums -// (see I114). Re-exported here so route + agent code can stay typesafe -// without importing the schema module directly. -export type ReconciliationNeedAgentStatus = NonNullable; -export type ReconciliationNeedAgentClassification = NonNullable; export type Phase = Turn['phase']; export type Impact = NonNullable; export type PhaseOutcomeStatus = PhaseOutcome['status']; @@ -622,195 +634,6 @@ export function updateSpecificationMode(db: DB, specificationId: number, mode: S .run(); } -// --- Reconciliation need queue --- - -export interface OpenReconciliationNeedInput { - specificationId: number; - sourceItemId: number; - targetItemId: number; - kind: ReconciliationNeedKind; - reason?: string | null; - causedByTurnId?: number | null; - // V3.1 setup (card 1): nullable source content snapshots, frozen for the - // need's lifetime. The cascade producer (edit-route hard path) supplies - // both; direct callers (tests, future agent paths) may omit them. - sourcePreviousContent?: string | null; - sourceCurrentContent?: string | null; -} - -export function openReconciliationNeed(db: DB, input: OpenReconciliationNeedInput): ReconciliationNeed { - const sourceItem = getKnowledgeItem(db, input.sourceItemId); - const targetItem = getKnowledgeItem(db, input.targetItemId); - if ( - !sourceItem || - !targetItem || - sourceItem.specification_id !== input.specificationId || - targetItem.specification_id !== input.specificationId - ) { - throw new Error('Reconciliation need items must belong to specification'); - } - - return db - .insert(schema.reconciliationNeed) - .values({ - specification_id: input.specificationId, - source_item_id: input.sourceItemId, - target_item_id: input.targetItemId, - kind: input.kind, - reason: input.reason ?? null, - caused_by_turn_id: input.causedByTurnId ?? null, - source_previous_content: input.sourcePreviousContent ?? null, - source_current_content: input.sourceCurrentContent ?? null, - }) - .returning() - .get() as ReconciliationNeed; -} - -/** - * Open a reconciliation_need only if no matching open row exists. The - * (source, target, kind) partial unique index guarantees idempotence; this - * helper exposes the no-op as `null` so callers can report newly-opened ids - * separately from already-open ones. - */ -export function openReconciliationNeedIfAbsent( - db: DB, - input: OpenReconciliationNeedInput, -): ReconciliationNeed | null { - const existing = db.all(sql` - SELECT 1 - FROM reconciliation_need - WHERE specification_id = ${input.specificationId} - AND source_item_id = ${input.sourceItemId} - AND target_item_id = ${input.targetItemId} - AND kind = ${input.kind} - AND status = 'open' - LIMIT 1 - `); - if (existing.length > 0) return null; - return openReconciliationNeed(db, input); -} - -export function getReconciliationNeed(db: DB, needId: number): ReconciliationNeed | undefined { - return db.select().from(schema.reconciliationNeed).where(eq(schema.reconciliationNeed.id, needId)).get() as - | ReconciliationNeed - | undefined; -} - -export function resolveReconciliationNeed(db: DB, reconciliationNeedId: number): void { - db.update(schema.reconciliationNeed) - .set({ status: 'resolved', resolved_at: sql`datetime('now')` }) - .where( - and( - eq(schema.reconciliationNeed.id, reconciliationNeedId), - eq(schema.reconciliationNeed.status, 'open'), - ), - ) - .run(); -} - -export function listOpenReconciliationNeeds(db: DB, specificationId: number): ReconciliationNeed[] { - return db - .select() - .from(schema.reconciliationNeed) - .where( - and( - eq(schema.reconciliationNeed.specification_id, specificationId), - eq(schema.reconciliationNeed.status, 'open'), - ), - ) - .orderBy(schema.reconciliationNeed.id) - .all() as ReconciliationNeed[]; -} - -/** - * V3.1 slice 4: open needs that the run-agent route should pick up. Filters - * out anything already classified or in flight. Per-row Re-run (slice 5) - * resets agent_status to null so the row reappears in this query. - */ -export function listOpenReconciliationNeedsAwaitingClassification( - db: DB, - specificationId: number, -): ReconciliationNeed[] { - return db - .select() - .from(schema.reconciliationNeed) - .where( - and( - eq(schema.reconciliationNeed.specification_id, specificationId), - eq(schema.reconciliationNeed.status, 'open'), - sql`${schema.reconciliationNeed.agent_status} IS NULL`, - ), - ) - .orderBy(schema.reconciliationNeed.id) - .all() as ReconciliationNeed[]; -} - -export function claimReconciliationNeedForClassification(db: DB, needId: number): boolean { - const result = db - .update(schema.reconciliationNeed) - .set({ agent_status: 'queued' }) - .where( - and(eq(schema.reconciliationNeed.id, needId), sql`${schema.reconciliationNeed.agent_status} IS NULL`), - ) - .run(); - return result.changes === 1; -} - -/** - * V3.1 slice 4: partial update for the three agent_* columns. Used by the - * classifier loop to walk a row through the lifecycle (null → queued → - * classifying → classified | failed). Each call is one transition; callers - * are responsible for the order and for never re-classifying without first - * resetting agent_status to null. - */ -export function updateReconciliationNeedAgentFields( - db: DB, - needId: number, - fields: { - agent_status: ReconciliationNeedAgentStatus | null; - agent_classification?: ReconciliationNeedAgentClassification | null; - agent_proposal?: string | null; - }, -): void { - const setClause: Record = { - agent_status: fields.agent_status, - }; - if (Object.hasOwn(fields, 'agent_classification')) { - setClause.agent_classification = fields.agent_classification ?? null; - } - if (Object.hasOwn(fields, 'agent_proposal')) { - setClause.agent_proposal = fields.agent_proposal ?? null; - } - db.update(schema.reconciliationNeed).set(setClause).where(eq(schema.reconciliationNeed.id, needId)).run(); -} - -/** - * V3.1 slice 4: look up the typed dependency edge that caused a need's - * (source, target) pair. Cascade producer creates needs from edges where the - * target is the upstream (changed) item and the source of the edge is the - * downstream item; see cascade-producer.ts and getDownstreamEdges. Returns - * undefined for orphan needs (target deleted, edge removed) — classifier - * callers fall back to a relation-agnostic prompt in that case. - */ -export function getCascadeRelationBetween( - db: DB, - sourceItemId: number, - targetItemId: number, -): InferSelectModel['relation'] | undefined { - const row = db - .select({ relation: schema.knowledgeEdge.relation }) - .from(schema.knowledgeEdge) - .where( - and( - eq(schema.knowledgeEdge.from_item_id, targetItemId), - eq(schema.knowledgeEdge.to_item_id, sourceItemId), - ), - ) - .limit(1) - .get() as { relation: InferSelectModel['relation'] } | undefined; - return row?.relation; -} - // --- Entity persistence (generic knowledge items + compatibility projections) --- type PersistedKnowledgeItem = InferSelectModel; diff --git a/src/server/db/reconciliation-store.ts b/src/server/db/reconciliation-store.ts new file mode 100644 index 00000000..739cd530 --- /dev/null +++ b/src/server/db/reconciliation-store.ts @@ -0,0 +1,209 @@ +import { and, eq, sql, type InferSelectModel } from 'drizzle-orm'; + +import type { DB } from '../db.js'; +import * as schema from '../schema.js'; + +export type ReconciliationNeed = InferSelectModel; +export type ReconciliationNeedKind = ReconciliationNeed['kind']; +// V3.1 slice 4: lifecycle and label vocabulary derive from the schema enums +// (see I114). Re-exported through db.ts so route + agent code can stay typesafe +// without importing the schema module directly. +export type ReconciliationNeedAgentStatus = NonNullable; +export type ReconciliationNeedAgentClassification = NonNullable; + +type KnowledgeItemOwner = Pick, 'specification_id'>; + +export interface OpenReconciliationNeedInput { + specificationId: number; + sourceItemId: number; + targetItemId: number; + kind: ReconciliationNeedKind; + reason?: string | null; + causedByTurnId?: number | null; + // V3.1 setup (card 1): nullable source content snapshots, frozen for the + // need's lifetime. The cascade producer (edit-route hard path) supplies + // both; direct callers (tests, future agent paths) may omit them. + sourcePreviousContent?: string | null; + sourceCurrentContent?: string | null; +} + +function getKnowledgeItemOwner(db: DB, itemId: number): KnowledgeItemOwner | undefined { + return db + .select({ specification_id: schema.knowledgeItem.specification_id }) + .from(schema.knowledgeItem) + .where(eq(schema.knowledgeItem.id, itemId)) + .get() as KnowledgeItemOwner | undefined; +} + +export function openReconciliationNeed(db: DB, input: OpenReconciliationNeedInput): ReconciliationNeed { + const sourceItem = getKnowledgeItemOwner(db, input.sourceItemId); + const targetItem = getKnowledgeItemOwner(db, input.targetItemId); + if ( + !sourceItem || + !targetItem || + sourceItem.specification_id !== input.specificationId || + targetItem.specification_id !== input.specificationId + ) { + throw new Error('Reconciliation need items must belong to specification'); + } + + return db + .insert(schema.reconciliationNeed) + .values({ + specification_id: input.specificationId, + source_item_id: input.sourceItemId, + target_item_id: input.targetItemId, + kind: input.kind, + reason: input.reason ?? null, + caused_by_turn_id: input.causedByTurnId ?? null, + source_previous_content: input.sourcePreviousContent ?? null, + source_current_content: input.sourceCurrentContent ?? null, + }) + .returning() + .get() as ReconciliationNeed; +} + +/** + * Open a reconciliation_need only if no matching open row exists. The + * (source, target, kind) partial unique index guarantees idempotence; this + * helper exposes the no-op as `null` so callers can report newly-opened ids + * separately from already-open ones. + */ +export function openReconciliationNeedIfAbsent( + db: DB, + input: OpenReconciliationNeedInput, +): ReconciliationNeed | null { + const existing = db.all(sql` + SELECT 1 + FROM reconciliation_need + WHERE specification_id = ${input.specificationId} + AND source_item_id = ${input.sourceItemId} + AND target_item_id = ${input.targetItemId} + AND kind = ${input.kind} + AND status = 'open' + LIMIT 1 + `); + if (existing.length > 0) return null; + return openReconciliationNeed(db, input); +} + +export function getReconciliationNeed(db: DB, needId: number): ReconciliationNeed | undefined { + return db.select().from(schema.reconciliationNeed).where(eq(schema.reconciliationNeed.id, needId)).get() as + | ReconciliationNeed + | undefined; +} + +export function resolveReconciliationNeed(db: DB, reconciliationNeedId: number): void { + db.update(schema.reconciliationNeed) + .set({ status: 'resolved', resolved_at: sql`datetime('now')` }) + .where( + and( + eq(schema.reconciliationNeed.id, reconciliationNeedId), + eq(schema.reconciliationNeed.status, 'open'), + ), + ) + .run(); +} + +export function listOpenReconciliationNeeds(db: DB, specificationId: number): ReconciliationNeed[] { + return db + .select() + .from(schema.reconciliationNeed) + .where( + and( + eq(schema.reconciliationNeed.specification_id, specificationId), + eq(schema.reconciliationNeed.status, 'open'), + ), + ) + .orderBy(schema.reconciliationNeed.id) + .all() as ReconciliationNeed[]; +} + +/** + * V3.1 slice 4: open needs that the run-agent route should pick up. Filters + * out anything already classified or in flight. Per-row Re-run (slice 5) + * resets agent_status to null so the row reappears in this query. + */ +export function listOpenReconciliationNeedsAwaitingClassification( + db: DB, + specificationId: number, +): ReconciliationNeed[] { + return db + .select() + .from(schema.reconciliationNeed) + .where( + and( + eq(schema.reconciliationNeed.specification_id, specificationId), + eq(schema.reconciliationNeed.status, 'open'), + sql`${schema.reconciliationNeed.agent_status} IS NULL`, + ), + ) + .orderBy(schema.reconciliationNeed.id) + .all() as ReconciliationNeed[]; +} + +export function claimReconciliationNeedForClassification(db: DB, needId: number): boolean { + const result = db + .update(schema.reconciliationNeed) + .set({ agent_status: 'queued' }) + .where( + and(eq(schema.reconciliationNeed.id, needId), sql`${schema.reconciliationNeed.agent_status} IS NULL`), + ) + .run(); + return result.changes === 1; +} + +/** + * V3.1 slice 4: partial update for the three agent_* columns. Used by the + * classifier loop to walk a row through the lifecycle (null → queued → + * classifying → classified | failed). Each call is one transition; callers + * are responsible for the order and for never re-classifying without first + * resetting agent_status to null. + */ +export function updateReconciliationNeedAgentFields( + db: DB, + needId: number, + fields: { + agent_status: ReconciliationNeedAgentStatus | null; + agent_classification?: ReconciliationNeedAgentClassification | null; + agent_proposal?: string | null; + }, +): void { + const setClause: Record = { + agent_status: fields.agent_status, + }; + if (Object.hasOwn(fields, 'agent_classification')) { + setClause.agent_classification = fields.agent_classification ?? null; + } + if (Object.hasOwn(fields, 'agent_proposal')) { + setClause.agent_proposal = fields.agent_proposal ?? null; + } + db.update(schema.reconciliationNeed).set(setClause).where(eq(schema.reconciliationNeed.id, needId)).run(); +} + +/** + * V3.1 slice 4: look up the typed dependency edge that caused a need's + * (source, target) pair. Cascade producer creates needs from edges where the + * target is the upstream (changed) item and the source of the edge is the + * downstream item; see cascade-producer.ts and getDownstreamEdges. Returns + * undefined for orphan needs (target deleted, edge removed) — classifier + * callers fall back to a relation-agnostic prompt in that case. + */ +export function getCascadeRelationBetween( + db: DB, + sourceItemId: number, + targetItemId: number, +): InferSelectModel['relation'] | undefined { + const row = db + .select({ relation: schema.knowledgeEdge.relation }) + .from(schema.knowledgeEdge) + .where( + and( + eq(schema.knowledgeEdge.from_item_id, targetItemId), + eq(schema.knowledgeEdge.to_item_id, sourceItemId), + ), + ) + .limit(1) + .get() as { relation: InferSelectModel['relation'] } | undefined; + return row?.relation; +} From 2d99c5d180c105eb67da93cb9ee2d7d11b81ca26 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 16:51:55 +0200 Subject: [PATCH 07/16] Extract annotation store from db facade --- memory/CARDS.md | 4 +-- src/server/db.ts | 53 +++++-------------------------- src/server/db/annotation-store.ts | 48 ++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 47 deletions(-) create mode 100644 src/server/db/annotation-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index 05f522aa..c583885f 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -41,7 +41,7 @@ Status: done / uncommitted ## Card 2 — Annotation store extraction -Status: next +Status: done ### Target Behavior @@ -76,7 +76,7 @@ Status: next ## Card 3 — Edit-impact query extraction -Status: queued +Status: next ### Target Behavior diff --git a/src/server/db.ts b/src/server/db.ts index 50a579d5..f531f843 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -54,6 +54,14 @@ import { import * as schema from './schema.js'; import { projectWorkflowState, type WorkflowProjectionSnapshot } from './workflow-projector.js'; +export { + createAnnotation, + deleteAnnotation, + getAnnotation, + getAnnotationsForSpecification, +} from './db/annotation-store.js'; +export type { Annotation, CreateAnnotationInput } from './db/annotation-store.js'; + export { claimReconciliationNeedForClassification, getCascadeRelationBetween, @@ -75,7 +83,6 @@ export type { export type DB = ReturnType>; export type Specification = InferSelectModel; -export type Annotation = InferSelectModel; type PersistedTurn = InferSelectModel; export type Turn = Omit & { specification_id: number; @@ -1312,50 +1319,6 @@ export function getCapturedItemsForTurns( return capturedItemsByTurn; } -// --- Side-chat annotations (D133) --- - -export interface CreateAnnotationInput { - knowledgeItemId: number; - summary: string; - body: string; - selectionStart?: number | null; - selectionEnd?: number | null; -} - -export function createAnnotation(db: DB, specificationId: number, input: CreateAnnotationInput): Annotation { - return db - .insert(schema.annotation) - .values({ - specification_id: specificationId, - knowledge_item_id: input.knowledgeItemId, - summary: input.summary, - body: input.body, - selection_start: input.selectionStart ?? null, - selection_end: input.selectionEnd ?? null, - }) - .returning() - .get() as Annotation; -} - -export function getAnnotationsForSpecification(db: DB, specificationId: number): Annotation[] { - return db - .select() - .from(schema.annotation) - .where(eq(schema.annotation.specification_id, specificationId)) - .orderBy(schema.annotation.created_at, schema.annotation.id) - .all() as Annotation[]; -} - -export function getAnnotation(db: DB, annotationId: number): Annotation | undefined { - return db.select().from(schema.annotation).where(eq(schema.annotation.id, annotationId)).get() as - | Annotation - | undefined; -} - -export function deleteAnnotation(db: DB, annotationId: number): void { - db.delete(schema.annotation).where(eq(schema.annotation.id, annotationId)).run(); -} - // --- Edit-impact queries (Side-chat V2 / FE-673) --- export interface DownstreamItem { diff --git a/src/server/db/annotation-store.ts b/src/server/db/annotation-store.ts new file mode 100644 index 00000000..a11b120c --- /dev/null +++ b/src/server/db/annotation-store.ts @@ -0,0 +1,48 @@ +import { eq, type InferSelectModel } from 'drizzle-orm'; + +import type { DB } from '../db.js'; +import * as schema from '../schema.js'; + +export type Annotation = InferSelectModel; + +export interface CreateAnnotationInput { + knowledgeItemId: number; + summary: string; + body: string; + selectionStart?: number | null; + selectionEnd?: number | null; +} + +export function createAnnotation(db: DB, specificationId: number, input: CreateAnnotationInput): Annotation { + return db + .insert(schema.annotation) + .values({ + specification_id: specificationId, + knowledge_item_id: input.knowledgeItemId, + summary: input.summary, + body: input.body, + selection_start: input.selectionStart ?? null, + selection_end: input.selectionEnd ?? null, + }) + .returning() + .get() as Annotation; +} + +export function getAnnotationsForSpecification(db: DB, specificationId: number): Annotation[] { + return db + .select() + .from(schema.annotation) + .where(eq(schema.annotation.specification_id, specificationId)) + .orderBy(schema.annotation.created_at, schema.annotation.id) + .all() as Annotation[]; +} + +export function getAnnotation(db: DB, annotationId: number): Annotation | undefined { + return db.select().from(schema.annotation).where(eq(schema.annotation.id, annotationId)).get() as + | Annotation + | undefined; +} + +export function deleteAnnotation(db: DB, annotationId: number): void { + db.delete(schema.annotation).where(eq(schema.annotation.id, annotationId)).run(); +} From 4f4ba7531c68116dc2e52db8511ef3f17ce2a8c4 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 16:52:31 +0200 Subject: [PATCH 08/16] Extract edit impact store from db facade --- memory/CARDS.md | 4 +- src/server/db.ts | 69 ++---------------------------- src/server/db/edit-impact-store.ts | 67 +++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 68 deletions(-) create mode 100644 src/server/db/edit-impact-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index c583885f..189aa6bc 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -76,7 +76,7 @@ Status: done ## Card 3 — Edit-impact query extraction -Status: next +Status: done ### Target Behavior @@ -111,7 +111,7 @@ Status: next ## Card 4 — Intent graph mutation store extraction -Status: queued +Status: next ### Target Behavior diff --git a/src/server/db.ts b/src/server/db.ts index f531f843..4830bf18 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -62,6 +62,9 @@ export { } from './db/annotation-store.js'; export type { Annotation, CreateAnnotationInput } from './db/annotation-store.js'; +export { getDownstreamEdges, getDownstreamItems, isItemInActiveReviewSet } from './db/edit-impact-store.js'; +export type { DownstreamEdge, DownstreamItem } from './db/edit-impact-store.js'; + export { claimReconciliationNeedForClassification, getCascadeRelationBetween, @@ -1319,72 +1322,6 @@ export function getCapturedItemsForTurns( return capturedItemsByTurn; } -// --- Edit-impact queries (Side-chat V2 / FE-673) --- - -export interface DownstreamItem { - id: number; - kind: string; - content: string; - kind_ordinal: number; -} - -/** Direct downstream items: items whose edges point TO the given item. */ -export function getDownstreamItems(db: DB, specificationId: number, itemId: number): DownstreamItem[] { - return db.all(sql` - SELECT ki.id, ki.kind, ki.content, ki.kind_ordinal - FROM knowledge_edge ke - JOIN knowledge_item ki ON ki.id = ke.from_item_id - WHERE ke.to_item_id = ${itemId} - AND ki.specification_id = ${specificationId} - ORDER BY ki.id - `) as DownstreamItem[]; -} - -export interface DownstreamEdge { - downstream_item_id: number; - relation: 'depends_on' | 'derived_from' | 'constrains' | 'verifies' | 'refines'; -} - -/** - * Like `getDownstreamItems` but preserves the edge relation alongside each - * downstream item id. V3.0 cascade enumeration uses this to map each downstream - * pair to a `reconciliation_need.kind`. The same (item_id, relation) tuple - * yields one row even if the same downstream item appears via multiple - * relations — the queue partial unique index dedupes by (source, target, kind). - */ -export function getDownstreamEdges(db: DB, specificationId: number, itemId: number): DownstreamEdge[] { - return db.all(sql` - SELECT ke.from_item_id AS downstream_item_id, ke.relation - FROM knowledge_edge ke - JOIN knowledge_item ki ON ki.id = ke.from_item_id - WHERE ke.to_item_id = ${itemId} - AND ki.specification_id = ${specificationId} - ORDER BY ke.from_item_id, ke.relation - `) as DownstreamEdge[]; -} - -/** - * An item is in an active review set if there is a `phase_outcome` with - * `status = 'proposed'` for requirements or criteria, AND the item has a - * `turn_knowledge_item` row linking it to that outcome's `proposal_turn_id` - * with relation `'reviewed'`. - */ -export function isItemInActiveReviewSet(db: DB, specificationId: number, itemId: number): boolean { - const rows = db.all(sql` - SELECT 1 - FROM phase_outcome po - JOIN turn_knowledge_item tki - ON tki.turn_id = po.proposal_turn_id - AND tki.item_id = ${itemId} - AND tki.relation = 'reviewed' - WHERE po.specification_id = ${specificationId} - AND po.status = 'proposed' - AND po.phase IN ('requirements', 'criteria') - LIMIT 1 - `); - return rows.length > 0; -} - export function updateKnowledgeItemContent( db: DB, itemId: number, diff --git a/src/server/db/edit-impact-store.ts b/src/server/db/edit-impact-store.ts new file mode 100644 index 00000000..f4c8f027 --- /dev/null +++ b/src/server/db/edit-impact-store.ts @@ -0,0 +1,67 @@ +import { sql } from 'drizzle-orm'; + +import type { DB } from '../db.js'; + +export interface DownstreamItem { + id: number; + kind: string; + content: string; + kind_ordinal: number; +} + +/** Direct downstream items: items whose edges point TO the given item. */ +export function getDownstreamItems(db: DB, specificationId: number, itemId: number): DownstreamItem[] { + return db.all(sql` + SELECT ki.id, ki.kind, ki.content, ki.kind_ordinal + FROM knowledge_edge ke + JOIN knowledge_item ki ON ki.id = ke.from_item_id + WHERE ke.to_item_id = ${itemId} + AND ki.specification_id = ${specificationId} + ORDER BY ki.id + `) as DownstreamItem[]; +} + +export interface DownstreamEdge { + downstream_item_id: number; + relation: 'depends_on' | 'derived_from' | 'constrains' | 'verifies' | 'refines'; +} + +/** + * Like `getDownstreamItems` but preserves the edge relation alongside each + * downstream item id. V3.0 cascade enumeration uses this to map each downstream + * pair to a `reconciliation_need.kind`. The same (item_id, relation) tuple + * yields one row even if the same downstream item appears via multiple + * relations — the queue partial unique index dedupes by (source, target, kind). + */ +export function getDownstreamEdges(db: DB, specificationId: number, itemId: number): DownstreamEdge[] { + return db.all(sql` + SELECT ke.from_item_id AS downstream_item_id, ke.relation + FROM knowledge_edge ke + JOIN knowledge_item ki ON ki.id = ke.from_item_id + WHERE ke.to_item_id = ${itemId} + AND ki.specification_id = ${specificationId} + ORDER BY ke.from_item_id, ke.relation + `) as DownstreamEdge[]; +} + +/** + * An item is in an active review set if there is a `phase_outcome` with + * `status = 'proposed'` for requirements or criteria, AND the item has a + * `turn_knowledge_item` row linking it to that outcome's `proposal_turn_id` + * with relation `'reviewed'`. + */ +export function isItemInActiveReviewSet(db: DB, specificationId: number, itemId: number): boolean { + const rows = db.all(sql` + SELECT 1 + FROM phase_outcome po + JOIN turn_knowledge_item tki + ON tki.turn_id = po.proposal_turn_id + AND tki.item_id = ${itemId} + AND tki.relation = 'reviewed' + WHERE po.specification_id = ${specificationId} + AND po.status = 'proposed' + AND po.phase IN ('requirements', 'criteria') + LIMIT 1 + `); + return rows.length > 0; +} From 11aa06463c8dce0189d2ebdc85d155a782e5f550 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 16:54:00 +0200 Subject: [PATCH 09/16] Extract intent graph mutation store --- memory/CARDS.md | 4 +- src/server/db.ts | 195 ++++------------------------ src/server/db/intent-graph-store.ts | 195 ++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+), 169 deletions(-) create mode 100644 src/server/db/intent-graph-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index 189aa6bc..778a5d77 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -111,7 +111,7 @@ Status: done ## Card 4 — Intent graph mutation store extraction -Status: next +Status: done ### Target Behavior @@ -147,7 +147,7 @@ Status: next ## Card 5 — Entity projection read-model extraction -Status: queued +Status: next ### Target Behavior diff --git a/src/server/db.ts b/src/server/db.ts index 4830bf18..eba4fb1a 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -10,9 +10,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url)); const MIGRATIONS_FOLDER = join(__dirname, '..', '..', 'drizzle'); import type { - AssumptionEntity as SharedAssumption, CriterionEntity as SharedCriterionEntity, - DecisionEntity as SharedDecision, EntitiesData, EntityReference as SharedEntityReference, EntityRelationship as SharedEntityRelationship, @@ -65,6 +63,29 @@ export type { Annotation, CreateAnnotationInput } from './db/annotation-store.js export { getDownstreamEdges, getDownstreamItems, isItemInActiveReviewSet } from './db/edit-impact-store.js'; export type { DownstreamEdge, DownstreamItem } from './db/edit-impact-store.js'; +import { + addKnowledgeRelationship, + createKnowledgeItem, + linkKnowledgeItemToTurn, +} from './db/intent-graph-store.js'; +import type { Assumption, Decision, KnowledgeItem, KnowledgeKind } from './db/intent-graph-store.js'; +export { + addAssumptionParentAssumption, + addDecisionParentAssumption, + addDecisionParentDecision, + addKnowledgeRelationship, + createAssumption, + createDecision, + createKnowledgeItem, + getKnowledgeItem, + linkAssumptionToTurn, + linkDecisionToTurn, + linkKnowledgeItemToTurn, + removeKnowledgeRelationship, + updateKnowledgeItemContent, +} from './db/intent-graph-store.js'; +export type { Assumption, Decision, KnowledgeItem, KnowledgeKind } from './db/intent-graph-store.js'; + export { claimReconciliationNeedForClassification, getCascadeRelationBetween, @@ -646,15 +667,7 @@ export function updateSpecificationMode(db: DB, specificationId: number, mode: S // --- Entity persistence (generic knowledge items + compatibility projections) --- -type PersistedKnowledgeItem = InferSelectModel; -export type KnowledgeItem = Omit & { - specification_id: number; -}; -export type KnowledgeKind = Extract; export type EntityCollection = KnowledgeEntityCollection; - -export type Decision = SharedDecision & { specification_id: number }; -export type Assumption = SharedAssumption & { specification_id: number }; export type EntityReference = SharedEntityReference; export type EntityRelationship = SharedEntityRelationship; export type RequirementEntity = SharedRequirementEntity & { kind_ordinal: number }; @@ -664,15 +677,12 @@ type GenericKnowledgeEntity = K extends 'require : K extends 'criterion' ? CriterionEntity : KnowledgeItem & { kind: K }; -type ProjectedKnowledgeEntity = K extends 'decision' - ? Decision & { kind_ordinal: number } - : Assumption & { kind_ordinal: number }; export type EntitiesForSpecification = EntitiesData; function projectKnowledgeItemEntity( item: KnowledgeItem, kind: K, -): ProjectedKnowledgeEntity { +): K extends 'decision' ? Decision & { kind_ordinal: number } : Assumption & { kind_ordinal: number } { const base = { id: item.id, specification_id: item.specification_id, @@ -684,129 +694,12 @@ function projectKnowledgeItemEntity( return { ...base, rationale: item.rationale, - } as unknown as ProjectedKnowledgeEntity; + } as K extends 'decision' ? Decision & { kind_ordinal: number } : Assumption & { kind_ordinal: number }; } - return base as unknown as ProjectedKnowledgeEntity; -} - -export function createDecision( - db: DB, - specificationId: number, - content: string, - rationale?: string | null, -): Decision { - return projectKnowledgeItemEntity( - db - .insert(schema.knowledgeItem) - .values({ - specification_id: specificationId, - kind: 'decision', - subtype: null, - content, - rationale: rationale ?? null, - kind_ordinal: sql`(SELECT COALESCE(MAX(kind_ordinal), 0) + 1 FROM knowledge_item WHERE specification_id = ${specificationId} AND kind = 'decision')`, - }) - .returning() - .get() as KnowledgeItem, - 'decision', - ); -} - -export function createAssumption(db: DB, specificationId: number, content: string): Assumption { - return projectKnowledgeItemEntity( - db - .insert(schema.knowledgeItem) - .values({ - specification_id: specificationId, - kind: 'assumption', - subtype: null, - content, - rationale: null, - kind_ordinal: sql`(SELECT COALESCE(MAX(kind_ordinal), 0) + 1 FROM knowledge_item WHERE specification_id = ${specificationId} AND kind = 'assumption')`, - }) - .returning() - .get() as KnowledgeItem, - 'assumption', - ); -} - -export function linkDecisionToTurn(db: DB, decisionId: number, turnId: number): void { - linkKnowledgeItemToTurn(db, decisionId, turnId); -} - -export function linkAssumptionToTurn(db: DB, assumptionId: number, turnId: number): void { - linkKnowledgeItemToTurn(db, assumptionId, turnId); -} - -export function createKnowledgeItem( - db: DB, - specificationId: number, - kind: KnowledgeKind, - content: string, - options?: { subtype?: string | null; rationale?: string | null }, -): KnowledgeItem { - return db - .insert(schema.knowledgeItem) - .values({ - specification_id: specificationId, - kind, - subtype: options?.subtype ?? null, - content, - rationale: options?.rationale ?? null, - kind_ordinal: sql`(SELECT COALESCE(MAX(kind_ordinal), 0) + 1 FROM knowledge_item WHERE specification_id = ${specificationId} AND kind = ${kind})`, - }) - .returning() - .get() as KnowledgeItem; -} - -export function getKnowledgeItem(db: DB, itemId: number): KnowledgeItem | undefined { - return db.select().from(schema.knowledgeItem).where(eq(schema.knowledgeItem.id, itemId)).get() as - | KnowledgeItem - | undefined; -} - -export function linkKnowledgeItemToTurn( - db: DB, - itemId: number, - turnId: number, - relation: InferSelectModel['relation'] = 'captured', -): void { - db.insert(schema.turnKnowledgeItem) - .values({ turn_id: turnId, item_id: itemId, relation }) - .onConflictDoNothing() - .run(); -} - -export function addKnowledgeRelationship( - db: DB, - fromItemId: number, - toItemId: number, - relation: InferSelectModel['relation'], -): boolean { - const inserted = db - .insert(schema.knowledgeEdge) - .values({ from_item_id: fromItemId, to_item_id: toItemId, relation }) - .onConflictDoNothing() - .returning({ fromItemId: schema.knowledgeEdge.from_item_id }) - .get(); - return inserted !== undefined; -} - -export function addDecisionParentDecision(db: DB, decisionId: number, parentDecisionId: number): void { - addKnowledgeRelationship(db, decisionId, parentDecisionId, 'depends_on'); -} - -export function addDecisionParentAssumption(db: DB, decisionId: number, parentAssumptionId: number): void { - addKnowledgeRelationship(db, decisionId, parentAssumptionId, 'depends_on'); -} - -export function addAssumptionParentAssumption( - db: DB, - assumptionId: number, - parentAssumptionId: number, -): void { - addKnowledgeRelationship(db, assumptionId, parentAssumptionId, 'depends_on'); + return base as K extends 'decision' + ? Decision & { kind_ordinal: number } + : Assumption & { kind_ordinal: number }; } function getKnowledgeItemsForSpecificationByKind( @@ -1321,35 +1214,3 @@ export function getCapturedItemsForTurns( return capturedItemsByTurn; } - -export function updateKnowledgeItemContent( - db: DB, - itemId: number, - updates: { content?: string; rationale?: string | null }, -): void { - const values: Record = {}; - if (updates.content !== undefined) values.content = updates.content; - if (updates.rationale !== undefined) values.rationale = updates.rationale; - if (Object.keys(values).length === 0) return; - db.update(schema.knowledgeItem).set(values).where(eq(schema.knowledgeItem.id, itemId)).run(); -} - -export function removeKnowledgeRelationship( - db: DB, - fromItemId: number, - toItemId: number, - relation: InferSelectModel['relation'], -): boolean { - const deleted = db - .delete(schema.knowledgeEdge) - .where( - and( - eq(schema.knowledgeEdge.from_item_id, fromItemId), - eq(schema.knowledgeEdge.to_item_id, toItemId), - eq(schema.knowledgeEdge.relation, relation), - ), - ) - .returning({ fromItemId: schema.knowledgeEdge.from_item_id }) - .get(); - return deleted !== undefined; -} diff --git a/src/server/db/intent-graph-store.ts b/src/server/db/intent-graph-store.ts new file mode 100644 index 00000000..282f9936 --- /dev/null +++ b/src/server/db/intent-graph-store.ts @@ -0,0 +1,195 @@ +import { and, eq, sql, type InferSelectModel } from 'drizzle-orm'; + +import type { + AssumptionEntity as SharedAssumption, + DecisionEntity as SharedDecision, +} from '@/shared/api-types.js'; +import type { KnowledgeKind as SharedKnowledgeKind } from '@/shared/knowledge.js'; + +import type { DB } from '../db.js'; +import * as schema from '../schema.js'; + +type PersistedKnowledgeItem = InferSelectModel; +export type KnowledgeItem = Omit & { + specification_id: number; +}; +export type KnowledgeKind = Extract; + +export type Decision = SharedDecision & { specification_id: number }; +export type Assumption = SharedAssumption & { specification_id: number }; + +type ProjectedKnowledgeEntity = K extends 'decision' + ? Decision & { kind_ordinal: number } + : Assumption & { kind_ordinal: number }; + +function projectKnowledgeItemEntity( + item: KnowledgeItem, + kind: K, +): ProjectedKnowledgeEntity { + const base = { + id: item.id, + specification_id: item.specification_id, + content: item.content, + kind_ordinal: item.kind_ordinal, + }; + + if (kind === 'decision') { + return { + ...base, + rationale: item.rationale, + } as unknown as ProjectedKnowledgeEntity; + } + + return base as unknown as ProjectedKnowledgeEntity; +} + +export function createDecision( + db: DB, + specificationId: number, + content: string, + rationale?: string | null, +): Decision { + return projectKnowledgeItemEntity( + db + .insert(schema.knowledgeItem) + .values({ + specification_id: specificationId, + kind: 'decision', + subtype: null, + content, + rationale: rationale ?? null, + kind_ordinal: sql`(SELECT COALESCE(MAX(kind_ordinal), 0) + 1 FROM knowledge_item WHERE specification_id = ${specificationId} AND kind = 'decision')`, + }) + .returning() + .get() as KnowledgeItem, + 'decision', + ); +} + +export function createAssumption(db: DB, specificationId: number, content: string): Assumption { + return projectKnowledgeItemEntity( + db + .insert(schema.knowledgeItem) + .values({ + specification_id: specificationId, + kind: 'assumption', + subtype: null, + content, + rationale: null, + kind_ordinal: sql`(SELECT COALESCE(MAX(kind_ordinal), 0) + 1 FROM knowledge_item WHERE specification_id = ${specificationId} AND kind = 'assumption')`, + }) + .returning() + .get() as KnowledgeItem, + 'assumption', + ); +} + +export function linkDecisionToTurn(db: DB, decisionId: number, turnId: number): void { + linkKnowledgeItemToTurn(db, decisionId, turnId); +} + +export function linkAssumptionToTurn(db: DB, assumptionId: number, turnId: number): void { + linkKnowledgeItemToTurn(db, assumptionId, turnId); +} + +export function createKnowledgeItem( + db: DB, + specificationId: number, + kind: KnowledgeKind, + content: string, + options?: { subtype?: string | null; rationale?: string | null }, +): KnowledgeItem { + return db + .insert(schema.knowledgeItem) + .values({ + specification_id: specificationId, + kind, + subtype: options?.subtype ?? null, + content, + rationale: options?.rationale ?? null, + kind_ordinal: sql`(SELECT COALESCE(MAX(kind_ordinal), 0) + 1 FROM knowledge_item WHERE specification_id = ${specificationId} AND kind = ${kind})`, + }) + .returning() + .get() as KnowledgeItem; +} + +export function getKnowledgeItem(db: DB, itemId: number): KnowledgeItem | undefined { + return db.select().from(schema.knowledgeItem).where(eq(schema.knowledgeItem.id, itemId)).get() as + | KnowledgeItem + | undefined; +} + +export function linkKnowledgeItemToTurn( + db: DB, + itemId: number, + turnId: number, + relation: InferSelectModel['relation'] = 'captured', +): void { + db.insert(schema.turnKnowledgeItem) + .values({ turn_id: turnId, item_id: itemId, relation }) + .onConflictDoNothing() + .run(); +} + +export function addKnowledgeRelationship( + db: DB, + fromItemId: number, + toItemId: number, + relation: InferSelectModel['relation'], +): boolean { + const inserted = db + .insert(schema.knowledgeEdge) + .values({ from_item_id: fromItemId, to_item_id: toItemId, relation }) + .onConflictDoNothing() + .returning({ fromItemId: schema.knowledgeEdge.from_item_id }) + .get(); + return inserted !== undefined; +} + +export function addDecisionParentDecision(db: DB, decisionId: number, parentDecisionId: number): void { + addKnowledgeRelationship(db, decisionId, parentDecisionId, 'depends_on'); +} + +export function addDecisionParentAssumption(db: DB, decisionId: number, parentAssumptionId: number): void { + addKnowledgeRelationship(db, decisionId, parentAssumptionId, 'depends_on'); +} + +export function addAssumptionParentAssumption( + db: DB, + assumptionId: number, + parentAssumptionId: number, +): void { + addKnowledgeRelationship(db, assumptionId, parentAssumptionId, 'depends_on'); +} + +export function updateKnowledgeItemContent( + db: DB, + itemId: number, + updates: { content?: string; rationale?: string | null }, +): void { + const values: Record = {}; + if (updates.content !== undefined) values.content = updates.content; + if (updates.rationale !== undefined) values.rationale = updates.rationale; + if (Object.keys(values).length === 0) return; + db.update(schema.knowledgeItem).set(values).where(eq(schema.knowledgeItem.id, itemId)).run(); +} + +export function removeKnowledgeRelationship( + db: DB, + fromItemId: number, + toItemId: number, + relation: InferSelectModel['relation'], +): boolean { + const deleted = db + .delete(schema.knowledgeEdge) + .where( + and( + eq(schema.knowledgeEdge.from_item_id, fromItemId), + eq(schema.knowledgeEdge.to_item_id, toItemId), + eq(schema.knowledgeEdge.relation, relation), + ), + ) + .returning({ fromItemId: schema.knowledgeEdge.from_item_id }) + .get(); + return deleted !== undefined; +} From 46b06f5c711e40ae25e6305d6689d40f4ff39c75 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 16:56:48 +0200 Subject: [PATCH 10/16] Extract review materialization store --- memory/CARDS.md | 14 +- src/server/db.ts | 195 +--------------- src/server/db/review-materialization-store.ts | 217 ++++++++++++++++++ 3 files changed, 230 insertions(+), 196 deletions(-) create mode 100644 src/server/db/review-materialization-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index 778a5d77..2cb17eb9 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -145,13 +145,13 @@ Status: done - Inner: focused tests — `npm run test -- observer edit-route db`. - Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. -## Card 5 — Entity projection read-model extraction +## Card 5 — Review materialization store extraction -Status: next +Status: done ### Target Behavior -`db.ts` remains the public persistence import surface while entity projection/read-model implementation lives in a private `src/server/db/entity-projection-store.ts` module. +`db.ts` remains the public persistence import surface while accepted requirements/criteria review materialization lives in a private `src/server/db/review-materialization-store.ts` module. ### Boundary Crossings @@ -165,7 +165,7 @@ Status: next ### Risks and Assumptions -- RISK: active-path filtering depends on turn lineage helpers currently local to db.ts → MITIGATION: either pass the small lineage primitive into the projection store or defer this card until chat/turn store extraction; do not duplicate lineage logic. +- RISK: active-path filtering and accepted-review visibility depend on workflow/turn helpers currently local to db.ts → MITIGATION: implement read-side SQL locally in the projection store for now; do not route through db.ts and create a cycle. - RISK: product lexicon says intent graph, while implementation still says knowledge → MITIGATION: prefer intent/entity naming for new private helpers where possible, while preserving public compatibility exports. - ASSUMPTION: read-model projection is separable from mutation helpers after Card 4 → VALIDATE: no circular import between intent graph mutation store and projection store. @@ -182,9 +182,9 @@ Status: next - Inner: focused tests — `npm run test -- db observer context export app`. - Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. -## Card 6 — Review materialization store extraction +## Card 6 — Entity projection read-model extraction -Status: queued +Status: next ### Target Behavior @@ -202,7 +202,7 @@ Status: queued ### Risks and Assumptions -- RISK: review materialization shares too many helpers with entity projection and intent graph mutation stores → MITIGATION: run this after Cards 4–5 and import only private store helpers if a real shared seam exists; otherwise keep duplicated SQL localized rather than creating a premature common utility. +- RISK: review materialization shares helper concepts with entity projection → MITIGATION: extract materialization first as a write-side seam; allow small local reference-code lookup duplication until the read model is extracted. - ASSUMPTION: accepted review materialization is a cohesive write-side seam distinct from generic intent graph mutation → VALIDATE: requirements/criteria review tests pass unchanged. ### Acceptance Criteria diff --git a/src/server/db.ts b/src/server/db.ts index eba4fb1a..67ab966e 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -14,7 +14,6 @@ import type { EntitiesData, EntityReference as SharedEntityReference, EntityRelationship as SharedEntityRelationship, - EdgeRelation, SpecificationMode, SpecificationStateTurn, ReadinessBand, @@ -24,7 +23,6 @@ import type { WorkflowPhaseStatus, WorkflowState as SharedWorkflowState, } from '@/shared/api-types.js'; -import { reviewSetSchema, type BrunchAssistantPart, type ReviewSetData } from '@/shared/chat.js'; import { createKnowledgeReferenceCode, genericKnowledgeKindRegistry, @@ -40,15 +38,8 @@ import { workflowPhaseOrder, type PhaseClosureBasis, } from '@/shared/phase-close.js'; -import { normalizeReviewSetForDisplay } from '@/shared/review-diffing.js'; -import { getPersistedReviewAction } from '@/shared/specification-state.js'; -import { supportsKnowledgeRelationship } from './knowledge-relationship-policy.js'; -import { - safeDeserializeAssistantParts, - safeDeserializeUserParts, - type DataConfirmationPart, -} from './parts.js'; +import { safeDeserializeUserParts, type DataConfirmationPart } from './parts.js'; import * as schema from './schema.js'; import { projectWorkflowState, type WorkflowProjectionSnapshot } from './workflow-projector.js'; @@ -86,6 +77,11 @@ export { } from './db/intent-graph-store.js'; export type { Assumption, Decision, KnowledgeItem, KnowledgeKind } from './db/intent-graph-store.js'; +export { + materializeAcceptedCriteriaReviewSet, + materializeAcceptedRequirementsReviewSet, +} from './db/review-materialization-store.js'; + export { claimReconciliationNeedForClassification, getCascadeRelationBetween, @@ -716,23 +712,6 @@ function getKnowledgeItemsForSpecificationByKind( .all() as KnowledgeItem[]; } -function findKnowledgeItemByReferenceCode( - db: DB, - specificationId: number, - referenceCode: string, -): KnowledgeItem | undefined { - for (const entry of knowledgeKindRegistry) { - const item = getKnowledgeItemsForSpecificationByKind(db, specificationId, entry.kind).find( - (candidate) => createKnowledgeReferenceCode(candidate.kind, candidate.kind_ordinal) === referenceCode, - ); - if (item) { - return item; - } - } - - return undefined; -} - function withReferenceCodes( items: readonly T[], ): Array { @@ -757,152 +736,6 @@ function getGenericKnowledgeEntitiesForSpecificationByKind>; } -function getPersistedReviewSetForTurn(turn: Pick | undefined): ReviewSetData | null { - const persistedReviewSet = safeDeserializeAssistantParts(turn?.assistant_parts).find( - (part): part is Extract => - part.type === 'data-review-set', - ); - if (!persistedReviewSet) { - return null; - } - - const parsedReviewSet = reviewSetSchema.safeParse(persistedReviewSet.data); - return parsedReviewSet.success ? parsedReviewSet.data : null; -} - -function findExistingKnowledgeItemForReviewSetItem( - db: DB, - specificationId: number, - kind: 'requirement' | 'criterion', - content: string, -): KnowledgeItem | undefined { - return db - .select() - .from(schema.knowledgeItem) - .where( - and( - eq(schema.knowledgeItem.specification_id, specificationId), - eq(schema.knowledgeItem.kind, kind), - eq(schema.knowledgeItem.content, content), - ), - ) - .orderBy(schema.knowledgeItem.id) - .get() as KnowledgeItem | undefined; -} - -function getTurnLineageToRoot(db: DB, turnId: number): Turn[] { - const lineage: Turn[] = []; - let currentTurn = getTurn(db, turnId); - - while (currentTurn) { - lineage.push(currentTurn); - currentTurn = currentTurn.parent_turn_id ? getTurn(db, currentTurn.parent_turn_id) : undefined; - } - - return lineage.reverse(); -} - -function getEffectiveAcceptedReviewSetForTurn( - db: DB, - turnId: number, - phase: 'requirements' | 'criteria', -): ReviewSetData | null { - let normalizedReviewSet: ReviewSetData | null = null; - - for (const turn of getTurnLineageToRoot(db, turnId)) { - if (turn.phase !== phase) { - continue; - } - - const reviewSet = getPersistedReviewSetForTurn(turn); - if (!reviewSet || reviewSet.phase !== phase) { - continue; - } - - if (turn.id !== turnId && !getPersistedReviewAction(turn)) { - continue; - } - - normalizedReviewSet = normalizedReviewSet - ? normalizeReviewSetForDisplay(reviewSet, normalizedReviewSet) - : reviewSet; - - if (turn.id === turnId) { - return normalizedReviewSet; - } - } - - return normalizedReviewSet; -} - -function persistReviewSetGroundingRelationships({ - db, - specificationId, - phase, - sourceItem, - grounding, -}: { - db: DB; - specificationId: number; - phase: 'requirements' | 'criteria'; - sourceItem: KnowledgeItem; - grounding: ReviewSetData['items'][number]['grounding']; -}): void { - for (const ref of grounding ?? []) { - const targetItem = findKnowledgeItemByReferenceCode(db, specificationId, ref.code); - const relation: EdgeRelation = - phase === 'criteria' && targetItem?.kind === 'requirement' ? 'verifies' : 'derived_from'; - - if ( - !targetItem || - sourceItem.id === targetItem.id || - sourceItem.specification_id !== targetItem.specification_id || - !supportsKnowledgeRelationship(relation, sourceItem.kind, targetItem.kind) - ) { - continue; - } - - addKnowledgeRelationship(db, sourceItem.id, targetItem.id, relation); - } -} - -function materializeAcceptedReviewSetItems( - db: DB, - specificationId: number, - turnId: number, - phase: 'requirements' | 'criteria', -): number[] { - const reviewSet = getEffectiveAcceptedReviewSetForTurn(db, turnId, phase); - if (!reviewSet || reviewSet.phase !== phase) { - throw new Error( - `Cannot materialize accepted ${phase} review: persisted review set is missing or mismatched on turn ${turnId}`, - ); - } - - const kind = phase === 'requirements' ? 'requirement' : 'criterion'; - const itemIds: number[] = []; - - for (const item of reviewSet.items) { - const existingItem = findExistingKnowledgeItemForReviewSetItem(db, specificationId, kind, item.content); - const materializedItem = - existingItem ?? - createKnowledgeItem(db, specificationId, kind, item.content, { - rationale: item.rationale ?? null, - }); - linkKnowledgeItemToTurn(db, materializedItem.id, turnId, 'reviewed'); - persistReviewSetGroundingRelationships({ - db, - specificationId, - phase, - sourceItem: materializedItem, - grounding: item.grounding, - }); - itemIds.push(materializedItem.id); - } - - return itemIds; -} - export function getAcceptedRequirementEntitiesForSpecification( db: DB, specificationId: number, @@ -931,22 +764,6 @@ export function getAcceptedCriterionEntitiesForSpecification( ); } -export function materializeAcceptedRequirementsReviewSet( - db: DB, - specificationId: number, - turnId: number, -): number[] { - return materializeAcceptedReviewSetItems(db, specificationId, turnId, 'requirements'); -} - -export function materializeAcceptedCriteriaReviewSet( - db: DB, - specificationId: number, - turnId: number, -): number[] { - return materializeAcceptedReviewSetItems(db, specificationId, turnId, 'criteria'); -} - export function getGroundingBundleForSpecification(db: DB, specificationId: number) { return { goals: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'goal'), diff --git a/src/server/db/review-materialization-store.ts b/src/server/db/review-materialization-store.ts new file mode 100644 index 00000000..db85333d --- /dev/null +++ b/src/server/db/review-materialization-store.ts @@ -0,0 +1,217 @@ +import { and, eq, type InferSelectModel } from 'drizzle-orm'; + +import type { EdgeRelation } from '@/shared/api-types.js'; +import { reviewSetSchema, type BrunchAssistantPart, type ReviewSetData } from '@/shared/chat.js'; +import { createKnowledgeReferenceCode, knowledgeKindRegistry } from '@/shared/knowledge.js'; +import { normalizeReviewSetForDisplay } from '@/shared/review-diffing.js'; +import { getPersistedReviewAction } from '@/shared/specification-state.js'; + +import type { DB } from '../db.js'; +import { supportsKnowledgeRelationship } from '../knowledge-relationship-policy.js'; +import { safeDeserializeAssistantParts } from '../parts.js'; +import * as schema from '../schema.js'; +import { + addKnowledgeRelationship, + createKnowledgeItem, + linkKnowledgeItemToTurn, + type KnowledgeItem, +} from './intent-graph-store.js'; + +type Turn = InferSelectModel; + +function getTurn(db: DB, turnId: number): Turn | undefined { + return db.select().from(schema.turn).where(eq(schema.turn.id, turnId)).get() as Turn | undefined; +} + +function getKnowledgeItemsForSpecificationByKind( + db: DB, + specificationId: number, + kind: (typeof knowledgeKindRegistry)[number]['kind'], +): KnowledgeItem[] { + return db + .select() + .from(schema.knowledgeItem) + .where( + and(eq(schema.knowledgeItem.specification_id, specificationId), eq(schema.knowledgeItem.kind, kind)), + ) + .all() as KnowledgeItem[]; +} + +function findKnowledgeItemByReferenceCode( + db: DB, + specificationId: number, + referenceCode: string, +): KnowledgeItem | undefined { + for (const entry of knowledgeKindRegistry) { + const item = getKnowledgeItemsForSpecificationByKind(db, specificationId, entry.kind).find( + (candidate) => createKnowledgeReferenceCode(candidate.kind, candidate.kind_ordinal) === referenceCode, + ); + if (item) { + return item; + } + } + + return undefined; +} + +function getPersistedReviewSetForTurn(turn: Pick | undefined): ReviewSetData | null { + const persistedReviewSet = safeDeserializeAssistantParts(turn?.assistant_parts).find( + (part): part is Extract => + part.type === 'data-review-set', + ); + if (!persistedReviewSet) { + return null; + } + + const parsedReviewSet = reviewSetSchema.safeParse(persistedReviewSet.data); + return parsedReviewSet.success ? parsedReviewSet.data : null; +} + +function findExistingKnowledgeItemForReviewSetItem( + db: DB, + specificationId: number, + kind: 'requirement' | 'criterion', + content: string, +): KnowledgeItem | undefined { + return db + .select() + .from(schema.knowledgeItem) + .where( + and( + eq(schema.knowledgeItem.specification_id, specificationId), + eq(schema.knowledgeItem.kind, kind), + eq(schema.knowledgeItem.content, content), + ), + ) + .orderBy(schema.knowledgeItem.id) + .get() as KnowledgeItem | undefined; +} + +function getTurnLineageToRoot(db: DB, turnId: number): Turn[] { + const lineage: Turn[] = []; + let currentTurn = getTurn(db, turnId); + + while (currentTurn) { + lineage.push(currentTurn); + currentTurn = currentTurn.parent_turn_id ? getTurn(db, currentTurn.parent_turn_id) : undefined; + } + + return lineage.reverse(); +} + +function getEffectiveAcceptedReviewSetForTurn( + db: DB, + turnId: number, + phase: 'requirements' | 'criteria', +): ReviewSetData | null { + let normalizedReviewSet: ReviewSetData | null = null; + + for (const turn of getTurnLineageToRoot(db, turnId)) { + if (turn.phase !== phase) { + continue; + } + + const reviewSet = getPersistedReviewSetForTurn(turn); + if (!reviewSet || reviewSet.phase !== phase) { + continue; + } + + if (turn.id !== turnId && !getPersistedReviewAction(turn)) { + continue; + } + + normalizedReviewSet = normalizedReviewSet + ? normalizeReviewSetForDisplay(reviewSet, normalizedReviewSet) + : reviewSet; + + if (turn.id === turnId) { + return normalizedReviewSet; + } + } + + return normalizedReviewSet; +} + +function persistReviewSetGroundingRelationships({ + db, + specificationId, + phase, + sourceItem, + grounding, +}: { + db: DB; + specificationId: number; + phase: 'requirements' | 'criteria'; + sourceItem: KnowledgeItem; + grounding: ReviewSetData['items'][number]['grounding']; +}): void { + for (const ref of grounding ?? []) { + const targetItem = findKnowledgeItemByReferenceCode(db, specificationId, ref.code); + const relation: EdgeRelation = + phase === 'criteria' && targetItem?.kind === 'requirement' ? 'verifies' : 'derived_from'; + + if ( + !targetItem || + sourceItem.id === targetItem.id || + sourceItem.specification_id !== targetItem.specification_id || + !supportsKnowledgeRelationship(relation, sourceItem.kind, targetItem.kind) + ) { + continue; + } + + addKnowledgeRelationship(db, sourceItem.id, targetItem.id, relation); + } +} + +function materializeAcceptedReviewSetItems( + db: DB, + specificationId: number, + turnId: number, + phase: 'requirements' | 'criteria', +): number[] { + const reviewSet = getEffectiveAcceptedReviewSetForTurn(db, turnId, phase); + if (!reviewSet || reviewSet.phase !== phase) { + throw new Error( + `Cannot materialize accepted ${phase} review: persisted review set is missing or mismatched on turn ${turnId}`, + ); + } + + const kind = phase === 'requirements' ? 'requirement' : 'criterion'; + const itemIds: number[] = []; + + for (const item of reviewSet.items) { + const existingItem = findExistingKnowledgeItemForReviewSetItem(db, specificationId, kind, item.content); + const materializedItem = + existingItem ?? + createKnowledgeItem(db, specificationId, kind, item.content, { + rationale: item.rationale ?? null, + }); + linkKnowledgeItemToTurn(db, materializedItem.id, turnId, 'reviewed'); + persistReviewSetGroundingRelationships({ + db, + specificationId, + phase, + sourceItem: materializedItem, + grounding: item.grounding, + }); + itemIds.push(materializedItem.id); + } + + return itemIds; +} + +export function materializeAcceptedRequirementsReviewSet( + db: DB, + specificationId: number, + turnId: number, +): number[] { + return materializeAcceptedReviewSetItems(db, specificationId, turnId, 'requirements'); +} + +export function materializeAcceptedCriteriaReviewSet( + db: DB, + specificationId: number, + turnId: number, +): number[] { + return materializeAcceptedReviewSetItems(db, specificationId, turnId, 'criteria'); +} From f5aef58dcd68f7705d42fbb7400b0c89def5117d Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 16:57:55 +0200 Subject: [PATCH 11/16] Extract entity projection store --- memory/CARDS.md | 4 +- src/server/db.ts | 468 ++-------------------- src/server/db/entity-projection-store.ts | 484 +++++++++++++++++++++++ 3 files changed, 508 insertions(+), 448 deletions(-) create mode 100644 src/server/db/entity-projection-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index 2cb17eb9..afb5cce0 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -184,7 +184,7 @@ Status: done ## Card 6 — Entity projection read-model extraction -Status: next +Status: done ### Target Behavior @@ -220,7 +220,7 @@ Status: next ## Card 7 — Workflow and phase outcome store extraction -Status: queued +Status: next ### Target Behavior diff --git a/src/server/db.ts b/src/server/db.ts index 67ab966e..1db581a3 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -10,29 +10,14 @@ const __dirname = dirname(fileURLToPath(import.meta.url)); const MIGRATIONS_FOLDER = join(__dirname, '..', '..', 'drizzle'); import type { - CriterionEntity as SharedCriterionEntity, - EntitiesData, - EntityReference as SharedEntityReference, - EntityRelationship as SharedEntityRelationship, SpecificationMode, - SpecificationStateTurn, ReadinessBand, TurnKind, - RequirementEntity as SharedRequirementEntity, WorkflowPhaseState as SharedWorkflowPhaseState, WorkflowPhaseStatus, WorkflowState as SharedWorkflowState, } from '@/shared/api-types.js'; -import { - createKnowledgeReferenceCode, - genericKnowledgeKindRegistry, - knowledgeEntityCollectionByKind, - knowledgeKindRegistry, - type GenericKnowledgeCollectionKey, - type GenericKnowledgeKind, - type KnowledgeEntityCollection, - type KnowledgeKind as SharedKnowledgeKind, -} from '@/shared/knowledge.js'; +import {} from '@/shared/knowledge.js'; import { parsePhaseClosureCommand, workflowPhaseOrder, @@ -82,6 +67,27 @@ export { materializeAcceptedRequirementsReviewSet, } from './db/review-materialization-store.js'; +import { countAcceptedKnowledgeItemsForPhase } from './db/entity-projection-store.js'; +export { + getAcceptedCriterionEntitiesForSpecification, + getAcceptedKnowledgeItemIdsForPhase, + getAcceptedRequirementEntitiesForSpecification, + getCapturedItemsForTurns, + getEntitiesForSpecification, + getEntitiesForSpecificationByMode, + getEntitiesForSpecificationOnActivePath, + getGroundingBundleForSpecification, +} from './db/entity-projection-store.js'; +export type { + CriterionEntity, + EntitiesForSpecification, + EntityCollection, + EntityProjectionMode, + EntityReference, + EntityRelationship, + RequirementEntity, +} from './db/entity-projection-store.js'; + export { claimReconciliationNeedForClassification, getCascadeRelationBetween, @@ -487,65 +493,6 @@ function getClosureBasisForOutcome(outcome: PhaseOutcome | undefined): ClosureBa return outcome.closure_basis ?? null; } -function findConfirmedPhaseOutcomeOnActivePath( - db: DB, - specificationId: number, - phase: Phase, -): PhaseOutcome | undefined { - const activeTurnIds = new Set(getActivePath(db, specificationId).map((turn) => turn.id)); - if (activeTurnIds.size === 0) { - return undefined; - } - - return listPhaseOutcomesForSpecification(db, specificationId).find( - (outcome) => - outcome.phase === phase && - outcome.status === 'confirmed' && - activeTurnIds.has(outcome.proposal_turn_id), - ); -} - -function getAcceptedKnowledgeItemIdsForPhase( - db: DB, - specificationId: number, - phase: 'requirements' | 'criteria', - kind: 'requirement' | 'criterion', -): Set { - const confirmationTurnId = findConfirmedPhaseOutcomeOnActivePath( - db, - specificationId, - phase, - )?.confirmation_turn_id; - if (!confirmationTurnId) { - return new Set(); - } - - const rows = db - .select({ itemId: schema.turnKnowledgeItem.item_id }) - .from(schema.turnKnowledgeItem) - .innerJoin(schema.knowledgeItem, eq(schema.knowledgeItem.id, schema.turnKnowledgeItem.item_id)) - .where( - and( - eq(schema.knowledgeItem.specification_id, specificationId), - eq(schema.knowledgeItem.kind, kind), - eq(schema.turnKnowledgeItem.turn_id, confirmationTurnId), - eq(schema.turnKnowledgeItem.relation, 'reviewed'), - ), - ) - .all() as Array<{ itemId: number }>; - - return new Set(rows.map((row) => row.itemId)); -} - -function countAcceptedKnowledgeItemsForPhase( - db: DB, - specificationId: number, - phase: 'requirements' | 'criteria', - kind: 'requirement' | 'criterion', -): number { - return getAcceptedKnowledgeItemIdsForPhase(db, specificationId, phase, kind).size; -} - export function readWorkflowProjectionSnapshot(db: DB, specificationId: number): WorkflowProjectionSnapshot { const activePath = getActivePath(db, specificationId); const activeTurnIds = new Set(activePath.map((turn) => turn.id)); @@ -660,374 +607,3 @@ export function updateSpecificationMode(db: DB, specificationId: number, mode: S .where(eq(schema.specification.id, specificationId)) .run(); } - -// --- Entity persistence (generic knowledge items + compatibility projections) --- - -export type EntityCollection = KnowledgeEntityCollection; -export type EntityReference = SharedEntityReference; -export type EntityRelationship = SharedEntityRelationship; -export type RequirementEntity = SharedRequirementEntity & { kind_ordinal: number }; -export type CriterionEntity = SharedCriterionEntity & { kind_ordinal: number }; -type GenericKnowledgeEntity = K extends 'requirement' - ? RequirementEntity - : K extends 'criterion' - ? CriterionEntity - : KnowledgeItem & { kind: K }; -export type EntitiesForSpecification = EntitiesData; - -function projectKnowledgeItemEntity( - item: KnowledgeItem, - kind: K, -): K extends 'decision' ? Decision & { kind_ordinal: number } : Assumption & { kind_ordinal: number } { - const base = { - id: item.id, - specification_id: item.specification_id, - content: item.content, - kind_ordinal: item.kind_ordinal, - }; - - if (kind === 'decision') { - return { - ...base, - rationale: item.rationale, - } as K extends 'decision' ? Decision & { kind_ordinal: number } : Assumption & { kind_ordinal: number }; - } - - return base as K extends 'decision' - ? Decision & { kind_ordinal: number } - : Assumption & { kind_ordinal: number }; -} - -function getKnowledgeItemsForSpecificationByKind( - db: DB, - specificationId: number, - kind: GenericKnowledgeKind | 'decision' | 'assumption', -): KnowledgeItem[] { - return db - .select() - .from(schema.knowledgeItem) - .where( - and(eq(schema.knowledgeItem.specification_id, specificationId), eq(schema.knowledgeItem.kind, kind)), - ) - .all() as KnowledgeItem[]; -} - -function withReferenceCodes( - items: readonly T[], -): Array { - return items - .slice() - .sort((left, right) => left.id - right.id) - .map((item) => ({ - ...item, - referenceCode: createKnowledgeReferenceCode(item.kind, item.kind_ordinal), - })); -} - -function getGenericKnowledgeEntitiesForSpecificationByKind( - db: DB, - specificationId: number, - kind: K, -): Array> { - return getKnowledgeItemsForSpecificationByKind(db, specificationId, kind).map((item) => ({ - ...item, - specification_id: item.specification_id, - kind, - })) as unknown as Array>; -} - -export function getAcceptedRequirementEntitiesForSpecification( - db: DB, - specificationId: number, -): RequirementEntity[] { - const acceptedIds = getAcceptedKnowledgeItemIdsForPhase(db, specificationId, 'requirements', 'requirement'); - if (acceptedIds.size === 0) { - return []; - } - - return getGenericKnowledgeEntitiesForSpecificationByKind(db, specificationId, 'requirement').filter( - (item) => acceptedIds.has(item.id), - ); -} - -export function getAcceptedCriterionEntitiesForSpecification( - db: DB, - specificationId: number, -): CriterionEntity[] { - const acceptedIds = getAcceptedKnowledgeItemIdsForPhase(db, specificationId, 'criteria', 'criterion'); - if (acceptedIds.size === 0) { - return []; - } - - return getGenericKnowledgeEntitiesForSpecificationByKind(db, specificationId, 'criterion').filter((item) => - acceptedIds.has(item.id), - ); -} - -export function getGroundingBundleForSpecification(db: DB, specificationId: number) { - return { - goals: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'goal'), - terms: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'term'), - contexts: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'context'), - constraints: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'constraint'), - }; -} - -function getKnowledgeItemIdsLinkedToActivePath(db: DB, specificationId: number): Set { - const activeTurnIds = getActivePath(db, specificationId).map((turn) => turn.id); - if (activeTurnIds.length === 0) { - return new Set(); - } - - const rows = db - .select({ itemId: schema.turnKnowledgeItem.item_id }) - .from(schema.turnKnowledgeItem) - .innerJoin(schema.knowledgeItem, eq(schema.knowledgeItem.id, schema.turnKnowledgeItem.item_id)) - .where( - and( - eq(schema.knowledgeItem.specification_id, specificationId), - inArray(schema.turnKnowledgeItem.turn_id, activeTurnIds), - ), - ) - .all() as Array<{ itemId: number }>; - - return new Set(rows.map((row) => row.itemId)); -} - -export type EntityProjectionMode = 'project-wide' | 'active-path'; - -function getSpecificationWideEntitiesForSpecification( - db: DB, - specificationId: number, -): EntitiesForSpecification { - const genericKnowledgeCollections = Object.fromEntries( - genericKnowledgeKindRegistry.map((entry) => [ - entry.collectionKey, - withReferenceCodes( - getGenericKnowledgeEntitiesForSpecificationByKind(db, specificationId, entry.kind), - ).map(({ kind_ordinal: _, ...item }) => item), - ]), - ) as Pick; - const decisions = withReferenceCodes( - getKnowledgeItemsForSpecificationByKind(db, specificationId, 'decision') - .map((item) => projectKnowledgeItemEntity(item, 'decision')) - .map((decision) => ({ - ...decision, - kind: 'decision' as const, - })), - ).map(({ kind: _, kind_ordinal: __, ...decision }) => decision); - const assumptions = withReferenceCodes( - getKnowledgeItemsForSpecificationByKind(db, specificationId, 'assumption') - .map((item) => projectKnowledgeItemEntity(item, 'assumption')) - .map((assumption) => ({ - ...assumption, - kind: 'assumption' as const, - })), - ).map(({ kind: _, kind_ordinal: __, ...assumption }) => assumption); - const relationships = db.all(sql` - SELECT - edge.relation AS type, - source.kind AS source_kind, - source.id AS source_id, - target.kind AS target_kind, - target.id AS target_id - FROM knowledge_edge edge - JOIN knowledge_item source ON source.id = edge.from_item_id - JOIN knowledge_item target ON target.id = edge.to_item_id - WHERE - source.specification_id = ${specificationId} - AND target.specification_id = ${specificationId} - ORDER BY - CASE source.kind WHEN 'decision' THEN 0 WHEN 'assumption' THEN 1 ELSE 2 END, - source.id, - CASE target.kind WHEN 'decision' THEN 0 WHEN 'assumption' THEN 1 ELSE 2 END, - target.id - `) as Array<{ - type: EntityRelationship['type']; - source_kind: EntityReference['kind']; - source_id: number; - target_kind: EntityReference['kind']; - target_id: number; - }>; - - return { - ...genericKnowledgeCollections, - decisions, - assumptions, - relationships: relationships.map((relationship) => ({ - type: relationship.type, - source: { - collection: knowledgeEntityCollectionByKind[relationship.source_kind], - kind: relationship.source_kind, - id: relationship.source_id, - }, - target: { - collection: knowledgeEntityCollectionByKind[relationship.target_kind], - kind: relationship.target_kind, - id: relationship.target_id, - }, - })), - }; -} - -function filterGenericKnowledgeCollectionsToActivePath( - entities: EntitiesForSpecification, - activeItemIds: ReadonlySet, - options?: { - acceptedRequirementIds?: ReadonlySet; - acceptedCriterionIds?: ReadonlySet; - }, -): Pick { - return Object.fromEntries( - genericKnowledgeKindRegistry.map((entry) => { - const acceptedIds = - entry.kind === 'requirement' - ? options?.acceptedRequirementIds - : entry.kind === 'criterion' - ? options?.acceptedCriterionIds - : undefined; - const visibleItems = - acceptedIds && acceptedIds.size > 0 - ? entities[entry.collectionKey].filter((item) => acceptedIds.has(item.id)) - : entities[entry.collectionKey].filter((item) => activeItemIds.has(item.id)); - return [entry.collectionKey, visibleItems]; - }), - ) as Pick; -} - -function filterEntitiesToActivePath( - entities: EntitiesForSpecification, - activeItemIds: ReadonlySet, - options?: { - acceptedRequirementIds?: ReadonlySet; - acceptedCriterionIds?: ReadonlySet; - }, -): EntitiesForSpecification { - const genericKnowledgeCollections = filterGenericKnowledgeCollectionsToActivePath( - entities, - activeItemIds, - options, - ); - const decisions = entities.decisions.filter((item) => activeItemIds.has(item.id)); - const assumptions = entities.assumptions.filter((item) => activeItemIds.has(item.id)); - - const visibleIdsByCollection = { - knowledge_item: new Set([ - ...genericKnowledgeKindRegistry.flatMap((entry) => - genericKnowledgeCollections[entry.collectionKey].map((item) => item.id), - ), - ...decisions.map((item) => item.id), - ...assumptions.map((item) => item.id), - ]), - } satisfies Record>; - - return { - ...genericKnowledgeCollections, - decisions, - assumptions, - relationships: entities.relationships.filter( - (relationship) => - visibleIdsByCollection[relationship.source.collection].has(relationship.source.id) && - visibleIdsByCollection[relationship.target.collection].has(relationship.target.id), - ), - }; -} - -export function getEntitiesForSpecificationByMode( - db: DB, - specificationId: number, - mode: EntityProjectionMode, -): EntitiesForSpecification { - const projectWideEntities = getSpecificationWideEntitiesForSpecification(db, specificationId); - if (mode === 'project-wide') { - return projectWideEntities; - } - - return filterEntitiesToActivePath( - projectWideEntities, - getKnowledgeItemIdsLinkedToActivePath(db, specificationId), - { - acceptedRequirementIds: getAcceptedKnowledgeItemIdsForPhase( - db, - specificationId, - 'requirements', - 'requirement', - ), - acceptedCriterionIds: getAcceptedKnowledgeItemIdsForPhase(db, specificationId, 'criteria', 'criterion'), - }, - ); -} - -export function getEntitiesForSpecification(db: DB, specificationId: number): EntitiesForSpecification { - return getEntitiesForSpecificationByMode(db, specificationId, 'project-wide'); -} - -export function getEntitiesForSpecificationOnActivePath( - db: DB, - specificationId: number, -): EntitiesForSpecification { - return getEntitiesForSpecificationByMode(db, specificationId, 'active-path'); -} - -export function getCapturedItemsForTurns( - db: DB, - specificationId: number, - turnIds: readonly number[], -): Map> { - const capturedItemsByTurn = new Map>(); - if (turnIds.length === 0) { - return capturedItemsByTurn; - } - - const projectWideEntities = getEntitiesForSpecification(db, specificationId); - const itemsById = new Map[number]>(); - - for (const entry of knowledgeKindRegistry) { - const items = projectWideEntities[entry.collectionKey] as ReadonlyArray<{ - id: number; - content: string; - referenceCode?: string; - kind?: SharedKnowledgeKind; - }>; - for (const item of items) { - itemsById.set(item.id, { - collection: entry.entityCollection, - kind: item.kind ?? entry.kind, - id: item.id, - content: item.content, - referenceCode: item.referenceCode, - }); - } - } - - const rows = db - .select({ - turnId: schema.turnKnowledgeItem.turn_id, - itemId: schema.turnKnowledgeItem.item_id, - }) - .from(schema.turnKnowledgeItem) - .innerJoin(schema.knowledgeItem, eq(schema.knowledgeItem.id, schema.turnKnowledgeItem.item_id)) - .where( - and( - eq(schema.knowledgeItem.specification_id, specificationId), - eq(schema.turnKnowledgeItem.relation, 'captured'), - inArray(schema.turnKnowledgeItem.turn_id, [...turnIds]), - ), - ) - .all() as Array<{ turnId: number; itemId: number }>; - - rows.sort((left, right) => left.turnId - right.turnId || left.itemId - right.itemId); - - for (const row of rows) { - const item = itemsById.get(row.itemId); - if (!item) { - continue; - } - - const currentTurnItems = capturedItemsByTurn.get(row.turnId) ?? []; - currentTurnItems.push(item); - capturedItemsByTurn.set(row.turnId, currentTurnItems); - } - - return capturedItemsByTurn; -} diff --git a/src/server/db/entity-projection-store.ts b/src/server/db/entity-projection-store.ts new file mode 100644 index 00000000..6a322a13 --- /dev/null +++ b/src/server/db/entity-projection-store.ts @@ -0,0 +1,484 @@ +import { and, desc, eq, inArray, sql, type InferSelectModel } from 'drizzle-orm'; + +import type { + CriterionEntity as SharedCriterionEntity, + EntitiesData, + EntityReference as SharedEntityReference, + EntityRelationship as SharedEntityRelationship, + SpecificationStateTurn, + RequirementEntity as SharedRequirementEntity, +} from '@/shared/api-types.js'; +import { + createKnowledgeReferenceCode, + genericKnowledgeKindRegistry, + knowledgeEntityCollectionByKind, + knowledgeKindRegistry, + type GenericKnowledgeCollectionKey, + type GenericKnowledgeKind, + type KnowledgeEntityCollection, + type KnowledgeKind as SharedKnowledgeKind, +} from '@/shared/knowledge.js'; + +import type { DB } from '../db.js'; +import * as schema from '../schema.js'; +import type { Assumption, Decision, KnowledgeItem } from './intent-graph-store.js'; + +type Turn = InferSelectModel; +type PhaseOutcome = InferSelectModel; +type Phase = Turn['phase']; + +function getActivePath(db: DB, specificationId: number): Turn[] { + const project = db + .select({ active_turn_id: schema.specification.active_turn_id }) + .from(schema.specification) + .where(eq(schema.specification.id, specificationId)) + .get(); + if (!project?.active_turn_id) return []; + + const rows = db.all(sql` + WITH RECURSIVE path AS ( + SELECT * FROM turn WHERE id = ${project.active_turn_id} + UNION ALL + SELECT t.* FROM turn t JOIN path p ON t.id = p.parent_turn_id + ) + SELECT * FROM path ORDER BY id ASC + `); + return rows as Turn[]; +} + +function listPhaseOutcomesForSpecification(db: DB, specificationId: number): PhaseOutcome[] { + return db + .select() + .from(schema.phaseOutcome) + .where(eq(schema.phaseOutcome.specification_id, specificationId)) + .orderBy(desc(schema.phaseOutcome.id)) + .all() as PhaseOutcome[]; +} + +function findConfirmedPhaseOutcomeOnActivePath( + db: DB, + specificationId: number, + phase: Phase, +): PhaseOutcome | undefined { + const activeTurnIds = new Set(getActivePath(db, specificationId).map((turn) => turn.id)); + if (activeTurnIds.size === 0) { + return undefined; + } + + return listPhaseOutcomesForSpecification(db, specificationId).find( + (outcome) => + outcome.phase === phase && + outcome.status === 'confirmed' && + activeTurnIds.has(outcome.proposal_turn_id), + ); +} + +export function getAcceptedKnowledgeItemIdsForPhase( + db: DB, + specificationId: number, + phase: 'requirements' | 'criteria', + kind: 'requirement' | 'criterion', +): Set { + const confirmationTurnId = findConfirmedPhaseOutcomeOnActivePath( + db, + specificationId, + phase, + )?.confirmation_turn_id; + if (!confirmationTurnId) { + return new Set(); + } + + const rows = db + .select({ itemId: schema.turnKnowledgeItem.item_id }) + .from(schema.turnKnowledgeItem) + .innerJoin(schema.knowledgeItem, eq(schema.knowledgeItem.id, schema.turnKnowledgeItem.item_id)) + .where( + and( + eq(schema.knowledgeItem.specification_id, specificationId), + eq(schema.knowledgeItem.kind, kind), + eq(schema.turnKnowledgeItem.turn_id, confirmationTurnId), + eq(schema.turnKnowledgeItem.relation, 'reviewed'), + ), + ) + .all() as Array<{ itemId: number }>; + + return new Set(rows.map((row) => row.itemId)); +} + +export function countAcceptedKnowledgeItemsForPhase( + db: DB, + specificationId: number, + phase: 'requirements' | 'criteria', + kind: 'requirement' | 'criterion', +): number { + return getAcceptedKnowledgeItemIdsForPhase(db, specificationId, phase, kind).size; +} + +export type EntityCollection = KnowledgeEntityCollection; +export type EntityReference = SharedEntityReference; +export type EntityRelationship = SharedEntityRelationship; +export type RequirementEntity = SharedRequirementEntity & { kind_ordinal: number }; +export type CriterionEntity = SharedCriterionEntity & { kind_ordinal: number }; +type GenericKnowledgeEntity = K extends 'requirement' + ? RequirementEntity + : K extends 'criterion' + ? CriterionEntity + : KnowledgeItem & { kind: K }; +export type EntitiesForSpecification = EntitiesData; + +function projectKnowledgeItemEntity( + item: KnowledgeItem, + kind: K, +): K extends 'decision' ? Decision & { kind_ordinal: number } : Assumption & { kind_ordinal: number } { + const base = { + id: item.id, + specification_id: item.specification_id, + content: item.content, + kind_ordinal: item.kind_ordinal, + }; + + if (kind === 'decision') { + return { + ...base, + rationale: item.rationale, + } as K extends 'decision' ? Decision & { kind_ordinal: number } : Assumption & { kind_ordinal: number }; + } + + return base as K extends 'decision' + ? Decision & { kind_ordinal: number } + : Assumption & { kind_ordinal: number }; +} + +function getKnowledgeItemsForSpecificationByKind( + db: DB, + specificationId: number, + kind: GenericKnowledgeKind | 'decision' | 'assumption', +): KnowledgeItem[] { + return db + .select() + .from(schema.knowledgeItem) + .where( + and(eq(schema.knowledgeItem.specification_id, specificationId), eq(schema.knowledgeItem.kind, kind)), + ) + .all() as KnowledgeItem[]; +} + +function withReferenceCodes( + items: readonly T[], +): Array { + return items + .slice() + .sort((left, right) => left.id - right.id) + .map((item) => ({ + ...item, + referenceCode: createKnowledgeReferenceCode(item.kind, item.kind_ordinal), + })); +} + +function getGenericKnowledgeEntitiesForSpecificationByKind( + db: DB, + specificationId: number, + kind: K, +): Array> { + return getKnowledgeItemsForSpecificationByKind(db, specificationId, kind).map((item) => ({ + ...item, + specification_id: item.specification_id, + kind, + })) as unknown as Array>; +} + +export function getAcceptedRequirementEntitiesForSpecification( + db: DB, + specificationId: number, +): RequirementEntity[] { + const acceptedIds = getAcceptedKnowledgeItemIdsForPhase(db, specificationId, 'requirements', 'requirement'); + if (acceptedIds.size === 0) { + return []; + } + + return getGenericKnowledgeEntitiesForSpecificationByKind(db, specificationId, 'requirement').filter( + (item) => acceptedIds.has(item.id), + ); +} + +export function getAcceptedCriterionEntitiesForSpecification( + db: DB, + specificationId: number, +): CriterionEntity[] { + const acceptedIds = getAcceptedKnowledgeItemIdsForPhase(db, specificationId, 'criteria', 'criterion'); + if (acceptedIds.size === 0) { + return []; + } + + return getGenericKnowledgeEntitiesForSpecificationByKind(db, specificationId, 'criterion').filter((item) => + acceptedIds.has(item.id), + ); +} + +export function getGroundingBundleForSpecification(db: DB, specificationId: number) { + return { + goals: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'goal'), + terms: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'term'), + contexts: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'context'), + constraints: getKnowledgeItemsForSpecificationByKind(db, specificationId, 'constraint'), + }; +} + +function getKnowledgeItemIdsLinkedToActivePath(db: DB, specificationId: number): Set { + const activeTurnIds = getActivePath(db, specificationId).map((turn) => turn.id); + if (activeTurnIds.length === 0) { + return new Set(); + } + + const rows = db + .select({ itemId: schema.turnKnowledgeItem.item_id }) + .from(schema.turnKnowledgeItem) + .innerJoin(schema.knowledgeItem, eq(schema.knowledgeItem.id, schema.turnKnowledgeItem.item_id)) + .where( + and( + eq(schema.knowledgeItem.specification_id, specificationId), + inArray(schema.turnKnowledgeItem.turn_id, activeTurnIds), + ), + ) + .all() as Array<{ itemId: number }>; + + return new Set(rows.map((row) => row.itemId)); +} + +export type EntityProjectionMode = 'project-wide' | 'active-path'; + +function getSpecificationWideEntitiesForSpecification( + db: DB, + specificationId: number, +): EntitiesForSpecification { + const genericKnowledgeCollections = Object.fromEntries( + genericKnowledgeKindRegistry.map((entry) => [ + entry.collectionKey, + withReferenceCodes( + getGenericKnowledgeEntitiesForSpecificationByKind(db, specificationId, entry.kind), + ).map(({ kind_ordinal: _, ...item }) => item), + ]), + ) as Pick; + const decisions = withReferenceCodes( + getKnowledgeItemsForSpecificationByKind(db, specificationId, 'decision') + .map((item) => projectKnowledgeItemEntity(item, 'decision')) + .map((decision) => ({ + ...decision, + kind: 'decision' as const, + })), + ).map(({ kind: _, kind_ordinal: __, ...decision }) => decision); + const assumptions = withReferenceCodes( + getKnowledgeItemsForSpecificationByKind(db, specificationId, 'assumption') + .map((item) => projectKnowledgeItemEntity(item, 'assumption')) + .map((assumption) => ({ + ...assumption, + kind: 'assumption' as const, + })), + ).map(({ kind: _, kind_ordinal: __, ...assumption }) => assumption); + const relationships = db.all(sql` + SELECT + edge.relation AS type, + source.kind AS source_kind, + source.id AS source_id, + target.kind AS target_kind, + target.id AS target_id + FROM knowledge_edge edge + JOIN knowledge_item source ON source.id = edge.from_item_id + JOIN knowledge_item target ON target.id = edge.to_item_id + WHERE + source.specification_id = ${specificationId} + AND target.specification_id = ${specificationId} + ORDER BY + CASE source.kind WHEN 'decision' THEN 0 WHEN 'assumption' THEN 1 ELSE 2 END, + source.id, + CASE target.kind WHEN 'decision' THEN 0 WHEN 'assumption' THEN 1 ELSE 2 END, + target.id + `) as Array<{ + type: EntityRelationship['type']; + source_kind: EntityReference['kind']; + source_id: number; + target_kind: EntityReference['kind']; + target_id: number; + }>; + + return { + ...genericKnowledgeCollections, + decisions, + assumptions, + relationships: relationships.map((relationship) => ({ + type: relationship.type, + source: { + collection: knowledgeEntityCollectionByKind[relationship.source_kind], + kind: relationship.source_kind, + id: relationship.source_id, + }, + target: { + collection: knowledgeEntityCollectionByKind[relationship.target_kind], + kind: relationship.target_kind, + id: relationship.target_id, + }, + })), + }; +} + +function filterGenericKnowledgeCollectionsToActivePath( + entities: EntitiesForSpecification, + activeItemIds: ReadonlySet, + options?: { + acceptedRequirementIds?: ReadonlySet; + acceptedCriterionIds?: ReadonlySet; + }, +): Pick { + return Object.fromEntries( + genericKnowledgeKindRegistry.map((entry) => { + const acceptedIds = + entry.kind === 'requirement' + ? options?.acceptedRequirementIds + : entry.kind === 'criterion' + ? options?.acceptedCriterionIds + : undefined; + const visibleItems = + acceptedIds && acceptedIds.size > 0 + ? entities[entry.collectionKey].filter((item) => acceptedIds.has(item.id)) + : entities[entry.collectionKey].filter((item) => activeItemIds.has(item.id)); + return [entry.collectionKey, visibleItems]; + }), + ) as Pick; +} + +function filterEntitiesToActivePath( + entities: EntitiesForSpecification, + activeItemIds: ReadonlySet, + options?: { + acceptedRequirementIds?: ReadonlySet; + acceptedCriterionIds?: ReadonlySet; + }, +): EntitiesForSpecification { + const genericKnowledgeCollections = filterGenericKnowledgeCollectionsToActivePath( + entities, + activeItemIds, + options, + ); + const decisions = entities.decisions.filter((item) => activeItemIds.has(item.id)); + const assumptions = entities.assumptions.filter((item) => activeItemIds.has(item.id)); + + const visibleIdsByCollection = { + knowledge_item: new Set([ + ...genericKnowledgeKindRegistry.flatMap((entry) => + genericKnowledgeCollections[entry.collectionKey].map((item) => item.id), + ), + ...decisions.map((item) => item.id), + ...assumptions.map((item) => item.id), + ]), + } satisfies Record>; + + return { + ...genericKnowledgeCollections, + decisions, + assumptions, + relationships: entities.relationships.filter( + (relationship) => + visibleIdsByCollection[relationship.source.collection].has(relationship.source.id) && + visibleIdsByCollection[relationship.target.collection].has(relationship.target.id), + ), + }; +} + +export function getEntitiesForSpecificationByMode( + db: DB, + specificationId: number, + mode: EntityProjectionMode, +): EntitiesForSpecification { + const projectWideEntities = getSpecificationWideEntitiesForSpecification(db, specificationId); + if (mode === 'project-wide') { + return projectWideEntities; + } + + return filterEntitiesToActivePath( + projectWideEntities, + getKnowledgeItemIdsLinkedToActivePath(db, specificationId), + { + acceptedRequirementIds: getAcceptedKnowledgeItemIdsForPhase( + db, + specificationId, + 'requirements', + 'requirement', + ), + acceptedCriterionIds: getAcceptedKnowledgeItemIdsForPhase(db, specificationId, 'criteria', 'criterion'), + }, + ); +} + +export function getEntitiesForSpecification(db: DB, specificationId: number): EntitiesForSpecification { + return getEntitiesForSpecificationByMode(db, specificationId, 'project-wide'); +} + +export function getEntitiesForSpecificationOnActivePath( + db: DB, + specificationId: number, +): EntitiesForSpecification { + return getEntitiesForSpecificationByMode(db, specificationId, 'active-path'); +} + +export function getCapturedItemsForTurns( + db: DB, + specificationId: number, + turnIds: readonly number[], +): Map> { + const capturedItemsByTurn = new Map>(); + if (turnIds.length === 0) { + return capturedItemsByTurn; + } + + const projectWideEntities = getEntitiesForSpecification(db, specificationId); + const itemsById = new Map[number]>(); + + for (const entry of knowledgeKindRegistry) { + const items = projectWideEntities[entry.collectionKey] as ReadonlyArray<{ + id: number; + content: string; + referenceCode?: string; + kind?: SharedKnowledgeKind; + }>; + for (const item of items) { + itemsById.set(item.id, { + collection: entry.entityCollection, + kind: item.kind ?? entry.kind, + id: item.id, + content: item.content, + referenceCode: item.referenceCode, + }); + } + } + + const rows = db + .select({ + turnId: schema.turnKnowledgeItem.turn_id, + itemId: schema.turnKnowledgeItem.item_id, + }) + .from(schema.turnKnowledgeItem) + .innerJoin(schema.knowledgeItem, eq(schema.knowledgeItem.id, schema.turnKnowledgeItem.item_id)) + .where( + and( + eq(schema.knowledgeItem.specification_id, specificationId), + eq(schema.turnKnowledgeItem.relation, 'captured'), + inArray(schema.turnKnowledgeItem.turn_id, [...turnIds]), + ), + ) + .all() as Array<{ turnId: number; itemId: number }>; + + rows.sort((left, right) => left.turnId - right.turnId || left.itemId - right.itemId); + + for (const row of rows) { + const item = itemsById.get(row.itemId); + if (!item) { + continue; + } + + const currentTurnItems = capturedItemsByTurn.get(row.turnId) ?? []; + currentTurnItems.push(item); + capturedItemsByTurn.set(row.turnId, currentTurnItems); + } + + return capturedItemsByTurn; +} From cdf6c2731dd2be48d8886fcb60729edbd65aaea9 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 17:00:14 +0200 Subject: [PATCH 12/16] Extract workflow store from db facade --- memory/CARDS.md | 4 +- src/server/db.ts | 270 +++--------------------------- src/server/db/workflow-store.ts | 280 ++++++++++++++++++++++++++++++++ 3 files changed, 308 insertions(+), 246 deletions(-) create mode 100644 src/server/db/workflow-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index afb5cce0..5e5fbc4f 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -220,7 +220,7 @@ Status: done ## Card 7 — Workflow and phase outcome store extraction -Status: next +Status: done ### Target Behavior @@ -258,7 +258,7 @@ Status: next ## Card 8 — Specification/chat/turn store extraction -Status: queued +Status: next ### Target Behavior diff --git a/src/server/db.ts b/src/server/db.ts index 1db581a3..1a56c2dc 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -9,24 +9,10 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; const __dirname = dirname(fileURLToPath(import.meta.url)); const MIGRATIONS_FOLDER = join(__dirname, '..', '..', 'drizzle'); -import type { - SpecificationMode, - ReadinessBand, - TurnKind, - WorkflowPhaseState as SharedWorkflowPhaseState, - WorkflowPhaseStatus, - WorkflowState as SharedWorkflowState, -} from '@/shared/api-types.js'; +import type { SpecificationMode, TurnKind } from '@/shared/api-types.js'; import {} from '@/shared/knowledge.js'; -import { - parsePhaseClosureCommand, - workflowPhaseOrder, - type PhaseClosureBasis, -} from '@/shared/phase-close.js'; -import { safeDeserializeUserParts, type DataConfirmationPart } from './parts.js'; import * as schema from './schema.js'; -import { projectWorkflowState, type WorkflowProjectionSnapshot } from './workflow-projector.js'; export { createAnnotation, @@ -67,7 +53,6 @@ export { materializeAcceptedRequirementsReviewSet, } from './db/review-materialization-store.js'; -import { countAcceptedKnowledgeItemsForPhase } from './db/entity-projection-store.js'; export { getAcceptedCriterionEntitiesForSpecification, getAcceptedKnowledgeItemIdsForPhase, @@ -88,6 +73,31 @@ export type { RequirementEntity, } from './db/entity-projection-store.js'; +import { reconcilePhaseOutcomesForSpecification } from './db/workflow-store.js'; +export { + confirmPhaseOutcome, + createConfirmedPhaseOutcome, + createPhaseOutcome, + findPhaseOutcomeForTurn, + findProposedPhaseOutcomeByTurn, + getCurrentPhase, + getCurrentWorkflowState, + getStructuralArtifactTurnIds, + listPhaseOutcomesForSpecification, + readWorkflowProjectionSnapshot, + supersedePhaseOutcome, +} from './db/workflow-store.js'; +export type { + ClosureBasis, + CreatePhaseOutcomeInput, + PhaseOutcome, + PhaseOutcomeStatus, + ReadinessBand, + WorkflowPhaseState, + WorkflowPhaseStatus, + WorkflowState, +} from './db/workflow-store.js'; + export { claimReconciliationNeedForClassification, getCascadeRelationBetween, @@ -114,23 +124,8 @@ export type Turn = Omit & { specification_id: number; }; export type Option = InferSelectModel; -export type PhaseOutcome = InferSelectModel; export type Phase = Turn['phase']; export type Impact = NonNullable; -export type PhaseOutcomeStatus = PhaseOutcome['status']; -export type { WorkflowPhaseStatus, ReadinessBand }; -export type ClosureBasis = PhaseClosureBasis | null; - -export type WorkflowPhaseState = SharedWorkflowPhaseState; -export type WorkflowState = SharedWorkflowState; - -export interface CreatePhaseOutcomeInput { - specificationId?: number; - phase: Phase; - proposal_turn_id: number; - summary: string; -} - export interface CreateTurnInput { parent_turn_id?: number | null; phase: Phase; @@ -342,219 +337,6 @@ export function getActivePath(db: DB, specificationId: number): Turn[] { return rows as Turn[]; } -export function listPhaseOutcomesForSpecification(db: DB, specificationId: number): PhaseOutcome[] { - return db - .select() - .from(schema.phaseOutcome) - .where(eq(schema.phaseOutcome.specification_id, specificationId)) - .orderBy(desc(schema.phaseOutcome.id)) - .all() as PhaseOutcome[]; -} - -function reconcilePhaseOutcomesForSpecification(db: DB, specificationId: number): void { - const activeTurnIds = new Set(getActivePath(db, specificationId).map((turn) => turn.id)); - const outcomesToSupersede = listPhaseOutcomesForSpecification(db, specificationId).filter( - (outcome) => - (outcome.status === 'proposed' || outcome.status === 'confirmed') && - !activeTurnIds.has(outcome.proposal_turn_id), - ); - - for (const outcome of outcomesToSupersede) { - db.update(schema.phaseOutcome) - .set({ - status: 'superseded', - superseded_at: sql`datetime('now')`, - }) - .where(eq(schema.phaseOutcome.id, outcome.id)) - .run(); - } -} - -export function createPhaseOutcome(db: DB, input: CreatePhaseOutcomeInput): PhaseOutcome { - const { specificationId } = input; - if (!specificationId) { - throw new Error('createPhaseOutcome requires specificationId'); - } - - const result = db - .insert(schema.phaseOutcome) - .values({ - specification_id: specificationId, - phase: input.phase, - proposal_turn_id: input.proposal_turn_id, - summary: input.summary, - status: 'proposed', - }) - .returning() - .get(); - return result as PhaseOutcome; -} - -function getClosureBasisForConfirmationTurn(db: DB, confirmationTurnId: number): PhaseClosureBasis { - const confirmationTurn = getTurn(db, confirmationTurnId); - const confirmationPart = safeDeserializeUserParts(confirmationTurn?.user_parts).find( - (part): part is DataConfirmationPart => part.type === 'data-confirmation', - ); - const phaseClosureCommand = confirmationPart ? parsePhaseClosureCommand(confirmationPart.data) : null; - - return phaseClosureCommand?.closureBasis ?? 'interviewer_recommended'; -} - -export function confirmPhaseOutcome(db: DB, phaseOutcomeId: number, confirmationTurnId: number): void { - db.update(schema.phaseOutcome) - .set({ - status: 'confirmed', - closure_basis: getClosureBasisForConfirmationTurn(db, confirmationTurnId), - confirmation_turn_id: confirmationTurnId, - confirmed_at: sql`datetime('now')`, - }) - .where(eq(schema.phaseOutcome.id, phaseOutcomeId)) - .run(); -} - -export function supersedePhaseOutcome(db: DB, phaseOutcomeId: number): void { - db.update(schema.phaseOutcome) - .set({ - status: 'superseded', - superseded_at: sql`datetime('now')`, - }) - .where(eq(schema.phaseOutcome.id, phaseOutcomeId)) - .run(); -} - -export function createConfirmedPhaseOutcome( - db: DB, - input: CreatePhaseOutcomeInput & { confirmation_turn_id: number }, -): PhaseOutcome { - const { specificationId } = input; - if (!specificationId) { - throw new Error('createConfirmedPhaseOutcome requires specificationId'); - } - - const result = db - .insert(schema.phaseOutcome) - .values({ - specification_id: specificationId, - phase: input.phase, - proposal_turn_id: input.proposal_turn_id, - summary: input.summary, - status: 'confirmed', - closure_basis: getClosureBasisForConfirmationTurn(db, input.confirmation_turn_id), - confirmation_turn_id: input.confirmation_turn_id, - confirmed_at: sql`datetime('now')`, - }) - .returning() - .get(); - return result as PhaseOutcome; -} - -export function findProposedPhaseOutcomeByTurn( - db: DB, - specificationId: number, - proposalTurnId: number, -): PhaseOutcome | undefined { - return db - .select() - .from(schema.phaseOutcome) - .where( - and( - eq(schema.phaseOutcome.specification_id, specificationId), - eq(schema.phaseOutcome.proposal_turn_id, proposalTurnId), - eq(schema.phaseOutcome.status, 'proposed'), - ), - ) - .orderBy(desc(schema.phaseOutcome.id)) - .get() as PhaseOutcome | undefined; -} - -export function findPhaseOutcomeForTurn( - db: DB, - specificationId: number, - proposalTurnId: number, -): PhaseOutcome | undefined { - return db - .select() - .from(schema.phaseOutcome) - .where( - and( - eq(schema.phaseOutcome.specification_id, specificationId), - eq(schema.phaseOutcome.proposal_turn_id, proposalTurnId), - ), - ) - .orderBy(desc(schema.phaseOutcome.id)) - .get() as PhaseOutcome | undefined; -} - -function getClosureBasisForOutcome(outcome: PhaseOutcome | undefined): ClosureBasis { - if (!outcome || outcome.status !== 'confirmed' || !outcome.confirmation_turn_id) { - return null; - } - - return outcome.closure_basis ?? null; -} - -export function readWorkflowProjectionSnapshot(db: DB, specificationId: number): WorkflowProjectionSnapshot { - const activePath = getActivePath(db, specificationId); - const activeTurnIds = new Set(activePath.map((turn) => turn.id)); - const turns = activePath.map((turn) => ({ - phase: turn.phase, - question: turn.question, - answer: turn.answer, - optionCount: getOptionsForTurn(db, turn.id).length, - })) satisfies WorkflowProjectionSnapshot['turns']; - const phaseOutcomes = listPhaseOutcomesForSpecification(db, specificationId).map((outcome) => ({ - phase: outcome.phase, - status: outcome.status, - proposalTurnId: outcome.proposal_turn_id, - summary: outcome.summary, - closureBasis: getClosureBasisForOutcome(outcome), - onActivePath: activeTurnIds.has(outcome.proposal_turn_id), - })) satisfies WorkflowProjectionSnapshot['phaseOutcomes']; - - return { - turns, - phaseOutcomes, - acceptedReviewItemCounts: { - requirements: countAcceptedKnowledgeItemsForPhase(db, specificationId, 'requirements', 'requirement'), - criteria: countAcceptedKnowledgeItemsForPhase(db, specificationId, 'criteria', 'criterion'), - }, - }; -} - -export function getCurrentWorkflowState(db: DB, specificationId: number): WorkflowState { - return projectWorkflowState(readWorkflowProjectionSnapshot(db, specificationId)); -} - -export function getStructuralArtifactTurnIds(db: DB, specificationId: number): number[] { - const activePath = getActivePath(db, specificationId); - const activeTurnIds = new Set(activePath.map((turn) => turn.id)); - const ids = new Set(); - - // Phase outcome anchors: proposal and confirmation turns - for (const outcome of listPhaseOutcomesForSpecification(db, specificationId)) { - if (activeTurnIds.has(outcome.proposal_turn_id)) { - ids.add(outcome.proposal_turn_id); - } - if (outcome.confirmation_turn_id && activeTurnIds.has(outcome.confirmation_turn_id)) { - ids.add(outcome.confirmation_turn_id); - } - } - - // Legacy transitional: kickoff/recovery turn rows (D95 marks these as transitional) - for (const turn of activePath) { - if (turn.turn_kind === 'kickoff' || turn.turn_kind === 'recovery' || turn.is_resolution) { - ids.add(turn.id); - } - } - - return [...ids]; -} - -export function getCurrentPhase(db: DB, specificationId: number): Phase { - const workflow = getCurrentWorkflowState(db, specificationId); - return workflowPhaseOrder.find((phase) => workflow.phases[phase].status !== 'closed') ?? 'criteria'; -} - export function getOptionsForTurn(db: DB, turnId: number): Option[] { return db .select() diff --git a/src/server/db/workflow-store.ts b/src/server/db/workflow-store.ts new file mode 100644 index 00000000..320ea811 --- /dev/null +++ b/src/server/db/workflow-store.ts @@ -0,0 +1,280 @@ +import { and, desc, eq, sql, type InferSelectModel } from 'drizzle-orm'; + +import type { + ReadinessBand, + WorkflowPhaseState as SharedWorkflowPhaseState, + WorkflowPhaseStatus, + WorkflowState as SharedWorkflowState, +} from '@/shared/api-types.js'; +import { + parsePhaseClosureCommand, + workflowPhaseOrder, + type PhaseClosureBasis, +} from '@/shared/phase-close.js'; + +import type { DB } from '../db.js'; +import { safeDeserializeUserParts, type DataConfirmationPart } from '../parts.js'; +import * as schema from '../schema.js'; +import { projectWorkflowState, type WorkflowProjectionSnapshot } from '../workflow-projector.js'; +import { countAcceptedKnowledgeItemsForPhase } from './entity-projection-store.js'; + +type PersistedTurn = InferSelectModel; +type Turn = Omit & { + specification_id: number; +}; +export type Phase = Turn['phase']; +export type PhaseOutcome = InferSelectModel; +export type PhaseOutcomeStatus = PhaseOutcome['status']; +export type { WorkflowPhaseStatus, ReadinessBand }; +export type WorkflowPhaseState = SharedWorkflowPhaseState; +export type WorkflowState = SharedWorkflowState; +export type ClosureBasis = PhaseClosureBasis | null; + +export interface CreatePhaseOutcomeInput { + specificationId?: number; + phase: Phase; + proposal_turn_id: number; + summary: string; +} + +function getTurn(db: DB, turnId: number): Turn | undefined { + return db.select().from(schema.turn).where(eq(schema.turn.id, turnId)).get() as Turn | undefined; +} + +function getActivePath(db: DB, specificationId: number): Turn[] { + const project = db + .select({ active_turn_id: schema.specification.active_turn_id }) + .from(schema.specification) + .where(eq(schema.specification.id, specificationId)) + .get(); + if (!project?.active_turn_id) return []; + + const rows = db.all(sql` + WITH RECURSIVE path AS ( + SELECT * FROM turn WHERE id = ${project.active_turn_id} + UNION ALL + SELECT t.* FROM turn t JOIN path p ON t.id = p.parent_turn_id + ) + SELECT * FROM path ORDER BY id ASC + `); + return rows as Turn[]; +} + +function getOptionsForTurn(db: DB, turnId: number): Array> { + return db + .select() + .from(schema.option) + .where(eq(schema.option.turn_id, turnId)) + .orderBy(schema.option.position) + .all() as Array>; +} + +export function listPhaseOutcomesForSpecification(db: DB, specificationId: number): PhaseOutcome[] { + return db + .select() + .from(schema.phaseOutcome) + .where(eq(schema.phaseOutcome.specification_id, specificationId)) + .orderBy(desc(schema.phaseOutcome.id)) + .all() as PhaseOutcome[]; +} + +export function reconcilePhaseOutcomesForSpecification(db: DB, specificationId: number): void { + const activeTurnIds = new Set(getActivePath(db, specificationId).map((turn) => turn.id)); + const outcomesToSupersede = listPhaseOutcomesForSpecification(db, specificationId).filter( + (outcome) => + (outcome.status === 'proposed' || outcome.status === 'confirmed') && + !activeTurnIds.has(outcome.proposal_turn_id), + ); + + for (const outcome of outcomesToSupersede) { + db.update(schema.phaseOutcome) + .set({ + status: 'superseded', + superseded_at: sql`datetime('now')`, + }) + .where(eq(schema.phaseOutcome.id, outcome.id)) + .run(); + } +} + +export function createPhaseOutcome(db: DB, input: CreatePhaseOutcomeInput): PhaseOutcome { + const turn = getTurn(db, input.proposal_turn_id); + const specificationId = input.specificationId ?? turn?.specification_id; + if (!specificationId) { + throw new Error('Cannot create phase outcome without a specification id'); + } + + return db + .insert(schema.phaseOutcome) + .values({ + specification_id: specificationId, + phase: input.phase, + proposal_turn_id: input.proposal_turn_id, + summary: input.summary, + status: 'proposed', + }) + .returning() + .get() as PhaseOutcome; +} + +function getClosureBasisForConfirmationTurn(db: DB, confirmationTurnId: number): PhaseClosureBasis { + const confirmationTurn = getTurn(db, confirmationTurnId); + const confirmationPart = safeDeserializeUserParts(confirmationTurn?.user_parts).find( + (part): part is DataConfirmationPart => part.type === 'data-confirmation', + ); + const phaseClosureCommand = confirmationPart ? parsePhaseClosureCommand(confirmationPart.data) : null; + + return phaseClosureCommand?.closureBasis ?? 'interviewer_recommended'; +} + +export function confirmPhaseOutcome(db: DB, phaseOutcomeId: number, confirmationTurnId: number): void { + db.update(schema.phaseOutcome) + .set({ + status: 'confirmed', + confirmation_turn_id: confirmationTurnId, + closure_basis: getClosureBasisForConfirmationTurn(db, confirmationTurnId), + confirmed_at: sql`datetime('now')`, + }) + .where(eq(schema.phaseOutcome.id, phaseOutcomeId)) + .run(); +} + +export function supersedePhaseOutcome(db: DB, phaseOutcomeId: number): void { + db.update(schema.phaseOutcome) + .set({ status: 'superseded', superseded_at: sql`datetime('now')` }) + .where(eq(schema.phaseOutcome.id, phaseOutcomeId)) + .run(); +} + +export function createConfirmedPhaseOutcome( + db: DB, + input: CreatePhaseOutcomeInput & { confirmation_turn_id: number }, +): PhaseOutcome { + const turn = getTurn(db, input.proposal_turn_id); + const specificationId = input.specificationId ?? turn?.specification_id; + if (!specificationId) { + throw new Error('Cannot create phase outcome without a specification id'); + } + + return db + .insert(schema.phaseOutcome) + .values({ + specification_id: specificationId, + phase: input.phase, + proposal_turn_id: input.proposal_turn_id, + summary: input.summary, + status: 'confirmed', + closure_basis: getClosureBasisForConfirmationTurn(db, input.confirmation_turn_id), + confirmation_turn_id: input.confirmation_turn_id, + confirmed_at: sql`datetime('now')`, + }) + .returning() + .get() as PhaseOutcome; +} + +export function findProposedPhaseOutcomeByTurn( + db: DB, + specificationId: number, + proposalTurnId: number, +): PhaseOutcome | undefined { + return db + .select() + .from(schema.phaseOutcome) + .where( + and( + eq(schema.phaseOutcome.specification_id, specificationId), + eq(schema.phaseOutcome.proposal_turn_id, proposalTurnId), + eq(schema.phaseOutcome.status, 'proposed'), + ), + ) + .orderBy(desc(schema.phaseOutcome.id)) + .get() as PhaseOutcome | undefined; +} + +export function findPhaseOutcomeForTurn( + db: DB, + specificationId: number, + proposalTurnId: number, +): PhaseOutcome | undefined { + return db + .select() + .from(schema.phaseOutcome) + .where( + and( + eq(schema.phaseOutcome.specification_id, specificationId), + eq(schema.phaseOutcome.proposal_turn_id, proposalTurnId), + ), + ) + .orderBy(desc(schema.phaseOutcome.id)) + .get() as PhaseOutcome | undefined; +} + +function getClosureBasisForOutcome(outcome: PhaseOutcome | undefined): ClosureBasis { + if (!outcome || outcome.status !== 'confirmed' || !outcome.confirmation_turn_id) { + return null; + } + + return outcome.closure_basis ?? null; +} + +export function readWorkflowProjectionSnapshot(db: DB, specificationId: number): WorkflowProjectionSnapshot { + const activePath = getActivePath(db, specificationId); + const activeTurnIds = new Set(activePath.map((turn) => turn.id)); + const turns = activePath.map((turn) => ({ + phase: turn.phase, + question: turn.question, + answer: turn.answer, + optionCount: getOptionsForTurn(db, turn.id).length, + })) satisfies WorkflowProjectionSnapshot['turns']; + const phaseOutcomes = listPhaseOutcomesForSpecification(db, specificationId).map((outcome) => ({ + phase: outcome.phase, + status: outcome.status, + proposalTurnId: outcome.proposal_turn_id, + summary: outcome.summary, + closureBasis: getClosureBasisForOutcome(outcome), + onActivePath: activeTurnIds.has(outcome.proposal_turn_id), + })) satisfies WorkflowProjectionSnapshot['phaseOutcomes']; + + return { + turns, + phaseOutcomes, + acceptedReviewItemCounts: { + requirements: countAcceptedKnowledgeItemsForPhase(db, specificationId, 'requirements', 'requirement'), + criteria: countAcceptedKnowledgeItemsForPhase(db, specificationId, 'criteria', 'criterion'), + }, + }; +} + +export function getCurrentWorkflowState(db: DB, specificationId: number): WorkflowState { + return projectWorkflowState(readWorkflowProjectionSnapshot(db, specificationId)); +} + +export function getStructuralArtifactTurnIds(db: DB, specificationId: number): number[] { + const activePath = getActivePath(db, specificationId); + const activeTurnIds = new Set(activePath.map((turn) => turn.id)); + const ids = new Set(); + + // Phase outcome anchors: proposal and confirmation turns + for (const outcome of listPhaseOutcomesForSpecification(db, specificationId)) { + if (activeTurnIds.has(outcome.proposal_turn_id)) { + ids.add(outcome.proposal_turn_id); + } + if (outcome.confirmation_turn_id && activeTurnIds.has(outcome.confirmation_turn_id)) { + ids.add(outcome.confirmation_turn_id); + } + } + + // Legacy transitional: kickoff/recovery turn rows (D95 marks these as transitional) + for (const turn of activePath) { + if (turn.turn_kind === 'kickoff' || turn.turn_kind === 'recovery' || turn.is_resolution) { + ids.add(turn.id); + } + } + + return [...ids]; +} + +export function getCurrentPhase(db: DB, specificationId: number): Phase { + const workflow = getCurrentWorkflowState(db, specificationId); + return workflowPhaseOrder.find((phase) => workflow.phases[phase].status !== 'closed') ?? 'criteria'; +} From 460988d1b4a11daed2feeb098b1961c0fb80a172 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 17:02:42 +0200 Subject: [PATCH 13/16] Extract specification store from db facade --- memory/CARDS.md | 295 --------------------------- memory/PLAN.md | 8 +- src/server/db.ts | 293 +++----------------------- src/server/db/specification-store.ts | 268 ++++++++++++++++++++++++ 4 files changed, 299 insertions(+), 565 deletions(-) delete mode 100644 memory/CARDS.md create mode 100644 src/server/db/specification-store.ts diff --git a/memory/CARDS.md b/memory/CARDS.md deleted file mode 100644 index 5e5fbc4f..00000000 --- a/memory/CARDS.md +++ /dev/null @@ -1,295 +0,0 @@ -# Scope Cards — server-mini-library-compartmentalization / db.ts extraction - -Containing frontier: `server-mini-library-compartmentalization`. - -Execution posture: keep `src/server/db.ts` as the public persistence root while moving cohesive implementation regions into private `src/server/db/*-store.ts` modules. Preserve existing `./db.js` caller imports unless a later card explicitly scopes API redesign. - -## Card 1 — Reconciliation store extraction - -Status: done / uncommitted - -### Target Behavior - -`db.ts` remains the public persistence import surface while reconciliation-need persistence implementation lives in a private `src/server/db/reconciliation-store.ts` module. - -### Boundary Crossings - -```txt -→ existing callers importing reconciliation helpers from ./db.js -→ public persistence root src/server/db.ts -→ private persistence implementation src/server/db/reconciliation-store.ts -→ Drizzle schema / SQLite rows -``` - -### Risks and Assumptions - -- RISK: import cycles emerge between `db.ts`, `schema.ts`, and the private store → MITIGATION: private store imports only `schema`, Drizzle helpers, and type-only public DB where needed. -- ASSUMPTION: reconciliation helpers are cohesive enough to extract first → VALIDATE: reconciliation tests pass with unchanged caller imports. - -### Acceptance Criteria - -```txt -✓ Reconciliation helpers/types are implemented outside db.ts and re-exported through db.ts. -✓ Existing callers continue importing reconciliation helpers/types from ./db.js. -✓ Reconciliation-related regression tests pass. -``` - -### Verification Approach - -- Inner: focused persistence/agent tests — `npm run test -- db reconciliation-need reconciliation-agent`. -- Gate: `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 2 — Annotation store extraction - -Status: done - -### Target Behavior - -`db.ts` remains the public persistence import surface while annotation persistence implementation lives in a private `src/server/db/annotation-store.ts` module. - -### Boundary Crossings - -```txt -→ annotation routes and tests importing annotation helpers from ./db.js -→ public persistence root src/server/db.ts -→ private persistence implementation src/server/db/annotation-store.ts -→ Drizzle schema / SQLite rows -``` - -### Risks and Assumptions - -- RISK: the annotation region is too small to pay for a subtree module → MITIGATION: keep the extraction mechanically simple and use it as the low-risk proof that small cohesive stores can live behind the facade. -- ASSUMPTION: annotation CRUD is independent of other db.ts private helpers → VALIDATE: private module imports only schema/Drizzle helpers plus `DB` type. - -### Acceptance Criteria - -```txt -✓ `createAnnotation`, `getAnnotationsForSpecification`, `getAnnotation`, and `deleteAnnotation` are implemented outside db.ts. -✓ Existing callers continue importing annotation helpers/types from ./db.js. -✓ Annotation route tests and db tests pass without behavior changes. -``` - -### Verification Approach - -- Inner: focused route/store tests — `npm run test -- annotation db`. -- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 3 — Edit-impact query extraction - -Status: done - -### Target Behavior - -`db.ts` remains the public persistence import surface while downstream edit-impact query implementation lives in a private `src/server/db/edit-impact-store.ts` module. - -### Boundary Crossings - -```txt -→ edit route / side-chat route importing edit-impact query helpers from ./db.js -→ public persistence root src/server/db.ts -→ private persistence implementation src/server/db/edit-impact-store.ts -→ knowledge_edge / knowledge_item / phase_outcome rows -``` - -### Risks and Assumptions - -- RISK: this store overlaps conceptually with the broader intent graph store → MITIGATION: extract only downstream impact queries first because they form a cohesive read-side seam used by edit-impact classification. -- ASSUMPTION: preserving current helper names avoids route churn → VALIDATE: `edit-route.ts` and `side-chat-route.ts` imports stay unchanged. - -### Acceptance Criteria - -```txt -✓ `getDownstreamItems`, `getDownstreamEdges`, and `isItemInActiveReviewSet` are implemented outside db.ts and re-exported through db.ts. -✓ Edit-impact callers continue importing from ./db.js. -✓ Focused edit-impact/edit-route/side-chat tests pass or only fail for known unrelated authorization flakes. -``` - -### Verification Approach - -- Inner: focused tests — `npm run test -- edit-impact side-chat-route edit-route`. -- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 4 — Intent graph mutation store extraction - -Status: done - -### Target Behavior - -`db.ts` remains the public persistence import surface while generic intent/knowledge item and edge mutation helpers live in a private `src/server/db/intent-graph-store.ts` module. - -### Boundary Crossings - -```txt -→ observer/edit/core tests and routes importing intent graph helpers from ./db.js -→ public persistence root src/server/db.ts -→ private persistence implementation src/server/db/intent-graph-store.ts -→ knowledge_item / turn_knowledge_item / knowledge_edge rows -``` - -### Risks and Assumptions - -- RISK: compatibility projection helpers (`createDecision`, `createAssumption`, parent helpers) obscure the canonical intent graph model → MITIGATION: move them as legacy-named facade exports over generic store internals without expanding compatibility language. -- RISK: this extraction may need shared reference-code/projection helpers from later read-model code → MITIGATION: keep mutation helpers separate from entity projection helpers; stop if extraction forces projection redesign. -- ASSUMPTION: mutation helpers form a real store seam independent of accepted review materialization → VALIDATE: observer/edit tests pass with unchanged public imports. - -### Acceptance Criteria - -```txt -✓ `createKnowledgeItem`, `getKnowledgeItem`, `linkKnowledgeItemToTurn`, `addKnowledgeRelationship`, `removeKnowledgeRelationship`, `updateKnowledgeItemContent`, and legacy decision/assumption helper exports are implemented outside db.ts. -✓ Existing callers continue importing from ./db.js. -✓ Observer/edit/db tests covering item and edge writes pass. -``` - -### Verification Approach - -- Inner: focused tests — `npm run test -- observer edit-route db`. -- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 5 — Review materialization store extraction - -Status: done - -### Target Behavior - -`db.ts` remains the public persistence import surface while accepted requirements/criteria review materialization lives in a private `src/server/db/review-materialization-store.ts` module. - -### Boundary Crossings - -```txt -→ app/export/context/observer callers importing entity projection helpers from ./db.js -→ public persistence root src/server/db.ts -→ private persistence implementation src/server/db/entity-projection-store.ts -→ knowledge tables + active-path turn lineage rows -→ shared API entity projection types -``` - -### Risks and Assumptions - -- RISK: active-path filtering and accepted-review visibility depend on workflow/turn helpers currently local to db.ts → MITIGATION: implement read-side SQL locally in the projection store for now; do not route through db.ts and create a cycle. -- RISK: product lexicon says intent graph, while implementation still says knowledge → MITIGATION: prefer intent/entity naming for new private helpers where possible, while preserving public compatibility exports. -- ASSUMPTION: read-model projection is separable from mutation helpers after Card 4 → VALIDATE: no circular import between intent graph mutation store and projection store. - -### Acceptance Criteria - -```txt -✓ `getEntitiesForSpecificationByMode`, `getEntitiesForSpecification`, `getEntitiesForSpecificationOnActivePath`, `getCapturedItemsForTurns`, accepted entity read helpers, and supporting projection helpers are implemented outside db.ts. -✓ App/export/context/observer callers continue importing from ./db.js. -✓ Entity projection, observer, export, context, and db tests pass. -``` - -### Verification Approach - -- Inner: focused tests — `npm run test -- db observer context export app`. -- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 6 — Entity projection read-model extraction - -Status: done - -### Target Behavior - -`db.ts` remains the public persistence import surface while accepted requirements/criteria review materialization lives in a private `src/server/db/review-materialization-store.ts` module. - -### Boundary Crossings - -```txt -→ interview/core/db tests importing review materialization helpers from ./db.js -→ public persistence root src/server/db.ts -→ private persistence implementation src/server/db/review-materialization-store.ts -→ review-set assistant parts parsing -→ knowledge_item / turn_knowledge_item / knowledge_edge rows -``` - -### Risks and Assumptions - -- RISK: review materialization shares helper concepts with entity projection → MITIGATION: extract materialization first as a write-side seam; allow small local reference-code lookup duplication until the read model is extracted. -- ASSUMPTION: accepted review materialization is a cohesive write-side seam distinct from generic intent graph mutation → VALIDATE: requirements/criteria review tests pass unchanged. - -### Acceptance Criteria - -```txt -✓ `materializeAcceptedRequirementsReviewSet`, `materializeAcceptedCriteriaReviewSet`, and their private accepted-review helpers are implemented outside db.ts. -✓ Existing callers continue importing materialization helpers from ./db.js. -✓ Requirements/criteria review persistence tests pass. -``` - -### Verification Approach - -- Inner: focused tests — `npm run test -- db interview app`. -- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 7 — Workflow and phase outcome store extraction - -Status: done - -### Target Behavior - -`db.ts` remains the public persistence import surface while phase outcome and workflow projection snapshot persistence lives in private `src/server/db/workflow-store.ts` and/or `src/server/db/phase-outcome-store.ts` modules. - -### Boundary Crossings - -```txt -→ core/chat transition/phase intent callers importing workflow helpers from ./db.js -→ public persistence root src/server/db.ts -→ private workflow persistence implementation -→ turn / option / phase_outcome / knowledge rows -→ workflow-projector read model -``` - -### Risks and Assumptions - -- RISK: this is the highest-coupling extraction because workflow snapshots read turns, outcomes, accepted knowledge counts, and structural artifact ids → MITIGATION: do it late, after entity/review extractions clarify which helpers should be imported vs passed in. -- RISK: moving this may accidentally alter I110 workflow read/write truth boundaries → MITIGATION: no behavior changes; preserve existing workflow projector interface and run transition/projector tests. -- ASSUMPTION: phase outcome CRUD and workflow snapshot reads can share one private module without becoming too broad → VALIDATE: module exports remain cohesive and smaller than the original db.ts region. - -### Acceptance Criteria - -```txt -✓ Phase outcome helpers and workflow snapshot/current-phase helpers are implemented outside db.ts and re-exported through db.ts. -✓ Workflow transition callers continue importing from ./db.js. -✓ Workflow projector, phase close, chat transition, app, and db tests pass. -``` - -### Verification Approach - -- Inner: focused tests — `npm run test -- workflow-projector phase-close chat-route-transition phase-intent app db`. -- Middle: route/workflow regression — ensure active path, closeability, and structural artifact projections still match fixtures. -- Gate: `npm run check`; full `npm run verify` when unrelated suite failures are resolved or acknowledged. - -## Card 8 — Specification/chat/turn store extraction - -Status: next - -### Target Behavior - -`db.ts` remains the public persistence import surface while specification, chat, turn, option, and active-head persistence lives in private `src/server/db/specification-store.ts` and `src/server/db/chat-turn-store.ts` modules. - -### Boundary Crossings - -```txt -→ nearly all server callers importing specification/turn helpers from ./db.js -→ public persistence root src/server/db.ts -→ private specification/chat-turn persistence modules -→ specification / chat / turn / option rows -``` - -### Risks and Assumptions - -- RISK: this is the broadest and most central extraction, so earlier cards may reveal a better split → MITIGATION: run this last and revise before building if prior extractions expose a different boundary. -- RISK: primary-chat active-head equivalence and multi-chat transitional invariants could regress → MITIGATION: run chat-substrate, core, app, and transition tests. -- ASSUMPTION: preserving public exports avoids broad caller churn while still clarifying ownership → VALIDATE: no non-test caller import paths change. - -### Acceptance Criteria - -```txt -✓ Specification creation/list/read, chat ownership, turn CRUD, option CRUD, active path, and active-head helpers are implemented outside db.ts and re-exported through db.ts. -✓ Existing callers continue importing from ./db.js. -✓ Core/chat-substrate/transition/app/db tests pass. -✓ `db.ts` is reduced to connection setup, type facade exports, and curated re-exports from private stores. -``` - -### Verification Approach - -- Inner: focused tests — `npm run test -- db core chat-substrate chat-route-transition turn-response app`. -- Middle: persisted resume/projection regression via app tests. -- Gate: `npm run verify` or explicitly document unrelated failures before commit. diff --git a/memory/PLAN.md b/memory/PLAN.md index a5607a82..b64f4d11 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -205,11 +205,11 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Server mini-library compartmentalization - **Linear:** unassigned in this plan snapshot - **Kind:** refactor -- **Status:** horizon +- **Status:** in-progress opportunistically on FE-705 lane; `db.ts` persistence facade extraction complete, broader server roots remain horizon. - **Objective:** Refactor growing server seams into plural public roots with same-named private subtrees where FE-698 / FE-705 pressure has made boundaries too implicit. -- **Why now / unlocks:** Near-term refactor candidate after FE-705 integration, not product roadmap work. -- **Acceptance:** Candidate seams such as `fixtures.ts`, `context-packs.ts`, `prompts.ts`, `scenario-runner.ts`, `entity-apis.ts`, and `agent-apis.ts` hide private implementation subtrees behind stable public roots where real pressure exists. -- **Verification:** Existing test suite plus import-boundary review. +- **Why now / unlocks:** Near-term refactor candidate after FE-705 integration, not product roadmap work. The persistence facade now proves the pattern: `db.ts` owns connection setup and curated public exports while private `src/server/db/*-store.ts` modules own cohesive persistence implementation. +- **Acceptance:** Candidate seams such as `db.ts`, `fixtures.ts`, `context-packs.ts`, `prompts.ts`, `scenario-runner.ts`, `entity-apis.ts`, and `agent-apis.ts` hide private implementation subtrees behind stable public roots where real pressure exists. +- **Verification:** Existing test suite plus import-boundary review; for the completed `db.ts` slice, focused store/route/workflow tests, `npm run check`, and `npm run build` pass. - **Traceability:** code organization convention in `AGENTS.md`. - **Design docs:** none. diff --git a/src/server/db.ts b/src/server/db.ts index 1a56c2dc..3ef6d1ec 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -2,14 +2,12 @@ import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; -import { and, desc, eq, inArray, sql, type InferSelectModel } from 'drizzle-orm'; import { drizzle } from 'drizzle-orm/better-sqlite3'; import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; const __dirname = dirname(fileURLToPath(import.meta.url)); const MIGRATIONS_FOLDER = join(__dirname, '..', '..', 'drizzle'); -import type { SpecificationMode, TurnKind } from '@/shared/api-types.js'; import {} from '@/shared/knowledge.js'; import * as schema from './schema.js'; @@ -73,7 +71,6 @@ export type { RequirementEntity, } from './db/entity-projection-store.js'; -import { reconcilePhaseOutcomesForSpecification } from './db/workflow-store.js'; export { confirmPhaseOutcome, createConfirmedPhaseOutcome, @@ -117,35 +114,34 @@ export type { ReconciliationNeedKind, } from './db/reconciliation-store.js'; -export type DB = ReturnType>; -export type Specification = InferSelectModel; -type PersistedTurn = InferSelectModel; -export type Turn = Omit & { - specification_id: number; -}; -export type Option = InferSelectModel; -export type Phase = Turn['phase']; -export type Impact = NonNullable; -export interface CreateTurnInput { - parent_turn_id?: number | null; - phase: Phase; - turn_kind?: TurnKind; - question: string; - why?: string | null; - impact?: Impact | null; - answer?: string | null; - is_resolution?: boolean; - user_parts?: string | null; - assistant_parts?: string | null; -} - -export interface CreateOptionInput { - position: number; - content: string; - is_recommended?: boolean; - is_selected?: boolean; -} +export { + advanceHead, + applyTurnResponseSelections, + createOption, + createSpecification, + createTurn, + getActivePath, + getOptionsForTurn, + getOrCreateSpecification, + getSpecification, + getTurn, + listSpecifications, + updateSpecificationMode, + updateTurn, +} from './db/specification-store.js'; +export type { + CreateOptionInput, + CreateSpecificationOptions, + CreateTurnInput, + Impact, + Option, + Phase, + Specification, + Turn, + UpdateTurnInput, +} from './db/specification-store.js'; +export type DB = ReturnType>; export function createDb(path: string = ':memory:'): DB { const sqlite = new Database(path); sqlite.pragma('journal_mode = WAL'); @@ -154,238 +150,3 @@ export function createDb(path: string = ':memory:'): DB { migrate(db, { migrationsFolder: MIGRATIONS_FOLDER }); return db; } - -export function getOrCreateSpecification(db: DB, name = 'default'): Specification { - const existing = db - .select() - .from(schema.specification) - .orderBy(desc(schema.specification.created_at)) - .limit(1) - .get(); - if (existing) return existing as Specification; - return insertSpecificationWithInterviewChat(db, { name }); -} - -export function listSpecifications(db: DB): Specification[] { - return db - .select() - .from(schema.specification) - .orderBy(desc(schema.specification.updated_at)) - .all() as Specification[]; -} - -export interface CreateSpecificationOptions { - mode?: SpecificationMode; -} - -export function createSpecification( - db: DB, - name: string, - options?: CreateSpecificationOptions, -): Specification { - return insertSpecificationWithInterviewChat(db, { - name, - ...(options?.mode ? { mode: options.mode } : {}), - }); -} - -function insertSpecificationWithInterviewChat( - db: DB, - values: { name: string; mode?: SpecificationMode }, -): Specification { - return db.transaction((tx) => { - const inserted = tx.insert(schema.specification).values(values).returning().get() as Specification; - const chatRow = tx - .insert(schema.chat) - .values({ specification_id: inserted.id, kind: 'interview' }) - .returning({ id: schema.chat.id }) - .get(); - const updated = tx - .update(schema.specification) - .set({ primary_chat_id: chatRow.id }) - .where(eq(schema.specification.id, inserted.id)) - .returning() - .get(); - return updated as Specification; - }); -} - -function getInterviewChatIdForSpecification(db: DB, specificationId: number): number { - const spec = db - .select({ primary_chat_id: schema.specification.primary_chat_id }) - .from(schema.specification) - .where(eq(schema.specification.id, specificationId)) - .get(); - if (!spec?.primary_chat_id) { - throw new Error(`Specification ${specificationId} has no primary_chat_id; substrate invariant violated`); - } - return spec.primary_chat_id; -} - -export function getSpecification(db: DB, id: number): Specification | undefined { - return db.select().from(schema.specification).where(eq(schema.specification.id, id)).get() as - | Specification - | undefined; -} - -export function getTurn(db: DB, turnId: number): Turn | undefined { - return db.select().from(schema.turn).where(eq(schema.turn.id, turnId)).get() as Turn | undefined; -} - -export function createTurn(db: DB, specificationId: number, input: CreateTurnInput): Turn { - const chatId = getInterviewChatIdForSpecification(db, specificationId); - - if (input.parent_turn_id != null) { - const parent = db - .select({ chat_id: schema.turn.chat_id }) - .from(schema.turn) - .where(eq(schema.turn.id, input.parent_turn_id)) - .get(); - if (!parent) { - throw new Error(`Parent turn ${input.parent_turn_id} not found`); - } - if (parent.chat_id !== chatId) { - throw new Error( - `Parent turn ${input.parent_turn_id} lives in chat ${parent.chat_id}, ` + - `not chat ${chatId} — parent_turn_id must share chat_id with the new turn`, - ); - } - } - - const result = db - .insert(schema.turn) - .values({ - specification_id: specificationId, - chat_id: chatId, - parent_turn_id: input.parent_turn_id ?? null, - phase: input.phase, - turn_kind: input.turn_kind ?? 'question', - question: input.question, - why: input.why ?? null, - impact: input.impact ?? null, - answer: input.answer ?? null, - is_resolution: input.is_resolution ?? false, - user_parts: input.user_parts ?? null, - assistant_parts: input.assistant_parts ?? null, - }) - .returning() - .get(); - return result as Turn; -} - -export interface UpdateTurnInput { - question?: string; - answer?: string; - why?: string | null; - impact?: Impact | null; - user_parts?: string | null; - assistant_parts?: string | null; -} - -export function updateTurn(db: DB, turnId: number, updates: UpdateTurnInput): void { - if ( - updates.question === undefined && - updates.answer === undefined && - updates.why === undefined && - updates.impact === undefined && - updates.user_parts === undefined && - updates.assistant_parts === undefined - ) - return; - const values: Record = {}; - if (updates.question !== undefined) values.question = updates.question; - if (updates.answer !== undefined) values.answer = updates.answer; - if (updates.why !== undefined) values.why = updates.why; - if (updates.impact !== undefined) values.impact = updates.impact; - if (updates.user_parts !== undefined) values.user_parts = updates.user_parts; - if (updates.assistant_parts !== undefined) values.assistant_parts = updates.assistant_parts; - db.update(schema.turn).set(values).where(eq(schema.turn.id, turnId)).run(); -} - -export function createOption(db: DB, turnId: number, input: CreateOptionInput): Option { - const result = db - .insert(schema.option) - .values({ - turn_id: turnId, - position: input.position, - content: input.content, - is_recommended: input.is_recommended ?? false, - is_selected: input.is_selected ?? false, - }) - .returning() - .get(); - return result as Option; -} - -export function getActivePath(db: DB, specificationId: number): Turn[] { - const project = db - .select({ active_turn_id: schema.specification.active_turn_id }) - .from(schema.specification) - .where(eq(schema.specification.id, specificationId)) - .get(); - if (!project?.active_turn_id) return []; - - // Recursive CTE — raw SQL via Drizzle's sql tag - const rows = db.all(sql` - WITH RECURSIVE path AS ( - SELECT * FROM turn WHERE id = ${project.active_turn_id} - UNION ALL - SELECT t.* FROM turn t JOIN path p ON t.id = p.parent_turn_id - ) - SELECT * FROM path ORDER BY id ASC - `); - return rows as Turn[]; -} - -export function getOptionsForTurn(db: DB, turnId: number): Option[] { - return db - .select() - .from(schema.option) - .where(eq(schema.option.turn_id, turnId)) - .orderBy(schema.option.position) - .all() as Option[]; -} - -export function applyTurnResponseSelections(db: DB, turnId: number, selectedPositions: number[]): void { - const uniquePositions = [...new Set(selectedPositions)]; - - // Clear any previous selection for this turn. - db.update(schema.option).set({ is_selected: false }).where(eq(schema.option.turn_id, turnId)).run(); - - if (uniquePositions.length === 0) { - return; - } - - // Mark the chosen options for this turn response. - db.update(schema.option) - .set({ is_selected: true }) - .where(and(eq(schema.option.turn_id, turnId), inArray(schema.option.position, uniquePositions))) - .run(); -} - -export function advanceHead(db: DB, specificationId: number, turnId: number): void { - const chatId = getInterviewChatIdForSpecification(db, specificationId); - db.transaction((tx) => { - tx.update(schema.specification) - .set({ active_turn_id: turnId, updated_at: sql`datetime('now')` }) - .where(eq(schema.specification.id, specificationId)) - .run(); - const updatedChat = tx - .update(schema.chat) - .set({ active_turn_id: turnId }) - .where(eq(schema.chat.id, chatId)) - .returning({ id: schema.chat.id }) - .get(); - if (!updatedChat) { - throw new Error(`Interview chat ${chatId} for spec ${specificationId} not found; head update aborted`); - } - }); - reconcilePhaseOutcomesForSpecification(db, specificationId); -} - -export function updateSpecificationMode(db: DB, specificationId: number, mode: SpecificationMode): void { - db.update(schema.specification) - .set({ mode, updated_at: sql`datetime('now')` }) - .where(eq(schema.specification.id, specificationId)) - .run(); -} diff --git a/src/server/db/specification-store.ts b/src/server/db/specification-store.ts new file mode 100644 index 00000000..155a9e5e --- /dev/null +++ b/src/server/db/specification-store.ts @@ -0,0 +1,268 @@ +import { and, desc, eq, inArray, sql, type InferSelectModel } from 'drizzle-orm'; + +import type { SpecificationMode, TurnKind } from '@/shared/api-types.js'; + +import type { DB } from '../db.js'; +import * as schema from '../schema.js'; +import { reconcilePhaseOutcomesForSpecification } from './workflow-store.js'; + +export type Specification = InferSelectModel; +type PersistedTurn = InferSelectModel; +export type Turn = Omit & { + specification_id: number; +}; +export type Option = InferSelectModel; +export type Phase = Turn['phase']; +export type Impact = NonNullable; + +export interface CreateTurnInput { + parent_turn_id?: number | null; + phase: Phase; + turn_kind?: TurnKind; + question: string; + why?: string | null; + impact?: Impact | null; + answer?: string | null; + is_resolution?: boolean; + user_parts?: string | null; + assistant_parts?: string | null; +} + +export interface CreateOptionInput { + position: number; + content: string; + is_recommended?: boolean; + is_selected?: boolean; +} + +export function getOrCreateSpecification(db: DB, name = 'default'): Specification { + const existing = db + .select() + .from(schema.specification) + .orderBy(desc(schema.specification.created_at)) + .limit(1) + .get(); + if (existing) return existing as Specification; + return insertSpecificationWithInterviewChat(db, { name }); +} + +export function listSpecifications(db: DB): Specification[] { + return db + .select() + .from(schema.specification) + .orderBy(desc(schema.specification.updated_at)) + .all() as Specification[]; +} + +export interface CreateSpecificationOptions { + mode?: SpecificationMode; +} + +export function createSpecification( + db: DB, + name: string, + options?: CreateSpecificationOptions, +): Specification { + return insertSpecificationWithInterviewChat(db, { + name, + ...(options?.mode ? { mode: options.mode } : {}), + }); +} + +function insertSpecificationWithInterviewChat( + db: DB, + values: { name: string; mode?: SpecificationMode }, +): Specification { + return db.transaction((tx) => { + const inserted = tx.insert(schema.specification).values(values).returning().get() as Specification; + const chatRow = tx + .insert(schema.chat) + .values({ specification_id: inserted.id, kind: 'interview' }) + .returning({ id: schema.chat.id }) + .get(); + const updated = tx + .update(schema.specification) + .set({ primary_chat_id: chatRow.id }) + .where(eq(schema.specification.id, inserted.id)) + .returning() + .get(); + return updated as Specification; + }); +} + +function getInterviewChatIdForSpecification(db: DB, specificationId: number): number { + const spec = db + .select({ primary_chat_id: schema.specification.primary_chat_id }) + .from(schema.specification) + .where(eq(schema.specification.id, specificationId)) + .get(); + if (!spec?.primary_chat_id) { + throw new Error(`Specification ${specificationId} has no primary_chat_id; substrate invariant violated`); + } + return spec.primary_chat_id; +} + +export function getSpecification(db: DB, id: number): Specification | undefined { + return db.select().from(schema.specification).where(eq(schema.specification.id, id)).get() as + | Specification + | undefined; +} + +export function getTurn(db: DB, turnId: number): Turn | undefined { + return db.select().from(schema.turn).where(eq(schema.turn.id, turnId)).get() as Turn | undefined; +} + +export function createTurn(db: DB, specificationId: number, input: CreateTurnInput): Turn { + const chatId = getInterviewChatIdForSpecification(db, specificationId); + + if (input.parent_turn_id != null) { + const parent = db + .select({ chat_id: schema.turn.chat_id }) + .from(schema.turn) + .where(eq(schema.turn.id, input.parent_turn_id)) + .get(); + if (!parent) { + throw new Error(`Parent turn ${input.parent_turn_id} not found`); + } + if (parent.chat_id !== chatId) { + throw new Error( + `Parent turn ${input.parent_turn_id} lives in chat ${parent.chat_id}, ` + + `not chat ${chatId} — parent_turn_id must share chat_id with the new turn`, + ); + } + } + + const result = db + .insert(schema.turn) + .values({ + specification_id: specificationId, + chat_id: chatId, + parent_turn_id: input.parent_turn_id ?? null, + phase: input.phase, + turn_kind: input.turn_kind ?? 'question', + question: input.question, + why: input.why ?? null, + impact: input.impact ?? null, + answer: input.answer ?? null, + is_resolution: input.is_resolution ?? false, + user_parts: input.user_parts ?? null, + assistant_parts: input.assistant_parts ?? null, + }) + .returning() + .get(); + return result as Turn; +} + +export interface UpdateTurnInput { + question?: string; + answer?: string; + why?: string | null; + impact?: Impact | null; + user_parts?: string | null; + assistant_parts?: string | null; +} + +export function updateTurn(db: DB, turnId: number, updates: UpdateTurnInput): void { + if ( + updates.question === undefined && + updates.answer === undefined && + updates.why === undefined && + updates.impact === undefined && + updates.user_parts === undefined && + updates.assistant_parts === undefined + ) + return; + const values: Record = {}; + if (updates.question !== undefined) values.question = updates.question; + if (updates.answer !== undefined) values.answer = updates.answer; + if (updates.why !== undefined) values.why = updates.why; + if (updates.impact !== undefined) values.impact = updates.impact; + if (updates.user_parts !== undefined) values.user_parts = updates.user_parts; + if (updates.assistant_parts !== undefined) values.assistant_parts = updates.assistant_parts; + db.update(schema.turn).set(values).where(eq(schema.turn.id, turnId)).run(); +} + +export function createOption(db: DB, turnId: number, input: CreateOptionInput): Option { + const result = db + .insert(schema.option) + .values({ + turn_id: turnId, + position: input.position, + content: input.content, + is_recommended: input.is_recommended ?? false, + is_selected: input.is_selected ?? false, + }) + .returning() + .get(); + return result as Option; +} + +export function getActivePath(db: DB, specificationId: number): Turn[] { + const project = db + .select({ active_turn_id: schema.specification.active_turn_id }) + .from(schema.specification) + .where(eq(schema.specification.id, specificationId)) + .get(); + if (!project?.active_turn_id) return []; + + const rows = db.all(sql` + WITH RECURSIVE path AS ( + SELECT * FROM turn WHERE id = ${project.active_turn_id} + UNION ALL + SELECT t.* FROM turn t JOIN path p ON t.id = p.parent_turn_id + ) + SELECT * FROM path ORDER BY id ASC + `); + return rows as Turn[]; +} + +export function getOptionsForTurn(db: DB, turnId: number): Option[] { + return db + .select() + .from(schema.option) + .where(eq(schema.option.turn_id, turnId)) + .orderBy(schema.option.position) + .all() as Option[]; +} + +export function applyTurnResponseSelections(db: DB, turnId: number, selectedPositions: number[]): void { + const uniquePositions = [...new Set(selectedPositions)]; + + db.update(schema.option).set({ is_selected: false }).where(eq(schema.option.turn_id, turnId)).run(); + + if (uniquePositions.length === 0) { + return; + } + + db.update(schema.option) + .set({ is_selected: true }) + .where(and(eq(schema.option.turn_id, turnId), inArray(schema.option.position, uniquePositions))) + .run(); +} + +export function advanceHead(db: DB, specificationId: number, turnId: number): void { + const chatId = getInterviewChatIdForSpecification(db, specificationId); + db.transaction((tx) => { + tx.update(schema.specification) + .set({ active_turn_id: turnId, updated_at: sql`datetime('now')` }) + .where(eq(schema.specification.id, specificationId)) + .run(); + const updatedChat = tx + .update(schema.chat) + .set({ active_turn_id: turnId }) + .where(eq(schema.chat.id, chatId)) + .returning({ id: schema.chat.id }) + .get(); + if (!updatedChat) { + throw new Error(`Interview chat ${chatId} for spec ${specificationId} not found; head update aborted`); + } + }); + reconcilePhaseOutcomesForSpecification(db, specificationId); +} + +export function updateSpecificationMode(db: DB, specificationId: number, mode: SpecificationMode): void { + db.update(schema.specification) + .set({ mode, updated_at: sql`datetime('now')` }) + .where(eq(schema.specification.id, specificationId)) + .run(); +} From c532f852f5e8cc0270f5e2985bece563dafc47da Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 17:24:24 +0200 Subject: [PATCH 14/16] Document substrate strangler coordination --- docs/design/README.md | 1 + .../SUBSTRATE_STRANGLER_COORDINATION.md | 132 ++++++++++++++++++ memory/PLAN.md | 24 ++-- 3 files changed, 147 insertions(+), 10 deletions(-) create mode 100644 docs/design/SUBSTRATE_STRANGLER_COORDINATION.md diff --git a/docs/design/README.md b/docs/design/README.md index ea29b883..7420dac7 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -31,6 +31,7 @@ Use design documents for deeper argumentation, raw synthesis, alternatives, and | `BEHAVIORAL_KERNELS.md` | Behavioral-kernel typology, kernel cards, signal-phrase routing, and contrastive-question workflow. Canonical design reference for kernel probes. | | `SPEC_EVOLUTION_STRATEGIES.md` | FE-705-era synthesis for chat-local strategies, scenario options, graph review, proposal turns, relation directionality, and candidate bundles. Graduated into `memory/SPEC.md` / `memory/PLAN.md`; keep as rationale. | | `AGENT_MUTATION_SURFACE.md` | Audit of agent-originated/adjoining mutation paths and the capability/changeset boundary needed before agents write durable truth. | +| `SUBSTRATE_STRANGLER_COORDINATION.md` | Working coordination note for moving routes, capabilities, and changesets toward shared handlers while keeping frontend work stable and parallelizable. | ### Conversational workspace runtime cluster diff --git a/docs/design/SUBSTRATE_STRANGLER_COORDINATION.md b/docs/design/SUBSTRATE_STRANGLER_COORDINATION.md new file mode 100644 index 00000000..f319b5f2 --- /dev/null +++ b/docs/design/SUBSTRATE_STRANGLER_COORDINATION.md @@ -0,0 +1,132 @@ +# Substrate Strangler Coordination + +> Status: **working design proposal / coordination note**, 2026-05-13. +> +> Purpose: keep FE-705 / FE-700 / FE-701 substrate work and parallel frontend/product-surface work moving without forcing an early frontend cutover. Canonical sequencing remains in `memory/PLAN.md`; this document records lane boundaries, collision zones, and the migration rule of thumb. + +## Coordination principle + +Treat the capability / changeset substrate as a **strangler migration**, not a frontend rewrite. + +```text +Frontend today + → existing REST / SSE routes + → shared application handlers + → db stores / schema + +Agent / future capability clients + → capability dispatcher / JSONL adapter + → same shared application handlers + → db stores / schema +``` + +The frontend should not have to switch substrates until the backend has already made the old route substrate an adapter over the new authority. Existing UI routes stay stable while their internals migrate toward shared command/query handlers and changeset-backed semantic writes. + +## Non-goals for the coordination window + +- Do not require current frontend routes to call the central capability adapter. +- Do not expose new changeset fields in user-facing DTOs until a product slice needs them. +- Do not let external agents or probe harnesses write durable graph truth through ORM helpers. +- Do not widen FE-701 into the full ontology expansion; FE-701 needs enough relation-policy directionality to make mutation history safe. + +## Lane split + +### Substrate lane + +Best owned by the agent working on FE-705 / FE-701 backend authority. + +Owns: + +- shared application-service / command-handler seam under existing routes and new capability adapters +- capability parity tests for route path vs capability path +- minimal relation-policy directionality needed by cascade and changesets +- `changeset` / `change` schema and stores +- `specification.latest_changeset_id` +- proposal-turn opened/base changeset identity +- `reconciliation_need.caused_by_changeset_id` replacing the historical `caused_by_patch_id` placeholder +- hidden changeset creation under existing semantic mutations before frontend DTO cutover + +Acceptance posture: + +- existing UI behavior and API shapes remain stable unless a scoped product slice explicitly changes them +- semantic writes pass through a shared handler that can be called by both route adapters and capability adapters +- old DB helper access remains internal; capability ids name product operations, not persistence primitives + +### Frontend / product-artifact lane + +Best owned by the colleague working on future-facing UI and low-collision product surfaces. + +Owns: + +- continuous workspace / phase-addressable host work against current read models +- fixture-backed candidate bundle cards and graph-review finding cards +- review status badges and proposal-artifact presentation states +- read-only graph/workspace improvements +- mocked or artifact-only scenario-options UI probes + +Acceptance posture: + +- no canonical graph mutation from candidate/proposal UI until FE-701 changesets exist +- frontend work consumes stable current read models or fixtures, not transitional internal stores +- UI prototypes may model future statuses, but acceptance/apply flows stay disabled, mocked, or explicitly proposal-only + +## High-conflict files and seams + +Coordinate before touching these: + +- `src/server/schema.ts` +- `drizzle/*` +- `src/server/db/*` +- `src/server/knowledge-relationship-policy.ts` +- semantic mutation handlers and edit/reconciliation routes +- turn completion / chat transition logic +- shared API types when changing existing frontend DTOs +- prompt/context pack contracts that become canonical mutation inputs + +Lower-conflict frontend work usually lives in: + +- `src/client/components/*` +- Ladle stories and fixtures +- read-only graph/workspace route presentation +- candidate/proposal/graph-review renderers backed by static artifacts + +## Backend migration sequence + +1. Keep current route contracts stable and add regression/parity tests around important UI-facing reads/writes. +2. Extract or name shared application handlers underneath existing Express routes. +3. Point capability/JSONL operations at those same handlers instead of ORM helpers. +4. Add minimal relation-policy directionality needed for direct-edit cascade and reconciliation cause semantics. +5. Add FE-701 changeset/change ledger as hidden substrate. +6. Route existing semantic writes through changeset creation while preserving existing response DTOs. +7. Expose changeset/proposal/staleness fields only through probe/debug/capability surfaces first. +8. Cut over frontend flows one at a time after parity is proven. + +## Frontend-safe work before cutover + +The colleague can work independently on: + +- layout shells, navigation, scroll/focus, and phase section rendering +- read-only graph visibility and status affordances +- candidate bundle and graph-review cards using static fixtures +- `reviewed_clean` / `reviewed_with_issues` / `blocked` visual states as non-mutating artifacts +- storybook/Ladle coverage for future proposal surfaces + +Avoid implementing real `accept`, `accept with issues`, `apply`, or `resolve` UI flows against ad hoc route writes. Those should wait for FE-701 handlers or remain mocked. + +## Cutover rule + +A frontend flow may switch to the new substrate only when all are true: + +1. existing route behavior has a parity test or compatibility assertion; +2. the new handler is the authority behind both route and capability entry points; +3. semantic mutations, if any, produce changeset/change rows atomically; +4. proposal or candidate acceptance has a clear user/HITL authority boundary; +5. rollback/failure behavior leaves graph truth and process debt coherent. + +## Relationship to existing docs + +- `AGENT_MUTATION_SURFACE.md` owns operation naming and agent authority classes. +- `MULTI_CHAT.md` owns shipped chat/reconciliation schema rationale. +- `PATCH_LEDGER.md` owns changeset/change algorithmic pressure under historical patch vocabulary. +- `CONVERSATIONAL_WORKSPACE_RUNTIME.md` owns the umbrella runtime synthesis. +- `memory/PLAN.md` owns actual frontier ordering. diff --git a/memory/PLAN.md b/memory/PLAN.md index b64f4d11..b860f0cc 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -15,9 +15,9 @@ The interaction model is mature: four-phase interview, interviewer-autonomous question format, phase-agnostic preface cards with workspace exploration, structured review with per-item commenting, observer knowledge extraction, workflow ownership extraction, distribution hardening, graph view's structured-list peer route, the first relation-first observer capture seam, the multi-chat substrate, side-chat V3.0 hard-impact cascade, and side-chat V3.1 agent-grouped reconciliation resolution all ship as working product. -The next product arc is a **continuous conversational workspace** plus a stronger semantic/generative substrate. Continuous workspace is active in a parallel lane and gives the chat runtime a stable phase-addressable host. The FE-705 branch contributes an integration substrate — a local agent capability CLI and external LLM-as-user probe harness — that should be reconciled into main before graph-review and scenario-options work depends on generated completed-spec fixtures. After that, the highest-coordination work is intent-graph semantics and the semantic changeset ledger; lower-coordination provider, gitignore, and web-research work can proceed in parallel. +The next product arc is a **continuous conversational workspace** plus a stronger semantic/generative substrate. Continuous workspace is active in a parallel lane and gives the chat runtime a stable phase-addressable host. The FE-705 branch contributes an integration substrate — a local agent capability CLI and external LLM-as-user probe harness — that should be reconciled into main before graph-review and scenario-options work depends on generated completed-spec fixtures. After that, the highest-coordination work is intent-graph semantics and the semantic changeset ledger; FE-701 should follow soon after the FE-705 reconciliation because the current schema already carries transitional multi-chat / reconciliation placeholders that only become coherent once `changeset` / `change` owns semantic mutation history. Lower-coordination provider, gitignore, and web-research work can proceed in parallel. -The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agent-mutation design notes are reconciled into one direction. `docs/design/MULTI_CHAT.md` is the substrate document. `docs/design/SIDE_CHAT.md` describes side-chat V1 / V2 / V3.0 / V3.1 / V4 phasing on top of that substrate. `docs/design/PATCH_LEDGER.md` remains historical deeper design pressure for semantic mutation history, but canonical future-facing vocabulary is `changeset` / `change`. The product-layer ontology trajectory is split out as `docs/design/INTENT_GRAPH_SEMANTICS.md` and `docs/design/BEHAVIORAL_KERNELS.md`; broader synthesis lives in `docs/archive/design/INTENT_SPEC_EVOLUTION.md`. FE-705's branch-local strategy/proposal notes add scenario options, graph-review oracle, chat-local strategies, and concern/dependency mapping; those notes should become a canonical design doc when the branch is integrated. The dev-layer self-tooling trajectory lives in `docs/design/ln-skills/EVOLUTION.md`. +The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agent-mutation design notes are reconciled into one direction. `docs/design/MULTI_CHAT.md` is the substrate document. `docs/design/SIDE_CHAT.md` describes side-chat V1 / V2 / V3.0 / V3.1 / V4 phasing on top of that substrate. `docs/design/PATCH_LEDGER.md` remains historical deeper design pressure for semantic mutation history, but canonical future-facing vocabulary is `changeset` / `change`. The product-layer ontology trajectory is split out as `docs/design/INTENT_GRAPH_SEMANTICS.md` and `docs/design/BEHAVIORAL_KERNELS.md`; broader synthesis lives in `docs/archive/design/INTENT_SPEC_EVOLUTION.md`. FE-705's branch-local strategy/proposal notes add scenario options, graph-review oracle, chat-local strategies, and concern/dependency mapping; those notes should become a canonical design doc when the branch is integrated. Coordination uses a substrate-strangler posture: keep existing frontend REST/SSE contracts stable while route adapters and capability adapters converge on shared server-owned handlers, then cut over UI flows only after parity and changeset-backed authority exist. The dev-layer self-tooling trajectory lives in `docs/design/ln-skills/EVOLUTION.md`. ## Sequencing @@ -29,7 +29,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen ### Next 1. `intent-graph-semantics` — highest-coordination semantic substrate after FE-705 reconciliation. -2. `changeset-ledger` — semantic history spine needed before canonical proposal acceptance and productized scenario options. +2. `changeset-ledger` — schedule soon after FE-705 reconciliation; semantic history spine needed before canonical proposal acceptance, direct-edit atomicity, and productized scenario options. 3. `graph-review-scenario-options` — artifact-only critique/probe lane; can advance in parallel with FE-700 if it does not commit canonical graph truth. 4. `productized-scenario-options` — user-facing acceleration surface after FE-700 semantics, FE-701 changesets, and graph-review probes. @@ -81,7 +81,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Acceptance:** Server-owned capability contracts and JSONL protocol/session code are integrated; the probe runner uses only the JSONL client/process boundary; fixture-candidate artifacts preserve scenario briefs, model policy, generated transcripts, and workspace-state inspection without becoming Brunch authority. - **Verification:** Contract/dispatcher tests, JSONL protocol/session tests, import-boundary tests, fake process tests, opt-in real-provider smoke, and fixture-candidate structure/readiness checks. - **Traceability:** Requirement 43; A89; D143, D147; I115. Also protects Requirements 40, 41, 42 by making prompt/context and mutation-surface probes executable through a real adapter. -- **Design docs:** `docs/design/AGENT_MUTATION_SURFACE.md`; `docs/archive/design/INTENT_SPEC_EVOLUTION.md`; FE-705 branch artifacts until rebased. +- **Design docs:** `docs/design/AGENT_MUTATION_SURFACE.md`; `docs/design/SUBSTRATE_STRANGLER_COORDINATION.md`; `docs/archive/design/INTENT_SPEC_EVOLUTION.md`; FE-705 branch artifacts until rebased. ### intent-graph-semantics @@ -103,11 +103,13 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Kind:** structural - **Status:** not-started - **Objective:** Introduce the semantic history spine that separates graph mutation history from conversational turn ancestry. -- **Why now / unlocks:** Scenario bundle acceptance, direct-edit atomicity, accepted-with-issues flows, stale proposal detection, graph-review repairs, side-chat V4b item versioning, and future architect/reconciliation agents all need a durable semantic mutation boundary. Without it, productized scenario-options can stay probe-only but cannot safely commit candidate bundles. -- **Acceptance:** Schema and operation vocabulary use `changeset` / `change`; specifications track latest semantic changeset; proposal turns carry base/opened changeset identity; `reconciliation_need.caused_by_changeset_id` is connected; non-accept proposal actions cannot mutate graph truth; a changeset is the smallest atomic unit preserving semantic coherence. -- **Verification:** DB atomicity tests for changeset + changes + reconciliation_need writes, staleness tests for open proposal turns across multi-chat changes, and capability/transition tests proving non-accept actions cannot mutate graph truth. +- **Why now / unlocks:** Scenario bundle acceptance, direct-edit atomicity, accepted-with-issues flows, stale proposal detection, graph-review repairs, side-chat V4b item versioning, and future architect/reconciliation agents all need a durable semantic mutation boundary. Without it, productized scenario-options can stay probe-only but cannot safely commit candidate bundles. The current DB substrate is already halfway there: `chat` and `reconciliation_need` exist, `specification.active_turn_id` / `chat.active_turn_id` are deliberately duplicated during the multi-chat transition, and `reconciliation_need.caused_by_patch_id` is a historical placeholder that should become changeset-backed provenance rather than be deleted as ordinary cruft. +- **Current schema observations:** Legacy dedicated knowledge tables (`decision`, `assumption`, `requirement`, `criterion`, and old join/parent tables) are retired in migration `0010`; current semantic truth is `knowledge_item` + `knowledge_edge` + `turn_knowledge_item`. `annotation` and `reconciliation_need` are active process/read-model tables even when empty in local DBs. `turn.turn_kind` / `turn.is_resolution` remain transitional structural-artifact markers until continuous workspace and multi-chat proposal semantics replace that projection. `docs/schema.dbml` is stale relative to `src/server/schema.ts` and should be regenerated or deleted when FE-701 touches schema docs. +- **Migration watch:** Live local `.brunch/brunch.db` was observed with only 18 applied migrations, stopping at `0017_reconciliation_need`; it lacked `0018` source snapshot columns and `0019` reconciliation-agent columns even though `src/server/schema.ts` defines them. There is no explicit `npm run migrate`; app/server `createDb()` runs Drizzle migrations automatically. Before FE-701 schema work, verify the target DB by inspecting `__drizzle_migrations` and `PRAGMA table_info(reconciliation_need)` so drift is not misread as product intent. +- **Acceptance:** Schema and operation vocabulary use `changeset` / `change`; specifications track latest semantic changeset; proposal turns carry base/opened changeset identity; `reconciliation_need.caused_by_changeset_id` replaces/connects the historical patch placeholder; non-accept proposal actions cannot mutate graph truth; a changeset is the smallest atomic unit preserving semantic coherence. +- **Verification:** DB atomicity tests for changeset + changes + reconciliation_need writes, staleness tests for open proposal turns across multi-chat changes, migration/drift checks against an actual SQLite DB, and capability/transition tests proving non-accept actions cannot mutate graph truth. - **Traceability:** Requirements 39, 42, 44; A71, A79; D135, D138, D143. -- **Design docs:** `docs/design/PATCH_LEDGER.md` (historical filename; future vocabulary is changeset/change); FE-705 strategy/proposal notes for semantic history and proposal turns. +- **Design docs:** `docs/design/PATCH_LEDGER.md` (historical filename; future vocabulary is changeset/change); `docs/design/SUBSTRATE_STRANGLER_COORDINATION.md`; FE-705 strategy/proposal notes for semantic history and proposal turns. ### graph-review-scenario-options @@ -359,9 +361,10 @@ continuous-workspace ├──→ stable host for side-chat-persistence-v4a └──→ workspace-aware graph / structured-list peer routes -TRACK B — Agent fixture substrate +TRACK B — Agent fixture substrate / strangler handler seam prompt/context scenario substrate foundation (completed) └──→ agent-fixture-substrate + ├──→ shared route/capability handler seam without frontend DTO cutover ├──→ generated completed-spec fixture candidates ├──→ graph-review-scenario-options └──→ Pi harness comparison (future, FE-635) @@ -378,9 +381,10 @@ multi-chat-substrate + reconciliation-needs (completed) ├──→ stale open proposal detection └──→ architect-generator-loop / verifier/import mutation provenance -TRACK D — Strategy probes and product acceleration +TRACK D — Strategy probes, frontend artifacts, and product acceleration agent-fixture-substrate + intent-graph-semantics └──→ graph-review-scenario-options + ├──→ fixture-backed candidate / graph-review UI artifacts can proceed without canonical mutation └──→ productized-scenario-options ├──→ absorbs / reshapes two-axis interview framing └──→ absorbs / reshapes progressive detail / recursive deflation From 926718d2bfdc9fa3d7e01c35f304b9ba567f300d Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 13 May 2026 17:31:19 +0200 Subject: [PATCH 15/16] Fix completed tool activity rendering --- .../$id/_view/-interview-controller.ts | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/client/routes/specification/$id/_view/-interview-controller.ts b/src/client/routes/specification/$id/_view/-interview-controller.ts index 2edc134a..d7989639 100644 --- a/src/client/routes/specification/$id/_view/-interview-controller.ts +++ b/src/client/routes/specification/$id/_view/-interview-controller.ts @@ -200,12 +200,11 @@ function extractToolDetail(input: unknown): string | null { return null; } -function getLiveToolItems(messages: readonly BrunchUIMessage[], status: ChatStatus) { - const liveAssistantMessage = getLatestLiveAssistantMessage(messages, status); - if (!liveAssistantMessage?.parts) { - return undefined; - } +function getLiveToolParts(messages: readonly BrunchUIMessage[], status: ChatStatus) { + return getLatestLiveAssistantMessage(messages, status)?.parts ?? []; +} +function getLiveToolItems(messages: readonly BrunchUIMessage[], status: ChatStatus) { const toolItems = new Map< string, { @@ -215,7 +214,7 @@ function getLiveToolItems(messages: readonly BrunchUIMessage[], status: ChatStat } >(); - for (const part of liveAssistantMessage.parts) { + for (const part of getLiveToolParts(messages, status)) { const label = part ? getActivityToolLabel(part) : null; if (!part || !label || !('input' in part) || !('state' in part) || !('toolCallId' in part)) { continue; @@ -234,6 +233,12 @@ function getLiveToolItems(messages: readonly BrunchUIMessage[], status: ChatStat return toolItems.size > 0 ? [...toolItems.values()] : undefined; } +function hasRunningLiveTool(messages: readonly BrunchUIMessage[], status: ChatStatus): boolean { + return getLiveToolParts(messages, status).some( + (part) => part && 'state' in part && part.state !== 'output-available', + ); +} + function getLatestAssistantActivity( messages: readonly BrunchUIMessage[], status: ChatStatus, @@ -368,8 +373,7 @@ export function useInterviewController(phase: WorkflowPhase): InterviewControlle [phaseMessages, status], ); const liveToolItems = useMemo(() => getLiveToolItems(phaseMessages, status), [phaseMessages, status]); - const liveToolsRunning = - (liveToolItems?.length ?? 0) > 0 && (status === 'streaming' || status === 'submitted'); + const liveToolsRunning = useMemo(() => hasRunningLiveTool(phaseMessages, status), [phaseMessages, status]); const submitText = useCallback( (text: string) => { From 8011cf6c05d733a9067e4d7dd1da7c9f4ae21327 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Fri, 15 May 2026 09:52:53 +0200 Subject: [PATCH 16/16] FE-705: Address persistence facade review comments --- src/server/db.test.ts | 23 +++++++++++++++++++++++ src/server/db.ts | 2 -- src/server/db/workflow-store.ts | 12 +++++------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/server/db.test.ts b/src/server/db.test.ts index 456de04d..dffd3b54 100644 --- a/src/server/db.test.ts +++ b/src/server/db.test.ts @@ -251,6 +251,29 @@ describe('phase outcome lifecycle', () => { }); }); + it('requires phase outcome writers to name the owning specification explicitly', async () => { + const project = getOrCreateSpecification(db); + const turn = createTurn(db, project.id, { phase: 'grounding', question: 'Goal?', answer: 'Spec tool' }); + const { createPhaseOutcome, createConfirmedPhaseOutcome } = await import('./db.js'); + + expect(() => + createPhaseOutcome(db, { + phase: 'grounding', + proposal_turn_id: turn.id, + summary: 'Grounding closed.', + } as Parameters[1]), + ).toThrow('createPhaseOutcome requires specificationId'); + expect(() => + createConfirmedPhaseOutcome(db, { + phase: 'grounding', + proposal_turn_id: turn.id, + confirmation_turn_id: turn.id, + summary: 'Grounding closed.', + } as Parameters[1]), + ).toThrow('createConfirmedPhaseOutcome requires specificationId'); + expect(listPhaseOutcomesForSpecification(db, project.id)).toHaveLength(0); + }); + it('persists explicit grounding outcomes and supersedes them when the active path changes upstream', async () => { const project = getOrCreateSpecification(db); const root = createTurn(db, project.id, { phase: 'grounding', question: 'Goal?', answer: 'Spec tool' }); diff --git a/src/server/db.ts b/src/server/db.ts index 3ef6d1ec..59384218 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -8,8 +8,6 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; const __dirname = dirname(fileURLToPath(import.meta.url)); const MIGRATIONS_FOLDER = join(__dirname, '..', '..', 'drizzle'); -import {} from '@/shared/knowledge.js'; - import * as schema from './schema.js'; export { diff --git a/src/server/db/workflow-store.ts b/src/server/db/workflow-store.ts index 320ea811..681e13fd 100644 --- a/src/server/db/workflow-store.ts +++ b/src/server/db/workflow-store.ts @@ -31,7 +31,7 @@ export type WorkflowState = SharedWorkflowState; export type ClosureBasis = PhaseClosureBasis | null; export interface CreatePhaseOutcomeInput { - specificationId?: number; + specificationId: number; phase: Phase; proposal_turn_id: number; summary: string; @@ -98,10 +98,9 @@ export function reconcilePhaseOutcomesForSpecification(db: DB, specificationId: } export function createPhaseOutcome(db: DB, input: CreatePhaseOutcomeInput): PhaseOutcome { - const turn = getTurn(db, input.proposal_turn_id); - const specificationId = input.specificationId ?? turn?.specification_id; + const { specificationId } = input; if (!specificationId) { - throw new Error('Cannot create phase outcome without a specification id'); + throw new Error('createPhaseOutcome requires specificationId'); } return db @@ -150,10 +149,9 @@ export function createConfirmedPhaseOutcome( db: DB, input: CreatePhaseOutcomeInput & { confirmation_turn_id: number }, ): PhaseOutcome { - const turn = getTurn(db, input.proposal_turn_id); - const specificationId = input.specificationId ?? turn?.specification_id; + const { specificationId } = input; if (!specificationId) { - throw new Error('Cannot create phase outcome without a specification id'); + throw new Error('createConfirmedPhaseOutcome requires specificationId'); } return db