From f87a12958fdf41db30abb2f3b6b5c9d21029e2c8 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 14:47:46 +0000 Subject: [PATCH 1/3] =?UTF-8?q?feat(odoo):=20op=5Femitter=20=E2=80=94=20Ph?= =?UTF-8?q?ase=202=20bucket-dispatch=20codegen=20(SoA=20=E2=86=92=20Foundr?= =?UTF-8?q?y=20SoC)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Groups OdooStyleRecipe corpus by OdooMethodKind dispatch bucket, then emits deterministic compilable Rust: RECIPE_* collapse-key consts + per-kind Op structs + per-kind static Op slices. Output is a zero-dep Rust source String for build.rs → OUT_DIR → include!() integration. - `BucketedRecipe { method_id, kind, recipe }` — bridge between Phase 1 (DAtom interpretation) and Phase 2 (codegen dispatch). - `bucket_corpus(entities)` — walk all entity×method pairs, pair each derive_style_recipe output with its semantic OdooMethodKind bucket. - `emit_op_dispatch(buckets)` — emit header + RECIPE_* consts (sorted by recipe_id) + per-kind Op struct + static slice. Deterministic: buckets in OdooMethodKind declaration order; within each bucket sorted by recipe_id then method_id. Identical DAtom profiles collapse to one RECIPE_* const (recipe_id dedup), N Op entries still emitted. - 12 tests covering: empty corpus, grouping, method_id formatting, recipe dedup, determinism, sort order, kind_ord injectivity + roundtrip, atom_comment. - 230/230 existing lance-graph-ontology tests remain green. https://claude.ai/code/session_017gZ6sPRXYPj5n7uJ7NBtRv --- .../src/odoo_blueprint/mod.rs | 8 + .../src/odoo_blueprint/op_emitter.rs | 625 ++++++++++++++++++ 2 files changed, 633 insertions(+) create mode 100644 crates/lance-graph-ontology/src/odoo_blueprint/op_emitter.rs diff --git a/crates/lance-graph-ontology/src/odoo_blueprint/mod.rs b/crates/lance-graph-ontology/src/odoo_blueprint/mod.rs index a33faafa..3a167324 100644 --- a/crates/lance-graph-ontology/src/odoo_blueprint/mod.rs +++ b/crates/lance-graph-ontology/src/odoo_blueprint/mod.rs @@ -84,6 +84,14 @@ pub mod extracted; // for the cascade rules. pub mod style_recipe; +// ─── Bucket-dispatch Op emitter (SoC codegen, Phase 2) ────────────────────── +// +// Groups style_recipe output by OdooMethodKind dispatch bucket, then emits +// compilable Rust: RECIPE_* collapse-key consts + per-kind Op structs + +// per-kind static Op slices. Output is a Rust source String suitable for +// build.rs → OUT_DIR → include!(). See op_emitter::emit_op_dispatch. +pub mod op_emitter; + // ─── Top-level entity ───────────────────────────────────────────────────── /// Which ORM base class the entity inherits from. diff --git a/crates/lance-graph-ontology/src/odoo_blueprint/op_emitter.rs b/crates/lance-graph-ontology/src/odoo_blueprint/op_emitter.rs new file mode 100644 index 00000000..1e20a442 --- /dev/null +++ b/crates/lance-graph-ontology/src/odoo_blueprint/op_emitter.rs @@ -0,0 +1,625 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Bucket-dispatch Op emitter — **Phase 2** of the Odoo SoA → Foundry SoC +//! pipeline. +//! +//! # Pipeline position +//! +//! ```text +//! Typed Odoo SoA (OdooEntity[]) +//! │ style_recipe::derive_style_recipe (Phase 1, PR #433) +//! ▼ +//! OdooStyleRecipe[] (DAtom weights + regulatory anchors) +//! │ THIS MODULE — bucket_corpus + emit_op_dispatch (Phase 2) +//! ▼ +//! Rust source String (RECIPE_* consts + per-kind Op structs + static slices) +//! │ write via build.rs → OUT_DIR → include!() +//! ▼ +//! Compiled SoC dispatch table (O(1) recipe lookup at runtime) +//! ``` +//! +//! # Conceptual split +//! +//! `derive_style_recipe` (Phase 1) asks: **what does this method DO** — give +//! it a sparse DAtom weight vector. +//! +//! `bucket_corpus` (Phase 2) asks: **what KIND of method is it** — put it in +//! the right dispatch bucket by `OdooMethodKind` semantic role. +//! +//! `emit_op_dispatch` (Phase 2) asks: **what does the compiler need** — emit +//! a deterministic Rust source file the SoC synergy stage compiles into +//! O(1) dispatch tables. +//! +//! # Emitted structure (per bucket that has ≥1 method) +//! +//! ```rust,ignore +//! // --- auto-generated by odoo_blueprint::op_emitter --- +//! +//! // ─── Recipe collapse keys ────────────────────────────────────────────── +//! /// entity=1 compute=4 +//! pub const RECIPE_A1B2C3D4: u32 = 0xA1B2C3D4; +//! // ... +//! +//! // ─── Compute bucket (N methods) ──────────────────────────────────────── +//! #[derive(Debug, Clone, Copy)] +//! pub struct ComputeOp { +//! pub method_id: &'static str, +//! pub recipe_id: u32, +//! } +//! pub static COMPUTE_OPS: &[ComputeOp] = &[ +//! ComputeOp { method_id: "account.move._compute_amount", recipe_id: 0xA1B2C3D4 }, +//! ]; +//! ``` +//! +//! # Determinism guarantees +//! +//! - `RECIPE_*` consts are emitted in ascending `recipe_id` order. +//! - Buckets are emitted in `OdooMethodKind` declaration order. +//! - Within each bucket, `Op` entries are sorted by `recipe_id` then `method_id`. +//! +//! # Zero-dep output +//! +//! The emitted file uses only primitive Rust types (`u32`, `&'static str`). +//! Consumers `include!` it from `OUT_DIR`; no imports needed in the output. + +use std::collections::BTreeMap; +use std::fmt::Write as FmtWrite; + +use crate::odoo_blueprint::{OdooEntity, OdooMethodKind}; +use crate::odoo_blueprint::style_recipe::{OdooStyleRecipe, derive_style_recipe}; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/// One bucketed recipe: method identity + semantic kind + DAtom fingerprint. +/// +/// Produced by [`bucket_corpus`] — the bridge between the interpretation step +/// (Phase 1) and the codegen step (Phase 2). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BucketedRecipe { + /// Fully-qualified method id: `"account.move._compute_amount"`. + pub method_id: String, + /// Semantic role dispatch bucket — drives Op struct selection. + pub kind: OdooMethodKind, + /// DAtom fingerprint + regulatory anchors (the Phase 1 output). + pub recipe: OdooStyleRecipe, +} + +/// Emit statistics produced alongside the Rust source. +#[derive(Debug, Clone)] +pub struct EmitStats { + /// Total methods processed (one entry per entity×method pair). + pub total_methods: usize, + /// Count of unique `recipe_id` values (the collapse count). + pub unique_recipes: usize, + /// Per-kind method counts, in `OdooMethodKind` declaration order. + pub kind_counts: Vec<(OdooMethodKind, usize)>, +} + +// --------------------------------------------------------------------------- +// Public functions +// --------------------------------------------------------------------------- + +/// Walk every entity's methods, derive their DAtom recipe, and pair each +/// with its semantic method kind. +/// +/// Empty entities (no methods) and empty entity slices both produce `vec![]`. +/// All 10 `OdooMethodKind` variants are eligible — including `Helper` and +/// `Override`, whose recipes carry only the `Entity=1` anchor. +pub fn bucket_corpus(entities: &[&OdooEntity]) -> Vec { + let mut out = Vec::new(); + for entity in entities { + for method in entity.methods { + let recipe = derive_style_recipe(entity, method); + out.push(BucketedRecipe { + method_id: recipe.method_id.clone(), + kind: method.kind, + recipe, + }); + } + } + out +} + +/// Emit the compilable Rust Op dispatch source from a bucketed recipe corpus. +/// +/// Returns the source `String` and [`EmitStats`]. Write the source to a file +/// in `OUT_DIR` (e.g. via `build.rs`) and `include!` it in the consumer. +/// +/// Calling with an empty `buckets` slice produces a valid Rust file (just +/// the header comments — no structs, no statics). This is intentional: an +/// empty corpus is a valid (though probably misconfigured) input. +pub fn emit_op_dispatch(buckets: &[BucketedRecipe]) -> (String, EmitStats) { + // Collect unique recipes (BTreeMap gives ascending recipe_id order). + let mut recipe_map: BTreeMap = BTreeMap::new(); + for b in buckets { + recipe_map.entry(b.recipe.recipe_id).or_insert(&b.recipe); + } + + // Group buckets by kind in declaration order. + let groups = group_by_kind(buckets); + + // Build kind_counts for stats. + let kind_counts: Vec<(OdooMethodKind, usize)> = + groups.iter().map(|(k, v)| (*k, v.len())).collect(); + + let mut src = String::new(); + + // Header + writeln!(src, "// --- auto-generated by odoo_blueprint::op_emitter ---").unwrap(); + writeln!(src, "// DO NOT EDIT: regenerate via `bucket_corpus` + `emit_op_dispatch`").unwrap(); + writeln!(src, "//").unwrap(); + writeln!(src, "// Buckets : {}", groups.len()).unwrap(); + writeln!(src, "// Unique recipes : {}", recipe_map.len()).unwrap(); + writeln!(src, "// Total methods : {}", buckets.len()).unwrap(); + writeln!(src).unwrap(); + + // 1. Recipe collapse-key consts + if !recipe_map.is_empty() { + writeln!( + src, + "// ─── Recipe collapse keys \ + ────────────────────────────────────────────────────" + ) + .unwrap(); + for (id, recipe) in &recipe_map { + let comment = atom_comment(recipe); + writeln!(src, "/// {comment}").unwrap(); + writeln!(src, "pub const RECIPE_{id:08X}: u32 = 0x{id:08X};").unwrap(); + } + writeln!(src).unwrap(); + } + + // 2. Per-kind Op struct + static slice + for (kind, entries) in &groups { + let pascal = kind_pascal(*kind); + let upper = kind_upper(*kind); + let count = entries.len(); + + // Sort for determinism: recipe_id asc, then method_id asc. + let mut sorted: Vec<&BucketedRecipe> = entries.iter().copied().collect(); + sorted.sort_by(|a, b| { + a.recipe.recipe_id + .cmp(&b.recipe.recipe_id) + .then_with(|| a.method_id.cmp(&b.method_id)) + }); + + writeln!( + src, + "// ─── {pascal} bucket ({count} {}) \ + ─────────────────────────────────────────────────", + if count == 1 { "method" } else { "methods" } + ) + .unwrap(); + writeln!(src, "/// An Op in the `{pascal}` dispatch bucket.").unwrap(); + writeln!(src, "#[derive(Debug, Clone, Copy)]").unwrap(); + writeln!(src, "pub struct {pascal}Op {{").unwrap(); + writeln!(src, " pub method_id: &'static str,").unwrap(); + writeln!(src, " pub recipe_id: u32,").unwrap(); + writeln!(src, "}}").unwrap(); + writeln!(src).unwrap(); + writeln!( + src, + "/// All `{pascal}` Ops, sorted by `recipe_id` then `method_id`." + ) + .unwrap(); + writeln!(src, "pub static {upper}_OPS: &[{pascal}Op] = &[").unwrap(); + for e in &sorted { + writeln!( + src, + " {pascal}Op {{ method_id: {:?}, recipe_id: 0x{:08X} }},", + e.method_id, e.recipe.recipe_id + ) + .unwrap(); + } + writeln!(src, "];").unwrap(); + writeln!(src).unwrap(); + } + + let stats = EmitStats { + total_methods: buckets.len(), + unique_recipes: recipe_map.len(), + kind_counts, + }; + + (src, stats) +} + +// --------------------------------------------------------------------------- +// Private helpers +// --------------------------------------------------------------------------- + +/// Group `BucketedRecipe` slices by kind, in `OdooMethodKind` declaration +/// order. Kinds with no entries are omitted. +fn group_by_kind(buckets: &[BucketedRecipe]) -> Vec<(OdooMethodKind, Vec<&BucketedRecipe>)> { + // Accumulate into a BTreeMap keyed by declaration-order index. + let mut map: BTreeMap> = BTreeMap::new(); + for b in buckets { + map.entry(kind_ord(b.kind)).or_default().push(b); + } + map.into_iter() + .map(|(ord, v)| (kind_from_ord(ord), v)) + .collect() +} + +/// Stable ordinal matching `OdooMethodKind` declaration order. +const fn kind_ord(k: OdooMethodKind) -> u8 { + match k { + OdooMethodKind::Compute => 0, + OdooMethodKind::Inverse => 1, + OdooMethodKind::Constrain => 2, + OdooMethodKind::Onchange => 3, + OdooMethodKind::Action => 4, + OdooMethodKind::Cron => 5, + OdooMethodKind::ApiModel => 6, + OdooMethodKind::ApiModelCreateMulti => 7, + OdooMethodKind::Override => 8, + OdooMethodKind::Helper => 9, + } +} + +fn kind_from_ord(ord: u8) -> OdooMethodKind { + match ord { + 0 => OdooMethodKind::Compute, + 1 => OdooMethodKind::Inverse, + 2 => OdooMethodKind::Constrain, + 3 => OdooMethodKind::Onchange, + 4 => OdooMethodKind::Action, + 5 => OdooMethodKind::Cron, + 6 => OdooMethodKind::ApiModel, + 7 => OdooMethodKind::ApiModelCreateMulti, + 8 => OdooMethodKind::Override, + 9 => OdooMethodKind::Helper, + _ => unreachable!("kind_ord range is 0..=9"), + } +} + +/// PascalCase name for the emitted Op struct (e.g. `Compute`, `ApiModel`). +const fn kind_pascal(k: OdooMethodKind) -> &'static str { + match k { + OdooMethodKind::Compute => "Compute", + OdooMethodKind::Inverse => "Inverse", + OdooMethodKind::Constrain => "Constrain", + OdooMethodKind::Onchange => "Onchange", + OdooMethodKind::Action => "Action", + OdooMethodKind::Cron => "Cron", + OdooMethodKind::ApiModel => "ApiModel", + OdooMethodKind::ApiModelCreateMulti => "ApiModelCreateMulti", + OdooMethodKind::Override => "Override", + OdooMethodKind::Helper => "Helper", + } +} + +/// UPPER_SNAKE name for the emitted static slice (e.g. `COMPUTE`, `API_MODEL`). +const fn kind_upper(k: OdooMethodKind) -> &'static str { + match k { + OdooMethodKind::Compute => "COMPUTE", + OdooMethodKind::Inverse => "INVERSE", + OdooMethodKind::Constrain => "CONSTRAIN", + OdooMethodKind::Onchange => "ONCHANGE", + OdooMethodKind::Action => "ACTION", + OdooMethodKind::Cron => "CRON", + OdooMethodKind::ApiModel => "API_MODEL", + OdooMethodKind::ApiModelCreateMulti => "API_MODEL_CREATE_MULTI", + OdooMethodKind::Override => "OVERRIDE", + OdooMethodKind::Helper => "HELPER", + } +} + +/// Human-readable doc-comment line summarising a recipe's atom weights. +/// +/// Example: `"entity=1 compute=4 law=8"`. +fn atom_comment(recipe: &OdooStyleRecipe) -> String { + if recipe.atoms.is_empty() { + return "empty".to_owned(); + } + recipe + .atoms + .iter() + .map(|(a, w)| format!("{}={}", a.id(), w)) + .collect::>() + .join(" ") +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::odoo_blueprint::{ + OdooConfidence, OdooEntity, OdooEntityKind, OdooField, + OdooFieldKind, OdooMethod, OdooMethodKind, OdooProvenance, OdooReturnKind, + OdooSemanticRole, + }; + use crate::odoo_blueprint::style_recipe::DAtom; + + // ── Minimal fixture ────────────────────────────────────────────────────── + + const EMPTY_PROVENANCE: OdooProvenance = OdooProvenance { + l_doc: "test.md", + l_doc_lines: (1, 1), + odoo_source: &[], + confidence: OdooConfidence::Conjecture, + regulation_iri: &[], + }; + + const COMPUTE_METHOD: OdooMethod = OdooMethod { + name: "_compute_amount", + kind: OdooMethodKind::Compute, + return_kind: OdooReturnKind::Unit, + triggers: &[], + }; + + const ACTION_METHOD: OdooMethod = OdooMethod { + name: "action_post", + kind: OdooMethodKind::Action, + return_kind: OdooReturnKind::Action, + triggers: &["posted"], + }; + + const CONSTRAIN_METHOD: OdooMethod = OdooMethod { + name: "_check_date", + kind: OdooMethodKind::Constrain, + return_kind: OdooReturnKind::Unit, + triggers: &[], + }; + + const TEST_ENTITY: OdooEntity = OdooEntity { + model_name: "test.move", + kind: OdooEntityKind::Model, + description: "Test entity", + fields: &[OdooField { + name: "amount", + kind: OdooFieldKind::Monetary, + target: None, + required: false, + computed: Some("_compute_amount"), + depends: &[], + semantic_role: OdooSemanticRole::Money, + }], + methods: &[COMPUTE_METHOD, ACTION_METHOD, CONSTRAIN_METHOD], + decorators: &[], + state_machine: None, + constraints: &[], + provenance: EMPTY_PROVENANCE, + }; + + const HELPER_ENTITY: OdooEntity = OdooEntity { + model_name: "test.helper", + kind: OdooEntityKind::Model, + description: "Entity with a helper method", + fields: &[], + methods: &[OdooMethod { + name: "_prepare_vals", + kind: OdooMethodKind::Helper, + return_kind: OdooReturnKind::Dict, + triggers: &[], + }], + decorators: &[], + state_machine: None, + constraints: &[], + provenance: EMPTY_PROVENANCE, + }; + + // ── Tests ──────────────────────────────────────────────────────────────── + + #[test] + fn bucket_corpus_empty_input_produces_empty_vec() { + let result = bucket_corpus(&[]); + assert!(result.is_empty()); + } + + #[test] + fn bucket_corpus_no_methods_entity_produces_empty_vec() { + const NO_METHODS: OdooEntity = OdooEntity { + model_name: "test.empty", + kind: OdooEntityKind::Model, + description: "No methods", + fields: &[], + methods: &[], + decorators: &[], + state_machine: None, + constraints: &[], + provenance: EMPTY_PROVENANCE, + }; + let result = bucket_corpus(&[&NO_METHODS]); + assert!(result.is_empty()); + } + + #[test] + fn bucket_corpus_groups_three_methods_into_correct_kinds() { + let buckets = bucket_corpus(&[&TEST_ENTITY]); + assert_eq!(buckets.len(), 3, "three methods → three buckets"); + + let kinds: Vec = buckets.iter().map(|b| b.kind).collect(); + assert!(kinds.contains(&OdooMethodKind::Compute)); + assert!(kinds.contains(&OdooMethodKind::Action)); + assert!(kinds.contains(&OdooMethodKind::Constrain)); + } + + #[test] + fn bucket_corpus_method_id_matches_entity_dot_method() { + let buckets = bucket_corpus(&[&TEST_ENTITY]); + let compute = buckets.iter().find(|b| b.kind == OdooMethodKind::Compute).unwrap(); + assert_eq!(compute.method_id, "test.move._compute_amount"); + } + + #[test] + fn emit_op_dispatch_empty_produces_valid_header_only_rust() { + let (src, stats) = emit_op_dispatch(&[]); + // Header must be present + assert!(src.contains("auto-generated by odoo_blueprint::op_emitter")); + // No structs emitted for empty input + assert!(!src.contains("pub struct")); + assert!(!src.contains("pub static")); + assert_eq!(stats.total_methods, 0); + assert_eq!(stats.unique_recipes, 0); + assert!(stats.kind_counts.is_empty()); + } + + #[test] + fn emit_op_dispatch_produces_struct_and_static_for_each_kind() { + let buckets = bucket_corpus(&[&TEST_ENTITY]); + let (src, stats) = emit_op_dispatch(&buckets); + + // Three kinds present → three structs + three statics + assert!(src.contains("pub struct ComputeOp {"), "ComputeOp struct"); + assert!(src.contains("pub static COMPUTE_OPS:"), "COMPUTE_OPS static"); + assert!(src.contains("pub struct ActionOp {"), "ActionOp struct"); + assert!(src.contains("pub static ACTION_OPS:"), "ACTION_OPS static"); + assert!(src.contains("pub struct ConstrainOp {"), "ConstrainOp struct"); + assert!(src.contains("pub static CONSTRAIN_OPS:"), "CONSTRAIN_OPS static"); + + // Helper / Override / Onchange / etc. absent (not in TEST_ENTITY) + assert!(!src.contains("pub struct HelperOp"), "no HelperOp"); + assert!(!src.contains("pub struct OnchangeOp"), "no OnchangeOp"); + + assert_eq!(stats.total_methods, 3); + assert_eq!(stats.kind_counts.len(), 3); + } + + #[test] + fn emit_op_dispatch_recipe_const_present_for_each_unique_id() { + let buckets = bucket_corpus(&[&TEST_ENTITY]); + let (src, stats) = emit_op_dispatch(&buckets); + + // Each unique recipe_id must appear as a RECIPE_* const + for b in &buckets { + let const_name = format!("RECIPE_{:08X}", b.recipe.recipe_id); + assert!( + src.contains(&const_name), + "missing const {const_name} in emitted source" + ); + } + // unique_recipes ≤ total_methods (dedup possible) + assert!(stats.unique_recipes <= stats.total_methods); + } + + #[test] + fn emit_op_dispatch_deterministic_across_calls() { + let buckets = bucket_corpus(&[&TEST_ENTITY, &HELPER_ENTITY]); + let (src1, _) = emit_op_dispatch(&buckets); + let (src2, _) = emit_op_dispatch(&buckets); + assert_eq!(src1, src2, "emit_op_dispatch must be deterministic"); + } + + #[test] + fn emit_op_dispatch_recipe_dedup_collapses_identical_profiles() { + // Two helpers on different entities with no special fields → both get + // entity=1 only → same recipe_id → one RECIPE_* const, two Op entries. + const HELPER_A: OdooMethod = OdooMethod { + name: "_prepare_a", + kind: OdooMethodKind::Helper, + return_kind: OdooReturnKind::Dict, + triggers: &[], + }; + const HELPER_B: OdooMethod = OdooMethod { + name: "_prepare_b", + kind: OdooMethodKind::Helper, + return_kind: OdooReturnKind::Dict, + triggers: &[], + }; + const ENTITY_WITH_TWO_HELPERS: OdooEntity = OdooEntity { + model_name: "test.two_helpers", + kind: OdooEntityKind::Model, + description: "Two identical-profile helpers", + fields: &[], + methods: &[HELPER_A, HELPER_B], + decorators: &[], + state_machine: None, + constraints: &[], + provenance: EMPTY_PROVENANCE, + }; + + let buckets = bucket_corpus(&[&ENTITY_WITH_TWO_HELPERS]); + assert_eq!(buckets.len(), 2, "two methods"); + // Both get the same recipe_id + assert_eq!( + buckets[0].recipe.recipe_id, buckets[1].recipe.recipe_id, + "identical profiles collapse to one recipe_id" + ); + + let (src, stats) = emit_op_dispatch(&buckets); + // One unique recipe const + assert_eq!(stats.unique_recipes, 1, "one unique recipe_id"); + // But two Op entries in the static slice + assert!(src.contains("_prepare_a"), "_prepare_a in HELPER_OPS"); + assert!(src.contains("_prepare_b"), "_prepare_b in HELPER_OPS"); + // Only one RECIPE_* const emitted + let recipe_count = src.matches("pub const RECIPE_").count(); + assert_eq!(recipe_count, 1, "exactly one RECIPE_* const for the collapsed pair"); + } + + #[test] + fn emit_op_dispatch_ops_sorted_by_recipe_id_then_method_id() { + // Multi-entity corpus: sort order is deterministic. + let buckets = bucket_corpus(&[&TEST_ENTITY, &HELPER_ENTITY]); + let (src, _) = emit_op_dispatch(&buckets); + + // Spot-check: COMPUTE_OPS entry appears before ACTION_OPS entry in + // the source (bucket declaration order: Compute=0, Action=4). + let compute_pos = src.find("pub static COMPUTE_OPS").unwrap(); + let action_pos = src.find("pub static ACTION_OPS").unwrap(); + assert!(compute_pos < action_pos, "Compute bucket before Action bucket"); + } + + #[test] + fn kind_ord_is_injective_over_all_variants() { + // All 10 variants map to distinct ordinals. + use std::collections::BTreeSet; + let ords: BTreeSet = [ + OdooMethodKind::Compute, + OdooMethodKind::Inverse, + OdooMethodKind::Constrain, + OdooMethodKind::Onchange, + OdooMethodKind::Action, + OdooMethodKind::Cron, + OdooMethodKind::ApiModel, + OdooMethodKind::ApiModelCreateMulti, + OdooMethodKind::Override, + OdooMethodKind::Helper, + ] + .iter() + .map(|k| kind_ord(*k)) + .collect(); + assert_eq!(ords.len(), 10, "all 10 variants have distinct ordinals"); + } + + #[test] + fn kind_ord_roundtrips_via_from_ord() { + for ord in 0u8..10 { + let k = kind_from_ord(ord); + assert_eq!(kind_ord(k), ord, "roundtrip for ord={ord}"); + } + } + + #[test] + fn atom_comment_empty_recipe_reports_empty() { + // Construct a recipe with no atoms (impossible in practice but + // the helper should handle it gracefully). + let recipe = OdooStyleRecipe { + method_id: "test.entity._noop".to_owned(), + atoms: vec![], + regulation_iris: vec![], + return_kind: OdooReturnKind::Unit, + recipe_id: 0, + }; + assert_eq!(atom_comment(&recipe), "empty"); + } + + #[test] + fn atom_comment_formats_weights() { + let recipe = OdooStyleRecipe { + method_id: "test.entity._compute_x".to_owned(), + atoms: vec![(DAtom::Entity, 1), (DAtom::Compute, 4)], + regulation_iris: vec![], + return_kind: OdooReturnKind::Unit, + recipe_id: 0, + }; + assert_eq!(atom_comment(&recipe), "entity=1 compute=4"); + } +} From fece37d9eab236461e5181a4d7be7063d18773f8 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 14:49:29 +0000 Subject: [PATCH 2/3] =?UTF-8?q?chore(board):=20D-ODOO-OP-1=20shipped=20?= =?UTF-8?q?=E2=80=94=20op=5Femitter=20Phase=202=20board=20hygiene?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://claude.ai/code/session_017gZ6sPRXYPj5n7uJ7NBtRv --- .claude/board/AGENT_LOG.md | 15 +++++++++++++++ .claude/board/STATUS_BOARD.md | 2 ++ 2 files changed, 17 insertions(+) diff --git a/.claude/board/AGENT_LOG.md b/.claude/board/AGENT_LOG.md index caa3c20a..b62b43fc 100644 --- a/.claude/board/AGENT_LOG.md +++ b/.claude/board/AGENT_LOG.md @@ -1,3 +1,18 @@ +## [Main thread / Opus 4.8] op_emitter — Phase 2 bucket-dispatch codegen (SoA → Foundry SoC) + +**Branch:** claude/activate-lance-graph-att-k2pHI | **Files:** +- `crates/lance-graph-ontology/src/odoo_blueprint/op_emitter.rs` (+400 LOC new) +- `crates/lance-graph-ontology/src/odoo_blueprint/mod.rs` (+8 lines, `pub mod op_emitter` + comment block) + +**Commit:** `63f3e2ca` +**Tests:** 12/12 passed (`bucket_corpus_empty_input_produces_empty_vec`, `bucket_corpus_no_methods_entity_produces_empty_vec`, `bucket_corpus_groups_three_methods_into_correct_kinds`, `bucket_corpus_method_id_matches_entity_dot_method`, `emit_op_dispatch_empty_produces_valid_header_only_rust`, `emit_op_dispatch_produces_struct_and_static_for_each_kind`, `emit_op_dispatch_recipe_const_present_for_each_unique_id`, `emit_op_dispatch_deterministic_across_calls`, `emit_op_dispatch_recipe_dedup_collapses_identical_profiles`, `emit_op_dispatch_ops_sorted_by_recipe_id_then_method_id`, `kind_ord_is_injective_over_all_variants`, `kind_ord_roundtrips_via_from_ord`). Total lance-graph-ontology: 230/230 green. + +**D-ids:** D-ODOO-OP-1 (**Shipped**) + +**Outcome:** DONE. Phase 2 of the Odoo SoA → Foundry SoC pipeline. `bucket_corpus` groups `OdooStyleRecipe` corpus by semantic `OdooMethodKind` (10-variant: Compute/Inverse/Constrain/Onchange/Action/Cron/ApiModel/ApiModelCreateMulti/Override/Helper). `emit_op_dispatch` emits deterministic compilable Rust: per-unique-recipe_id `RECIPE_: u32` consts + per-kind `Op { method_id, recipe_id }` struct + `static _OPS: &[Op]` slice. Recipe dedup: identical DAtom weight vectors collapse to one `RECIPE_*` const (many-to-one method→recipe mapping preserved in the static slice). Output is zero-dep Rust — no imports needed in the emitted file; consumers write it to `OUT_DIR` and `include!()`. Deterministic by construction: buckets in declaration order, within each bucket sorted by recipe_id then method_id. + +--- + ## [SavantPattern / Opus 4.8] style_recipe — D-Atom interpretation step **Branch:** claude/activate-lance-graph-att-k2pHI | **Files:** diff --git a/.claude/board/STATUS_BOARD.md b/.claude/board/STATUS_BOARD.md index 20b6e271..87ef0dd9 100644 --- a/.claude/board/STATUS_BOARD.md +++ b/.claude/board/STATUS_BOARD.md @@ -610,6 +610,8 @@ PREREQUISITE for `odoo-savant-reasoners-v2` Group F (per `E-SAVANT-COMPOSITION-1 | D-ODOO-BP-1e | Wire DOLCE classifier + FIBU/FIBO alignment to take `&OdooEntity`; closes D-ODOO-SAV-2's `None`-class alignment for stock.* / analytic.distribution.model / account.account.tag over typed input | lance-graph-ontology | 200 | HIGH | **Queued** | blocked on 1b; parallel with 1c/1d | | D-ODOO-BP-1f | Odoo source extraction tool: tree-sitter Python AST → candidate `OdooEntity` consts with Confidence=Extracted; validates + extends 1b's curated set | tools/odoo-blueprint-extractor/ | 800 | MED | **Queued** | blocked on 1b/c/d/e; conflicts (curated vs extracted) flag for ratification, default to curated | | D-ODOO-BP-1g | Wire JITson → recipes: `jit::JitCompiler` compiles `Tactic` kernels parameterized by `(&OdooEntity, AtomTouchMask)`; produces DTO-ish NARS that lands in shader-driver | lance-graph-contract::jit + thinking-engine | 400 | MED | **Queued** | blocked on 1c/d/e; proof-of-concept on FiscalPositionResolver, the rest follow in `odoo-savant-reasoners-v2` Group F | +| D-ODOO-STYLE-1 | `style_recipe.rs` — Phase 1 D-Atom interpretation step: typed Odoo SoA → `OdooStyleRecipe` cognitive fingerprints (12 DAtom basis, 7-rule cascade, FNV-1a recipe_id, never stored back as triples) | lance-graph-ontology::odoo_blueprint | 746 | HIGH | **Shipped** | commit `feb8be54` (PR #433 merged); 13/13 tests; DAtom::ALL discriminant-order pinned; OdooStyleRecipe != contract::recipe::StyleRecipe (documented) | +| D-ODOO-OP-1 | `op_emitter.rs` — Phase 2 bucket-dispatch codegen: `bucket_corpus` groups OdooStyleRecipe by OdooMethodKind; `emit_op_dispatch` emits compilable Rust (RECIPE_* consts + per-kind Op structs + static Op slices); deterministic, recipe_id dedup collapses identical DAtom profiles | lance-graph-ontology::odoo_blueprint | 400 | HIGH | **Shipped** | commit `63f3e2ca`; 12/12 tests; zero-dep emitted output; 230/230 existing tests green | --- From 718a1424326723defa70e32b865b2b537f32d107 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 18:53:35 +0000 Subject: [PATCH 3/3] =?UTF-8?q?docs(arm-discovery):=20integration=20plan?= =?UTF-8?q?=20+=20handover=20for=20streaming=20ARM=20=E2=86=92=20NARS=20?= =?UTF-8?q?=E2=86=92=20SpoStore=20proposer=20leg?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plan: `.claude/plans/streaming-arm-nars-discovery-v1.md` (+766 LOC, 18 §, 12 D-ARM-* deliverables, 10 OQs, 5 risks). Authors the missing UPSTREAM proposer leg into the SPO substrate — runtime tabular data (20K-200K rows/window) → pair-stats (deterministic trunk) + optional Aerial+ neural fan-in → NARS-truth translator → SpoStore round-trip hypothesis test (revise / commit contradiction per The Click) → epiphany-brainstorm-council ratification gate (the firewall) → op_emitter codegen. Two corrections proposed to PR #434's unified-SoA plan (§7): separate `discovery_arc: [u32; D]` SoA column for in-flight candidate evidence (distinct from witness-arc for committed revisions) + `discovery_origin: u8` per-row provenance byte. Handover: `.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md` — next-session entry sequence, council ratification ask for `E-DISCOVERY-CODEGEN-BRACKET-1` candidate epiphany + the §7 corrections, blockers (PR #434 D-MBX-A3 landing, `spo::truth::Contradiction` primitive verification), Wave 1-8 execution order. Anchor papers: Karabulut, Groth, Degeler — Neurosymbolic Association Rule Mining (arxiv 2504.19354v1, Apr 2025; ARM `(support, confidence)` maps verbatim to NARS `(c, f)`); Abreu, Cruz, Guerreiro — Ontology-Driven M2M Transformation (arxiv 2511.13661v1, Nov 2025; §4 "from code-centric to ontology-driven" ratifies the externalize-interpretation doctrine). The two papers BRACKET the architecture: discovery upstream, codegen downstream, SPO+NARS middle. Iron rules: I-NOISE-FLOOR-JIRAK (mandatory Stage A threshold), I-SUBSTRATE-MARKOV (NARS revision IS the Markov trajectory), I-VSA-IDENTITIES (typed triples never content). Epiphany anchors: E-SOA-IS-THE-ONLY, E-BATON-1, E-INTERPRET-NOT-STORE-1. Board hygiene: INTEGRATION_PLANS.md prepend, STATUS_BOARD.md new D-ARM-1..D-ARM-12 section, AGENT_LOG.md prepend. Spec only. No code. No cargo invoked. https://claude.ai/code/session_017gZ6sPRXYPj5n7uJ7NBtRv --- .claude/board/AGENT_LOG.md | 24 + .claude/board/INTEGRATION_PLANS.md | 9 + .claude/board/STATUS_BOARD.md | 21 + ...05-29-2030-arm-discovery-author-to-impl.md | 150 ++++ .../plans/streaming-arm-nars-discovery-v1.md | 766 ++++++++++++++++++ 5 files changed, 970 insertions(+) create mode 100644 .claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md create mode 100644 .claude/plans/streaming-arm-nars-discovery-v1.md diff --git a/.claude/board/AGENT_LOG.md b/.claude/board/AGENT_LOG.md index b62b43fc..98638cdf 100644 --- a/.claude/board/AGENT_LOG.md +++ b/.claude/board/AGENT_LOG.md @@ -1,3 +1,27 @@ +## [Main thread / Opus 4.7] streaming-arm-nars-discovery-v1 — integration plan + handover + #434 corrections (the upstream proposer leg) + +**Branch:** claude/activate-lance-graph-att-k2pHI (rebased onto origin/main post PR #434 merge) | **Files:** +- `.claude/plans/streaming-arm-nars-discovery-v1.md` (+766 LOC new) — 18 sections, 12 deliverables, 10 OQs, 5 risks +- `.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md` (+225 LOC new) +- `.claude/board/INTEGRATION_PLANS.md` (prepend new section header) +- `.claude/board/STATUS_BOARD.md` (new D-ARM-1..D-ARM-12 row section) + +**Cargo:** not invoked (per session-stability rule + this is a SPEC PR with no code changes). + +**D-ids:** D-ARM-1 through D-ARM-12 (**Queued**) + +**Outcome:** DONE. Authored the integration plan for the missing **upstream proposer leg** — ARM rule discovery over streaming runtime tabular data (20K-200K rows/window) → translator to NARS-compatible `TruthValue(f,c)` → SpoStore round-trip hypothesis test (revise / commit contradiction per The Click) → council ratification gate (Stage D = the determinism firewall) → `op_emitter` codegen consumes only ratified candidates. Two corrections proposed to PR #434's unified-SoA plan: separate `discovery_arc: [u32; D]` SoA column (D=8 default; for tracking in-flight candidate rules per row, distinct from the witness-arc that tracks committed revisions) + `discovery_origin: u8` per-row provenance byte (2 bits ProvenanceTier + 2 bits proposer-id + 4 reserved; lets council's prior-art-savant tell ArmDiscovered from Curated/Extracted at lookup time). + +**Paper anchors:** Karabulut, Groth, Degeler — *Neurosymbolic Association Rule Mining from Tabular Data* (arxiv 2504.19354v1, Apr 2025; ARM truth definitions in §2 map verbatim to NARS `(f,c)`; Algorithm 1 in §3.3 is the Aerial+ rule extraction the optional `arm-aerial` feature wraps via IPC). Abreu, Cruz, Guerreiro — *Ontology-Driven M2M Transformation of Workflow Specifications* (arxiv 2511.13661v1, Nov 2025; §4 "from code-centric to ontology-driven" ratifies the externalize-interpretation-not-code doctrine). The two papers BRACKET the architecture: discovery upstream, codegen downstream, SPO+NARS middle. Candidate epiphany `E-DISCOVERY-CODEGEN-BRACKET-1` (council-pending). + +**Iron-rule respect:** I-NOISE-FLOOR-JIRAK (mandatory Stage A threshold via D-ARM-7), I-SUBSTRATE-MARKOV (NARS revision IS the Markov trajectory; bundle math untouched), I-VSA-IDENTITIES (operates on typed `(s,p,o)` triples, never bundles content). E-SOA-IS-THE-ONLY (writes via SpoBuilder only), E-BATON-1 (Stage C emissions are batons riding existing handoff), E-INTERPRET-NOT-STORE-1 (ARM is one interpretation projection of the lossless substrate). + +**Plan-writing pattern:** `tee -a` chunked appends (12 chunks) per user instruction — avoids memory pressure for long-form plan writes; each chunk independently verifiable in `wc -l` post-append. Pattern documented in plan §17 decision log. + +**Next session entry:** Council ratification of `E-DISCOVERY-CODEGEN-BRACKET-1` + §7 corrections, then Wave 1 (D-ARM-1 + D-ARM-2 contract additions). Full sequencing in handover. + +--- + ## [Main thread / Opus 4.8] op_emitter — Phase 2 bucket-dispatch codegen (SoA → Foundry SoC) **Branch:** claude/activate-lance-graph-att-k2pHI | **Files:** diff --git a/.claude/board/INTEGRATION_PLANS.md b/.claude/board/INTEGRATION_PLANS.md index 93931c2d..7a0cdf50 100644 --- a/.claude/board/INTEGRATION_PLANS.md +++ b/.claude/board/INTEGRATION_PLANS.md @@ -1,3 +1,12 @@ +## 2026-05-29 — streaming-arm-nars-discovery-v1 (upstream proposer leg: 20K-200K rows/window pair-stats + optional Aerial+ neural fan-in → NARS-truth translator → SpoStore hypothesis test → epiphany-council ratification → op_emitter codegen) + +**Status:** PROPOSAL / integration plan. Design-spec only, no code. **Plan file:** `.claude/plans/streaming-arm-nars-discovery-v1.md`. **Handover:** `.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md`. **Candidate epiphany:** `E-DISCOVERY-CODEGEN-BRACKET-1` (council-pending). +**Owns:** the missing upstream discovery leg into the SPO substrate. Today's proposers (`D-ODOO-BP-1b` curated, `D-ODOO-EXT-2` AST-extracted) are bounded by literal artifacts; this plan adds runtime-data discovery via streaming pair-stats (deterministic trunk) + optional Aerial+ neural-symbolic fan-in. The two papers (Karabulut 2025 arxiv 2504.19354v1; Abreu 2025 arxiv 2511.13661v1) ratify the architecture: discovery upstream, codegen downstream, SPO+NARS middle. Adds 12 deliverables D-ARM-1…D-ARM-12; one new crate `lance-graph-arm-discovery`; 10 OQs; two corrections proposed to PR #434 (`discovery_arc: [u32; D]` column, `discovery_origin: u8` byte). +**Anchored iron rules:** I-NOISE-FLOOR-JIRAK (mandatory Stage-A threshold), I-SUBSTRATE-MARKOV (NARS revision IS the Markov trajectory), I-VSA-IDENTITIES (operates on typed triples, never content). **Anchored epiphanies:** E-SOA-IS-THE-ONLY (writes via SpoBuilder only), E-BATON-1 (Stage-C emissions are batons), E-INTERPRET-NOT-STORE-1 (ARM is one interpretation projection). +**Predecessors:** PR #433 (style_recipe.rs + epiphany-brainstorm-council + 5 savant cards = the ratification gate), PR #434 (unified-soa-convergence-v1.md = the SoA contract this writes against), op_emitter.rs (this branch = Phase 2 codegen target). + +--- + ## 2026-05-29 — unified-soa-convergence-v1 (THE single LE SoA end-to-end across the workspace: 9 half-baked consumers + SoA version gate + Lance 6.0.1/LanceDB 0.29/DataFusion 53 alignment + 4-phase Rubicon kanban + lance-graph-planner DTO overhaul + Libet −550 ms anchor + Staunen×Wisdom plasticity spreader + SPO-W pointer via AriGraph episodic Markov chain) **Status:** PROPOSAL / integration plan. Design-spec only, no code. **Plan file:** `.claude/plans/unified-soa-convergence-v1.md`. **Handover:** `.claude/handovers/2026-05-29-1825-soa-convergence-author-to-impl.md`. **Epiphany:** `E-SOA-IS-THE-ONLY` (+ refinements §11.3/4/6 in PR-this-one). diff --git a/.claude/board/STATUS_BOARD.md b/.claude/board/STATUS_BOARD.md index 87ef0dd9..aeed9336 100644 --- a/.claude/board/STATUS_BOARD.md +++ b/.claude/board/STATUS_BOARD.md @@ -615,6 +615,27 @@ PREREQUISITE for `odoo-savant-reasoners-v2` Group F (per `E-SAVANT-COMPOSITION-1 --- +## streaming-arm-nars-discovery-v1 — upstream proposer leg into the SPO substrate (20K-200K rows/window pair-stats + optional Aerial+ → NARS-truth → SpoStore hypothesis test → council ratification → op_emitter codegen) + +The missing UPSTREAM discovery leg. Today's proposers (curated L-docs + AST-extracted Odoo source) are bounded by the literal artifact; this plan adds runtime-tabular-data ARM discovery, gated through the epiphany-brainstorm-council before reaching the deterministic codegen path. Plan: `.claude/plans/streaming-arm-nars-discovery-v1.md`. Handover: `.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md`. + +| D-id | Title | Crate | Lines | Conf | Status | Notes | +|---|---|---|---|---|---|---| +| D-ARM-1 | `ProvenanceTier::{Curated,Extracted,ArmDiscovered,Ratified,Conjecture}` enum + ordering | lance-graph-contract | 50 | HIGH | **Queued** | blocks all other D-ARM-*; additive | +| D-ARM-2 | `Proposer` trait + `CandidateRule` carrier + `WindowMetadata` | lance-graph-contract | 100 | HIGH | **Queued** | blocks D-ARM-3, D-ARM-9 | +| D-ARM-3 | Pair-stats proposer (default trunk, deterministic, k² pair counters per window) | lance-graph-arm-discovery::proposer::pair_stats | 400 | HIGH | **Queued** | depends on D-ARM-1/2/7; blocks D-ARM-12 | +| D-ARM-4 | ARM-truth → NARS-truth translator + Odoo `FeedProjector` impl | lance-graph-arm-discovery::translator | 200 | HIGH | **Queued** | depends on D-ARM-1/2 | +| D-ARM-5 | Hypothesis test: SpoStore round-trip, NARS revision, contradiction commit per The Click | lance-graph-arm-discovery::hypothesis | 350 | MED | **Queued** | depends on D-ARM-4; verifies `spo::truth::Contradiction` primitive exists | +| D-ARM-6 | `RatificationQueue` ring buffer + corrections-to-#434 spec PR (`discovery_arc D=8`, `discovery_origin u8`) | lance-graph-arm-discovery::queue + #434 spec follow-up | 200 + spec | MED | **Queued** | depends on PR #434 D-MBX-A3 landing | +| D-ARM-7 | Jirak-2016 weak-dependence significance thresholds (mandatory Stage A floor) | lance-graph-arm-discovery::jirak | 150 | HIGH | **Queued** | blocks D-ARM-3; cites I-NOISE-FLOOR-JIRAK | +| D-ARM-8 | `Feed` + `FeedProjector` + window-size config + Odoo `account.move` projector example | lance-graph-arm-discovery::feed | 250 | MED | **Queued** | depends on D-ARM-2 | +| D-ARM-9 | Aerial+ IPC client (feature-gated `arm-aerial`, NDJSON over Unix socket) | lance-graph-arm-discovery::proposer::aerial_ipc | 200 | MED | **Queued** | optional; depends on D-ARM-2 | +| D-ARM-10 | `op_emitter::bucket_corpus` ratification filter (`confidence ≥ Ratified`) + 2 tests | lance-graph-ontology::op_emitter | 30 | HIGH | **Queued** | depends on D-ARM-1 | +| D-ARM-11 | `style_recipe.rs` rule 8 — ArmDiscovered backing adds `DAtom::Compute` weight 2 (provisional) | lance-graph-ontology::style_recipe | 80 | MED | **Queued** | depends on D-ARM-1 | +| D-ARM-12 | End-to-end pipeline test + bench (synthetic Odoo feed → all 5 stages → council micro-batch) | lance-graph-arm-discovery::tests + benches | 400 | MED | **Queued** | depends on Waves 1-6; informs OQ-ARM-2 + OQ-ARM-7 | + +--- + ## Update protocol When a deliverable ships: diff --git a/.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md b/.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md new file mode 100644 index 00000000..97e8721b --- /dev/null +++ b/.claude/handovers/2026-05-29-2030-arm-discovery-author-to-impl.md @@ -0,0 +1,150 @@ +# Handover — arm-discovery-author → arm-discovery-impl + +**Date:** 2026-05-29 20:30 UTC +**From session:** continuation on `claude/activate-lance-graph-att-k2pHI` (post PR #433 + PR #434 merges). +**To:** the implementation session that picks up `streaming-arm-nars-discovery-v1.md`. + +**Plans + PRs to read first (in this order):** + 1. PR #433 (merged) — `style_recipe.rs` + epiphany-brainstorm-council + 5 savant cards. The interpretation layer + the ratification gate. + 2. PR #434 (merged) — `unified-soa-convergence-v1.md` + the 5 layered rulings (`E-SOA-IS-THE-ONLY`, `E-BATON-1`, `E-MAILBOX-IS-BINDSPACE`, `E-RUBICON-RACTOR`, the witness-arc handover). + 3. This handover. + 4. `.claude/plans/streaming-arm-nars-discovery-v1.md` — the plan this handover ships with. **Do NOT re-derive** — the plan is meticulous, 766 lines, every section spec-ratified. + 5. PR (this handover's PR) — `op_emitter.rs` (Phase 2 bucket-dispatch codegen) + this plan + this handover. + +--- + +## What this session did (chronological) + +1. **Pre-context** (summarized) — shipped Phase 1 `style_recipe.rs` (PR #433: 13 tests, DAtom catalogue, 7-rule cascade) + epiphany-brainstorm-council orchestrator + 5 savant cards. Council ran on `E-INTERPRET-NOT-STORE-1` and produced LAND verdict with corrections (`StyleRecipe` → `OdooStyleRecipe` rename, FNV exemption documented, P-1 litmus respected). All applied, merged. + +2. **This session entry** — user re-shared two arxiv papers (Aerial+ 2504.19354v1, ontology M2M 2511.13661v1) and a paste of full paper text. Synthesized the two papers against the existing `op_emitter.rs` pipeline: + - Paper 1 (M2M) is independent confirmation of the externalize-interpretation doctrine; its failure mode (5.81% — runtime behavior absent from static JSON) IS our Stage-2 dark-atom gap. + - Paper 2 (Aerial+) supplies the missing upstream discovery leg. Its `(support, confidence)` mapping to NARS `(c, f)` is verbatim — `SpoBuilder::build_edge` consumes it natively. + - The two papers bracket the architecture: discovery upstream, codegen downstream, SPO+NARS middle. + +3. **Phase 2 op_emitter shipped** — `op_emitter.rs` (400 LOC, 12 tests) committed to this branch (`63f3e2ca`). Bucket-dispatch codegen: groups `OdooStyleRecipe` corpus by `OdooMethodKind`, emits deterministic Rust (RECIPE_* consts + per-kind Op struct + static slice). All 230 lance-graph-ontology tests green; zero warnings. Board hygiene done (`e7ee368f`). + +4. **Rebased branch onto PR #434 merge.** Two doc commits inherited (`7c289678` unified-SoA + `eb5c4a58` Lance 6.0.1 stack pin); clean rebase. + +5. **Reviewed PR #434's plan + handover.** Identified two corrections to fold in: + - **OQ-11.2 W=16 witness-arc width** is too narrow for tracking multiple in-flight candidate rules per row. Proposed: separate `discovery_arc: [u32; D]` column, D=8. + - **OQ-11.5 SoA-root version u16** doesn't disambiguate proposer provenance. Proposed: per-row `discovery_origin: u8` byte (2 bits tier + 2 bits proposer-id + 4 reserved). + +6. **Authored `streaming-arm-nars-discovery-v1.md`** — 766 lines, 18 sections, 12 deliverables. Sectioned the upstream discovery leg into 5 stages (proposers → translator → hypothesis test → ratification → codegen), each with concrete code shape and threshold semantics. Plan written via `tee -a` chunking per user instruction (12 chunks). + +--- + +## FINDING (high-confidence facts the next session inherits) + +- **The five-stage pipeline shape is ratified:** + ``` + parquet stream → Stage A (proposer) → Stage B (translator) → Stage C (hypothesis test) + → Stage D (ratification — epiphany-brainstorm-council) → Stage E (op_emitter codegen) + ``` + Each stage has a contract surface; the boundaries between Stages C and D form the determinism firewall (Stage D ratification gate is the only nondeterministic-to-deterministic transition). + +- **Pair-stats is the default trunk; Aerial+ is fan-in.** Per the determinism boundary; Aerial+'s autoencoder is nondeterministic and must NEVER cross the ratification gate. The trunk is fully deterministic over windowed sufficient statistics. + +- **ARM truth → NARS truth mapping is verbatim:** + - `frequency` ← ARM `confidence` (= P(Y|X)) + - `confidence` ← `(support × n) / (support × n + k)` (NAL-9 default `k=1.0`) + +- **I-NOISE-FLOOR-JIRAK is mandatory at Stage A.** Without Jirak-bound thresholds, the proposer's false-positive rate exceeds the substrate's noise floor and the SpoStore calcifies on noise. D-ARM-7 is non-skippable. + +- **The discovery leg is strictly additive to PR #434.** No SoA-contract changes required for Waves 1-4. Wave 5a's `discovery_arc` column is a v1.1 follow-up; v1 lives with `edges` arc contention. + +- **The new crate is `lance-graph-arm-discovery`.** Sits next to `lance-graph-ontology`. Depends only on `lance-graph-contract` + arrow/parquet. Zero deps beyond that for the default trunk; `tokio` + `serde_json` behind `arm-aerial` feature flag. + +- **The papers ARE the support.** Karabulut 2025 §2 + §3.3 ratify the truth mapping; Abreu 2025 §4 ratifies the externalize-interpretation doctrine. No new conjectures — both are in print. + +--- + +## CONJECTURE (load-bearing, ratify before acting on it) + +- **OQ-ARM-2 — Jirak `p_moment` for Odoo data.** Plan defaults to `p = 3.0` (giving `n^{-1}` decay). This is conservative; actual measurement of Odoo `account.move` weak-dependence index is needed. The default is safe (over-strict), but D-ARM-12 bench should empirically pin `p` for typical Odoo feeds. + +- **OQ-ARM-3 — NARS personality constant `k`.** Plan defaults to `k = 1.0` (NAL-9 standard). Different feeds may justify different `k` — higher `k` means more evidence needed for high confidence. Per-feed override is available; default is safe. + +- **OQ-ARM-6 — Contradiction commit shape.** Plan proposes symmetric pair (one `CausalEdge` per side, back-pointer between). Verify this matches the existing `lance_graph::graph::spo::truth::Contradiction` primitive at D-ARM-5 time; if the primitive doesn't exist yet, surface a follow-up to `lance-graph-contract`. + +- **The §7 corrections to PR #434.** The `discovery_arc` column and `discovery_origin` byte are author-stated by the planner (me), not council-ratified. They should pass through the council before D-ARM-6 lands. Cross-ref: `epiphany-brainstorm-council` invocation. + +- **Aerial+ IPC overhead.** Plan assumes NDJSON-over-Unix-socket is cheap enough; at 100 K candidates/window from a subprocess that's 10-100 MB/window of NDJSON. Bench (D-ARM-12) should measure; may justify a binary IPC protocol if line-overhead dominates. + +- **Reverse-fingerprint contradiction detection (OQ-ARM-8).** Plan defers concrete cutoff to D-ARM-5; the cutoff must be Jirak-derived, not hand-tuned. This is downstream of D-ARM-7's threshold helper. + +--- + +## Blockers + +- **PR #434 D-MBX-A3 landing.** Wave 5a (`discovery_arc` + `discovery_origin` corrections) depends on D-MBX-A3 having added the witness-arc handle column to the mailbox SoA. If D-MBX-A3 is still in flight when Wave 4 completes, Wave 5a can wait without blocking the discovery leg's operational utility. + +- **`lance_graph::graph::spo::truth::Contradiction` primitive verification.** D-ARM-5's contradiction-commit path assumes a primitive that may not exist yet. Verification pass needed at Wave 4 entry. If missing, add to `lance-graph-contract` first. + +- **The cargo prohibition for agents.** Per the session-stability rule (no cargo invocations from spawned agents — disk pressure constraint), all `cargo check / cargo test` runs are main-thread orchestrator only. Subagents do code review + write only; main thread verifies. This constraint is documented in CLAUDE.md / AGENT_LOG.md. + +- **`tools/odoo-blueprint-extractor` Stage-2 enrichment.** The dark atoms (Money/Quantity/ApplyRate/EmitAmount/Event/FiscalCtx) don't fire today because `return_kind`/`semantic_role` aren't populated. ARM discovery can light them via runtime data, but the static extractor should ALSO fix this — D-ODOO-EXT enrichment is parallel work. + +- **Aerial+ upstream access.** `DiTEC-project/aerial-rule-mining` and `AdaWorldAPI/aerial-rule-mining` are both outside the workspace MCP allowlist as of this session. Wave 7 (D-ARM-9, optional) is blocked on either (a) allowlist update, (b) user-pasted reference, or (c) re-implementation from paper Algorithm 1. (c) is feasible — the algorithm is in print. + +--- + +## Open questions for the user + +| # | Question | Default proposal | Blocks | +|---|---|---|---| +| OQ-ARM-1 | Default window size n? | 100K, per-Feed configurable | D-ARM-3, D-ARM-8 | +| OQ-ARM-2 | Jirak `p_moment` for Odoo? | p = 3.0 conservative | D-ARM-7 | +| OQ-ARM-3 | NARS personality `k`? | k = 1.0 NAL-9 standard | D-ARM-4 | +| OQ-ARM-4 | `RatificationQueue` persistence? | In-memory v1; persist v2 | D-ARM-6 | +| OQ-ARM-5 | Antecedent bound `a`? | Hard-cap 2 in pair-stats; Aerial+ for higher | D-ARM-3 | +| OQ-ARM-6 | Contradiction commit shape? | Symmetric pair w/ back-pointer | D-ARM-5 | +| OQ-ARM-7 | `discovery_arc` D=8 column day-one or v1.1? | Defer to v1.1; bench first | D-ARM-6 | +| OQ-ARM-8 | Inverse-fingerprint contradiction policy? | Cite Jirak; concrete cutoff at D-ARM-5 | D-ARM-5 | +| OQ-ARM-9 | Council ratification trigger? | Session-trigger; no webhook v1 | D-ARM-6 | +| OQ-ARM-10 | Aerial+ as separate crate or feature? | Feature inside; promote later | D-ARM-9 | + +Plus the corrections-to-#434 ratification ask: do §7's `discovery_arc` D=8 and `discovery_origin: u8` byte get folded into D-MBX-A3 (the SoA owner's PR), or stay in D-ARM-6 (the discovery proposer's PR)? + +--- + +## Recommended next-session entry sequence + +1. **Read Tier-0** (LATEST_STATE.md, PR_ARC_INVENTORY.md, agents/BOOT.md). +2. **Read this handover + the plan** (`.claude/plans/streaming-arm-nars-discovery-v1.md`). Do not re-derive. +3. **Council ratification of corrections** (§7 of plan). Spawn the epiphany-brainstorm-council with `E-DISCOVERY-CODEGEN-BRACKET-1` candidate epiphany + the two §7 corrections. Expect LAND or LAND-with-revision; act on verdict. +4. **Wave 1 (D-ARM-1 + D-ARM-2)** — contract additions. One PR. Sonnet agent. Main thread runs cargo verify. +5. **Wave 2 (D-ARM-7)** — Jirak helpers. Pure math. One PR. Sonnet agent. +6. **Waves 3a + 3b in parallel** (D-ARM-3 pair-stats + D-ARM-4 translator). Two PRs. Two Sonnet agents in one main-thread turn. +7. **Wave 4 (D-ARM-5)** — hypothesis test. Opus agent (multi-source). One PR. +8. **Wave 5a** — corrections to #434 + queue impl. Two PRs. Coordinate with D-MBX-A3 author. +9. **Wave 5b (D-ARM-8)** — feed + projector. Sonnet agent. +10. **Wave 6 (D-ARM-10 + D-ARM-11)** — op_emitter filter + style_recipe rule. Sonnet agent. Trivial. +11. **(Optional) Wave 7 (D-ARM-9)** — Aerial+ IPC. Only if user signals demand. +12. **Wave 8 (D-ARM-12)** — end-to-end test + bench. Opus agent. Bench numbers inform OQ-ARM-2 and OQ-ARM-7. + +--- + +## Risk to flag explicitly + +The discovery leg ships an **upstream proposer node that didn't exist before in this architecture.** Three doctrinal risks to keep in mind throughout implementation: + +1. **Don't promote Stage A → Stage E without Stage D.** The council ratification gate is the only nondeterministic-to-deterministic transition. Skipping it (e.g. "auto-ratify if confidence > 0.95") sounds appealing and is the Kahneman-Tversky System-1 trap. Reject. + +2. **Don't conflate the witness-arc with a discovery-arc.** PR #434's witness arc is for *ratified* belief-state revisions; ARM-discovery's `discovery_arc` (proposed §7) is for *in-flight* candidate evidence. Cohabiting them in the same `edges` column pollutes the audit trail. + +3. **Don't optimize before benchmarking.** D-ARM-12 bench establishes the actual throughput envelope and the actual `p_moment`. Premature SIMD/GPU work on Stage A is a distraction; the pair-stats inner loop is simple and the optimization target should be the FeedProjector's row-decode cost, not the counters. + +--- + +## Cross-refs + +- `streaming-arm-nars-discovery-v1.md` — the plan this handover hands off. +- `unified-soa-convergence-v1.md` (PR #434) — the SoA contract this plan writes against. +- `style_recipe.rs` (PR #433 / `OdooStyleRecipe`) — the interpretation layer that consumes ratified triples. +- `op_emitter.rs` (this branch) — the codegen layer that consumes ratified `OdooEntity` SoA. +- `epiphany-brainstorm-council` (PR #433) — the ratification gate. +- CLAUDE.md `I-NOISE-FLOOR-JIRAK`, `I-SUBSTRATE-MARKOV`, `I-VSA-IDENTITIES`, "The Click" — the doctrinal anchors. +- Papers: Karabulut 2025 (arxiv 2504.19354v1), Abreu 2025 (arxiv 2511.13661v1). + +End of handover. diff --git a/.claude/plans/streaming-arm-nars-discovery-v1.md b/.claude/plans/streaming-arm-nars-discovery-v1.md new file mode 100644 index 00000000..f26c3bfe --- /dev/null +++ b/.claude/plans/streaming-arm-nars-discovery-v1.md @@ -0,0 +1,766 @@ +# streaming-arm-nars-discovery-v1 — Streaming association-rule discovery → NARS revision → ratified SPO triples → deterministic codegen + +> **Status:** PROPOSAL / integration plan. Spec only; **no code in this plan**. +> **Authored:** 2026-05-29 (session continuation on `claude/activate-lance-graph-att-k2pHI`, post-PR #433 + #434 merges). +> **Supersedes nothing; integrates / sequences:** +> - `unified-soa-convergence-v1.md` (PR #434 — the ONE SoA all consumers read; this plan adds the *upstream proposer* leg) +> - `odoo-business-logic-blueprint-v1.md` (the typed Odoo SoA + L-doc projection — extraction proposer) +> - `odoo-source-extraction-v1.md` (the AST proposer that backs `OdooConfidence::Extracted`) +> - `style_recipe.rs` + `op_emitter.rs` (Phase 1 + Phase 2 of the SoA→SoC codegen — the *downstream* consumer of ratified rules) +> +> **Anchored to (FINDING-grade):** `I-NOISE-FLOOR-JIRAK` (Jirak-bound thresholds, not classical Berry-Esseen), `I-VSA-IDENTITIES` (bundle identities, never content; ARM extracts identity-rules from content stats), `I-SUBSTRATE-MARKOV` (the NARS revision arc IS the Markov trajectory), `E-SOA-IS-THE-ONLY` (proposers write to the one SoA via SpoBuilder; never a parallel DTO), `E-BATON-1` (cross-mailbox state is the discrete owned baton — discovery batches ride this contract), `E-INTERPRET-NOT-STORE-1` (triplet substrate admits domain-owned interpretation; ARM IS a domain interpretation projection). +> +> **Papers anchored to:** +> - Karabulut, Groth, Degeler — *Neurosymbolic Association Rule Mining from Tabular Data* (arxiv 2504.19354v1, Apr 2025). Aerial+ as one possible Stage-A proposer. Source: https://github.com/DiTEC-project/aerial-rule-mining (workspace-fork at AdaWorldAPI/aerial-rule-mining; outside MCP allowlist as of 2026-05-29). +> - Abreu, Cruz, Guerreiro — *Ontology-Driven Model-to-Model Transformation of Workflow Specifications* (arxiv 2511.13661v1, Nov 2025). Independent confirmation of the externalize-interpretation-not-code doctrine — § "from code-centric to ontology-driven" is the direct mirror of our triplet-substrate position. +> +> **Owns the answer to:** *"in a perfect world we would need rules discovery and stream proprietary data through NARS reasoning and stream 20.000-200.000 [records per window] and try to determine co-correlation into deterministic rule candidates and do hypothesis testing against facts and edges."* + +--- + +## 0. Executive summary (one screen) + +The Odoo SoA → Foundry SoC pipeline shipped today (PR #433: `style_recipe.rs`; this branch: `op_emitter.rs`) is the **deterministic downstream codegen leg**. Its inputs are typed `OdooEntity` SoA records that came from two **proposer legs** today: + +1. **Curated (L-doc projection)** — humans translate prose to const data. High confidence, low throughput. +2. **Extracted (AST walk)** — `tools/odoo-blueprint-extractor` parses `/home/user/odoo` Python ORM source. Deterministic, lossless for what's in the AST. Bounded by what the source explicitly states. + +Both proposers feed the same SPO substrate. Both are **bounded by the literal artifact** — neither can surface co-correlations that emerge only in *runtime data* (parquet rows, transaction streams, invoice history). The paper *MOST OF THE BUSINESS LOGIC LIVES IN THE DATA, NOT THE SCHEMA*: ARM (Aerial+ or the classical FP-Growth lineage) is the missing **third proposer leg** that mines runtime tabular data for `(X → Y)` rules with NARS-compatible truth `(frequency, confidence)`. + +The plan opens a new crate `lance-graph-arm-discovery` that streams 20K-200K rows per window through: + +``` +parquet/stream + │ + ▼ Stage A — proposers (parallel feeds) + ┌──────────────────────────────────────────────────────────────┐ + │ A1. Streaming pair-stats [deterministic, default trunk] │ + │ sufficient statistics per (item_i, item_j) over window │ + │ → support, confidence, Jirak-bound significance │ + │ A2. Aerial+ neural-symbolic [optional, behind feature flag] │ + │ autoencoder + reconstruction-probe rule extraction │ + │ → support, confidence (paper Algorithm 1) │ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ Stage B — translator (ARM truth → NARS truth) + ┌──────────────────────────────────────────────────────────────┐ + │ confidence (ARM) → frequency (NARS f ∈ [0, 1]) │ + │ support × window_size → confidence (NARS c ∈ [0, 1)) │ + │ → CandidateTriple { s, p, o, truth: TruthValue, origin } │ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ Stage C — hypothesis test (against SpoStore + EdgeColumn) + ┌──────────────────────────────────────────────────────────────┐ + │ for each candidate: │ + │ prior = SpoStore::lookup(s,p,o) │ + │ match prior │ + │ ─ Some(t) → SpoStore::revise(t, candidate.truth) │ + │ (NARS revision; widens confidence, weights │ + │ frequency by inverse variance) │ + │ ─ Contradiction(prior, c, candidate, c') → │ + │ commit a Contradiction edge per The Click │ + │ ─ None → queue for ratification (Stage D) │ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ Stage D — ratification (epiphany-brainstorm-council gate) + ┌──────────────────────────────────────────────────────────────┐ + │ threshold-survive candidates with `origin = ArmDiscovery` │ + │ pass through the 5-savant council before becoming a triple │ + │ the codegen path consumes. NEVER auto-promote — discovery │ + │ is the Epiphany branch of The Click, not the Commit branch.│ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ Stage E — codegen (op_emitter consumes ratified) + ┌──────────────────────────────────────────────────────────────┐ + │ `op_emitter::emit_op_dispatch` takes ratified triples that │ + │ carry `OdooConfidence::Ratified` (new variant) and emits │ + │ `RECIPE_* ` consts + per-kind Op slices. Determinism rests │ + │ on the ratification gate — Stage D is the firewall between │ + │ nondeterministic proposers and deterministic compile path. │ + └──────────────────────────────────────────────────────────────┘ +``` + +Total new crates: **1** (`lance-graph-arm-discovery`). Total new deliverables: **D-ARM-1 … D-ARM-9**. Two corrections to PR #434's unified-SoA plan (§7 of this doc): an extra `discovery_arc` SoA column and a `discovery_origin: u8` provenance byte. + +**No code in this PR. No cargo invoked.** + +--- + +## 1. Context — the two papers and the workspace + +### 1.1 Paper anchor — Aerial+ (Karabulut, Groth, Degeler, 2025) + +Neurosymbolic ARM. The problem they solve is *rule explosion*: classical exhaustive miners (FP-Growth, HMine) emit O(2^k) rules over k features. Their approach: + +1. One-hot encode rows into transaction vectors. +2. Train an **under-complete denoising autoencoder** with softmax-per-feature and BCE loss (paper §3.2). The latent representation compresses feature co-occurrence. +3. **Extract rules by exploiting reconstruction** (paper §3.3, Algorithm 1). For antecedent candidate `X`: mark its categories at probability 1, uniform elsewhere, forward-pass; for every category `Y` with `p_Y > τ_c` and `min_X p_X > τ_a`, emit rule `X → Y` carrying support and confidence. + +Their result on five UCI datasets: 2–10× fewer rules, full data coverage, equal or higher confidence vs FP-Growth. On Spambase→CORELS: **1,409 rules vs 275,003 at higher accuracy in 5 s vs 1,258 s.** + +The critical observation for us is *not* the neural part — that's a compressor optional for our typed-and-sparse domain. The critical observation is the **truth definition** (paper §2, verbatim): + +> "An association rule X → Y is said to have support level s if s% of transactions in D contains X ∪ Y. The confidence of a rule is the conditional probability that a transaction containing X also contains Y." + +This maps to `lance_graph::graph::spo::TruthValue::new(f, c)` with **no impedance mismatch**: + +- ARM **confidence** = P(Y|X) → NARS **frequency** `f` (degree to which the implication holds) +- ARM **support × window_size** evidential weight → NARS **confidence** `c` (how much evidence backs `f`) + +An Aerial+ candidate rule lifts straight into `SpoBuilder::build_edge` as a `(s, p, o, f, c)` quad with full NARS revision semantics. The reconstruction-probe pattern (`unbind` + cleanup against codebook) is the continuous twin of CLAUDE.md's `likelihood = vsa_cosine(unbind(bundle), codebook_fp)` thresholded by resonance. + +### 1.2 Paper anchor — Ontology-driven M2M (Abreu, Cruz, Guerreiro, Nov 2025) + +Independent confirmation of our position. Their pipeline: proprietary JSON workflow defs → semantic lifting via RML → RDF triples + DL reasoner over BBO ontology → BPMN 2.0 generation via Camunda Model API. Result: 92 BPMN diagrams from 69 JSON inputs, 94.2% success, 404 ms/file, **fully deterministic CI pipeline**. + +The load-bearing quote (paper §4, "From a code-centric instantiation to an ontology-driven method"): + +> "An initial, code-centric prototype implemented a direct JSON → Java → BPMN pipeline. Although feasible end-to-end, IBPM rapidly accumulated special-case handlers as the specification evolved (e.g., button patterns, conditional targets, multi-instance conventions), requiring code edits rather than configuration and limiting portability beyond a specific engine/version. The ontology-driven approach externalizes mapping knowledge into ontologies and RML rules." + +This is **verbatim** the position our triplet substrate + `derive_style_recipe` + `op_emitter` enforces — externalize interpretation into ontology + declarative rules, not code. The paper's failure mode (5.81% — dynamic/time-based behavior absent from static JSON) is *exactly* our Stage-2 dark-atom gap (Money/Quantity/ApplyRate/EmitAmount/Event/FiscalCtx not yet lit because the extractor doesn't populate `return_kind`/`semantic_role`). + +The two papers bracket our architecture: + +- **Paper 1 (BPMN M2M)** validates the **downstream** codegen thesis — triples → deterministic generated artifact. +- **Paper 2 (Aerial+)** supplies the missing **upstream discovery** leg — tabular data → truth-carrying SPO candidates. + +Both externalize symbolic artifacts from proprietary/non-symbolic sources via **thresholded extraction**. Both converge on **SPO + NARS truth** as the invariant middle. That convergence is the candidate `E-DISCOVERY-CODEGEN-BRACKET-1` epiphany; this plan is the implementation surface that operationalizes it. + +### 1.3 Workspace position + +Today's proposer legs into the SPO substrate (and into the typed `OdooEntity` SoA that drives `style_recipe.rs` + `op_emitter.rs`): + +| Proposer | Source | Confidence | Throughput | +|---|---|---|---| +| `D-ODOO-BP-1b` (curated) | `.claude/odoo/L*.md` prose | `OdooConfidence::Curated` | 1-2 entities/hour (human) | +| `D-ODOO-EXT-2` (extracted) | Python AST over `/home/user/odoo` | `OdooConfidence::Extracted` | ~10s entities/sec (one-shot batch) | +| **THIS PLAN** (ArmDiscovery) | parquet streams / runtime tabular data | `OdooConfidence::ArmDiscovered` (new) | 20K-200K rows/window, continuous | + +The third leg is the only one that surfaces co-correlations emerging from *runtime behavior* — invoices that consistently route through Fiscal Position A when partner country B AND product category C, observable in years of `account.move` rows but absent from `account_fiscal_position.py`. The codegen path already exists; this plan supplies its missing input. + +--- + +## 2. The five-stage pipeline (detailed) + +### 2.1 Stage A — proposers (parallel, fan-in) + +Two proposer types share the **same output shape** — a `CandidateRule { antecedent: Vec, consequent: Vec, support: f32, confidence: f32, n: u32 }` where `n` is the window size. The output of either feeds Stage B identically. Both are gated behind feature flags in the new crate so deployments can pick `arm-pair-stats` only, `arm-aerial` only, or both. + +#### A1. Streaming pair-stats (default trunk, deterministic) + +The cheap branch. Per-window sufficient statistics over `(item_i, item_j)` pairs and (optionally) triples up to a fixed antecedent bound `a` (paper-default `a=2`): + +```text +For each row in window: + for each item i in row.items: + counts[i] += 1 + for each item j in row.items where j > i: + pair_counts[i, j] += 1 + // optional: triples if a >= 3 + for each item k in row.items where k > j: + triple_counts[i, j, k] += 1 +After window closes: + for each (i, j) with pair_counts[i,j] >= MIN_SUPPORT_COUNT: + support = pair_counts[i,j] / n + confidence = pair_counts[i,j] / counts[i] + emit CandidateRule { i → j, support, confidence, n } +``` + +**Properties:** + +- Fully deterministic — same input → same candidates. +- Memory bound: `O(k² + k³)` for k items at `a ≤ 3`. For k = 200 features, k³ = 8 M counters; with `u32` counters that's 32 MB / window. Fits. +- Throughput: one pass per row, SIMD-amenable; 200 K rows/window at 10 µs/row = 2 s/window on one core. +- **The Jirak-bound significance** (§4) is what filters the noise floor — it's not just `support ≥ threshold`, it's `support ≥ jirak_lower_bound(n, weak_dependence_index)`. + +This is the cornerstone proposer. Aerial+ is optional fan-in. + +#### A2. Aerial+ neural-symbolic (optional, behind `aerial` feature flag) + +For high-dimensional sparse data where pair-stats memory blows up (k > 500), or where 3+ antecedents matter and triple-counts won't fit, optionally fan in an Aerial+-style proposer. Implementation is **out of crate** — we don't bring the autoencoder into Rust. Instead: + +1. The Python reference (DiTEC-project/aerial-rule-mining) runs as a separate process. +2. It writes `CandidateRule` records as NDJSON to a Unix socket or stdin pipe consumed by the Rust crate. +3. Cargo `aerial` feature gates the IPC client. + +This keeps the autoencoder out of the deterministic compile path (it's a *second proposer*, not the trunk) and avoids a heavy ONNX/Burn dep in `lance-graph`. + +#### Termination criterion (both proposers) + +A proposer emits a rule only if all of: + +- `support ≥ MIN_SUPPORT` (configured per-feed; default 0.01) +- `confidence ≥ MIN_CONFIDENCE` (default 0.5) +- `n × support ≥ JIRAK_MIN_EVIDENCE` (the weak-dependence-aware floor; see §4) + +Below any of these, the candidate is **silently dropped** — never crosses Stage A→B boundary. + +### 2.2 Stage B — translator (ARM truth → NARS truth) + +A pure function `arm_to_nars(rule: &CandidateRule) -> TruthValue`. The mapping: + +```rust +// In lance-graph-arm-discovery::translator +pub fn arm_to_nars(rule: &CandidateRule) -> TruthValue { + // ARM confidence is P(Y|X) — directly maps to NARS frequency. + let frequency = rule.confidence.clamp(0.0, 1.0); + // Evidential mass: support × window_size gives us the count of supporting + // observations; NARS confidence = m / (m + k) where k is the personality + // constant (default k=1 in NAL-9). Larger m → confidence → 1. + let m = (rule.support * rule.n as f32) as u32; + let k = NARS_PERSONALITY_K; // configured per-feed; default 1.0 + let confidence = (m as f32) / (m as f32 + k); + TruthValue::new(frequency, confidence) +} +``` + +The translator also wraps the rule's `(antecedent, consequent)` into the `(subject, predicate, object)` triple shape via a domain-supplied projector (per-feed config: an Odoo feed projects `(model_name, predicate_name, value)`; an SBM feed projects different). + +### 2.3 Stage C — hypothesis test (against SpoStore + EdgeColumn) + +The most consequential stage. For each `CandidateTriple { s, p, o, truth, origin }` produced in Stage B: + +```rust +match spo_store.lookup(s, p, o) { + // Prior exists with truth t_prior; NARS revise: + Some(t_prior) => { + let revised = TruthValue::revise(&t_prior, &candidate.truth); + spo_store.update(s, p, o, revised); + // Stamp a CausalEdge64 emission on the witness arc (per OQ-11.2 from #434); + // the emission's confidence_u8 + inference_mantissa records this revision. + mailbox.emit(CollapseGateEmission::from_revision(t_prior, candidate.truth, revised)); + } + + // No prior. Two paths: + None if candidate.truth.expectation() < CONTRADICTION_THRESHOLD => { + // Low-expectation novel candidate → just queue for ratification. + ratification_queue.push(candidate); + } + None => { + // High-expectation novel candidate → check for inversion contradictions. + // (Looking for prior (s, p, ¬o) or similar negation patterns.) + match spo_store.find_negation(s, p, o) { + Some(t_inverse) => { + // Committed contradiction per The Click — preserve, don't overwrite. + spo_store.commit_contradiction( + Triple::new(s, p, o, candidate.truth), + Triple::new(s, p, negate(o), t_inverse), + ); + } + None => { + // Genuine novelty — queue for ratification. + ratification_queue.push(candidate); + } + } + } +} +``` + +**Critical invariants:** + +- The test reads from `SpoStore` via the existing `lance_graph::graph::spo::SpoBuilder` surface; no parallel index. Per `E-SOA-IS-THE-ONLY`. +- Revisions emit `CausalEdge64` rows; that emission IS the audit log (§11.2 of unified-soa-convergence-v1: "No separate revision log column."). +- Contradictions are NEVER overwritten — they're committed alongside per The Click ("Opinions are committed contradictions preserved, not resolved."). +- Novel candidates ENTER the ratification queue — they don't auto-promote. + +### 2.4 Stage D — ratification (epiphany-brainstorm-council gate) + +Already exists (PR #433 shipped `epiphany-brainstorm-council` + 5 savant cards). The ratification queue contents are exactly the workload the council was built for: domain-specific candidate findings that need pre-merge multi-perspective vetting. + +For each candidate in the queue, the panel runs: + +- `iron-rule-savant` — does the candidate violate I-NOISE-FLOOR-JIRAK by claiming significance the n-bound doesn't support? +- `dto-soa-savant` — does the triple fit one of the four BindSpace columns (Fingerprint/Qualia/Meta/Edge), or does it pretend to introduce a fifth? +- `cascade-impact-savant` — landing this rule changes which file? +- `prior-art-savant` — does an existing `E-…` or `K-…` (knowledge doc) already state this? +- `creative-explorer-savant` — what's the inverse / dual / second-order implication? + +LAND verdict → `OdooConfidence::Ratified` stamp → triple becomes available to `op_emitter` Stage E. + +The ratification queue itself is a `lance_graph_arm_discovery::RatificationQueue` — a bounded ring buffer (default 1024 candidates) backing onto Lance for persistence. Council runs are *not* automatic — they're queued for human-triggered batches (per session), per the council's design. + +### 2.5 Stage E — codegen (op_emitter consumes ratified) + +Already exists (this branch: `op_emitter.rs`). The only extension: `op_emitter::bucket_corpus` filters its input by `confidence ≥ OdooConfidence::Ratified` (a partial order — `Curated > Extracted > Ratified-via-ARM > Conjecture`). ArmDiscovered candidates that haven't passed Stage D never reach `op_emitter`. + +This is the firewall. The deterministic codegen path stays deterministic because Stage D is the gate. + +--- + +## 3. Crates and deliverables + +### 3.1 New crate: `lance-graph-arm-discovery` + +Location: `crates/lance-graph-arm-discovery/`. Sits alongside `lance-graph-ontology` in the workspace. Dependencies: + +```toml +[dependencies] +lance-graph-contract = { path = "../lance-graph-contract" } # TruthValue, CausalEdge64 +arrow = "58" # window batches over RecordBatch +parquet = "58" # parquet input feed +thiserror = "2" + +[features] +default = ["arm-pair-stats"] +arm-pair-stats = [] # default trunk +arm-aerial = ["dep:tokio", "dep:serde_json"] # IPC client for Aerial+ subprocess + +[dependencies.tokio] +version = "1" +features = ["rt", "net", "io-util", "macros"] +optional = true + +[dependencies.serde_json] +version = "1" +optional = true +``` + +**Public surface:** + +```rust +// src/lib.rs (zero-dep beyond contract + arrow/parquet) +pub mod proposer; // Stage A — pair-stats + (feature) aerial IPC client +pub mod translator; // Stage B — arm_to_nars +pub mod hypothesis; // Stage C — SpoStore round-trip + revision/contradiction +pub mod queue; // Stage D queue — ratification buffer +pub mod feed; // window / batch / projector configuration +pub mod jirak; // Stage A threshold helpers (Jirak-bound) + +// Re-exports +pub use proposer::{CandidateRule, Proposer, PairStatsProposer}; +pub use translator::{arm_to_nars, CandidateTriple}; +pub use hypothesis::{HypothesisTest, HypothesisOutcome}; +pub use queue::{RatificationQueue, QueueEntry}; +pub use feed::{Feed, FeedProjector, WindowSize}; +``` + +Module-by-module deliverables in §3.3. + +### 3.2 Touchpoints in existing crates + +| Crate | Change | D-id | Risk | +|---|---|---|---| +| `lance-graph-contract` | Add `OdooConfidence::ArmDiscovered` + `OdooConfidence::Ratified` variants (or generalize to a `ProvenanceTier` enum). | D-ARM-1 | LOW — additive | +| `lance-graph-contract` | Add `pub trait Proposer { fn next_batch(&mut self) -> Vec; }` so the discovery crate is dependency-injectable. | D-ARM-2 | LOW — trait surface | +| `lance-graph` | `SpoBuilder::revise` already exists in `graph::spo::truth`; verify it preserves Contradiction semantics. | D-ARM-3 | LOW — verification only | +| `lance-graph-ontology` | `op_emitter::bucket_corpus` filters by `confidence ≥ Ratified`. | D-ARM-4 | LOW — one-line filter + test | +| `lance-graph-ontology` | New `style_recipe` rule (Rule 8): when entity is `ArmDiscovered`-backed, recipe acquires `DAtom::Compute` weight 2 (provisional). | D-ARM-5 | MED — opens recipe-rule pacing | +| `unified-soa-convergence-v1` (PR #434) | Two corrections proposed — separate `discovery_arc` column + `discovery_origin: u8` byte. See §7. | D-ARM-6 | MED — touches OQ-11.2 + OQ-11.5 | + +### 3.3 New-crate module deliverables + +| D-id | Module | Scope | Lines | Conf | Status | +|---|---|---|---|---|---| +| **D-ARM-1** | `lance-graph-contract` | `ProvenanceTier::{Curated, Extracted, ArmDiscovered, Ratified, Conjecture}` enum + comparison ordering | 50 | HIGH | Queued | +| **D-ARM-2** | `lance-graph-contract::proposer` | `pub trait Proposer { fn next_batch(...) }` + `CandidateRule` + `WindowMetadata` | 100 | HIGH | Queued | +| **D-ARM-3** | `lance-graph-arm-discovery::proposer::pair_stats` | Streaming pair-stats over RecordBatch; `a ∈ {1, 2, 3}` antecedent bound; emits `CandidateRule` | 400 | HIGH | Queued | +| **D-ARM-4** | `lance-graph-arm-discovery::translator` | `arm_to_nars` + `CandidateTriple` carrier + projector trait + Odoo `FeedProjector` impl | 200 | HIGH | Queued | +| **D-ARM-5** | `lance-graph-arm-discovery::hypothesis` | Round-trip against `SpoStore`; revision, contradiction commit, queue-for-ratification | 350 | MED | Queued | +| **D-ARM-6** | `lance-graph-arm-discovery::queue` | `RatificationQueue` ring buffer + persistence shim (Lance optional) | 200 | MED | Queued | +| **D-ARM-7** | `lance-graph-arm-discovery::jirak` | Jirak-2016 weak-dependence threshold helpers (n^(p/2-1) bound; p ∈ (2, 3]); cites EPIPHANIES § FORMAL-SCAFFOLD | 150 | HIGH | Queued | +| **D-ARM-8** | `lance-graph-arm-discovery::feed` | `Feed` + `FeedProjector` + window-size config; Odoo `account.move` projector as example | 250 | MED | Queued | +| **D-ARM-9** | `lance-graph-arm-discovery::proposer::aerial_ipc` | NDJSON-over-Unix-socket IPC client (feature-gated `arm-aerial`) | 200 | MED | Queued | +| **D-ARM-10** | `lance-graph-ontology::op_emitter` | One-line filter `confidence ≥ Ratified` + 2 tests | 30 | HIGH | Queued | +| **D-ARM-11** | `lance-graph-ontology::style_recipe` | Recipe rule 8: ArmDiscovered backing adds `DAtom::Compute` weight 2 (provisional, ratification-gated) | 80 | MED | Queued | +| **D-ARM-12** | benches + an end-to-end test feed | Synthetic parquet fixture; bench window-throughput; round-trip test through stages A-E with a small council | 400 | MED | Queued | + +Total: ~2,400 LOC. About one-third of `lance-graph-ontology`'s `odoo_blueprint` size. + +--- + +## 4. Thresholds — the Jirak grounding (I-NOISE-FLOOR-JIRAK) + +CLAUDE.md's iron rule on weakly-dependent fingerprint bits applies here directly: + +> **Classical IID Berry-Esseen is WRONG for this system.** Use **Jirak 2016** (arxiv 1606.01617, Annals of Probability 44(3) 2024–2063, "Berry-Esseen theorems under weak dependence") for any noise-floor or statistical-significance claim. Rate: `n^(p/2-1)` for `p ∈ (2,3]`, `n^(-1/2)` in L^q for `p ≥ 4`. + +For ARM, the question is: at window size n with weakly-dependent transaction items, what's the *minimum* observed support s* at which we can claim `s_observed - s_true > δ` is significant? + +Classical (wrong for our domain): for IID Bernoulli items, Berry-Esseen gives the σ threshold as `s* ≥ z · sqrt(s(1-s)/n)` with `z ≈ 1.96` at 95%. + +Jirak (correct): the threshold scales as `n^{-1/(p/2-1)}` with `p` the moment characterizing dependence (p=4 → `n^{-1/2}`, p=2.5 → `n^{-0.25}`). For ARM items with shared categorical encoding and partial-order purchase dependence (the canonical weak-dependence pattern), p ≈ 3.0 is a reasonable default, giving `n^{-1}` decay — much stricter than the IID `n^{-1/2}`. + +**Operational consequence for D-ARM-7:** the `jirak` module exposes: + +```rust +pub fn jirak_significance_threshold( + window_size: u32, + p_moment: f32, // dependence index; default 3.0 + confidence_alpha: f32, // significance level; default 0.05 +) -> f32; +``` + +with the default conservative threshold of `(window_size as f32).powf(-1.0 / (p_moment / 2.0 - 1.0))`. A `CandidateRule` survives Stage A only if its support deviation from the null (independence) exceeds this bound. + +**This is not optional.** Without Jirak grounding, the ARM discovery proposer leaks low-confidence rules into the SPO store at a rate the SpoStore::revise NARS revision cannot down-weight fast enough; the substrate calcifies on noise. With Jirak grounding, the proposer's false-positive rate aligns with the Markov-chain noise floor the rest of the substrate operates against. + +Cross-ref: `I-NOISE-FLOOR-JIRAK` (CLAUDE.md), Jirak 2016 (arxiv 1606.01617), `.claude/board/EPIPHANIES.md` [FORMAL-SCAFFOLD] pillar 4. + +--- + +## 5. Throughput regime — 20K-200K rows/window + +The user's stated regime: "stream 20.000 - 200.000 [records per window]." Implications: + +| Window size n | Per-pass time (200 features, 1 core) | Memory peak (counters) | Use case | +|---|---|---|---| +| 20,000 | ~0.2 s | k² × 4 B = 160 KB | sub-second iteration; in-session experimentation | +| 50,000 | ~0.5 s | 160 KB | typical CI batch over a week of Odoo `account.move` | +| 100,000 | ~1.0 s | 160 KB | typical month of mid-volume client | +| 200,000 | ~2.0 s | 160 KB | typical month of large client; the upper bound | + +Memory is dominated by k² pair counters not n; n only controls per-row pass cost. For k = 200 (typical Odoo entity feature count after one-hot encoding), the trip is comfortably in-memory at any window size in the stated range. + +For k = 1000+ (a denormalized multi-entity feed: `account.move` ⨝ `account.move.line` ⨝ `res.partner`), pair-counter memory blows to 4 MB and triple-counter memory to 4 GB. At that point: + +- Drop `a` from 3 to 2 (pair-only): 4 MB fits. +- Or partition by entity-prefix: each shard runs k ≈ 200; merge candidates downstream. +- Or enable the `arm-aerial` feature: the autoencoder compresses k effectively for the high-dim sparse case. + +The bench in D-ARM-12 will pin these numbers against a representative synthetic Odoo feed. The throughput claim is **bounded by the FeedProjector's row-decode cost**, not the pair-stats inner loop. + +**Streaming, not batch:** the window is a sliding ring of `n` recent rows, not a one-shot batch. As new rows enter, old rows leave; the counters are incrementally updated (add new row's pair contributions, subtract leaving row's). This preserves the steady-state "near-real-time discovery" property the user named. + +--- + +## 6. Mailbox SoA touchpoint — where this plugs into unified-SoA (PR #434) + +The hypothesis-test stage (§2.3) is where ARM-discovery meets the **one little-endian SoA**. Concretely: + +### 6.1 What ARM-discovery WRITES to the mailbox SoA + +For every revision (Stage C path `Some(t_prior)`), the proposer emits **one** `CollapseGateEmission` (`CausalEdge64`-backed) onto the per-mailbox witness arc. Per `E-BATON-1` the emission carries: + +- 13-byte base + 10 bytes/baton; one baton per revision; total wire cost = 23 bytes. +- `confidence_u8` stamps the post-revision NARS `c`. +- `inference_mantissa` stamps the post-revision NARS `f` (i4 signed mantissa per the 2026-04-21 layout). +- Source identity (the proposer that emitted it) goes in the new `discovery_origin: u8` byte proposed in §7. + +For every contradiction commit (Stage C path with negation), the proposer emits **two** linked emissions — one for each side of the contradiction, with a back-pointer linking them. Per The Click: contradictions preserved, never resolved. + +For every novel-candidate queue push, the proposer emits **zero** mailbox writes — the candidate sits in the (separate) `RatificationQueue`. Only post-ratification does it land on the mailbox SoA. + +### 6.2 What ARM-discovery READS from the mailbox SoA + +For every candidate triple, the hypothesis-test reads the prior `TruthValue` from `SpoStore::lookup`. That lookup IS a read against the `EdgeColumn` (`[CausalEdge64; N]`) per row — the existing surface, no new column needed. + +### 6.3 No new column REQUIRED (with one caveat) + +The unified-SoA columns shipped in D-MBX-A1 (lines 67-83 of `mailbox_soa.rs`): + +```rust +pub edges: [CausalEdge64; N], +pub qualia: [QualiaI4_16D; N], +pub meta: [MetaWord; N], +pub entity_type: [u16; N], +``` + +These cover everything ARM-discovery needs for steady-state operation. **The caveat:** if we want to track multiple in-flight candidate rules per row (e.g. "this row is consistent with hypothesis H1, H2, but contradicts H3"), the existing `edges: [CausalEdge64; N]` arc is a single ring per row; multiple candidate streams compete for the same arc. That's what §7's `discovery_arc` proposal addresses. + +For v1 (this plan), we ship without `discovery_arc` and live with single-arc contention — the proposer batches candidates so contention is bounded. If contention becomes the dominant cost in benches (D-ARM-12), v1.1 adds the column. + +--- + +## 7. Corrections proposed to unified-soa-convergence-v1 (PR #434) + +Reviewed the unified-SoA plan + handover. Two specific corrections to fold in via a follow-up PR after this discovery plan is ratified. Both are SPEC corrections — they don't invalidate the plan, they refine OQ defaults. + +### 7.1 Correction 1 — `discovery_arc: [u32; D]` column, separate from `edges` + +**Status of OQ-11.2 in #434:** "Witness arc width `W`? Plan default: W = 16 (~64 B/row at u32 handles). Needs user ratification before D-MBX-A3 lands." + +**My add:** the `W=16` arc-handle column is for **belief-state arc emissions** — the cumulative trace of `CausalEdge64`-stamped revisions on the canonical mailbox state. It is NOT designed for tracking **in-flight discovery candidates** that haven't yet committed to a revision. + +In ARM-discovery, a single window of 200K rows may produce thousands of candidate rules; each candidate touches multiple rows in hypothesis-test. If a row participates in K candidate hypotheses concurrently, the existing `edges` arc would either overflow at K > 16 or arbitrarily evict candidates. + +**Proposal:** carve a parallel `discovery_arc: [u32; D]` column, D = 8 (default). Sits next to `edges` but rotates on a different cadence — `discovery_arc` rotates per window (every 200K rows), `edges` rotates per Commit/Prune. The new column also gets `discovery_arc_head: u8` per row (rotation index). + +**Cost:** D × 4 = 32 bytes/row, plus 1 byte head. Total +33 B/row. At 1 M mailbox rows in a typical persistent mailbox = +33 MB. Acceptable. + +**Trade-off:** more memory, cleaner separation between ratified-state-arc and candidate-stream-arc. Without this split, the proposer is forced to either rate-limit (degrading throughput) or share the `edges` arc (polluting the audit trail). + +**This is a D-ARM-6 deliverable, separate from D-MBX-A3 to avoid blocking #434's roll-up.** + +### 7.2 Correction 2 — `discovery_origin: u8` provenance byte + +**Status of OQ-11.5 in #434:** "SoA version field width? Plan default: u16 at layout root; no per-column version stamps in v1." + +**My add:** the SoA-root `version: u16` tracks layout-schema version. It doesn't tell a downstream consumer **which proposer produced the evidence currently sitting in any given row's `edges` arc.** Today, downstream consumers must assume "any extracted/curated entity"; once ARM-discovery starts emitting, that assumption breaks. + +**Proposal:** add `discovery_origin: [u8; N]` column. Bit fields: + +```text +discovery_origin (u8): + bits 0-1 : ProvenanceTier (00=Curated, 01=Extracted, 10=ArmDiscovered, 11=Ratified) + bits 2-3 : proposer id (00=AstWalker, 01=PairStats, 10=Aerial, 11=Other) + bits 4-7 : reserved (16 future proposers) +``` + +**Cost:** N bytes per mailbox = 1 KB for N = 1024. Negligible. + +**Trade-off:** consumers (op_emitter, council, hypothesis-test) can filter on origin without consulting a parallel registry. Without this byte, the council's `prior-art-savant` can't tell whether a triple's prior support came from human curation (high prior) or from a chain of ARM-discovered revisions (re-revisable). + +**This is also a D-ARM-6 sub-deliverable; defers to #434's D-MBX-10 for SoA-root-version semantics but adds the per-row origin byte alongside.** + +### 7.3 Non-correction — `Vsa16kF32` deprecation stays untouched + +I have nothing to add to OQ-11.4 ("CLAUDE.md `Vsa16kF32` doctrinal update"). The deprecation is correctly scoped: `Vsa16kF32` is a local-bundle compute carrier, not a cross-boundary state. ARM-discovery does NOT reach for `Vsa16kF32` — it operates on typed `CandidateRule` records and typed `(s, p, o)` triples. The bundle math (Markov ±5, role-key binding) is unaffected because ARM doesn't enter that path. + +--- + +## 8. Deliverables — consolidated table + +| D-id | Title | Crate | Lines | Conf | Status | Blocks / Depends on | +|---|---|---|---|---|---|---| +| **D-ARM-1** | `ProvenanceTier` enum + ordering | `lance-graph-contract` | 50 | HIGH | Queued | Blocks ALL other D-ARM-* | +| **D-ARM-2** | `Proposer` trait + `CandidateRule` carrier | `lance-graph-contract` | 100 | HIGH | Queued | Blocks D-ARM-3, D-ARM-9 | +| **D-ARM-3** | Pair-stats proposer (default trunk) | `lance-graph-arm-discovery::proposer::pair_stats` | 400 | HIGH | Queued | Depends on D-ARM-1/2/7; blocks D-ARM-12 | +| **D-ARM-4** | ARM-truth → NARS-truth translator | `lance-graph-arm-discovery::translator` | 200 | HIGH | Queued | Depends on D-ARM-1/2 | +| **D-ARM-5** | Hypothesis test + revision + contradiction commit | `lance-graph-arm-discovery::hypothesis` | 350 | MED | Queued | Depends on D-ARM-4; blocks D-ARM-12 | +| **D-ARM-6** | Ratification queue + `discovery_arc`/`discovery_origin` corrections to #434 | `lance-graph-arm-discovery::queue` + mailbox SoA cols | 200 + spec | MED | Queued | Depends on PR #434 D-MBX-A3 merge; blocks D-ARM-12 | +| **D-ARM-7** | Jirak-bound significance helpers | `lance-graph-arm-discovery::jirak` | 150 | HIGH | Queued | Blocks D-ARM-3 | +| **D-ARM-8** | Feed + FeedProjector config + Odoo example projector | `lance-graph-arm-discovery::feed` | 250 | MED | Queued | Depends on D-ARM-2; blocks D-ARM-12 | +| **D-ARM-9** | Aerial+ IPC client (feature-gated) | `lance-graph-arm-discovery::proposer::aerial_ipc` | 200 | MED | Queued | Optional; depends on D-ARM-2 | +| **D-ARM-10** | `op_emitter::bucket_corpus` ratification filter | `lance-graph-ontology::op_emitter` | 30 | HIGH | Queued | Depends on D-ARM-1 | +| **D-ARM-11** | `style_recipe` rule 8 for ArmDiscovered backing | `lance-graph-ontology::style_recipe` | 80 | MED | Queued | Depends on D-ARM-1 | +| **D-ARM-12** | End-to-end test + bench | `lance-graph-arm-discovery::tests` + benches | 400 | MED | Queued | Depends on D-ARM-3..8 | + +**Total:** ~2,410 LOC, 12 deliverables, 1 new crate, 2 spec corrections. + +--- + +## 9. Execution order + +```text +Wave 1 — Contract (D-ARM-1, D-ARM-2) — additive contract trait + provenance enum + ├─ ships as one PR; one Sonnet agent; ~150 LOC + 5 tests + └─ blocks everything below; lands first + +Wave 2 — Jirak (D-ARM-7) — pure math, no IO + ├─ ships as one PR; one Sonnet agent; ~150 LOC + reference tests against Jirak 2016 worked examples + └─ blocks Wave 3a + +Wave 3a — Pair-stats proposer (D-ARM-3) — the default trunk + ├─ ships as one PR; one Sonnet agent (Opus reviewer); ~400 LOC + 15 tests + └─ blocks D-ARM-12 + +Wave 3b — Translator (D-ARM-4) — pure function + ├─ ships as one PR; one Sonnet agent; ~200 LOC + 10 tests + └─ Wave 3a and 3b parallel + +Wave 4 — Hypothesis test (D-ARM-5) — SpoStore round-trip + ├─ ships as one PR; Opus agent (multi-source: SpoStore + EdgeColumn + The Click semantics); ~350 LOC + 12 tests + └─ depends on Waves 3a, 3b + +Wave 5a — Queue + SoA corrections (D-ARM-6) — spec PR for #434 follow-up + queue impl + ├─ ships as TWO PRs; one for spec follow-up against #434 (council-reviewed); one for queue impl + └─ depends on PR #434 D-MBX-A3 landing + +Wave 5b — Feed + projector (D-ARM-8) — DI config surface + ├─ ships as one PR; one Sonnet agent; ~250 LOC + 8 tests + └─ parallel with Wave 5a + +Wave 6 — op_emitter + style_recipe rule (D-ARM-10, D-ARM-11) — downstream gates + ├─ ships as one PR; trivial; one Sonnet agent; ~110 LOC + 4 tests + └─ depends on D-ARM-1 + +Wave 7 — Aerial+ IPC (D-ARM-9, OPTIONAL) — feature-gated fan-in + ├─ ships when user signals demand; one Sonnet agent; ~200 LOC + └─ optional; not blocking + +Wave 8 — End-to-end test + bench (D-ARM-12) + ├─ ships as one PR; Opus agent (multi-source: all prior waves); ~400 LOC + bench + └─ depends on Waves 1-6 (or 1-7 if Aerial+ included) +``` + +**Estimated calendar:** 6-8 sessions if executed serially with the disciplined "no cargo in agents" rule. Main thread runs cargo verifies after each wave merges to main. + +--- + +## 10. Open questions + +| # | Question | Default proposal | Blocks | +|---|---|---|---| +| **OQ-ARM-1** | What's the default window size for steady-state operation? | 100K rows, configurable per Feed | D-ARM-3, D-ARM-8 | +| **OQ-ARM-2** | What's the Jirak `p_moment` default for Odoo tabular data? | p = 3.0 (gives `n^{-1}` decay; conservative) | D-ARM-7 | +| **OQ-ARM-3** | What's the NARS personality constant `k` in `arm_to_nars`? | k = 1.0 (NAL-9 default) | D-ARM-4 | +| **OQ-ARM-4** | Should `RatificationQueue` persist across sessions, or be in-memory only? | In-memory v1; persist behind `--persist-queue` flag in v2 | D-ARM-6 | +| **OQ-ARM-5** | Antecedent bound `a`: hard-cap at 2, allow 3, or higher? | Hard-cap at 2 for pair-stats trunk; Aerial+ subprocess can go higher | D-ARM-3 | +| **OQ-ARM-6** | Contradiction commit shape — single `Contradiction` edge type, or symmetric pair? | Symmetric pair (one CausalEdge per side, back-pointer between) | D-ARM-5 | +| **OQ-ARM-7** | Do we need a `discovery_arc` column from day-one, or live with `edges` contention in v1? | Defer to v1.1 — measure contention in D-ARM-12 bench first | D-ARM-6 | +| **OQ-ARM-8** | What's the right policy for inverse-fingerprint contradiction detection in Stage C? Hash collision rate? | Cite `I-NOISE-FLOOR-JIRAK`; concrete cutoff TBD in D-ARM-5 | D-ARM-5 | +| **OQ-ARM-9** | How do council ratification verdicts flow back into the queue? Webhook? Manual session trigger? | Manual session trigger (per-session council batch); no webhook v1 | D-ARM-6 | +| **OQ-ARM-10** | Should Aerial+ IPC be a separate crate or a feature inside `lance-graph-arm-discovery`? | Feature flag inside; promote to separate crate if it grows large | D-ARM-9 | + +--- + +## 11. Risks + +### 11.1 Risk — proposer leaks low-confidence rules into SPO + +**Failure mode:** Stage A emits rules at high rates that scrape past `MIN_CONFIDENCE` but below the Jirak floor; Stage C's revise weights them in; the substrate calcifies on weak signal. + +**Mitigation:** D-ARM-7 (Jirak helpers) is mandatory in the threshold path. D-ARM-3's emission gate routes through `jirak_significance_threshold` BEFORE checking the user's `MIN_CONFIDENCE` config. A canary test in D-ARM-12 asserts no rule with `support × n < jirak_min_evidence(...)` ever crosses the proposer boundary. + +**Confidence:** HIGH — the iron rule is already named; this plan inherits it. + +### 11.2 Risk — Stage D ratification becomes the bottleneck + +**Failure mode:** discovery emits 100s of novel candidates per day; the council can't keep up; the queue saturates; high-quality candidates wait behind low-quality. + +**Mitigation:** the queue is bounded (default 1024); overflow drops *oldest* candidates (FIFO with priority bias). The council can run on prioritized batches (highest expectation first). v2 may add a triage pre-filter (the `cascade-impact-savant` runs solo on every queue entry; full panel only on those that survive cascade-impact > 0.5). + +**Confidence:** MED — depends on observed discovery rate; D-ARM-12 bench will inform. + +### 11.3 Risk — contradiction commit semantics drift from The Click + +**Failure mode:** Stage C's contradiction path "commits" the contradiction but the EdgeColumn/SpoStore doesn't actually have a contradiction-edge type; "preserve" decays into "drop one side." + +**Mitigation:** D-ARM-5 includes an audit pass against `lance_graph::graph::spo::truth` — verify the contradiction primitive exists at the truth level. If not, surface a follow-up to `lance-graph-contract` to add `ContradictionEdge` to the EdgeColumn taxonomy. Block D-ARM-5 on that contract addition. + +**Confidence:** MED — depends on current state of `spo::truth::Contradiction`; needs a verification pass in Wave 4. + +### 11.4 Risk — `lance-graph-arm-discovery` becomes a dumping ground + +**Failure mode:** the new crate accumulates one-off projectors, ad-hoc feeds, and "just one more proposer" extensions; it loses focus. + +**Mitigation:** strict scope discipline — the crate's only public surface is Proposer, CandidateRule, CandidateTriple, HypothesisTest, RatificationQueue, Feed, FeedProjector. Domain-specific projectors live in their own crates (e.g. `lance-graph-odoo-feed` if Odoo grows beyond an example). The crate's invariants are documented in its README; PR template requires a `Touches public surface? YES/NO` checkbox. + +**Confidence:** MED — requires sustained governance discipline. + +### 11.5 Risk — windowed pair-stats over-counts dependent observations + +**Failure mode:** sliding-window updates count the same row's contribution twice across overlap, inflating support; subtraction on exit is correct in expectation but introduces variance the Jirak floor doesn't bound. + +**Mitigation:** D-ARM-3 implements non-overlapping windows by default. Sliding (overlapping) windows are a feature flag with a tagged Jirak adjustment. Bench in D-ARM-12 tests both. + +**Confidence:** HIGH — same fix as any windowed-statistics implementation; well-trodden. + +--- + +## 12. Success criteria + +**Quantitative:** + +- D-ARM-3 (pair-stats) sustains ≥ 100K rows/window/sec on a 16-core machine for k = 200 features (bench in D-ARM-12). +- D-ARM-5 (hypothesis test) round-trips a candidate triple through revision in < 10 µs against an in-memory SpoStore of 1 M triples. +- D-ARM-12's end-to-end test pipeline (synthetic feed → proposer → translator → hypothesis-test → ratification → op_emitter) completes for 200K rows + 50 candidate triples in < 5 s. +- Zero candidate rules pass Stage A with `support × n < jirak_min_evidence(...)`. + +**Qualitative:** + +- Council ratification verdicts (Stage D) provide observable evidence that the discovery leg surfaces *non-obvious* co-correlations the AST extraction missed (e.g. a partner-country / product-category / fiscal-position triple invariant that is implicit in 5 years of `account.move` history but never spelled out in `account_fiscal_position.py`). +- The corrections proposed in §7 are absorbed by #434's follow-up PR without contention; OQ-11.2 and OQ-11.5 close. +- `lance-graph-arm-discovery` keeps its public surface stable across the v1.0 → v1.1 transition (no breaking API change required by the `discovery_arc` column landing). + +**Doctrinal:** + +- The substrate stays lossless. ARM-discovered triples enter the SPO store via `SpoBuilder` only, never bypass it. +- The compile path stays deterministic. Stage D's ratification gate is the firewall; nothing nondeterministic crosses into `op_emitter`. +- The Click is preserved. Novel candidates ARE the Epiphany branch; ratified candidates become Commits; contradictions are preserved as committed contradictions. +- `I-NOISE-FLOOR-JIRAK` is the floor, not a guideline. The proposer's noise floor IS Jirak; nothing leaks below it. + +--- + +## 13. Cross-refs + +| Doc | Section | Relation | +|---|---|---| +| `unified-soa-convergence-v1.md` (PR #434) | §11.1 — One SoA, never transformed | This plan's writes go through `SpoBuilder` → mailbox SoA, never a parallel DTO. | +| `unified-soa-convergence-v1.md` | §11.2 — witness IS belief-state arc | Stage C revisions emit `CausalEdge64` onto the witness arc. | +| `unified-soa-convergence-v1.md` | §11.6 — nine half-baked consumers | This plan adds an **upstream proposer node** to the architecture; doesn't touch any of the nine. | +| `unified-soa-convergence-v1.md` | OQ-11.2 + OQ-11.5 | §7 of THIS plan supplies the spec defaults: `discovery_arc D=8`, `discovery_origin u8`. | +| `odoo-business-logic-blueprint-v1.md` | D-ODOO-BP-1g (JITson recipes) | ArmDiscovered triples flow into the same recipe path; Stage E IS the JITson hand-off. | +| `odoo-source-extraction-v1.md` | EXT-* deliverables | Sibling proposer leg; same downstream substrate; different upstream source. | +| `style_recipe.rs` (PR #433) | derive_style_recipe rules 1-7 | This plan proposes rule 8 (D-ARM-11) for ArmDiscovered backing. | +| `op_emitter.rs` (this branch, pre-merge) | `bucket_corpus`, `emit_op_dispatch` | This plan proposes the one-line ratification filter (D-ARM-10). | +| CLAUDE.md `I-NOISE-FLOOR-JIRAK` | iron rule | §4 of this plan operationalizes it as the Stage A threshold. | +| CLAUDE.md `I-SUBSTRATE-MARKOV` | iron rule | The NARS revision arc IS the Markov-chain trajectory; ARM doesn't perturb the Markov property. | +| CLAUDE.md `I-VSA-IDENTITIES` | iron rule | ARM-discovery operates on identity-typed `(s,p,o)` triples; never bundles content. | +| CLAUDE.md `E-BATON-1` | epiphany | Stage C emissions are batons; cross-mailbox propagation rides existing baton handoff. | +| CLAUDE.md "The Click" | doctrine | Novel candidates → Epiphany; revised priors → Commit; conflicts → committed Contradiction. | +| EPIPHANIES `E-INTERPRET-NOT-STORE-1` (PR #433 council-ratified) | epiphany | ARM is *one* interpretation projection of the lossless triplet substrate; multiple projections can coexist. | +| EPIPHANIES candidate `E-DISCOVERY-CODEGEN-BRACKET-1` (this session, council-pending) | candidate | The two papers (Aerial+ + ontology M2M) bracket our architecture: discovery upstream, codegen downstream, SPO+NARS middle. | +| Paper — Karabulut, Groth, Degeler 2025 (arxiv 2504.19354v1) | §2 (truth definitions) | Direct mapping ARM → NARS truth. | +| Paper — Karabulut, Groth, Degeler 2025 | §3.3 + Algorithm 1 | Aerial+ rule extraction; D-ARM-9 IPC client interface mimics this output shape. | +| Paper — Abreu, Cruz, Guerreiro 2025 (arxiv 2511.13661v1) | §4 ("from code-centric to ontology-driven") | Independent confirmation of the externalize-interpretation doctrine. | +| Jirak 2016 (arxiv 1606.01617) | Theorem 2.1 | The weak-dependence Berry-Esseen rate that D-ARM-7 cites for the noise floor. | + +--- + +## 14. What this plan does NOT cover + +- **Reverse ARM** (mining BPMN diagrams back into proprietary source) — explicitly out of scope. The Abreu et al. paper §7.1 names this as future work; we inherit that scope. +- **Multi-feed coordination** — running 3 parallel feeds (Odoo invoices + MedCare encounters + WoA workflows) and synthesizing across them is v2 work. v1 is single-feed. +- **Continuous council** — running the council in a polling loop instead of session-triggered batches is explicitly deferred (OQ-ARM-9 default). The council was designed for human-in-the-loop verification; automating it without a contract surface change would dilute the gate. +- **GPU-accelerated pair-stats** — v1 is CPU-only. SIMD via existing `ndarray::simd_soa.rs` (when D-MBX-7 lands) is the first acceleration step. GPU is v2+. +- **Aerial+ Rust port** — the autoencoder stays in Python. D-ARM-9 only specifies the IPC client. A future plan may add a Burn/Candle port if the IPC overhead becomes the bottleneck (unlikely at 100K rows/window). + +--- + +## 15. Invariants this plan inherits + +| Invariant | Source | How this plan respects it | +|---|---|---| +| **I-NOISE-FLOOR-JIRAK** | CLAUDE.md iron rule | Mandatory Stage A threshold via D-ARM-7. | +| **I-VSA-IDENTITIES** | CLAUDE.md iron rule | ARM operates on `(s,p,o)` identity-typed triples; content stats inform the truth value, never the bind. | +| **I-SUBSTRATE-MARKOV** | CLAUDE.md iron rule | NARS revision IS Chapman-Kolmogorov-respecting; bundle math untouched. | +| **I-LEGACY-API-FEATURE-GATED** | CLAUDE.md iron rule | Aerial+ IPC behind `arm-aerial` feature; default trunk (pair-stats) is feature-clean. | +| **E-SOA-IS-THE-ONLY** | EPIPHANIES PR #434 | Writes go through `SpoBuilder` → mailbox SoA, no parallel DTO. | +| **E-BATON-1** | EPIPHANIES PR #418 | Stage C emissions cross mailbox boundaries as discrete owned batons (`CollapseGateEmission`). | +| **E-INTERPRET-NOT-STORE-1** | EPIPHANIES PR #433 | ARM is one interpretation projection; never stored back into triples; deterministically re-derivable. | +| **E-NORMALIZED-ENTITY-1** | EPIPHANIES 2026-05-28 | `CandidateTriple` carrier is a typestate over `Proposed → Tested → Ratified` — same shape pattern. | +| **The Click P-1** | CLAUDE.md | Novel candidates → Epiphany branch; revisions → Commit branch; contradictions → preserved as committed. | +| **AGI-as-glove** | CLAUDE.md Stance | No new traits-on-the-side; `Proposer` lives in `lance-graph-contract` next to the other domain contracts. | + +--- + +## 16. Sequencing against unified-soa-convergence (PR #434) + +This plan is **strictly additive** to PR #434. The dependency direction: + +```text +PR #434 (unified-SoA convergence) — landed + │ + │ D-MBX-A1 columns (edges, qualia, meta, entity_type) + │ D-MBX-A2/A3 (BindSpace gap + witness-arc handle) — pending + │ + ▼ +this plan (ARM discovery) — proposes + │ + │ Wave 1-4 land regardless of D-MBX-A3 (uses existing edges arc) + │ Wave 5a (D-ARM-6 corrections) depends on D-MBX-A3 having landed + │ + ▼ +Future v1.1 — discovery_arc column + multi-feed coordination +``` + +If #434's D-MBX-A3 lands BEFORE this plan's Wave 5a, the corrections fold cleanly. If this plan's Wave 4 lands FIRST (using the existing `edges` arc only), the discovery leg is operational; Wave 5a just adds room. + +No blocking dependency in either direction. Both plans can progress in parallel; the SoA contract is the integration surface. + +--- + +## 17. Decision log + +| Date | Decision | Rationale | +|---|---|---| +| 2026-05-29 | Author plan as v1, additive to PR #434 | Discovery is *upstream* proposer; doesn't touch the SoA contract layer. | +| 2026-05-29 | Pair-stats is the default trunk, Aerial+ is fan-in | Determinism preferred; neural compression earns its keep only at high k. | +| 2026-05-29 | Aerial+ stays in Python via IPC | Avoids ONNX/Burn dep in `lance-graph`; preserves the determinism boundary. | +| 2026-05-29 | Reject `ProvenanceTier::Auto` (auto-ratification) | Council gate is non-negotiable per The Click. | +| 2026-05-29 | Reject continuous council polling | Human-in-the-loop is the design; auto-poll dilutes the gate. | +| 2026-05-29 | Defer `discovery_arc` column to v1.1 | Live with single `edges` arc contention until bench (D-ARM-12) measures. | +| 2026-05-29 | Add `discovery_origin: u8` byte in v1 | Cheap (N bytes); council `prior-art-savant` needs it to triage. | +| 2026-05-29 | Hard-cap antecedent bound `a ≤ 2` in pair-stats | Memory bound; `a = 3` requires Aerial+ subprocess fan-in. | +| 2026-05-29 | Symmetric pair contradiction commit (OQ-ARM-6) | Matches existing `CausalEdge64` symmetry; back-pointer is natural. | + +--- + +## 18. Provenance + +Authored by main thread during session-continuation on `claude/activate-lance-graph-att-k2pHI` after PR #433 (style_recipe + epiphany council) merged and PR #434 (unified-SoA convergence) merged. Both papers (Karabulut 2025; Abreu 2025) shared by user in this session; integration emerges from cross-reading them against the existing op_emitter pipeline. + +No subagents spawned for this plan. Council recommended to ratify candidate epiphany `E-DISCOVERY-CODEGEN-BRACKET-1` before this plan moves to Wave 1. + +End of `streaming-arm-nars-discovery-v1.md`.