From e6e2b3e1c4ffecb9632504fb96a43aa2c25695ba Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 20:12:38 +0000 Subject: [PATCH 01/14] feat(graph): Backend::MailboxSoa + the classid-route node-match half (Inc 0 first slice) cypher-kanban-ast-unification-v1 Inc 0, the verified-safe half: the substrate IS the graph (E-GUID-IS-THE-GRAPH), so MATCH (n:Label) is a classid prefix-route over the zero-dep MailboxSoaView contract, resolved off the class column with zero value decode. - graph_router::Backend gains the MailboxSoa variant (the named router gap). - graph/mailbox_scan.rs: match_nodes_by_class (classid route; reads only the class column) + match_node_by_local_key (local_key->row via row_for_local_key, None-fallback to positional address). - Gates: parity (matched set == reference classid filter); F2 zero-value-decode proven structurally by a GuardedSoa whose energy()/meta_raw() panic on access; key-index point lookup. 4/4 green, no new clippy warnings. Edge-traversal ((a)-[r]->(b)) deliberately deferred, grounded not faked: CausalEdge64 (the edges_raw column) is an SPO triple, NOT a row->row adjacency pointer, and the View exposes no EdgeBlock adjacency accessor. That is the edge-representation boundary the 5+3 council said to pin first (verdict 4b); it lands as the next slice once the classid-resolved edge rep + adjacency accessor are added. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- crates/lance-graph/src/graph/graph_router.rs | 9 + crates/lance-graph/src/graph/mailbox_scan.rs | 201 +++++++++++++++++++ crates/lance-graph/src/graph/mod.rs | 1 + 3 files changed, 211 insertions(+) create mode 100644 crates/lance-graph/src/graph/mailbox_scan.rs diff --git a/crates/lance-graph/src/graph/graph_router.rs b/crates/lance-graph/src/graph/graph_router.rs index f9ac0d60d..1cc1e379c 100644 --- a/crates/lance-graph/src/graph/graph_router.rs +++ b/crates/lance-graph/src/graph/graph_router.rs @@ -50,6 +50,15 @@ pub enum Backend { Blasgraph, /// Palette-accelerated traversal (Entry 2: bgz17 hot path). Palette, + /// MailboxSoA traversal (Entry 3, `cypher-kanban-ast-unification-v1` Inc 0): + /// a Cypher `MATCH` routed over the canonical GUID-keyed substrate via the + /// zero-dep `MailboxSoaView` contract — classid prefix-route for node match, + /// `local_key`→row for point lookup. Edge-slot traversal is deferred until the + /// edge-representation boundary is pinned (classid-resolved `EdgeBlock` + /// adjacency vs `CausalEdge64` SPO — they are NOT interchangeable; see + /// `graph::mailbox_scan` and the plan's verdict §4b). See + /// [`crate::graph::mailbox_scan`]. + MailboxSoa, } /// Classification of a query for routing purposes. diff --git a/crates/lance-graph/src/graph/mailbox_scan.rs b/crates/lance-graph/src/graph/mailbox_scan.rs new file mode 100644 index 000000000..1887b4b1f --- /dev/null +++ b/crates/lance-graph/src/graph/mailbox_scan.rs @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! MailboxSoA scan — `Backend::MailboxSoa` (`cypher-kanban-ast-unification-v1` Inc 0). +//! +//! A Cypher `MATCH` routed over the canonical GUID-keyed substrate via the +//! zero-dep [`MailboxSoaView`] contract, instead of the index-built +//! [`TypedGraph`](crate::graph::blasgraph::typed_graph::TypedGraph) the other +//! backends use. The thesis (`E-GUID-IS-THE-GRAPH`): the substrate **is** the +//! graph — a node is its GUID key, and `MATCH (n:Label)` is a **classid +//! prefix-route**, resolved off the key/class column with **zero value decode** +//! (it never touches the 480 B value slab: `energy` / `meta` / fingerprints). +//! +//! ## Scope of this increment (the verified-safe half) +//! +//! This lands the **node-match** half — `MATCH (n:Label)` → the set of rows whose +//! class discriminator equals the queried class. That is the half that is correct +//! *without* the boundary the 5+3 council said to pin first. +//! +//! The **edge-traversal** half (`(a)-[r]->(b)`) is deliberately **deferred**, for +//! two grounded reasons (verdict §4b): +//! +//! 1. **Edge-representation is not yet pinned.** `EdgeBlock` (12+4 one-byte +//! *adjacency* slots → neighbor `local_key`) and `CausalEdge64` (an **SPO +//! triple** of s/p/o palette indices, the `edges_raw` column) are NOT +//! interchangeable. A relationship-type must bind to one via the class's +//! `EdgeCodecFlavor` — the router must not guess by availability. +//! 2. **The View exposes only `edges_raw` (`CausalEdge64`/SPO), not `EdgeBlock` +//! adjacency.** `CausalEdge64` carries s/p/o palette indices, not a row→row +//! pointer, so it cannot be dereferenced to a neighbor row without the +//! adjacency accessor (a follow-on contract addition). +//! +//! So this module does the classid prefix-route and the `local_key`→row point +//! lookup (via [`MailboxSoaView::row_for_local_key`]); edge dispatch lands once +//! the representation boundary is resolved. + +use lance_graph_contract::soa_view::MailboxSoaView; + +use crate::graph::graph_router::Backend; + +/// A node matched by a MailboxSoA scan: the row index plus the backend tag. +/// +/// Distinct from `GraphHit` (an *edge* with source/target) — a node match has no +/// target until the edge-traversal half lands. Kept minimal and honest. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct NodeMatch { + /// The matched row index within the mailbox. + pub row: usize, + /// Always [`Backend::MailboxSoa`] — names the route that produced this match. + pub backend: Backend, +} + +/// `MATCH (n:Label)` → the rows whose class discriminator equals `class_id`. +/// +/// The classid prefix-route over the `MailboxSoaView`. **Zero value decode by +/// construction:** the only column read is `class_id()` (which aliases the +/// `entity_type` u16 slot — the Cognitive-RISC N1 class hook); the 480 B value +/// slab (`energy` / `meta` / fingerprints) is never touched. This is the +/// substrate-is-the-graph node-selection half of `Backend::MailboxSoa`. +pub fn match_nodes_by_class(view: &V, class_id: u16) -> Vec { + let classes = view.class_id(); + classes + .iter() + .enumerate() + .filter_map(|(row, &c)| { + (c == class_id).then_some(NodeMatch { + row, + backend: Backend::MailboxSoa, + }) + }) + .collect() +} + +/// Point lookup: resolve a canonical [`NodeGuid::local_key`] to a single row, +/// the GUID-keyed address half of `Backend::MailboxSoa`. +/// +/// [`NodeGuid::local_key`]: lance_graph_contract::canonical_node::NodeGuid::local_key +/// +/// Returns `None` when the view has not materialized a key index (the +/// deferred-binding default of [`MailboxSoaView::row_for_local_key`]) — the +/// caller then falls back to the positional `(mailbox_id, row)` address, never a +/// wrong row. +pub fn match_node_by_local_key(view: &V, local_key: u64) -> Option { + view.row_for_local_key(local_key).map(|row| NodeMatch { + row, + backend: Backend::MailboxSoa, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use lance_graph_contract::kanban::KanbanColumn; + + /// A minimal view over fixed columns. The value-side columns + /// (`energy`/`meta`/fingerprints) PANIC on access so the zero-value-decode + /// gate (F2) is proven structurally: if the scan ever touches them, the test + /// fails loudly. + struct GuardedSoa { + class_ids: Vec, + edges: Vec, + keyed_rows: Vec<(u64, usize)>, + } + + impl MailboxSoaView for GuardedSoa { + fn mailbox_id(&self) -> u32 { + 0 + } + fn n_rows(&self) -> usize { + self.class_ids.len() + } + fn w_slot(&self) -> u8 { + 0 + } + fn current_cycle(&self) -> u32 { + 0 + } + fn phase(&self) -> KanbanColumn { + KanbanColumn::Planning + } + // ── value slab — must NEVER be touched by a classid route (F2 guard) ── + fn energy(&self) -> &[f32] { + panic!("F2 violated: classid node-match touched the energy value column"); + } + fn edges_raw(&self) -> &[u64] { + // edges are key/causal side, not value slab — allowed, but the + // node-match half does not use them; returned for trait completeness. + &self.edges + } + fn meta_raw(&self) -> &[u32] { + // meta is value-slab adjacent; the node-match must not read it. + panic!("F2 violated: classid node-match touched the meta value column"); + } + fn entity_type(&self) -> &[u16] { + // class_id() aliases entity_type — this IS the class hook, allowed. + &self.class_ids + } + fn row_for_local_key(&self, local_key: u64) -> Option { + self.keyed_rows + .iter() + .find(|(k, _)| *k == local_key) + .map(|(_, r)| *r) + } + } + + fn sample() -> GuardedSoa { + GuardedSoa { + // rows: 0=A(7) 1=B(9) 2=C(7) 3=D(9) 4=E(7) + class_ids: vec![7, 9, 7, 9, 7], + edges: vec![0; 5], + keyed_rows: vec![(0xABCD, 3), (0x1234, 0)], + } + } + + #[test] + fn match_nodes_by_class_routes_on_classid_only() { + let soa = sample(); + let hits = match_nodes_by_class(&soa, 7); + let rows: Vec = hits.iter().map(|h| h.row).collect(); + assert_eq!(rows, vec![0, 2, 4], "all class-7 rows, in order"); + assert!(hits.iter().all(|h| h.backend == Backend::MailboxSoa)); + // parity: the matched set equals the reference classid filter. + let reference: Vec = soa + .class_ids + .iter() + .enumerate() + .filter(|(_, &c)| c == 7) + .map(|(i, _)| i) + .collect(); + assert_eq!(rows, reference); + } + + #[test] + fn match_nodes_by_class_empty_when_no_match() { + let soa = sample(); + assert!(match_nodes_by_class(&soa, 42).is_empty()); + } + + #[test] + fn match_node_by_local_key_resolves_via_key_index() { + let soa = sample(); + assert_eq!( + match_node_by_local_key(&soa, 0xABCD), + Some(NodeMatch { + row: 3, + backend: Backend::MailboxSoa + }) + ); + // unknown key → None (caller falls back to positional address). + assert_eq!(match_node_by_local_key(&soa, 0xDEAD), None); + } + + #[test] + fn f2_zero_value_decode_the_scan_never_panics_on_value_columns() { + // The GuardedSoa panics if energy()/meta_raw() are read. If this test + // completes, the classid node-match touched ONLY the class column. + let soa = sample(); + let _ = match_nodes_by_class(&soa, 7); + let _ = match_node_by_local_key(&soa, 0x1234); + } +} diff --git a/crates/lance-graph/src/graph/mod.rs b/crates/lance-graph/src/graph/mod.rs index 923258dd3..eded4abb0 100644 --- a/crates/lance-graph/src/graph/mod.rs +++ b/crates/lance-graph/src/graph/mod.rs @@ -13,6 +13,7 @@ pub mod blasgraph; pub mod fingerprint; pub mod graph_router; pub mod hydrate; +pub mod mailbox_scan; pub mod metadata; pub mod neighborhood; pub mod neuron; From 4549acf50524d544e633e6fc83088e061c7fc567 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 20:24:08 +0000 Subject: [PATCH 02/14] =?UTF-8?q?docs(epiphany):=20E-ADJACENCY-IS-KEY-AND-?= =?UTF-8?q?EDGECODEC=20=E2=80=94=20resolves=20the=20edge-rep=20boundary=20?= =?UTF-8?q?(=C2=A74b)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator correction: adjacency lives in two places, classid/key-resolved, never query-guessed: 1. HHTL cascade in the GUID key = CLAM hierarchical neighborhood (NiblePath is_ancestor_of/prefix; graph/neighborhood/clam.rs) — free, zero value decode. 2. 16-byte EdgeBlock = explicit typed edges per EdgeCodecFlavor: CoarseOnly (16x8) = 12 in-family + 4 external; Pq32x4 (32x4) = turbovec residue edges. 3. edges_raw = CausalEdge64 = SPO causal arcs (separate facet). The class picks the rep (classid -> ClassView -> EdgeCodecFlavor). Unblocks the deferred edge half of #544: next slice exposes the HHTL/key + EdgeBlock per row on MailboxSoaView, then CLAM prefix-route + EdgeBlock slot-deref, both zero-value-decode. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index acc340e2a..556e0575a 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,24 @@ +## 2026-06-18 — E-ADJACENCY-IS-KEY-AND-EDGECODEC — adjacency lives in two places, classid/key-resolved: HHTL cascade in the GUID = CLAM neighborhood; the 16-byte EdgeBlock = 12-family/4-external OR 32×4 turbovec per EdgeCodecFlavor; CausalEdge64 = SPO + +**Status:** FINDING (operator-stated, grounded in shipped `hhtl.rs` `NiblePath` + `canonical_node.rs` `EdgeCodecFlavor` + `graph/neighborhood/clam.rs`). Resolves `cypher-kanban-ast-unification-v1` boundary §4b ("which edge rep") — there is no single rep to pick; there are distinct adjacency facets, each selected by the key/classid, never guessed by the query. + +**The three facets:** + +1. **HHTL cascade *in the GUID key* = hierarchical/neighborhood adjacency = the CLAM tree.** `NiblePath::from_guid_prefix(guid)` over `classid·HEEL·HIP·TWIG`; `is_ancestor_of` / `prefix(d)` = centroid-tree containment. Longer shared prefix ⇒ nearer ⇒ same CLAM cluster. The "connecting tissue" structural traversal — **free, in the key, zero value decode**; `graph/neighborhood/clam.rs` is the search engine. Adjacency was never an `EdgeBlock` requirement for the neighborhood case — it is the cascade prefix. + +2. **The 16-byte `EdgeBlock` *in the node* = explicit typed edges, read per the classid's `EdgeCodecFlavor`:** + - **`CoarseOnly` (16×8) → 12 in-family + 4 external** adjacency edges (one byte = neighbor basin-local index → `local_key`). The 12/4 split is **family-internal vs cross-family interface** (canon). + - **`Pq32x4` (32×4) → turbovec residue edges** (same 16 bytes as 32 four-bit PQ residue codes). + - `CoarseResidue` (1+⌈D/2⌉) → coarse index + per-dim residue. + +3. **`edges_raw` column → `CausalEdge64` = SPO causal/belief arcs** (s/p/o palette indices — NOT a row→row adjacency pointer; a separate facet). + +**Traversal dispatch (the resolved boundary):** neighborhood/proximity → HHTL/CLAM (key); explicit typed edge → `EdgeBlock` via `classid → ClassView → EdgeCodecFlavor` (12-family/4-external **or** 32×4 turbovec); causal/belief → `CausalEdge64`. The class picks the rep; the query never guesses. + +**Wiring implication (next Inc-0 slice):** the edge/neighborhood traversal half needs `MailboxSoaView` to expose, per row, **(a)** the HHTL `NiblePath` (or the full `NodeGuid` key — the canon `NodeRow` already carries `key(16)`, so this exposes what's there) for the CLAM cascade, and **(b)** the `EdgeBlock` bytes + the class's `EdgeCodecFlavor` for the explicit-edge deref. Then: CLAM neighborhood = prefix routing on the key; `(a)-[r]->(b)` = `EdgeBlock` slot deref under the resolved flavor. Both zero-value-decode. The node-match half (`#544`) already lands the classid route; this adds the two key/edge accessors + the CLAM + EdgeBlock deref. Cross-refs: `E-GUID-IS-THE-GRAPH`, `E-CYPHER-IS-THE-KANBAN-AST`, `hhtl::NiblePath`, `canonical_node::{EdgeBlock, EdgeCodecFlavor}`, `graph/neighborhood/clam.rs`, `graph/mailbox_scan.rs`, `cypher-kanban-ast-unification-v1` §4b. + +--- + ## 2026-06-18 — E-OGAR-IS-FOUNDRY — being Palantir Foundry / Gotham reduces to "write the OGAR class schema + inheritance"; everything else (traversal, query, pipelines, actions, low-code apps) is generic machinery over it via the shared AST **Status:** FINDING (capstone; operator-stated). The platform-level reading of the whole arc: Foundry/Gotham is not a platform to rebuild — it is an **OGAR class-schema-inheritance exercise**, because every other Foundry/Gotham layer is already generic machinery the workspace ships, parameterized only by `classid`. From 7a674531a0f993bc47f4f5d4e00683704f9a1c7e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 20:35:36 +0000 Subject: [PATCH 03/14] =?UTF-8?q?docs(epiphany):=20E-HELIX-IS-EXACT-LOCATI?= =?UTF-8?q?ON=20=E2=80=94=20adjacency=20!=3D=20location;=20helix=20Signed3?= =?UTF-8?q?60=20is=20the=20exact=20orthogonal=20point,=20"where"=20is=20a?= =?UTF-8?q?=20decode=20ladder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator correction: the helix is not more adjacency, it is the EXACT orthogonal LOCATION. Adjacency (HHTL/CLAM near, EdgeBlock connected) is relational; helix Signed360 (ValueTenant::HelixResidue, signed full-sphere golden-spiral, 6B) is the absolute exact coordinate. "Where" is a decode-cost ladder: 1. HHTL/CLAM containment - key prefix, zero value decode (which cluster). 2. Helix PLACE - deterministic from the address, zero value decode. 3. Helix RESIDUE - Signed360 6B in the value slab, one value-tenant decode (exact). Router consequence: proximity query = key (free); exact-position query = read the HelixResidue tenant (a value decode, costed as such). Grounded in canonical_node ValueTenant::HelixResidue + ValueSchema::Compressed. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index 556e0575a..e0d51c2b1 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,22 @@ +## 2026-06-18 — E-HELIX-IS-EXACT-LOCATION — adjacency (relational: near/edge) is NOT location (absolute: exact orthogonal coordinate); the helix `Signed360` gives the exact point, and "where" is a decode-cost ladder containment → place → residue + +**Status:** FINDING (operator correction, grounded in `canonical_node.rs` `ValueTenant::HelixResidue` = `Signed360`, signed full-sphere golden-spiral Place/Residue, 48-bit/6 B, in the `Compressed` value schema). Refines `E-ADJACENCY-IS-KEY-AND-EDGECODEC`: that epiphany answered *"who is related"* (adjacency). This answers *"where exactly is it"* (location) — a different question, and the helix is a **coordinate, not an edge**. + +**The split the operator drew:** +- **Adjacency = relational.** "Near" (HHTL/CLAM containment) or "connected" (`EdgeBlock` typed edge). Tells you the *neighborhood / the link*. +- **Location = absolute.** The helix `Signed360` gives the **exact orthogonal point on the signed full-sphere** (golden-spiral, equal-area ⇒ the axes are orthogonal ⇒ the coordinate decodes exactly, lossless-for-synthesis). Tells you *precisely here*, not *near what*. + +**"Where" is a decode-cost ladder (each rung more precise, more decode):** +1. **HHTL / CLAM containment** — key prefix (`NiblePath` `is_ancestor_of`/`prefix`). "Which cluster." **Zero value decode** (coarsest). +2. **Helix PLACE** — the golden-spiral position *deterministic from the address* (generated, not stored; the `place` half of place/residue). The exact point the key *implies*. **Zero value decode.** +3. **Helix RESIDUE** — `Signed360` (`HelixResidue` tenant, 6 B in the value slab). The exact orthogonal location to full precision = PLACE ⊕ the stored 6-byte remainder. **One value-tenant decode** (paid only when exactness beyond the cluster is needed). + +So the substrate answers location at three precisions off one node, and you pay decode only for the precision you ask for — consistent with `E-GUID-IS-THE-GRAPH` (the key prerenders coarse position free) and the OGAR place/residue doctrine (PLACE deterministic, RESIDUE stored). CLAM tells you the neighborhood; the helix `Signed360` tells you the exact orthogonal point. + +**Consequence for the Cypher/router work:** a proximity / `MATCH … NEAR` query resolves on the key (HHTL/CLAM, zero decode); an *exact-position / sort-by-true-distance / orthogonal-coordinate* query reads the `HelixResidue` tenant (a value decode — it is NOT a zero-decode operation, and must be costed as such, unlike the adjacency facets). Do not conflate "find the cluster" (free) with "find the exact point" (6-byte decode). Cross-refs: `E-ADJACENCY-IS-KEY-AND-EDGECODEC`, `E-GUID-IS-THE-GRAPH`, `canonical_node::{ValueTenant::HelixResidue, ValueSchema::Compressed}`, the `helix` crate's `Signed360`. + +--- + ## 2026-06-18 — E-ADJACENCY-IS-KEY-AND-EDGECODEC — adjacency lives in two places, classid/key-resolved: HHTL cascade in the GUID = CLAM neighborhood; the 16-byte EdgeBlock = 12-family/4-external OR 32×4 turbovec per EdgeCodecFlavor; CausalEdge64 = SPO **Status:** FINDING (operator-stated, grounded in shipped `hhtl.rs` `NiblePath` + `canonical_node.rs` `EdgeCodecFlavor` + `graph/neighborhood/clam.rs`). Resolves `cypher-kanban-ast-unification-v1` boundary §4b ("which edge rep") — there is no single rep to pick; there are distinct adjacency facets, each selected by the key/classid, never guessed by the query. From 04576d96f4efb0bd3784079d06b7cfd45a288c45 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 20:41:14 +0000 Subject: [PATCH 04/14] =?UTF-8?q?docs(epiphany):=20E-CLAM-IS-THE-MANIFOLD-?= =?UTF-8?q?ENGINE=20=E2=80=94=20CLAM=20is=20CAKES+CHAODA+LFD,=20not=20a=20?= =?UTF-8?q?containment=20check;=20the=20full=20geometry-of-a-node=20surfac?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator: ndarray also has chaoda. Grounded in ndarray/src/hpc/clam.rs (CAKES Alg1/4/6, panCAKES Alg2, CHAODA Phase 4 anomaly_scores from LFD) + perturbation- sim chaoda (CHAODA-lite, names ndarray ClamTree as production). The CLAM facet is the manifold engine: containment + CAKES ranked-NN (attraction) + CHAODA anomaly (repulsion) + panCAKES compression, one tree, LFD the shared measure. Synthesized geometry-of-a-node: off one GUID the substrate answers which-cluster (CLAM, free), nearest-similar (CAKES), how-anomalous (CHAODA), exact-location (helix Signed360, value decode), connected-to (EdgeBlock), caused-by (CausalEdge64) - a complete geometric+relational surface, each at its own decode cost; the router dispatches a query to the cheapest facet that answers it. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index e0d51c2b1..e8cb561b8 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,30 @@ +## 2026-06-18 — E-CLAM-IS-THE-MANIFOLD-ENGINE — the CLAM facet is not a containment check; it is the CAKES+CHAODA+LFD ensemble (ndarray `clam.rs`): containment + ranked-NN + anomaly + compression, one tree, one geometric measure + +**Status:** FINDING (operator-stated, grounded in `ndarray/src/hpc/clam.rs` — CAKES arXiv:2309.05491 Partition Alg 1 / ρ-NN Alg 4 / DFS-sieve Alg 6, panCAKES Alg 2, **CHAODA Phase 4 `anomaly_scores` from the LFD distribution**; + `lance-graph/crates/perturbation-sim/src/chaoda.rs` CHAODA-lite which names ndarray's `ClamTree` as the production path). Enriches `E-ADJACENCY-IS-KEY-AND-EDGECODEC`: the "HHTL/CLAM neighborhood" facet is a whole geometry engine, not `is_ancestor_of`. + +**The CLAM tree (built off the GUID/scent vectors) answers four geometric questions, all over ONE tree with LFD as the shared measure:** +- **Containment** — `is_ancestor_of`/`prefix`: which cluster (the HHTL cascade view). +- **CAKES (attraction)** — entropy-scaling **exact k-NN search** (ρ-NN + DFS-sieve): the ranked similar neighbors. "Pull in the similar." +- **CHAODA (repulsion)** — per-cluster **anomaly score from the LFD distribution** (high Local Fractal Dimension = complex local geometry = outlier): how typical/anomalous a node is. "Push out the unusual." +- **panCAKES** — compression *via the same tree* (the cluster structure IS the codec). + +`CAKES pulls in the similar + CHAODA pushes out the unusual = meaningful structure` (perturbation-sim's framing). LFD is the one measure both ride. + +**The synthesized geometry-of-a-node (the full surface off one GUID, tiered by decode cost):** + +| Question | Facet | Cost | +|---|---|---| +| which cluster? | CLAM containment (key prefix) | zero value decode | +| nearest similar? | **CAKES** ρ-NN/DFS over the CLAM tree | tree walk (scent vectors) | +| how anomalous? | **CHAODA** LFD anomaly score | tree walk (LFD) | +| exact location? | helix `Signed360` (`HelixResidue` tenant) | one value decode (`E-HELIX-IS-EXACT-LOCATION`) | +| connected to? | `EdgeBlock` typed edge (12-family/4-external or 32×4 turbovec) | edge-block read (`E-ADJACENCY…`) | +| caused by? | `CausalEdge64` SPO | `edges_raw` read | + +So a single node exposes containment + ranked-NN + anomaly + exact-location + typed-edges + causal — a complete geometric *and* relational surface, each answered by the right ndarray/contract primitive at its own cost. The router/Cypher layer dispatches a query to the cheapest facet that answers it (proximity→containment, similarity→CAKES, novelty→CHAODA, position→helix). Cross-refs: `ndarray::hpc::clam` (CAKES/panCAKES/CHAODA), `perturbation-sim::chaoda`, `graph/neighborhood/clam.rs`, `E-ADJACENCY-IS-KEY-AND-EDGECODEC`, `E-HELIX-IS-EXACT-LOCATION`, `E-GUID-IS-THE-GRAPH`. + +--- + ## 2026-06-18 — E-HELIX-IS-EXACT-LOCATION — adjacency (relational: near/edge) is NOT location (absolute: exact orthogonal coordinate); the helix `Signed360` gives the exact point, and "where" is a decode-cost ladder containment → place → residue **Status:** FINDING (operator correction, grounded in `canonical_node.rs` `ValueTenant::HelixResidue` = `Signed360`, signed full-sphere golden-spiral Place/Residue, 48-bit/6 B, in the `Compressed` value schema). Refines `E-ADJACENCY-IS-KEY-AND-EDGECODEC`: that epiphany answered *"who is related"* (adjacency). This answers *"where exactly is it"* (location) — a different question, and the helix is a **coordinate, not an edge**. From 2f1ad2ecdcc6bc48e4c538c1535b1946707ed20f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 20:52:37 +0000 Subject: [PATCH 05/14] feat(graph+contract): CLAM containment + CAKES nearest over HHTL prefixes (Inc 0, the manifold facet) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit panCAKES == radix trie == HHTL (E-PANCAKES-IS-RADIX-IS-HHTL): the CLAM cluster tree IS the radix trie of the classid·HEEL·HIP·TWIG nibble paths already in the keys, so the structural neighborhood is pure prefix arithmetic, zero value decode. - NiblePath::common_prefix_depth (contract) — the radix-trie nearest-neighbor measure; longest-common-prefix = CAKES attraction. +1 unit test. - MailboxSoaView::hhtl_path_at (contract) — per-row HHTL NiblePath, deferred- binding default None (canon NodeRow already carries key(16); the override exposes what's there). - graph::mailbox_scan::clam_contained (is_ancestor_of = the radix subtree = CLAM cluster) + cakes_nearest (common_prefix_depth ranking, k-NN). Both key-only, zero value decode. - Tests: containment = radix subtree (rows 0,1,2 under 1·2; leaf narrows to 0); CAKES ranks by shared depth [(0,3),(1,2),(2,2)]; deferred-None yields nothing (coarser-facet fallback); F2 zero-value-decode extended to CLAM/CAKES (the GuardedSoa value columns still panic-guarded). 7/7 mailbox_scan + 21/21 hhtl green, clippy clean. This is the first dispatch-table facet beyond the classid node-match: proximity/ neighborhood resolves on the key (CLAM/CAKES), free, per E-CLAM-IS-THE-MANIFOLD- ENGINE. Edge-deref (EdgeBlock) + helix exact-location (value decode) are the next tiers. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 20 ++++ crates/lance-graph-contract/src/hhtl.rs | 47 ++++++++ crates/lance-graph-contract/src/soa_view.rs | 18 +++ crates/lance-graph/src/graph/mailbox_scan.rs | 111 ++++++++++++++++++- 4 files changed, 195 insertions(+), 1 deletion(-) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index e8cb561b8..e23f961c6 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,23 @@ +## 2026-06-18 — E-PANCAKES-IS-RADIX-IS-HHTL — panCAKES ≡ radix trie ≡ HHTL: the CLAM cluster tree is NOT a separate structure, it IS the radix trie of the HHTL prefixes already in the keys; so CLAM/CAKES = prefix arithmetic on the GUID, zero value decode + +**Status:** FINDING (operator-stated identity; wired this commit). The unification that makes the manifold-geometry facet (`E-CLAM-IS-THE-MANIFOLD-ENGINE`) *free*: there is no CLAM tree to build and store — the tree IS the radix trie of the `classid·HEEL·HIP·TWIG` nibble paths that already live in every GUID key. + +**The three are one structure seen three ways:** +- **HHTL** = the cascade tiers in the key (`NiblePath` over `classid·HEEL·HIP·TWIG`). +- **radix trie** = prefix tree; routing = bit-shift on the nibble path, not hash. +- **panCAKES** = the compressed CLAM tree — and a CLAM cluster IS a radix-trie subtree (shared prefix = same cluster); the cluster structure IS the codec. + +**Operational consequence (the wiring):** the CLAM/CAKES operations reduce to pure prefix arithmetic on the key, **zero value decode** — +- **CLAM containment** (which cluster / subtree) = `NiblePath::is_ancestor_of` — the radix subtree under the query prefix. +- **CAKES nearest** (ranked similar) = `NiblePath::common_prefix_depth` (added this commit) — longest-common-prefix ranking IS the entropy-scaling NN over the cluster tree; deeper shared prefix ⇒ same deeper cluster ⇒ nearer. +- **panCAKES compression** = the trie itself (shared prefixes are the dedup). + +No separate index, no scent-vector tree materialization for the *structural* neighborhood — the keys are the tree. (CAKES over *content scent vectors* in `ndarray::hpc::clam` remains the metric-space path for non-prefix similarity; this identity covers the HHTL-prefix structural neighborhood, the free tier.) + +**Wired:** `NiblePath::common_prefix_depth`; `MailboxSoaView::hhtl_path_at` (deferred-binding, default `None`); `graph::mailbox_scan::{clam_contained, cakes_nearest}` over the View — all key-only, F2 zero-value-decode-guarded (#544). Cross-refs: `E-CLAM-IS-THE-MANIFOLD-ENGINE`, `E-ADJACENCY-IS-KEY-AND-EDGECODEC`, `E-GUID-IS-THE-GRAPH`, `hhtl::NiblePath`, `ndarray::hpc::clam` (CAKES/panCAKES/CHAODA). + +--- + ## 2026-06-18 — E-CLAM-IS-THE-MANIFOLD-ENGINE — the CLAM facet is not a containment check; it is the CAKES+CHAODA+LFD ensemble (ndarray `clam.rs`): containment + ranked-NN + anomaly + compression, one tree, one geometric measure **Status:** FINDING (operator-stated, grounded in `ndarray/src/hpc/clam.rs` — CAKES arXiv:2309.05491 Partition Alg 1 / ρ-NN Alg 4 / DFS-sieve Alg 6, panCAKES Alg 2, **CHAODA Phase 4 `anomaly_scores` from the LFD distribution**; + `lance-graph/crates/perturbation-sim/src/chaoda.rs` CHAODA-lite which names ndarray's `ClamTree` as the production path). Enriches `E-ADJACENCY-IS-KEY-AND-EDGECODEC`: the "HHTL/CLAM neighborhood" facet is a whole geometry engine, not `is_ancestor_of`. diff --git a/crates/lance-graph-contract/src/hhtl.rs b/crates/lance-graph-contract/src/hhtl.rs index db353df61..76cb98d00 100644 --- a/crates/lance-graph-contract/src/hhtl.rs +++ b/crates/lance-graph-contract/src/hhtl.rs @@ -238,6 +238,35 @@ impl NiblePath { }) } + /// The depth of the **longest common prefix** with `other` — the radix-trie + /// nearest-neighbor measure. Larger ⇒ the two paths share more cascade tiers + /// ⇒ they sit in the same deeper CLAM cluster ⇒ they are nearer. + /// + /// This is the operational form of `panCAKES ≡ radix trie ≡ HHTL` + /// (`E-PANCAKES-IS-RADIX-IS-HHTL`): CAKES nearest-neighbor over the cluster + /// tree is *longest-common-prefix ranking* over the HHTL nibble paths — no + /// separate tree to build, the keys ARE the tree. Pure prefix arithmetic on + /// the key; never touches the value slab. + #[must_use] + pub const fn common_prefix_depth(self, other: Self) -> u8 { + let max = if self.depth < other.depth { + self.depth + } else { + other.depth + }; + let mut d = 0u8; + // Walk depth-by-depth while the aligned prefixes agree. `prefix(d)` is + // `Some` for every d ≤ depth, so the unwraps below cannot fail. + while d < max { + let next = d + 1; + match (self.prefix(next), other.prefix(next)) { + (Some(a), Some(b)) if a.path == b.path && a.depth == b.depth => d = next, + _ => break, + } + } + d + } + /// Lower a [`NodeGuid`](crate::canonical_node::NodeGuid) prefix to a 16-nibble /// `NiblePath`, the routing-path counterpart of the GUID's /// `classid · HEEL · HIP · TWIG` cascade (identity-architecture v1 §3). @@ -456,6 +485,24 @@ mod tests { ); } + #[test] + fn common_prefix_depth_is_the_radix_nn_measure() { + let a = NiblePath::root(1).child(2).child(3); + let b = NiblePath::root(1).child(2).child(4); + let c = NiblePath::root(1).child(2); + let d = NiblePath::root(9); + assert_eq!(a.common_prefix_depth(a), 3, "self ⇒ full depth"); + assert_eq!(a.common_prefix_depth(b), 2, "1·2 shared, leaf differs"); + assert_eq!(a.common_prefix_depth(c), 2, "ancestor ⇒ min depth"); + assert_eq!(a.common_prefix_depth(d), 0, "different basin ⇒ 0"); + assert_eq!( + a.common_prefix_depth(b), + b.common_prefix_depth(a), + "symmetric" + ); + assert_eq!(NiblePath::EMPTY.common_prefix_depth(a), 0); + } + #[test] fn is_ancestor_of_is_cheap_prefix_reachability() { let mammal = NiblePath::root(0x0).child(0x3); // Endurant → …mammal diff --git a/crates/lance-graph-contract/src/soa_view.rs b/crates/lance-graph-contract/src/soa_view.rs index 6a8e6dc81..59a2596c6 100644 --- a/crates/lance-graph-contract/src/soa_view.rs +++ b/crates/lance-graph-contract/src/soa_view.rs @@ -88,6 +88,24 @@ pub trait MailboxSoaView { None } + /// The HHTL routing path ([`NiblePath`](crate::hhtl::NiblePath)) of `row`'s + /// GUID key — the `classid·HEEL·HIP·TWIG` cascade lowered to a nibble path. + /// This is the **radix-trie / CLAM cluster address** of the node + /// (`panCAKES ≡ radix trie ≡ HHTL`): containment = `is_ancestor_of`, + /// CAKES nearest = `common_prefix_depth`, both pure key arithmetic, **zero + /// value decode**. + /// + /// **Default = `None` (zero-fallback, deferred binding)** — same discipline as + /// [`row_for_local_key`](MailboxSoaView::row_for_local_key): a view that has + /// not materialized a per-row key/HHTL column returns `None`, and a CLAM/CAKES + /// scan over it yields nothing (the consumer falls back to a coarser facet). + /// An owner that carries the GUID key per row overrides this (the canon + /// `NodeRow` already holds `key(16)`, so the override exposes what is there). + #[inline] + fn hhtl_path_at(&self, _row: usize) -> Option { + None + } + // NOTE (follow-up): the qualia column (`QualiaI4_16D`) accessor is intentionally omitted — // add `fn qualia(&self) -> &[crate::qualia::QualiaI4_16D]` when the first consumer // (planner strategy selection) needs it; keep the read surface minimal until then. diff --git a/crates/lance-graph/src/graph/mailbox_scan.rs b/crates/lance-graph/src/graph/mailbox_scan.rs index 1887b4b1f..4b5a86e85 100644 --- a/crates/lance-graph/src/graph/mailbox_scan.rs +++ b/crates/lance-graph/src/graph/mailbox_scan.rs @@ -34,6 +34,7 @@ //! lookup (via [`MailboxSoaView::row_for_local_key`]); edge dispatch lands once //! the representation boundary is resolved. +use lance_graph_contract::hhtl::NiblePath; use lance_graph_contract::soa_view::MailboxSoaView; use crate::graph::graph_router::Backend; @@ -87,6 +88,55 @@ pub fn match_node_by_local_key(view: &V, local_key: u64) -> O }) } +/// **CLAM containment** — the rows in `query`'s subtree: every row whose HHTL +/// path is a descendant-or-equal of `query` (`query.is_ancestor_of(path)`). +/// +/// This is the `panCAKES ≡ radix trie ≡ HHTL` neighborhood (`E-CLAM-IS-THE-MANIFOLD-ENGINE` +/// / `E-PANCAKES-IS-RADIX-IS-HHTL`): the CLAM cluster is the radix-trie subtree +/// under the query prefix. Pure key arithmetic — **zero value decode**. Rows with +/// no materialized HHTL path (`hhtl_path_at == None`) are skipped. +pub fn clam_contained(view: &V, query: NiblePath) -> Vec { + (0..view.n_rows()) + .filter(|&row| view.hhtl_path_at(row).is_some_and(|p| query.is_ancestor_of(p))) + .map(|row| NodeMatch { + row, + backend: Backend::MailboxSoa, + }) + .collect() +} + +/// **CAKES nearest** — the `k` rows nearest `query` by longest-common-prefix +/// depth (descending), the radix-trie nearest-neighbor over the HHTL paths. +/// +/// Returns `(NodeMatch, shared_depth)`; deeper shared prefix ⇒ nearer (same deeper +/// CLAM cluster). Ties keep ascending row order (stable). Pure key arithmetic — +/// **zero value decode**; rows without a materialized HHTL path are skipped. This +/// is CAKES "attraction" expressed as `NiblePath::common_prefix_depth` +/// (`E-CLAM-IS-THE-MANIFOLD-ENGINE`). +pub fn cakes_nearest( + view: &V, + query: NiblePath, + k: usize, +) -> Vec<(NodeMatch, u8)> { + let mut scored: Vec<(NodeMatch, u8)> = (0..view.n_rows()) + .filter_map(|row| { + view.hhtl_path_at(row).map(|p| { + ( + NodeMatch { + row, + backend: Backend::MailboxSoa, + }, + query.common_prefix_depth(p), + ) + }) + }) + .collect(); + // Descending by shared depth; stable sort preserves ascending row order on ties. + scored.sort_by(|a, b| b.1.cmp(&a.1)); + scored.truncate(k); + scored +} + #[cfg(test)] mod tests { use super::*; @@ -100,6 +150,7 @@ mod tests { class_ids: Vec, edges: Vec, keyed_rows: Vec<(u64, usize)>, + paths: Vec>, } impl MailboxSoaView for GuardedSoa { @@ -141,6 +192,9 @@ mod tests { .find(|(k, _)| *k == local_key) .map(|(_, r)| *r) } + fn hhtl_path_at(&self, row: usize) -> Option { + self.paths.get(row).copied().flatten() + } } fn sample() -> GuardedSoa { @@ -149,6 +203,15 @@ mod tests { class_ids: vec![7, 9, 7, 9, 7], edges: vec![0; 5], keyed_rows: vec![(0xABCD, 3), (0x1234, 0)], + // HHTL radix-trie paths (root basin 1): + // row0: 1·2·3 row1: 1·2·4 row2: 1·2 row3: 1·5 row4: 9 (other basin) + paths: vec![ + Some(NiblePath::root(1).child(2).child(3)), + Some(NiblePath::root(1).child(2).child(4)), + Some(NiblePath::root(1).child(2)), + Some(NiblePath::root(1).child(5)), + Some(NiblePath::root(9)), + ], } } @@ -193,9 +256,55 @@ mod tests { #[test] fn f2_zero_value_decode_the_scan_never_panics_on_value_columns() { // The GuardedSoa panics if energy()/meta_raw() are read. If this test - // completes, the classid node-match touched ONLY the class column. + // completes, the classid node-match + CLAM/CAKES touched ONLY the + // class/HHTL key columns, never the value slab. let soa = sample(); let _ = match_nodes_by_class(&soa, 7); let _ = match_node_by_local_key(&soa, 0x1234); + let _ = clam_contained(&soa, NiblePath::root(1).child(2)); + let _ = cakes_nearest(&soa, NiblePath::root(1).child(2).child(3), 3); + } + + #[test] + fn clam_contained_is_the_radix_subtree() { + // query = 1·2 ⇒ its CLAM cluster = the radix subtree under 1·2: + // rows 0 (1·2·3), 1 (1·2·4), 2 (1·2 itself). NOT 3 (1·5) or 4 (other basin 9). + let soa = sample(); + let rows: Vec = clam_contained(&soa, NiblePath::root(1).child(2)) + .iter() + .map(|m| m.row) + .collect(); + assert_eq!(rows, vec![0, 1, 2]); + // a deeper query narrows the subtree to the exact leaf. + let leaf: Vec = clam_contained(&soa, NiblePath::root(1).child(2).child(3)) + .iter() + .map(|m| m.row) + .collect(); + assert_eq!(leaf, vec![0]); + } + + #[test] + fn cakes_nearest_ranks_by_longest_common_prefix() { + // query = 1·2·3 (row 0). Shared-prefix depths: + // row0 1·2·3 →3, row1 1·2·4 →2, row2 1·2 →2, row3 1·5 →1, row4 9 →0. + let soa = sample(); + let near = cakes_nearest(&soa, NiblePath::root(1).child(2).child(3), 3); + let ranked: Vec<(usize, u8)> = near.iter().map(|(m, d)| (m.row, *d)).collect(); + assert_eq!(ranked, vec![(0, 3), (1, 2), (2, 2)], "nearest-3 by shared depth"); + assert!(near.iter().all(|(m, _)| m.backend == Backend::MailboxSoa)); + // k larger than n returns all rows, still depth-sorted descending. + let all = cakes_nearest(&soa, NiblePath::root(1).child(2).child(3), 99); + let depths: Vec = all.iter().map(|(_, d)| *d).collect(); + assert_eq!(depths, vec![3, 2, 2, 1, 0]); + } + + #[test] + fn clam_cakes_skip_rows_with_no_materialized_path() { + // A view with all-None hhtl paths (the deferred-binding default) yields + // nothing — the consumer falls back to a coarser facet, never a wrong row. + let mut soa = sample(); + soa.paths = vec![None; 5]; + assert!(clam_contained(&soa, NiblePath::root(1)).is_empty()); + assert!(cakes_nearest(&soa, NiblePath::root(1), 5).is_empty()); } } From fbeef185be392abcd3c25f555dbefd07ba6b5ad1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 20:58:23 +0000 Subject: [PATCH 06/14] =?UTF-8?q?feat(graph+contract):=20EdgeBlock=20typed?= =?UTF-8?q?-edge=20facet=20=E2=80=94=20CoarseOnly=2012-family/4-external?= =?UTF-8?q?=20slot=20decode=20(Inc=200)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The third dispatch-table facet: explicit typed edges (a)-[r]->... under the classid-resolved EdgeCodecFlavor (E-ADJACENCY-IS-KEY-AND-EDGECODEC). EdgeBlock is bytes 16..32 (the edge region), NOT the value slab, so still zero value decode. - MailboxSoaView::edge_block_at(row) -> Option (contract, deferred default None; the canon NodeRow carries edges(16), the override exposes it). - graph::mailbox_scan::{EdgeNeighbors, edge_slots_coarse}: under CoarseOnly, decode the 12 in-family + 4 external slots to their populated (non-zero) refs, family vs external. Pq32x4 (turbovec residue) / CoarseResidue are refused - they are NOT adjacency, never coerced to slots (boundary 4b: classid-resolved, not query-guessed). - Slot-byte -> neighbor-row resolution is deliberately deferred (the basin-local- index convention + zero-collision is the next encoding decision, analogous to local_key->row); this facet lands the structure (which slots are edges, family vs external, under which flavor), never fakes the row resolution. - Tests: populated decode ([2,5] family + [1] external), all-zero = no edges, no-block = None, non-Coarse flavors refused. F2 zero-value-decode extended. 9/9 mailbox_scan, clippy clean (sort_by_key + Reverse). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- crates/lance-graph-contract/src/soa_view.rs | 19 +++++ crates/lance-graph/src/graph/mailbox_scan.rs | 87 +++++++++++++++++++- 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/crates/lance-graph-contract/src/soa_view.rs b/crates/lance-graph-contract/src/soa_view.rs index 59a2596c6..c6d6fff30 100644 --- a/crates/lance-graph-contract/src/soa_view.rs +++ b/crates/lance-graph-contract/src/soa_view.rs @@ -106,6 +106,25 @@ pub trait MailboxSoaView { None } + /// The 16-byte [`EdgeBlock`](crate::canonical_node::EdgeBlock) of `row` — the + /// node's **explicit typed edges** (12 in-family + 4 out-of-family one-byte + /// slots), bytes 16..32 of the canonical `NodeRow`. This is the edge region, + /// **NOT the value slab** (32..512), so reading it is **zero value decode**. + /// + /// How the 16 bytes are *interpreted* is the class's + /// [`EdgeCodecFlavor`](crate::canonical_node::EdgeCodecFlavor) + /// (`CoarseOnly` = 12-family/4-external adjacency, `Pq32x4` = 32×4 turbovec + /// residue) — resolved `classid → ClassView`, never guessed by the query + /// (`E-ADJACENCY-IS-KEY-AND-EDGECODEC`). + /// + /// **Default = `None` (zero-fallback, deferred binding)** — a view that has not + /// materialized the edge region returns `None`; an owner that carries the + /// canonical `NodeRow` (which holds `edges(16)`) overrides this. + #[inline] + fn edge_block_at(&self, _row: usize) -> Option { + None + } + // NOTE (follow-up): the qualia column (`QualiaI4_16D`) accessor is intentionally omitted — // add `fn qualia(&self) -> &[crate::qualia::QualiaI4_16D]` when the first consumer // (planner strategy selection) needs it; keep the read surface minimal until then. diff --git a/crates/lance-graph/src/graph/mailbox_scan.rs b/crates/lance-graph/src/graph/mailbox_scan.rs index 4b5a86e85..910c7feca 100644 --- a/crates/lance-graph/src/graph/mailbox_scan.rs +++ b/crates/lance-graph/src/graph/mailbox_scan.rs @@ -34,6 +34,7 @@ //! lookup (via [`MailboxSoaView::row_for_local_key`]); edge dispatch lands once //! the representation boundary is resolved. +use lance_graph_contract::canonical_node::EdgeCodecFlavor; use lance_graph_contract::hhtl::NiblePath; use lance_graph_contract::soa_view::MailboxSoaView; @@ -132,14 +133,60 @@ pub fn cakes_nearest( }) .collect(); // Descending by shared depth; stable sort preserves ascending row order on ties. - scored.sort_by(|a, b| b.1.cmp(&a.1)); + scored.sort_by_key(|&(_, depth)| core::cmp::Reverse(depth)); scored.truncate(k); scored } +/// The explicit typed edges of a node under the `CoarseOnly` flavor: the +/// **populated** (non-zero) slot references, split family-internal vs external. +/// +/// Canon: an `EdgeBlock` slot is "zeroed when unused", so a zero byte is an empty +/// slot and a non-zero byte is a basin-local edge reference. The 12 `in_family` +/// slots are intra-basin edges; the 4 `external` slots are cross-family interface +/// references (`E-ADJACENCY-IS-KEY-AND-EDGECODEC`). The refs are returned raw — +/// resolving a ref → neighbor row needs the basin-local-index→row convention +/// (the next encoding decision, analogous to `row_for_local_key`); this facet +/// lands the *structure* (which slots are edges, family vs external), not the +/// row-resolution, and never fakes it. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct EdgeNeighbors { + /// Populated in-family (intra-basin) edge slot references (non-zero bytes). + pub in_family: Vec, + /// Populated out-of-family (cross-basin interface) edge slot references. + pub external: Vec, +} + +/// Decode a node's explicit typed edges under the **`CoarseOnly`** flavor — +/// `(a)-[r]->…` as the populated 12-family/4-external slot references. +/// +/// Returns `None` when the view has no edge block for `row` +/// ([`MailboxSoaView::edge_block_at`] default), or when the class's `flavor` is +/// not `CoarseOnly` (the adjacency reading) — `Pq32x4` is **turbovec residue**, +/// not adjacency, and `CoarseResidue` is coarse+residue; both are a different +/// read handled elsewhere, never coerced into slot adjacency +/// (`E-ADJACENCY-IS-KEY-AND-EDGECODEC` boundary §4b: classid-resolved, not +/// query-guessed). **Zero value decode** — the `EdgeBlock` is bytes 16..32, the +/// edge region, never the 480 B value slab. +pub fn edge_slots_coarse( + view: &V, + row: usize, + flavor: EdgeCodecFlavor, +) -> Option { + if flavor != EdgeCodecFlavor::CoarseOnly { + return None; + } + let block = view.edge_block_at(row)?; + Some(EdgeNeighbors { + in_family: block.in_family.iter().copied().filter(|&b| b != 0).collect(), + external: block.out_family.iter().copied().filter(|&b| b != 0).collect(), + }) +} + #[cfg(test)] mod tests { use super::*; + use lance_graph_contract::canonical_node::EdgeBlock; use lance_graph_contract::kanban::KanbanColumn; /// A minimal view over fixed columns. The value-side columns @@ -151,6 +198,7 @@ mod tests { edges: Vec, keyed_rows: Vec<(u64, usize)>, paths: Vec>, + blocks: Vec>, } impl MailboxSoaView for GuardedSoa { @@ -195,6 +243,9 @@ mod tests { fn hhtl_path_at(&self, row: usize) -> Option { self.paths.get(row).copied().flatten() } + fn edge_block_at(&self, row: usize) -> Option { + self.blocks.get(row).copied().flatten() + } } fn sample() -> GuardedSoa { @@ -212,6 +263,17 @@ mod tests { Some(NiblePath::root(1).child(5)), Some(NiblePath::root(9)), ], + // row0 has in-family edges to refs 2,5 and one external ref 1; rest empty. + blocks: vec![ + Some(EdgeBlock { + in_family: [2, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + out_family: [1, 0, 0, 0], + }), + Some(EdgeBlock::default()), + None, + None, + None, + ], } } @@ -263,6 +325,29 @@ mod tests { let _ = match_node_by_local_key(&soa, 0x1234); let _ = clam_contained(&soa, NiblePath::root(1).child(2)); let _ = cakes_nearest(&soa, NiblePath::root(1).child(2).child(3), 3); + let _ = edge_slots_coarse(&soa, 0, EdgeCodecFlavor::CoarseOnly); + } + + #[test] + fn edge_slots_coarse_decodes_populated_family_and_external() { + let soa = sample(); + let n = edge_slots_coarse(&soa, 0, EdgeCodecFlavor::CoarseOnly).unwrap(); + assert_eq!(n.in_family, vec![2, 5], "non-zero in-family slots only"); + assert_eq!(n.external, vec![1], "non-zero external slot"); + // an all-zero block ⇒ no edges (zeroed = unused). + let empty = edge_slots_coarse(&soa, 1, EdgeCodecFlavor::CoarseOnly).unwrap(); + assert!(empty.in_family.is_empty() && empty.external.is_empty()); + // no edge block materialized ⇒ None (deferred-binding fallback). + assert!(edge_slots_coarse(&soa, 2, EdgeCodecFlavor::CoarseOnly).is_none()); + } + + #[test] + fn edge_slots_coarse_refuses_non_coarse_flavors() { + // Pq32x4 = turbovec residue, NOT adjacency; CoarseResidue likewise. + // The classid-resolved flavor gates the read — never coerced to slots. + let soa = sample(); + assert!(edge_slots_coarse(&soa, 0, EdgeCodecFlavor::Pq32x4).is_none()); + assert!(edge_slots_coarse(&soa, 0, EdgeCodecFlavor::CoarseResidue).is_none()); } #[test] From 9f7047239fce29b69f7e98f85cde1b32c989efa8 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 21:26:46 +0000 Subject: [PATCH 07/14] style(mailbox_scan): cargo fmt + refresh module header to the three landed facets CI fmt --check failed on three blocks (let-chain filter, EdgeNeighbors collect, cakes assert). Applied cargo fmt. Also refreshed the stale module doc header (it still described only the node-match half and called CLAM/EdgeBlock "deferred") to document the three landed key-resident facets + the genuinely deferred tiers (slot->row convention, helix/CHAODA/SPO costed tier). No logic change; 9/9 mailbox_scan green, fmt clean. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- crates/lance-graph/src/graph/mailbox_scan.rs | 70 ++++++++++++++------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/crates/lance-graph/src/graph/mailbox_scan.rs b/crates/lance-graph/src/graph/mailbox_scan.rs index 910c7feca..c051fa1da 100644 --- a/crates/lance-graph/src/graph/mailbox_scan.rs +++ b/crates/lance-graph/src/graph/mailbox_scan.rs @@ -11,28 +11,37 @@ //! prefix-route**, resolved off the key/class column with **zero value decode** //! (it never touches the 480 B value slab: `energy` / `meta` / fingerprints). //! -//! ## Scope of this increment (the verified-safe half) +//! ## Facets landed (the dispatch table — all key-resident, zero value decode) //! -//! This lands the **node-match** half — `MATCH (n:Label)` → the set of rows whose -//! class discriminator equals the queried class. That is the half that is correct -//! *without* the boundary the 5+3 council said to pin first. +//! The substrate IS the graph, so a query routes to the cheapest facet that +//! answers it, off the GUID key, never touching the 480 B value slab +//! (`energy` / `meta` / fingerprints — the F2 invariant): //! -//! The **edge-traversal** half (`(a)-[r]->(b)`) is deliberately **deferred**, for -//! two grounded reasons (verdict §4b): +//! - **classid node-match** ([`match_nodes_by_class`]) — `MATCH (n:Label)` as a +//! classid prefix-route; [`match_node_by_local_key`] for the `local_key`→row +//! point lookup. +//! - **CLAM/CAKES neighborhood** ([`clam_contained`] / [`cakes_nearest`]) — +//! `panCAKES ≡ radix trie ≡ HHTL`: the CLAM cluster tree IS the radix trie of +//! the HHTL nibble paths in the keys, so containment = `is_ancestor_of` and +//! nearest = `NiblePath::common_prefix_depth` (`E-PANCAKES-IS-RADIX-IS-HHTL`). +//! - **EdgeBlock typed edges** ([`edge_slots_coarse`]) — `(a)-[r]->…` under the +//! classid-resolved [`EdgeCodecFlavor`]; `EdgeBlock` is bytes 16..32 (the edge +//! region, not the value slab). `CoarseOnly` = 12-family/4-external slot +//! structure; `Pq32x4` (turbovec residue) / `CoarseResidue` are refused, not +//! coerced to adjacency (`E-ADJACENCY-IS-KEY-AND-EDGECODEC` §4b). //! -//! 1. **Edge-representation is not yet pinned.** `EdgeBlock` (12+4 one-byte -//! *adjacency* slots → neighbor `local_key`) and `CausalEdge64` (an **SPO -//! triple** of s/p/o palette indices, the `edges_raw` column) are NOT -//! interchangeable. A relationship-type must bind to one via the class's -//! `EdgeCodecFlavor` — the router must not guess by availability. -//! 2. **The View exposes only `edges_raw` (`CausalEdge64`/SPO), not `EdgeBlock` -//! adjacency.** `CausalEdge64` carries s/p/o palette indices, not a row→row -//! pointer, so it cannot be dereferenced to a neighbor row without the -//! adjacency accessor (a follow-on contract addition). +//! ## Deliberately deferred (different cost tier / open encoding decision) //! -//! So this module does the classid prefix-route and the `local_key`→row point -//! lookup (via [`MailboxSoaView::row_for_local_key`]); edge dispatch lands once -//! the representation boundary is resolved. +//! - **EdgeBlock slot-byte → neighbor-row resolution** — needs the basin-local- +//! index convention (zero = unused; 1-based vs basin-table), the next encoding +//! decision, analogous to `row_for_local_key`. This module lands the edge +//! *structure*, never fakes the row resolution. +//! - **Helix `Signed360` exact-location, CHAODA anomaly, CausalEdge64 SPO** — +//! the costed tier: a value-slab decode (helix), the metric-space `ClamTree` +//! (CHAODA), or cross-crate `causal-edge` accessors (SPO). They break the +//! zero-value-decode invariant or need other-crate work, so they land +//! separately with their own cost gates (`E-HELIX-IS-EXACT-LOCATION`, +//! `E-CLAM-IS-THE-MANIFOLD-ENGINE`). use lance_graph_contract::canonical_node::EdgeCodecFlavor; use lance_graph_contract::hhtl::NiblePath; @@ -98,7 +107,10 @@ pub fn match_node_by_local_key(view: &V, local_key: u64) -> O /// no materialized HHTL path (`hhtl_path_at == None`) are skipped. pub fn clam_contained(view: &V, query: NiblePath) -> Vec { (0..view.n_rows()) - .filter(|&row| view.hhtl_path_at(row).is_some_and(|p| query.is_ancestor_of(p))) + .filter(|&row| { + view.hhtl_path_at(row) + .is_some_and(|p| query.is_ancestor_of(p)) + }) .map(|row| NodeMatch { row, backend: Backend::MailboxSoa, @@ -178,8 +190,18 @@ pub fn edge_slots_coarse( } let block = view.edge_block_at(row)?; Some(EdgeNeighbors { - in_family: block.in_family.iter().copied().filter(|&b| b != 0).collect(), - external: block.out_family.iter().copied().filter(|&b| b != 0).collect(), + in_family: block + .in_family + .iter() + .copied() + .filter(|&b| b != 0) + .collect(), + external: block + .out_family + .iter() + .copied() + .filter(|&b| b != 0) + .collect(), }) } @@ -375,7 +397,11 @@ mod tests { let soa = sample(); let near = cakes_nearest(&soa, NiblePath::root(1).child(2).child(3), 3); let ranked: Vec<(usize, u8)> = near.iter().map(|(m, d)| (m.row, *d)).collect(); - assert_eq!(ranked, vec![(0, 3), (1, 2), (2, 2)], "nearest-3 by shared depth"); + assert_eq!( + ranked, + vec![(0, 3), (1, 2), (2, 2)], + "nearest-3 by shared depth" + ); assert!(near.iter().all(|(m, _)| m.backend == Backend::MailboxSoa)); // k larger than n returns all rows, still depth-sorted descending. let all = cakes_nearest(&soa, NiblePath::root(1).child(2).child(3), 99); From 237ae65b11cca696816d6e11db6bea6e8c9835c7 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 21:37:46 +0000 Subject: [PATCH 08/14] =?UTF-8?q?docs(epiphany):=20E-TENANT-ANGLE-SWEEP-IS?= =?UTF-8?q?-PRUNE-THEN-RANK=20=E2=80=94=20the=20tenant-switch=2016K-from-a?= =?UTF-8?q?n-angle=20compare=20is=20the=20costed=20value=20sweep,=20compos?= =?UTF-8?q?ing=20with=20the=20free=20key=20facets=20as=20a=20two-stage=20c?= =?UTF-8?q?ascade?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Switch tenant + compare across the 16K mailbox from an angle" decomposes as: batch Hamming sweep (hamming_top_k over a contiguous identity plane) = the right use of popcount on the homogeneous 16K fingerprint; "angle" = which plane (content/topic/angle) + query; "tenant switch" = column selector. Load-bearing: it composes with #544's free key facets as a two-stage HHTL cascade - CLAM/CAKES prefix prune (free, zero decode) then angle-Hamming rank over the pruned set (costed). Key prune + content rank = two halves of one query. Cost-class boundary: this facet deliberately decodes the value plane, NOT in the F2 zero-decode class, lands on its own branch with its own cost gate. Grounded in MailboxSoA content/topic/angle_row + ndarray hamming_top_k + cycle snapshot. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index e23f961c6..2c559b9a7 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,27 @@ +## 2026-06-18 — E-TENANT-ANGLE-SWEEP-IS-PRUNE-THEN-RANK — "switch tenant + compare across the 16K mailbox from an angle" is the costed value-side sweep; it composes with the free key facets as a two-stage HHTL cascade (key prune → angle Hamming rank) + +**Status:** FINDING (forward design; grounded in `MailboxSoA::{content_row,topic_row,angle_row}` W1b planes + `ndarray::simd_avx2::{hamming_batch,hamming_top_k}` + the cycle-aware write contract). The value-side mirror of the free key facets (`E-CLAM-IS-THE-MANIFOLD-ENGINE` / #544), and how they compose. + +**The operation decomposed:** +- **"compare across the 16K views"** = a batch Hamming sweep — `hamming_top_k(query, plane_database, n_rows=16384, row_bytes=2048)` over a contiguous SoA identity plane. This is the *right* use of popcount (`E-… popcount`): homogeneous 16K-bit fingerprint bits, the AVX-512 streaming path (~611M lookups/sec), NOT the heterogeneous GUID key. +- **"from a certain angle"** = which identity plane (`content`/`topic`/`angle`) + the query vector — the perspective axis (AGI-as-glove: angle = the angle/QualiaColumn read). +- **"tenant switch"** = a **column selector** (which value tenant the sweep reads: angle plane / `Energy` / `HelixResidue`). A which-column choice, not a key op. + +**The load-bearing structure — two-stage HHTL cascade (free prune → costed rank):** + +| Stage | Facet | Cost | Touches | +|---|---|---|---| +| 1. prune | CLAM/CAKES prefix on the GUID key (#544 `cakes_nearest`) | **free** | key only, zero value decode | +| 2. rank | angle-Hamming sweep over the *pruned* rows | **costed** | the angle value plane | + +A tenant-switch comparison is NOT a naive full-16K sweep: the free key prefix gives the candidate cluster zero-decode, then the angle-Hamming ranks ONLY the pruned set ("95% of pairs skipped", the bgz-tensor HHTL-cascade doctrine). Full-16K sweep is the un-pruned fallback. **The key facets and this value sweep are the two halves of one query: address-prune (key) + content-rank (value plane).** + +**Cost-class boundary (must hold):** this facet **deliberately decodes the value plane** — it is explicitly NOT in the zero-value-decode class that the #544 F2 gate enforces. That's correct (it's the other side of the line), so it lands on its own branch with its own cost gate, never mixed into the free key facets. Cycle-consistency: the sweep reads a coherent `current_cycle` snapshot (the cycle-aware write contract), so a tenant-switch compare is a clean point-in-time view. + +**Wiring (when built):** `MailboxSoaView` plane accessor (`angle_row`/plane selector, deferred-binding default like `hhtl_path_at`); a `compare_from_angle(view, plane, query, k)` calling `hamming_top_k`; tenant switch = a `PlaneSelector` enum; compose `cakes_nearest` (prune) → `compare_from_angle` (rank). Cross-refs: `E-CLAM-IS-THE-MANIFOLD-ENGINE`, `E-HELIX-IS-EXACT-LOCATION`, `E-GUID-IS-THE-GRAPH`, `MailboxSoA::{content,topic,angle}_row`, `ndarray::simd_avx2::hamming_top_k`, `graph::mailbox_scan` (#544). + +--- + ## 2026-06-18 — E-PANCAKES-IS-RADIX-IS-HHTL — panCAKES ≡ radix trie ≡ HHTL: the CLAM cluster tree is NOT a separate structure, it IS the radix trie of the HHTL prefixes already in the keys; so CLAM/CAKES = prefix arithmetic on the GUID, zero value decode **Status:** FINDING (operator-stated identity; wired this commit). The unification that makes the manifold-geometry facet (`E-CLAM-IS-THE-MANIFOLD-ENGINE`) *free*: there is no CLAM tree to build and store — the tree IS the radix trie of the `classid·HEEL·HIP·TWIG` nibble paths that already live in every GUID key. From 8fa4238246a28374e1c4f47a2c67dc243d88df6b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 21:42:13 +0000 Subject: [PATCH 09/14] =?UTF-8?q?docs(epiphany):=20correct=20E-TENANT-ANGL?= =?UTF-8?q?E=20=E2=86=92=20CAM-PQ=20ADC,=20not=20Hamming=20sweep;=20IVF=20?= =?UTF-8?q?coarse=20quantizer=20IS=20the=20HHTL/CLAM=20prefix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator: "sweep or just 90° fingerprint vector cam index" → CAM index. The value-side rank is a CAM-PQ ADC (distance-table lookups + IVF probe), never a linear Hamming sweep (that's only the no-index fallback). The load-bearing correction: CAM-PQ = IVF-PQ, and its IVF coarse quantizer IS the HHTL/CLAM prefix (turbovec: palette256 = coarse quantizer) while the PQ residual IS the turbovec Pq32x4 / value-slab codes. So #544's cakes_nearest prefix prune is literally the IVF coarse-quantization stage, not a prefilter on a scan. 90° = content/topic/angle orthogonal axes each get their own distance table; tenant switch = which orthogonal table to ADC against. Retitled the entry (was "…-SWEEP-IS-PRUNE-THEN-RANK"); fixed an accidental duplicate E-PANCAKES header. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index 2c559b9a7..5db245025 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,24 +1,24 @@ -## 2026-06-18 — E-TENANT-ANGLE-SWEEP-IS-PRUNE-THEN-RANK — "switch tenant + compare across the 16K mailbox from an angle" is the costed value-side sweep; it composes with the free key facets as a two-stage HHTL cascade (key prune → angle Hamming rank) +## 2026-06-18 — E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC — "switch tenant + compare across the 16K from an angle" is a CAM-PQ ADC (table lookups), NOT a Hamming sweep; and its IVF coarse quantizer IS the HHTL/CLAM prefix — the prune+rank cascade is literally IVF-PQ -**Status:** FINDING (forward design; grounded in `MailboxSoA::{content_row,topic_row,angle_row}` W1b planes + `ndarray::simd_avx2::{hamming_batch,hamming_top_k}` + the cycle-aware write contract). The value-side mirror of the free key facets (`E-CLAM-IS-THE-MANIFOLD-ENGINE` / #544), and how they compose. +**Status:** FINDING (operator correction "sweep or just 90° fingerprint vector cam index" → CAM index; grounded in `ndarray/src/hpc/{cam_pq,cam_index}.rs`, contract `cam::{DistanceTableProvider, CamCodecContract, IvfContract::probe}`, turbovec KNOWLEDGE "palette256 = coarse quantizer"). **Supersedes this entry's first framing** (posted 2026-06-18 as "…-SWEEP-IS-PRUNE-THEN-RANK"): the Hamming sweep is only the naive fallback — the real path is CAM-PQ ADC, and the two stages are IVF-PQ, not a prefilter+scan. -**The operation decomposed:** -- **"compare across the 16K views"** = a batch Hamming sweep — `hamming_top_k(query, plane_database, n_rows=16384, row_bytes=2048)` over a contiguous SoA identity plane. This is the *right* use of popcount (`E-… popcount`): homogeneous 16K-bit fingerprint bits, the AVX-512 streaming path (~611M lookups/sec), NOT the heterogeneous GUID key. -- **"from a certain angle"** = which identity plane (`content`/`topic`/`angle`) + the query vector — the perspective axis (AGI-as-glove: angle = the angle/QualiaColumn read). -- **"tenant switch"** = a **column selector** (which value tenant the sweep reads: angle plane / `Energy` / `HelixResidue`). A which-column choice, not a key op. +**The operation = CAM-PQ ADC, all table lookups (no linear scan):** +- angle (90°/orthogonal) fingerprint → CAM-PQ `encode` → PQ codes; +- query → precompute the **distance table** (`DistanceTableProvider`), then **ADC** = per-subspace table lookups summed (O(1)/candidate, no decompression, no per-row popcount); +- `IvfContract::probe(query, num_probes)` → the few coarse cells to touch; ADC only inside them → sub-linear. This IS the "attention as table lookup" / 611M-lookups path — it was never a sweep. -**The load-bearing structure — two-stage HHTL cascade (free prune → costed rank):** +**The unification (the load-bearing correction): the IVF coarse quantizer IS the HHTL/CLAM prefix.** CAM-PQ = IVF-PQ = coarse quantizer → residual ADC, and: -| Stage | Facet | Cost | Touches | -|---|---|---|---| -| 1. prune | CLAM/CAKES prefix on the GUID key (#544 `cakes_nearest`) | **free** | key only, zero value decode | -| 2. rank | angle-Hamming sweep over the *pruned* rows | **costed** | the angle value plane | +| CAM-PQ stage | ≡ | shipped facet | +|---|---|---| +| IVF coarse cell (which centroid) | ≡ | **HHTL prefix / CLAM containment** (`cakes_nearest`, #544) — the key IS the coarse quantizer (turbovec: "palette256 = coarse quantizer") | +| PQ residual codes (fine ADC) | ≡ | **turbovec residue** (`EdgeCodecFlavor::Pq32x4`, 32×4) / value-slab CAM-PQ tenant | -A tenant-switch comparison is NOT a naive full-16K sweep: the free key prefix gives the candidate cluster zero-decode, then the angle-Hamming ranks ONLY the pruned set ("95% of pairs skipped", the bgz-tensor HHTL-cascade doctrine). Full-16K sweep is the un-pruned fallback. **The key facets and this value sweep are the two halves of one query: address-prune (key) + content-rank (value plane).** +So the prune→rank cascade is **literally IVF-PQ**: the HHTL prefix is the coarse-quantization step, the PQ residual is the fine ADC. Not a prefilter bolted onto a sweep — one CAM index whose coarse stage is free (key) and whose fine stage is the residual ADC (costed, but table-lookups not scan). -**Cost-class boundary (must hold):** this facet **deliberately decodes the value plane** — it is explicitly NOT in the zero-value-decode class that the #544 F2 gate enforces. That's correct (it's the other side of the line), so it lands on its own branch with its own cost gate, never mixed into the free key facets. Cycle-consistency: the sweep reads a coherent `current_cycle` snapshot (the cycle-aware write contract), so a tenant-switch compare is a clean point-in-time view. +**90°/orthogonal:** `content`/`topic`/`angle` are orthogonal axes → each gets its own PQ codebook + distance table; "switch tenant / from an angle" = switch which orthogonal distance table you ADC against, and orthogonality makes the per-axis ADCs separable. The Hamming **sweep** (`hamming_top_k`) remains only as the no-CAM-index fallback for tiny/un-indexed sets. -**Wiring (when built):** `MailboxSoaView` plane accessor (`angle_row`/plane selector, deferred-binding default like `hhtl_path_at`); a `compare_from_angle(view, plane, query, k)` calling `hamming_top_k`; tenant switch = a `PlaneSelector` enum; compose `cakes_nearest` (prune) → `compare_from_angle` (rank). Cross-refs: `E-CLAM-IS-THE-MANIFOLD-ENGINE`, `E-HELIX-IS-EXACT-LOCATION`, `E-GUID-IS-THE-GRAPH`, `MailboxSoA::{content,topic,angle}_row`, `ndarray::simd_avx2::hamming_top_k`, `graph::mailbox_scan` (#544). +**Cost-class boundary (unchanged):** the ADC reads the residual codes (value-side), NOT zero-value-decode; lands on its own branch with its own cost gate, never mixed into #544's free key facets. Cycle-consistency unchanged (coherent `current_cycle` snapshot). Wiring (when built): reuse the existing CAM-PQ (`ndarray::hpc::cam_pq`/`cam_index`, contract `IvfContract`) with the HHTL prefix as the IVF coarse quantizer; per-axis (content/topic/angle) distance tables; `cakes_nearest` (= IVF probe) → ADC. Cross-refs: `E-CLAM-IS-THE-MANIFOLD-ENGINE`, `E-PANCAKES-IS-RADIX-IS-HHTL`, `E-ADJACENCY-IS-KEY-AND-EDGECODEC` (Pq32x4 = the residual), `cam::IvfContract`, `ndarray::hpc::cam_pq`. --- From e2c518e8ee1fffdb73dc68cda59c9579a5d73629 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 04:42:25 +0000 Subject: [PATCH 10/14] =?UTF-8?q?docs(epiphany):=20E-ORTHOGONAL-BUNDLE-IS-?= =?UTF-8?q?WHT-READOUT=20=E2=80=94=2090=C2=B0=20bundle=20=3D=20Parseval=20?= =?UTF-8?q?(any=20basis);=20Walsh-Hadamard=20projection=20iff=20bipolar=20?= =?UTF-8?q?=C2=B11=20basis=20(our=20case);=20the=20third=20ranking=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator: "bundle a 90° sweep → a hadamard projection or something" — made precise + graded. [G] orthogonal bundle is Parseval-recoverable in any orthonormal basis; [G] it's a Walsh-Hadamard projection iff the basis is ±1 WH (H^T H = N I); [G, this substrate] fingerprints are bipolar ±1 so it literally is a WHT (canon bipolar-phase pyramid + witness.rs particle==wave). Caveats: two Hadamards (transform = basis change, NOT the product = vsa_bind); "Walsh = eigenbasis" only on hypercube-structured data (sketch.rs:20). Payoff: the third ranking path — FWHT the field once, read many angle queries by Parseval (transform-once/read-many); honest bound: no measured single-query speedup (witness.rs). Grounded in ndarray::simd::wht_f32 + perturbation-sim sketch/witness. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index 5db245025..3d13b54c6 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,28 @@ +## 2026-06-18 — E-ORTHOGONAL-BUNDLE-IS-WHT-READOUT — bundling a 90° (orthogonal) sweep is Parseval-recoverable in any basis; it is specifically a Walsh-Hadamard projection in the bipolar ±1 basis (this substrate's case) — the third ranking path: transform-once, read-many + +**Status:** FINDING (graded; operator intuition "bundle a 90° sweep → a Hadamard projection or something" — made precise). Grounded in `ndarray::simd::wht_f32`, `perturbation-sim::sketch::{fwht, walsh_pyramid_energy}`, `perturbation-sim::witness.rs` (Parseval-over-FWHT, "particle == wave"), OGAR canon "Bipolar-phase pyramid — Walsh-Hadamard on VSA". Theorem-checker applied; the "also Hadamard" is conditional, not universal. + +**Graded statement:** +- **[G]** Bundle (`vsa_bundle` = Σ add) of mutually **orthogonal** (90°) vectors → each component recoverable by orthogonal projection; Parseval `‖Σ‖²=Σ‖parts‖²`. True in **any** orthonormal basis — orthogonal decomposition, not yet Hadamard. +- **[G]** It is specifically a **Walsh-Hadamard** projection **iff** the basis is the ±1 WH basis (H symmetric, `HᵀH=N·I`, self-inverse up to N): bundle = inverse WHT of coefficients, readout = forward WHT. +- **[G, this substrate]** Fingerprints are **bipolar ±1**, so the basis IS (block-)Hadamard → a 90° bundle here literally IS a WHT projection. Canon already formalizes it (bipolar-phase pyramid = WHT; sign=`vsa_bind`/XOR, magnitude=`vsa_bundle`/add; `witness.rs` particle==wave). + +**Caveats (the "or something"):** +- **Two Hadamards — don't conflate:** the **transform** (WHT, orthogonal basis change — what's meant) vs the **product** (element-wise multiply = `vsa_bind`). The WHT structure comes from the orthogonal ±1 **basis**, not the bundling op (plain add). +- **[H, conditional]** "Walsh = eigenbasis" holds **only on hypercube-structured data** (`sketch.rs:20` states this). The GUID/HHTL nibble cube is approximately that, so WHT is the natural spectral basis here — but NOT the eigenbasis of an arbitrary graph. So: orthogonal ⇒ Parseval (general); Hadamard ⇒ ±1 basis only; eigenbasis ⇒ hypercube only. + +**The payoff — the third ranking path (completes the trio with `E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC`):** + +| Path | How | When | +|---|---|---| +| Hamming **sweep** | per-row popcount | naive fallback | +| **CAM-PQ ADC** | IVF probe + distance tables (coarse = HHTL prefix) | indexed, per-candidate | +| **WHT spectral readout** | FWHT the bundled field **once**, read each angle query by Parseval inner product (`witness.rs` particle==wave) | one field, **many** angle queries — amortized; exact *because* of 90° orthogonality | + +**[H] Honest bound (from `witness.rs` itself):** the WHT "wave" path wins only when one field is reused across many queries (Parseval reuse); it makes **NO measured single-query speedup claim**. So it's a real transform-once/read-many path, not a universal speedup. Cross-refs: `E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC`, OGAR "Bipolar-phase pyramid = Walsh-Hadamard", `ndarray::simd::wht_f32`, `perturbation-sim::{sketch::fwht, witness}`, `I-SUBSTRATE-MARKOV` (bundle = add, the algebra this rides). + +--- + ## 2026-06-18 — E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC — "switch tenant + compare across the 16K from an angle" is a CAM-PQ ADC (table lookups), NOT a Hamming sweep; and its IVF coarse quantizer IS the HHTL/CLAM prefix — the prune+rank cascade is literally IVF-PQ **Status:** FINDING (operator correction "sweep or just 90° fingerprint vector cam index" → CAM index; grounded in `ndarray/src/hpc/{cam_pq,cam_index}.rs`, contract `cam::{DistanceTableProvider, CamCodecContract, IvfContract::probe}`, turbovec KNOWLEDGE "palette256 = coarse quantizer"). **Supersedes this entry's first framing** (posted 2026-06-18 as "…-SWEEP-IS-PRUNE-THEN-RANK"): the Hamming sweep is only the naive fallback — the real path is CAM-PQ ADC, and the two stages are IVF-PQ, not a prefilter+scan. From 5b30bd2f5ff22e310fb677a2bde07f110c25353d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 04:56:36 +0000 Subject: [PATCH 11/14] =?UTF-8?q?docs(epiphany):=20E-WHT-META-AWARENESS-AN?= =?UTF-8?q?D-KRONECKER-LOOKUP=20=E2=80=94=20per-tenant=20WHT=20spectrum=20?= =?UTF-8?q?=3D=20cheap=20meta-awareness;=20WHT=20tensor-factorizes=20along?= =?UTF-8?q?=20HHTL=20=E2=86=92=20exponential=20prefix-table=20lookup=20(ca?= =?UTF-8?q?pstone)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two graded claims (operator synthesis). Claim 1 [G]: one FWHT per tenant = few-coefficient global summary (walsh_pyramid_energy: per-dyadic-level energy + coarse fraction); coarse-dominant=predictable, fine-spread=surprising → a free-energy/awareness proxy, transform-once, cheap, for all tenants a small meta-fingerprint panel. Claim 2 [G mechanism / H "any factor"]: H_{16^n} = H_16^⊗n (Kronecker; FWHT butterfly = the HHTL nibble tree), so a Walsh op over a 16^k prefix-subtree = k stages of 16-point table ops = O(k·16) vs O(16^k) for a SEPARABLE (low-sequency) factor — "exponential lookup over prefixed tables" (bgz-tensor attention-as-lookup + cascade as Kronecker). Caveat: only for separable factors; full FWHT is O(N log N) not sub-linear. Capstone: five threads (PANCAKES index / ORTHOGONAL-BUNDLE transform / TENANT-ANGLE tables / spectrum summary / prefix-lookup composition) are ONE Walsh-Kronecker structure along the nibble hierarchy. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index 3d13b54c6..b31e52248 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,28 @@ +## 2026-06-18 — E-WHT-META-AWARENESS-AND-KRONECKER-LOOKUP — per-tenant WHT spectrum = cheap global meta-awareness (a few coefficients); and the Walsh transform tensor-factorizes along HHTL (H_16^⊗n) → "exponential lookup over prefixed tables" for separable factors + +**Status:** TWO claims, graded separately (operator synthesis: "operationalize WHT picking any tenant over the standing wave sorted by any factor → meta-awareness; …use it for HHTL 16ⁿ exponential lookup over prefixed tables"). Theorem-checker applied. Grounded in `perturbation-sim::sketch::{fwht, walsh_pyramid_energy}`, `ndarray::simd::wht_f32`, bgz-tensor attention-as-table-lookup + HHTL cascade, OGAR "Bipolar-phase pyramid = Walsh-Hadamard". Capstone of the geometry-of-a-node arc. + +**Claim 1 — per-tenant WHT spectrum = global meta-awareness. [G as descriptor; framing as "awareness"].** `walsh_pyramid_energy` already gives Walsh energy per dyadic level + coarse fraction. So one FWHT of a tenant field → a **few-coefficient summary of all 16K rows**: coarse-dominant = smooth/clustered/predictable (low surprise), fine-spread = scattered (high surprise). For all tenants (hhtl/helix/energy/content/topic/angle) it's a small fixed "meta-fingerprint" panel, transform-once, reusable, **cheap** — and it is a **free-energy proxy** (spectral concentration = field predictability = the workspace's self-monitoring/awareness signal, per the active-inference loop). Sound and valuable. + +**Claim 2 — WHT for HHTL 16ⁿ "exponential lookup over prefixed tables". [G for the mechanism; H for "any factor"].** +- **[G]** The Walsh-Hadamard matrix **tensor-factorizes along the nibble hierarchy**: `H_{16ⁿ} = H₁₆ ⊗ H₁₆ ⊗ … (n)` (Kronecker; the FWHT butterfly, radix-16). The HHTL nibble tree IS that tensor structure (`E-PANCAKES-IS-RADIX-IS-HHTL`). +- **[G]** Therefore a Walsh-domain operation over a 16ᵏ prefix-subtree decomposes into **k stages of 16-point table ops** — the per-level 16-entry tables are the "prefixed tables," composed by the tensor product → **O(k·16) vs O(16ᵏ)** for a **separable** factor (the bgz-tensor "attention as table lookup" + HHTL cascade, seen as the Kronecker factorization). "Top gaussian preserved level-to-level" (canon Parseval) is the condition making the common factor separable. +- **[H] Caveat (do NOT overclaim):** the exponential saving holds only for factors **low-sequency / separable** in the Walsh basis; an arbitrary high-sequency factor still has to touch the leaves. And a full FWHT of the whole field is O(N log N), not sub-linear — the exponential win is the **per-level table composition over a prefix subtree**, not the full transform. + +**The capstone — five threads are ONE structure (Walsh-Kronecker factorization along the HHTL nibble hierarchy):** + +| Thread | facet of the Kronecker/HHTL structure | +|---|---| +| `E-PANCAKES` (radix≡HHTL≡panCAKES) | the tensor **index** (n nibble levels) | +| `E-ORTHOGONAL-BUNDLE` (90°→WHT) | the tensor **transform** (H₁₆^⊗ⁿ) | +| `E-TENANT-ANGLE` CAM-PQ ADC | the tensor **tables** (prefixed 16-point distance tables) | +| Claim 1 (per-tenant spectrum) | the tensor **summary** (pyramid energy = meta-awareness) | +| Claim 2 (16ⁿ prefix lookup) | the tensor **composition** (k stages, not 16ᵏ) | + +Cross-refs: `E-ORTHOGONAL-BUNDLE-IS-WHT-READOUT`, `E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC`, `E-PANCAKES-IS-RADIX-IS-HHTL`, `E-CLAM-IS-THE-MANIFOLD-ENGINE`, `ndarray::simd::wht_f32`, `perturbation-sim::sketch`, bgz-tensor attention-as-lookup. + +--- + ## 2026-06-18 — E-ORTHOGONAL-BUNDLE-IS-WHT-READOUT — bundling a 90° (orthogonal) sweep is Parseval-recoverable in any basis; it is specifically a Walsh-Hadamard projection in the bipolar ±1 basis (this substrate's case) — the third ranking path: transform-once, read-many **Status:** FINDING (graded; operator intuition "bundle a 90° sweep → a Hadamard projection or something" — made precise). Grounded in `ndarray::simd::wht_f32`, `perturbation-sim::sketch::{fwht, walsh_pyramid_energy}`, `perturbation-sim::witness.rs` (Parseval-over-FWHT, "particle == wave"), OGAR canon "Bipolar-phase pyramid — Walsh-Hadamard on VSA". Theorem-checker applied; the "also Hadamard" is conditional, not universal. From f3e1f1d5724cfddf80335e3540a718282624d37f Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 05:08:24 +0000 Subject: [PATCH 12/14] =?UTF-8?q?docs(epiphany):=20E-COARSE-QUANTIZER-IS-S?= =?UTF-8?q?CALE-FREE-ROUTER=20=E2=80=94=201024=20HHTL=20coarse=20fingerpri?= =?UTF-8?q?nts=20route=20in-RAM=20(IVF)=20AND=20cross-server=20(shard)=20w?= =?UTF-8?q?ith=20one=20lookup;=20corrected=20capacity=20(8MiB/8GiB=20at=20?= =?UTF-8?q?512B=20node)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deployment-tier synthesis. Corrected arithmetic: canon 512B node → mailbox = 16384×512 = 8 MiB (not 2 MB; 2 MiB only at a 128B reduced node), 1024 prefixes = 8 GiB, coarse table = 512 KiB negligible. The scale-free insight: the same 1024 coarse fingerprints route at two scales with one lookup — in-RAM IVF probe (which cluster) AND cross-server shard route (which server). HHTL prefix = cluster key = IVF cell = shard key. "Delegate awareness to other servers" = replicate the 512KiB coarse table + route by prefix; the IVF coarse stage IS the shard router. Split: GUID key + routing uncompressed/transparent, 480B value slab compresses in Lance, Raft per region. Fences: fork-policy P0 (path is SurrealDB-on-TiKV not raw TiKV → STOP-and-ask); capacity-only, no throughput bench. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/EPIPHANIES.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index b31e52248..af007a31c 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,23 @@ +## 2026-06-18 — E-COARSE-QUANTIZER-IS-SCALE-FREE-ROUTER — the 1024 HHTL coarse fingerprints route a query in-RAM (IVF probe) AND cross-server (shard route) with one lookup; the GUID-key substrate shards on the prefix, value-slab compresses in Lance, durability via SurrealDB-on-TiKV/Raft + +**Status:** FINDING (deployment-tier; operator capacity+distribution synthesis). Grounded in `E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC` (IVF coarse ≡ HHTL prefix), `E-GUID-IS-THE-GRAPH` (key = address), canon node = 512 B. Theorem-checker on the arithmetic. + +**Corrected capacity (canon node = 512 B = 4096 bit):** +- mailbox = 16384 × 512 B = **8 MiB** (operator's "2 MB" is 4× low at 512 B; it equals 2 MiB only at a **128 B reduced node** — pin which before sizing). +- 1024 prefixes × 8 MiB = **8 GiB** per 1024-prefix shard (matches the session's earlier 8 GB figure); + 1024 coarse fingerprints = **512 KiB** (negligible). +- ⇒ a 2 TB server holds ~256 prefix-shards at 8 GiB (or ~1M mailboxes), the coarse table fits trivially in RAM on every node. + +**The scale-free insight:** the **same 1024-fingerprint coarse quantizer** routes at two scales with one lookup — +- **in-RAM:** IVF probe → which CLAM cluster (the local prune, `cakes_nearest`); +- **cross-server:** the prefix → which region/server holds that shard (the distributed route). +The HHTL prefix is simultaneously the **cluster key, the IVF cell, and the shard key**. So "delegate lookup-table awareness to other servers" = **replicate the 512 KiB coarse table** (it's tiny, gossip-cheap) and route by prefix. No new structure — the IVF coarse stage IS the shard router. + +**The deployment split:** GUID key + coarse routing stay **uncompressed/transparent** (the GUID-is-key invariant: addressability never costs a value decode); the **480 B value slab compresses in Lance** (columnar); **durability/consensus via Raft per region**. + +**Fences:** (1) **Fork policy P0** — TiKV has no AdaWorldAPI fork; the blessed path is **SurrealDB-on-TiKV** (SurrealDB ships a TiKV storage engine; ties to `lite-unified`/`ExecTarget::SurrealQl`), NOT a raw new TiKV dep → STOP-and-ask before introducing. (2) "In theory" — capacity arithmetic only, no throughput bench; the Lance-vs-surreal-kv value split needs measurement. Cross-refs: `E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC`, `E-WHT-META-AWARENESS-AND-KRONECKER-LOOKUP`, `E-GUID-IS-THE-GRAPH`, `lite-unified-surrealql-lance-v1`, canon node 512 B. + +--- + ## 2026-06-18 — E-WHT-META-AWARENESS-AND-KRONECKER-LOOKUP — per-tenant WHT spectrum = cheap global meta-awareness (a few coefficients); and the Walsh transform tensor-factorizes along HHTL (H_16^⊗n) → "exponential lookup over prefixed tables" for separable factors **Status:** TWO claims, graded separately (operator synthesis: "operationalize WHT picking any tenant over the standing wave sorted by any factor → meta-awareness; …use it for HHTL 16ⁿ exponential lookup over prefixed tables"). Theorem-checker applied. Grounded in `perturbation-sim::sketch::{fwht, walsh_pyramid_energy}`, `ndarray::simd::wht_f32`, bgz-tensor attention-as-table-lookup + HHTL cascade, OGAR "Bipolar-phase pyramid = Walsh-Hadamard". Capstone of the geometry-of-a-node arc. From 472a2acf7bfea977fc99d4fa37e550cfaaeee491 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 05:23:09 +0000 Subject: [PATCH 13/14] =?UTF-8?q?feat(graph):=20node=5Fdistance=20+=20Dist?= =?UTF-8?q?anceMeans=20=E2=80=94=20"edge=20distance=20over=20different=20m?= =?UTF-8?q?eans,=20GUID=20=3D=20node"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidates the distance facets into one typed surface (the task at hand): a node IS its GUID, and node↔node separation is computed by a selectable means anchored on the key. DistanceMeans names the dispatch surface; node_distance is the dispatcher. - PrefixDepth (landed, key-only, zero value decode): the CLAM/HHTL radix TREE-HOP metric (depth_a − cpd) + (depth_b − cpd) — a genuine metric (d(x,x)=0, symmetric, tree triangle inequality), NOT the earlier MAX−cpd which failed d(x,x)=0. Caught by the self-distance test. - Value-decode means (Hamming-plane / HelixAngular / PqAdc) named in the enum doc as the costed tier — wired on their own branch with their own cost gate, return None here until then (never silently in the zero-decode path). Tests: tree-hop metric (self=0, siblings=2, ancestor=1, cross-basin=4, symmetric, monotone) + None on unmaterialized path. 11/11 mailbox_scan green, fmt + clippy clean. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- crates/lance-graph/src/graph/mailbox_scan.rs | 76 ++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/crates/lance-graph/src/graph/mailbox_scan.rs b/crates/lance-graph/src/graph/mailbox_scan.rs index c051fa1da..5bda1ad45 100644 --- a/crates/lance-graph/src/graph/mailbox_scan.rs +++ b/crates/lance-graph/src/graph/mailbox_scan.rs @@ -205,6 +205,55 @@ pub fn edge_slots_coarse( }) } +/// The **means** by which an edge/node distance is measured over the GUID-keyed +/// substrate — "edge distance over different means, while the GUID is the node" +/// (`E-GUID-IS-THE-GRAPH`). One node IS its GUID; the separation between two +/// nodes is computed by a *selectable* metric, all anchored on the key. The +/// key-resident means are **zero value decode**; the value means (named, not yet +/// wired) read the 480 B value slab and are a separate cost tier. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DistanceMeans { + /// **CLAM/HHTL radix tree-hop distance** — `(depth_a − cpd) + (depth_b − cpd)` + /// where `cpd = common_prefix_depth(a, b)`: the steps up to the shared + /// ancestor and back down. A genuine metric (`d(x,x)=0`, symmetric, tree + /// triangle inequality). Key-only, zero value decode + /// (`E-PANCAKES-IS-RADIX-IS-HHTL`). + PrefixDepth, + // ── value-decode means (the costed tier — named here as the dispatch + // surface; wired on their own branch with their own cost gate, never + // mixed into the zero-decode facets; `E-TENANT-ANGLE-RANK-IS-CAM-PQ-ADC`, + // `E-HELIX-IS-EXACT-LOCATION`): ── + // Hamming(plane) — fingerprint popcount over a content/topic/angle plane; + // HelixAngular — Signed360 exact-orthogonal-location distance; + // PqAdc — CAM-PQ asymmetric distance (IVF probe + tables). +} + +/// Distance between two nodes (rows, each a GUID) under the chosen `means`. +/// +/// `PrefixDepth`: the radix tree-hop distance `(depth_a − cpd) + (depth_b − cpd)` +/// — `0` for the same leaf, growing as the two GUIDs' `NiblePath`s diverge nearer +/// the root (different basin = farthest). `None` if either row has no +/// materialized HHTL path (deferred-binding fallback). Key-only, **zero value +/// decode**. +/// +/// The value-decode means (`DistanceMeans` doc) return `None` here until wired — +/// they land on the costed branch, never silently in the zero-decode path. +pub fn node_distance( + view: &V, + a: usize, + b: usize, + means: DistanceMeans, +) -> Option { + match means { + DistanceMeans::PrefixDepth => { + let pa = view.hhtl_path_at(a)?; + let pb = view.hhtl_path_at(b)?; + let cpd = pa.common_prefix_depth(pb); + Some(u32::from((pa.depth() - cpd) + (pb.depth() - cpd))) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -409,6 +458,33 @@ mod tests { assert_eq!(depths, vec![3, 2, 2, 1, 0]); } + #[test] + fn node_distance_prefix_depth_is_the_tree_hop_metric() { + // rows: 0=1·2·3 (d3) 1=1·2·4 (d3) 2=1·2 (d2) 3=1·5 (d2) 4=9 (d1). + let soa = sample(); + let d = |a, b| node_distance(&soa, a, b, DistanceMeans::PrefixDepth); + // metric: d(x,x) = 0. + assert_eq!(d(0, 0), Some(0)); + // 1·2·3 vs 1·2·4: cpd 2 ⇒ (3−2)+(3−2) = 2 (siblings). + assert_eq!(d(0, 1), Some(2)); + // 1·2·3 vs 1·2 (its ancestor): cpd 2 ⇒ (3−2)+(2−2) = 1. + assert_eq!(d(0, 2), Some(1)); + // 1·2·3 vs 1·5: cpd 1 ⇒ (3−1)+(2−1) = 3. + assert_eq!(d(0, 3), Some(3)); + // 1·2·3 vs 9 (different basin): cpd 0 ⇒ (3−0)+(1−0) = 4 (farthest). + assert_eq!(d(0, 4), Some(4)); + // symmetric + monotone. + assert_eq!(d(0, 4), d(4, 0)); + assert!(d(0, 1).unwrap() < d(0, 4).unwrap()); + } + + #[test] + fn node_distance_none_without_materialized_path() { + let mut soa = sample(); + soa.paths[1] = None; + assert_eq!(node_distance(&soa, 0, 1, DistanceMeans::PrefixDepth), None); + } + #[test] fn clam_cakes_skip_rows_with_no_materialized_path() { // A view with all-None hhtl paths (the deferred-binding default) yields From 2e6d03908ec93c1314f7823c92b2b5c1a8ab4979 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 05:54:15 +0000 Subject: [PATCH 14/14] =?UTF-8?q?fix(graph):=20clamp=20match=5Fnodes=5Fby?= =?UTF-8?q?=5Fclass=20to=20n=5Frows()=20=E2=80=94=20no=20phantom=20padding?= =?UTF-8?q?=20rows=20(codex=20P1=20#544)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The real MailboxSoA reports n_rows() == populated while class_id()/ entity_type() borrow the full zero-padded backing capacity. Iterating the raw slice surfaced phantom padding rows (MATCH class 0 hitting the zeroed tail, or stale padding after a logical shrink). Added .take(view.n_rows()) so the scan stops at the logical row count. Regression test: a 2-populated / 3-zero-padding fake — class 7 returns only [0,1], class 0 returns empty. 12/12 green, fmt+clippy clean. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- crates/lance-graph/src/graph/mailbox_scan.rs | 35 +++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/crates/lance-graph/src/graph/mailbox_scan.rs b/crates/lance-graph/src/graph/mailbox_scan.rs index 5bda1ad45..31b90859e 100644 --- a/crates/lance-graph/src/graph/mailbox_scan.rs +++ b/crates/lance-graph/src/graph/mailbox_scan.rs @@ -68,10 +68,17 @@ pub struct NodeMatch { /// `entity_type` u16 slot — the Cognitive-RISC N1 class hook); the 480 B value /// slab (`energy` / `meta` / fingerprints) is never touched. This is the /// substrate-is-the-graph node-selection half of `Backend::MailboxSoa`. +/// +/// **Clamped to `n_rows()`** — the real `MailboxSoA` reports `n_rows() == +/// populated` while `class_id()`/`entity_type()` borrow the full backing +/// capacity (zero-padded). Iterating the raw slice would surface phantom padding +/// rows (e.g. `MATCH` class 0 hitting the zeroed tail, or stale padding after a +/// logical shrink); the scan must stop at the logical row count. pub fn match_nodes_by_class(view: &V, class_id: u16) -> Vec { let classes = view.class_id(); classes .iter() + .take(view.n_rows()) .enumerate() .filter_map(|(row, &c)| { (c == class_id).then_some(NodeMatch { @@ -270,6 +277,10 @@ mod tests { keyed_rows: Vec<(u64, usize)>, paths: Vec>, blocks: Vec>, + /// Logical populated rows; `None` ⇒ the full `class_ids` length. Set + /// smaller to model the real `MailboxSoA` (zero-padded capacity with + /// `n_rows() == populated < entity_type().len()`). + logical_n: Option, } impl MailboxSoaView for GuardedSoa { @@ -277,7 +288,7 @@ mod tests { 0 } fn n_rows(&self) -> usize { - self.class_ids.len() + self.logical_n.unwrap_or(self.class_ids.len()) } fn w_slot(&self) -> u8 { 0 @@ -345,9 +356,31 @@ mod tests { None, None, ], + logical_n: None, } } + #[test] + fn match_nodes_by_class_clamps_to_n_rows_ignoring_padding() { + // Model the real MailboxSoA: class_id() borrows the full capacity + // (zero-padded), but n_rows() reports only the populated prefix. + let mut soa = sample(); + soa.class_ids = vec![7, 7, 0, 0, 0]; // 2 populated, 3 zero-padding + soa.logical_n = Some(2); + // class 7 ⇒ only the 2 populated rows, never the padding. + let sevens: Vec = match_nodes_by_class(&soa, 7) + .iter() + .map(|m| m.row) + .collect(); + assert_eq!(sevens, vec![0, 1]); + // class 0 ⇒ EMPTY — the zeroed padding tail must NOT surface as phantom + // matches (the codex P1 regression). + assert!( + match_nodes_by_class(&soa, 0).is_empty(), + "padding rows beyond n_rows() must not match class 0" + ); + } + #[test] fn match_nodes_by_class_routes_on_classid_only() { let soa = sample();