From f25f21cb9fa94aef6c70036456741a715328f33f Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sun, 14 Jun 2026 13:29:19 +0200
Subject: [PATCH 01/14] refactor(context): extract nvisy-context crate + wire
 engine enhancer pass

Lifts crate::context out of nvisy-core into a sibling nvisy-context crate so
the SDK base stays primitives-only for third-party recognizer authors. Adds
NerRecognizer::context_registry (mirroring PatternRegistry::context_registry)
and wires ContextEnhancer into DetectionPhase: build_for_request now returns
DetectionResources { recognizers, enhancer }, the enhancer runs in block-local
coordinates between recognizer dispatch and modality lifting, and the
substring path runs by default (Tokens artifact wiring follows when an
NlpEngine is plumbed in).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                                    | 15 +++++
 Cargo.toml                                    |  2 +
 crates/nvisy-context/Cargo.toml               | 41 +++++++++++++
 crates/nvisy-context/README.md                | 44 ++++++++++++++
 .../src}/declaration.rs                       |  0
 .../context => nvisy-context/src}/enhancer.rs | 44 ++++++++------
 crates/nvisy-context/src/lib.rs               | 15 +++++
 .../context => nvisy-context/src}/matcher.rs  |  0
 .../context => nvisy-context/src}/registry.rs | 11 ++++
 .../context => nvisy-context/src}/tokens.rs   |  4 +-
 crates/nvisy-core/src/context/mod.rs          | 41 -------------
 crates/nvisy-core/src/lib.rs                  |  1 -
 crates/nvisy-engine/Cargo.toml                |  1 +
 crates/nvisy-engine/src/core/context.rs       | 17 ++++++
 .../nvisy-engine/src/detection/config/mod.rs  | 59 ++++++++++++++-----
 crates/nvisy-engine/src/detection/document.rs |  5 +-
 crates/nvisy-engine/src/detection/mod.rs      |  4 +-
 .../src/detection/phases/detection.rs         | 28 ++++++---
 crates/nvisy-engine/src/detection/pipeline.rs |  5 +-
 crates/nvisy-ner/Cargo.toml                   |  1 +
 crates/nvisy-ner/src/nlp/engine.rs            |  4 +-
 crates/nvisy-ner/src/nlp/mod.rs               |  4 +-
 crates/nvisy-ner/src/recognition/config.rs    |  2 +-
 .../nvisy-ner/src/recognition/recognizer.rs   | 21 ++++++-
 crates/nvisy-pattern/Cargo.toml               |  1 +
 .../src/recognition/dictionary.rs             |  2 +-
 crates/nvisy-pattern/src/recognition/mod.rs   |  4 +-
 .../src/recognition/recognizer.rs             |  2 +-
 .../src/recognition/regex_rule.rs             |  2 +-
 .../nvisy-pattern/src/recognition/registry.rs |  6 +-
 .../nvisy-pattern/tests/enhancer_roundtrip.rs |  5 +-
 31 files changed, 287 insertions(+), 104 deletions(-)
 create mode 100644 crates/nvisy-context/Cargo.toml
 create mode 100644 crates/nvisy-context/README.md
 rename crates/{nvisy-core/src/context => nvisy-context/src}/declaration.rs (100%)
 rename crates/{nvisy-core/src/context => nvisy-context/src}/enhancer.rs (92%)
 create mode 100644 crates/nvisy-context/src/lib.rs
 rename crates/{nvisy-core/src/context => nvisy-context/src}/matcher.rs (100%)
 rename crates/{nvisy-core/src/context => nvisy-context/src}/registry.rs (89%)
 rename crates/{nvisy-core/src/context => nvisy-context/src}/tokens.rs (98%)
 delete mode 100644 crates/nvisy-core/src/context/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index d5056aea..a9b28db5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2891,6 +2891,18 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "nvisy-context"
+version = "0.1.0"
+dependencies = [
+ "derive_builder",
+ "hipstr",
+ "nvisy-core",
+ "schemars",
+ "serde",
+ "thiserror",
+]
+
 [[package]]
 name = "nvisy-core"
 version = "0.1.0"
@@ -2928,6 +2940,7 @@ dependencies = [
  "humantime-serde",
  "jiff",
  "nvisy-codec",
+ "nvisy-context",
  "nvisy-core",
  "nvisy-engine",
  "nvisy-llm",
@@ -2999,6 +3012,7 @@ dependencies = [
  "bentoml",
  "derive_builder",
  "lingua",
+ "nvisy-context",
  "nvisy-core",
  "serde",
  "tokio",
@@ -3030,6 +3044,7 @@ dependencies = [
  "async-trait",
  "csv",
  "derive_builder",
+ "nvisy-context",
  "nvisy-core",
  "regex",
  "schemars",
diff --git a/Cargo.toml b/Cargo.toml
index 9a236f0f..0c74b1d1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ resolver = "3"
 members = [
     "./crates/nvisy-cli",
     "./crates/nvisy-codec",
+    "./crates/nvisy-context",
     "./crates/nvisy-core",
     "./crates/nvisy-engine",
     "./crates/nvisy-fake",
@@ -37,6 +38,7 @@ documentation = "https://docs.rs/nvisy-runtime"
 
 # Internal crates
 nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0", default-features = false }
+nvisy-context = { path = "./crates/nvisy-context", version = "0.1.0" }
 nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
 nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" }
diff --git a/crates/nvisy-context/Cargo.toml b/crates/nvisy-context/Cargo.toml
new file mode 100644
index 00000000..8c53f2d1
--- /dev/null
+++ b/crates/nvisy-context/Cargo.toml
@@ -0,0 +1,41 @@
+# https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[package]
+name = "nvisy-context"
+description = "Post-recognition keyword-boost enhancer for Nvisy entities"
+keywords = ["nvisy", "context", "enhancer", "pii"]
+categories = ["text-processing"]
+readme = "README.md"
+
+version = { workspace = true }
+rust-version = { workspace = true }
+edition = { workspace = true }
+license = { workspace = true }
+publish = { workspace = true }
+
+authors = { workspace = true }
+repository = { workspace = true }
+homepage = { workspace = true }
+documentation = { workspace = true }
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
+[dependencies]
+# Internal crates
+nvisy-core = { workspace = true, features = [] }
+
+# Serialization
+serde = { workspace = true, features = [] }
+schemars = { workspace = true, features = [] }
+
+# Derive macros and error handling
+derive_builder = { workspace = true, features = [] }
+thiserror = { workspace = true, features = [] }
+
+# Primitive datatypes (cheap-clone surface form on `Token`)
+hipstr = { workspace = true, features = [] }
+
+[dev-dependencies]
+nvisy-core = { workspace = true, features = ["test-utils"] }
diff --git a/crates/nvisy-context/README.md b/crates/nvisy-context/README.md
new file mode 100644
index 00000000..e8653b7a
--- /dev/null
+++ b/crates/nvisy-context/README.md
@@ -0,0 +1,44 @@
+# nvisy-context
+
+[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml)
+
+Post-recognition keyword-boost enhancer for the Nvisy runtime.
+
+## Overview
+
+Mirrors Presidio's `ContextAwareEnhancer` pattern. Every recognizer
+that wants score boosting declares a `Context` (a list of keywords
+plus optional window / boost overrides), registered against the
+recognizer's name. After recognition, `ContextEnhancer` walks each
+detected `Entity<Text>`, looks the recognizer name up in the
+`ContextRegistry`, scans the surrounding window for any declared
+keyword via the configured `KeywordMatcher`, and bumps the entity's
+confidence on a hit.
+
+`Tokens` is the optional NLP artifact (surface + lemma per token)
+that a tokenizing NLP engine stashes on `RecognizerInput.artifacts`
+so `LemmaMatcher` can match morphological variants (`running` →
+`run`). The `SubstringMatcher` fallback runs whenever no `Tokens`
+artifact is present.
+
+The crate depends only on `nvisy-core` for `Entity<Text>`,
+`TrailStep`, and `Confidence` — recognizer crates and the engine
+each depend on `nvisy-context` to participate.
+
+## Documentation
+
+See [`docs/`](../../docs/) for architecture, security, and API documentation.
+
+## Changelog
+
+See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history.
+
+## License
+
+Apache 2.0 License, see [LICENSE.txt](../../LICENSE.txt)
+
+## Support
+
+- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com)
+- **Issues**: [GitHub Issues](https://github.com/nvisycom/runtime/issues)
+- **Email**: [support@nvisy.com](mailto:support@nvisy.com)
diff --git a/crates/nvisy-core/src/context/declaration.rs b/crates/nvisy-context/src/declaration.rs
similarity index 100%
rename from crates/nvisy-core/src/context/declaration.rs
rename to crates/nvisy-context/src/declaration.rs
diff --git a/crates/nvisy-core/src/context/enhancer.rs b/crates/nvisy-context/src/enhancer.rs
similarity index 92%
rename from crates/nvisy-core/src/context/enhancer.rs
rename to crates/nvisy-context/src/enhancer.rs
index 38ff3794..ab406d2d 100644
--- a/crates/nvisy-core/src/context/enhancer.rs
+++ b/crates/nvisy-context/src/enhancer.rs
@@ -2,14 +2,14 @@
 //! any [`Entity<Text>`] regardless of which recognizer produced it.
 
 use derive_builder::{Builder, UninitializedFieldError};
-use type_map::concurrent::TypeMap;
+use nvisy_core::entity::{Entity, TrailStep};
+use nvisy_core::extraction::Artifacts;
+use nvisy_core::modality::Text;
+use nvisy_core::primitive::Confidence;
 
 use super::Tokens;
 use super::matcher::{KeywordMatcher, SubstringMatcher};
 use super::registry::ContextRegistry;
-use crate::entity::{Entity, TrailStep};
-use crate::modality::Text;
-use crate::primitive::Confidence;
 
 /// Post-recognition enhancer that boosts entity confidence when
 /// keywords declared by the source recognizer appear near the match.
@@ -81,6 +81,14 @@ impl ContextEnhancer {
         ContextEnhancerBuilder::default()
     }
 
+    /// Borrow the underlying registry. Useful for diagnostics and
+    /// for engine code that wants to short-circuit when there are
+    /// no entries to boost against.
+    #[must_use]
+    pub fn registry(&self) -> &ContextRegistry {
+        &self.registry
+    }
+
     /// Apply context-keyword boosting to `entities` in place.
     ///
     /// For each entity, looks at its first recognition step's
@@ -96,14 +104,14 @@ impl ContextEnhancer {
     /// declared context has an empty keyword list) pass through
     /// unchanged.
     ///
-    /// [`Refinement`]: crate::entity::TrailStepKind::Refinement
-    pub fn enhance(&self, entities: &mut [Entity<Text>], text: &str, artifacts: &TypeMap) {
+    /// [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
+    pub fn enhance(&self, entities: &mut [Entity<Text>], text: &str, artifacts: &Artifacts) {
         for entity in entities.iter_mut() {
             self.enhance_one(entity, text, artifacts);
         }
     }
 
-    fn enhance_one(&self, entity: &mut Entity<Text>, text: &str, artifacts: &TypeMap) {
+    fn enhance_one(&self, entity: &mut Entity<Text>, text: &str, artifacts: &Artifacts) {
         let Some(name) = entity
             .trail
             .first()
@@ -216,15 +224,15 @@ impl From<UninitializedFieldError> for ContextEnhancerBuilderError {
 
 #[cfg(test)]
 mod tests {
-    use type_map::concurrent::TypeMap;
-
-    use super::*;
-    use crate::context::Context;
-    use crate::entity::{
+    use nvisy_core::entity::{
         EntityLabelRef, ModelProvenance, PatternProvenance, TrailProvenance, TrailStepKind,
         builtins,
     };
-    use crate::modality::{Text, TextLocation};
+    use nvisy_core::extraction::Artifacts;
+    use nvisy_core::modality::{Text, TextLocation};
+
+    use super::*;
+    use crate::Context;
 
     fn pattern_entity(name: &str, span: std::ops::Range<usize>) -> Entity<Text> {
         let confidence = Confidence::new(0.6).unwrap();
@@ -284,7 +292,7 @@ mod tests {
         let text = "Your SSN: 123-45-6789";
         let mut entities = vec![pattern_entity("ssn", 10..21)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &TypeMap::new());
+        enhancer.enhance(&mut entities, text, &Artifacts::new());
         assert!(entities[0].confidence.get() > before);
         assert!(
             entities[0]
@@ -308,7 +316,7 @@ mod tests {
         let text = "Mr. Smith is named in the report.";
         let mut entities = vec![model_entity("gliner", 4..9)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &TypeMap::new());
+        enhancer.enhance(&mut entities, text, &Artifacts::new());
         assert!(entities[0].confidence.get() > before);
         let TrailProvenance::Model(prov) = &entities[0].trail[0].provenance else {
             panic!("expected model provenance");
@@ -323,7 +331,7 @@ mod tests {
         let text = "Your SSN: 123-45-6789";
         let mut entities = vec![pattern_entity("ssn", 10..21)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &TypeMap::new());
+        enhancer.enhance(&mut entities, text, &Artifacts::new());
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -335,7 +343,7 @@ mod tests {
         let text = "far_keyword                            XYZ here";
         let mut entities = vec![pattern_entity("far", 39..42)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &TypeMap::new());
+        enhancer.enhance(&mut entities, text, &Artifacts::new());
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -349,7 +357,7 @@ mod tests {
         // Push base confidence to 0.95
         entity.confidence = Confidence::new(0.95).unwrap();
         let mut entities = vec![entity];
-        enhancer.enhance(&mut entities, text, &TypeMap::new());
+        enhancer.enhance(&mut entities, text, &Artifacts::new());
         assert!((entities[0].confidence.get() - 1.0).abs() < f64::EPSILON);
     }
 }
diff --git a/crates/nvisy-context/src/lib.rs b/crates/nvisy-context/src/lib.rs
new file mode 100644
index 00000000..2004d7c6
--- /dev/null
+++ b/crates/nvisy-context/src/lib.rs
@@ -0,0 +1,15 @@
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
+
+mod declaration;
+mod enhancer;
+mod matcher;
+mod registry;
+mod tokens;
+
+pub use self::declaration::Context;
+pub use self::enhancer::{ContextEnhancer, ContextEnhancerBuilder, ContextEnhancerBuilderError};
+pub use self::matcher::{KeywordMatcher, LemmaMatcher, SubstringMatcher};
+pub use self::registry::ContextRegistry;
+pub use self::tokens::{Token, Tokens};
diff --git a/crates/nvisy-core/src/context/matcher.rs b/crates/nvisy-context/src/matcher.rs
similarity index 100%
rename from crates/nvisy-core/src/context/matcher.rs
rename to crates/nvisy-context/src/matcher.rs
diff --git a/crates/nvisy-core/src/context/registry.rs b/crates/nvisy-context/src/registry.rs
similarity index 89%
rename from crates/nvisy-core/src/context/registry.rs
rename to crates/nvisy-context/src/registry.rs
index ef301338..d0043bc9 100644
--- a/crates/nvisy-core/src/context/registry.rs
+++ b/crates/nvisy-context/src/registry.rs
@@ -68,6 +68,17 @@ impl ContextRegistry {
         self
     }
 
+    /// Merge another registry into this one. Last-write-wins on
+    /// duplicate names. Used to combine per-source registries (e.g.
+    /// pattern registry + NER registry) into one enhancer input.
+    #[must_use]
+    pub fn merge(mut self, other: ContextRegistry) -> Self {
+        for (name, context) in other.entries {
+            self.entries.insert(name, context);
+        }
+        self
+    }
+
     /// Look up the [`Context`] for `name`. Returns `None` when the
     /// name was never registered or when the registered context
     /// had an empty keyword list (which is treated as "not
diff --git a/crates/nvisy-core/src/context/tokens.rs b/crates/nvisy-context/src/tokens.rs
similarity index 98%
rename from crates/nvisy-core/src/context/tokens.rs
rename to crates/nvisy-context/src/tokens.rs
index 55cb21e0..24181797 100644
--- a/crates/nvisy-core/src/context/tokens.rs
+++ b/crates/nvisy-context/src/tokens.rs
@@ -15,7 +15,7 @@
 //!
 //! [`around`]: Tokens::around
 //! [`lemmas_in`]: Tokens::lemmas_in
-//! [`Entity::location`]: crate::entity::Entity::location
+//! [`Entity::location`]: nvisy_core::entity::Entity::location
 //!
 //! Tokens live next to the [`ContextEnhancer`] because that's the
 //! only consumer: the enhancer reads them off
@@ -102,7 +102,7 @@ impl Token {
 /// The owning token sequence carried by a
 /// [`RecognizerInput::artifacts`] bundle.
 ///
-/// [`RecognizerInput::artifacts`]: crate::recognition::RecognizerInput::artifacts
+/// [`RecognizerInput::artifacts`]: nvisy_core::recognition::RecognizerInput::artifacts
 ///
 /// Tokens are sorted by `offset.start` (producers should emit them
 /// in order; consumer-side code assumes this). The collection
diff --git a/crates/nvisy-core/src/context/mod.rs b/crates/nvisy-core/src/context/mod.rs
deleted file mode 100644
index bab43af9..00000000
--- a/crates/nvisy-core/src/context/mod.rs
+++ /dev/null
@@ -1,41 +0,0 @@
-//! Post-recognition keyword-boost enhancement, shared across every
-//! [`EntityRecognizer<Text>`].
-//!
-//! The enhancer takes a slice of detected entities plus the source
-//! text and the shared `RecognizerInput::artifacts` `TypeMap`, and for each
-//! entity:
-//!
-//! 1. Pulls the source recognizer's name from the entity's first
-//!    `TrailStep` provenance.
-//! 2. Looks the name up in a [`ContextRegistry`] to find the
-//!    declared keyword [`Context`].
-//! 3. Walks the surrounding window (token-based when `Tokens` is
-//!    present in the artifact map, substring-based otherwise) and
-//!    asks the configured [`KeywordMatcher`] whether any keyword
-//!    fired.
-//! 4. Applies the configured boost (or the per-entity override),
-//!    capped at `1.0`, and appends a `Refinement` step to the
-//!    trail.
-//!
-//! The registry shape — `name → Context` — is the same pattern
-//! Presidio uses: each recognizer (or each rule within a
-//! recognizer) registers a *source name* and a keyword list, and
-//! the enhancer dispatches on the name carried in the entity's
-//! provenance. Per-rule contexts for patterns (`Regex.context`,
-//! `Dictionary.context` in `nvisy-pattern`) and per-recognizer
-//! contexts for NER (`NerRecognizer.default_context` in
-//! `nvisy-ner`) plug into the same registry.
-//!
-//! [`EntityRecognizer<Text>`]: crate::recognition::EntityRecognizer
-
-mod declaration;
-mod enhancer;
-mod matcher;
-mod registry;
-mod tokens;
-
-pub use self::declaration::Context;
-pub use self::enhancer::{ContextEnhancer, ContextEnhancerBuilder, ContextEnhancerBuilderError};
-pub use self::matcher::{KeywordMatcher, LemmaMatcher, SubstringMatcher};
-pub use self::registry::ContextRegistry;
-pub use self::tokens::{Token, Tokens};
diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs
index f32ebaf5..ea75f4cc 100644
--- a/crates/nvisy-core/src/lib.rs
+++ b/crates/nvisy-core/src/lib.rs
@@ -2,7 +2,6 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-pub mod context;
 pub mod entity;
 pub mod extraction;
 pub mod health;
diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml
index ff30c194..5f516ed7 100644
--- a/crates/nvisy-engine/Cargo.toml
+++ b/crates/nvisy-engine/Cargo.toml
@@ -39,6 +39,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
+nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 nvisy-toolkit = { workspace = true, features = [] }
 nvisy-codec = { workspace = true, features = ["text"] }
diff --git a/crates/nvisy-engine/src/core/context.rs b/crates/nvisy-engine/src/core/context.rs
index 3dfdbccd..865e8102 100644
--- a/crates/nvisy-engine/src/core/context.rs
+++ b/crates/nvisy-engine/src/core/context.rs
@@ -17,6 +17,7 @@
 use std::num::NonZeroUsize;
 use std::sync::Arc;
 
+use nvisy_context::ContextEnhancer;
 use nvisy_toolkit::detection::RecognizerRegistry;
 use nvisy_toolkit::extraction::ExtractorRegistry;
 use tokio_util::sync::CancellationToken;
@@ -51,6 +52,11 @@ pub struct DetectionContext {
     /// engine-side detection-config template plus the request's
     /// label catalog.
     pub(crate) recognizer_registry: Arc<RecognizerRegistry>,
+    /// Post-recognition keyword-boost enhancer — built alongside
+    /// `recognizer_registry` from the same recognizer set. Shared
+    /// behind `Arc` so per-document phases borrow it without
+    /// cloning the embedded registry / matcher.
+    pub(crate) context_enhancer: Arc<ContextEnhancer>,
     pub(crate) concurrency: Option<NonZeroUsize>,
 }
 
@@ -61,6 +67,7 @@ pub struct DetectionContext {
 pub(crate) struct DetectionEngines {
     pub extraction_engine: ExtractorRegistry,
     pub recognizer_registry: Arc<RecognizerRegistry>,
+    pub context_enhancer: Arc<ContextEnhancer>,
 }
 
 impl DetectionContext {
@@ -75,12 +82,14 @@ impl DetectionContext {
         let DetectionEngines {
             extraction_engine,
             recognizer_registry,
+            context_enhancer,
         } = engines;
         Self {
             cancel,
             shared,
             extraction_engine,
             recognizer_registry,
+            context_enhancer,
             concurrency,
         }
     }
@@ -99,6 +108,14 @@ impl DetectionContext {
     pub(crate) fn recognizer_registry(&self) -> &Arc<RecognizerRegistry> {
         &self.recognizer_registry
     }
+
+    /// Per-request context-keyword enhancer borrowed by
+    /// [`DetectionPhase`].
+    ///
+    /// [`DetectionPhase`]: crate::detection::phases::detection::DetectionPhase
+    pub(crate) fn context_enhancer(&self) -> &Arc<ContextEnhancer> {
+        &self.context_enhancer
+    }
 }
 
 impl PhaseContext for DetectionContext {
diff --git a/crates/nvisy-engine/src/detection/config/mod.rs b/crates/nvisy-engine/src/detection/config/mod.rs
index a5288936..f9c77164 100644
--- a/crates/nvisy-engine/src/detection/config/mod.rs
+++ b/crates/nvisy-engine/src/detection/config/mod.rs
@@ -17,6 +17,7 @@ mod pattern;
 
 #[cfg(not(feature = "bento"))]
 use nvisy_core::Error;
+use nvisy_context::{ContextEnhancer, ContextRegistry};
 use nvisy_core::Result;
 use nvisy_core::entity::EntityLabelCatalog;
 use nvisy_core::modality::Text;
@@ -34,6 +35,26 @@ pub use self::pattern::PatternDetection;
 /// provenance on emitted entities).
 const NER_RECOGNIZER_NAME: &str = "ner";
 
+/// Engine-wide defaults for the post-recognition [`ContextEnhancer`].
+/// Mirrors Presidio's defaults (`context_similarity_factor = 0.35`,
+/// `context_prefix_count = ~5 words ≈ 50 bytes`).
+const ENHANCER_DEFAULT_WINDOW: usize = 50;
+const ENHANCER_DEFAULT_BOOST: f64 = 0.35;
+
+/// Bundle returned by [`DetectionConfig::build_for_request`]:
+/// the per-request recognizer registry plus the matching
+/// [`ContextEnhancer`] built from each recognizer's declared
+/// context keywords.
+pub struct DetectionResources {
+    /// Recognizers selected for this request.
+    pub recognizers: RecognizerRegistry,
+    /// Post-recognition keyword-boost enhancer for `Text`
+    /// entities. Always present; carries an empty registry when
+    /// no recognizer declared context keywords (cheap to skip
+    /// inside [`ContextEnhancer::enhance`]).
+    pub enhancer: ContextEnhancer,
+}
+
 /// Configuration for the [`RecognizerRegistry`].
 ///
 /// Each field maps to a `[detection.*]` section in `Nvisy.toml`.
@@ -71,13 +92,15 @@ impl DetectionConfig {
     /// Returns the first construction error encountered — pattern
     /// compile failure, NER backend init failure, or a
     /// config-selected backend whose feature wasn't compiled in.
-    pub fn build_for_request(&self, catalog: &EntityLabelCatalog) -> Result<RecognizerRegistry> {
+    pub fn build_for_request(&self, catalog: &EntityLabelCatalog) -> Result<DetectionResources> {
         let mut reg = RecognizerRegistry::new();
+        let mut context_registry = ContextRegistry::new();
 
         let pattern_cfg = self.pattern.clone().unwrap_or_default();
         if pattern_cfg.enabled {
             let pattern_registry = PatternRegistry::builtin().filter_by_catalog(catalog);
             if !pattern_registry.is_empty() {
+                context_registry = context_registry.merge(pattern_registry.context_registry());
                 let recognizer = PatternRecognizer::builder()
                     .with_registry(pattern_registry)
                     .build()?;
@@ -87,25 +110,21 @@ impl DetectionConfig {
 
         if let Some(ner_cfg) = self.ner.as_ref().filter(|c| c.enabled) {
             let supported_labels = catalog.iter().map(|l| l.label_ref()).collect::<Vec<_>>();
-            reg = match &ner_cfg.backend {
-                NerBackend::Noop => {
-                    let recognizer = NerRecognizer::builder()
-                        .with_name(NER_RECOGNIZER_NAME)
-                        .with_engine(NoopBackend)
-                        .with_supported_labels(supported_labels)
-                        .build()?;
-                    reg.with_recognizer::<Text>(recognizer)
-                }
+            let recognizer = match &ner_cfg.backend {
+                NerBackend::Noop => NerRecognizer::builder()
+                    .with_name(NER_RECOGNIZER_NAME)
+                    .with_engine(NoopBackend)
+                    .with_supported_labels(supported_labels)
+                    .build()?,
 
                 #[cfg(feature = "bento")]
                 NerBackend::Bento { base_url } => {
                     let backend = BentoBackend::new(BentoParams::new(base_url.clone()))?;
-                    let recognizer = NerRecognizer::builder()
+                    NerRecognizer::builder()
                         .with_name(NER_RECOGNIZER_NAME)
                         .with_engine(backend)
                         .with_supported_labels(supported_labels)
-                        .build()?;
-                    reg.with_recognizer::<Text>(recognizer)
+                        .build()?
                 }
 
                 #[cfg(not(feature = "bento"))]
@@ -116,8 +135,20 @@ impl DetectionConfig {
                     ));
                 }
             };
+            context_registry = context_registry.merge(recognizer.context_registry());
+            reg = reg.with_recognizer::<Text>(recognizer);
         }
 
-        Ok(reg)
+        let enhancer = ContextEnhancer::builder()
+            .with_registry(context_registry)
+            .with_default_window(ENHANCER_DEFAULT_WINDOW)
+            .with_default_boost(ENHANCER_DEFAULT_BOOST)
+            .build()
+            .expect("enhancer fields (window, boost, registry) all set");
+
+        Ok(DetectionResources {
+            recognizers: reg,
+            enhancer,
+        })
     }
 }
diff --git a/crates/nvisy-engine/src/detection/document.rs b/crates/nvisy-engine/src/detection/document.rs
index 379420e0..378906b8 100644
--- a/crates/nvisy-engine/src/detection/document.rs
+++ b/crates/nvisy-engine/src/detection/document.rs
@@ -27,7 +27,10 @@ impl DetectionDocumentPipeline {
     pub(super) fn from_context(ctx: &DetectionContext) -> Self {
         Self {
             extraction: ExtractionPhase::new(ctx.extraction_engine().clone()),
-            detection: DetectionPhase::new(ctx.recognizer_registry().clone()),
+            detection: DetectionPhase::new(
+                ctx.recognizer_registry().clone(),
+                ctx.context_enhancer().clone(),
+            ),
             deduplication: DeduplicationPhase::new(),
         }
     }
diff --git a/crates/nvisy-engine/src/detection/mod.rs b/crates/nvisy-engine/src/detection/mod.rs
index 31e50884..b0c2e6d0 100644
--- a/crates/nvisy-engine/src/detection/mod.rs
+++ b/crates/nvisy-engine/src/detection/mod.rs
@@ -28,7 +28,9 @@ mod result;
 mod state;
 mod status;
 
-pub use self::config::{DetectionConfig, NerBackend, NerDetection, PatternDetection};
+pub use self::config::{
+    DetectionConfig, DetectionResources, NerBackend, NerDetection, PatternDetection,
+};
 pub use self::engine::DetectionEngine;
 pub use self::extraction::ExtractionConfig;
 #[cfg(feature = "image")]
diff --git a/crates/nvisy-engine/src/detection/phases/detection.rs b/crates/nvisy-engine/src/detection/phases/detection.rs
index b86bdb8c..ef60bf51 100644
--- a/crates/nvisy-engine/src/detection/phases/detection.rs
+++ b/crates/nvisy-engine/src/detection/phases/detection.rs
@@ -11,8 +11,10 @@
 
 use std::sync::Arc;
 
+use nvisy_context::ContextEnhancer;
 use nvisy_core::Result;
 use nvisy_core::entity::Entity;
+use nvisy_core::extraction::Artifacts;
 use nvisy_core::modality::{
     Audio, AudioLocation, Image, ImageLocation, Overlap, Tabular, TabularLocation, Text, TextData,
     TextLocation,
@@ -34,18 +36,21 @@ const TARGET: &str = "nvisy_engine::detection";
 ///
 /// Holds an `Arc<RecognizerRegistry>` so the registry is shared
 /// cheaply across per-document phases without cloning the
-/// underlying recognizer lists.
+/// underlying recognizer lists, plus an `Arc<ContextEnhancer>` for
+/// the post-recognition keyword-boost pass.
 ///
 /// [`EntityRecord`]: crate::document::provenance::EntityRecord
 pub struct DetectionPhase {
     registry: Arc<RecognizerRegistry>,
+    enhancer: Arc<ContextEnhancer>,
 }
 
 impl DetectionPhase {
-    /// Build the phase from the shared recognizer registry. Called
-    /// once per pipeline by the pipeline orchestrator.
-    pub fn new(registry: Arc<RecognizerRegistry>) -> Self {
-        Self { registry }
+    /// Build the phase from the shared recognizer registry and
+    /// matching context enhancer. Called once per pipeline by the
+    /// pipeline orchestrator.
+    pub fn new(registry: Arc<RecognizerRegistry>, enhancer: Arc<ContextEnhancer>) -> Self {
+        Self { registry, enhancer }
     }
 
     pub(crate) async fn apply_text(
@@ -84,7 +89,7 @@ impl DetectionPhase {
         let span = tracing::info_span!(target: TARGET, "phase", name = "detection.image");
         let run_id = ctx.shared().run_id;
         async move {
-            detect_text_blocks(&self.registry, &mut tree.root, run_id).await?;
+            detect_text_blocks(&self.registry, &self.enhancer, &mut tree.root, run_id).await?;
             detect_image_chunks(
                 &self.registry,
                 &mut tree.root,
@@ -111,7 +116,7 @@ impl DetectionPhase {
         let span = tracing::info_span!(target: TARGET, "phase", name = "detection.text_only");
         let run_id = ctx.shared().run_id;
         async move {
-            detect_text_blocks(&self.registry, doc, run_id).await?;
+            detect_text_blocks(&self.registry, &self.enhancer, doc, run_id).await?;
             Ok(())
         }
         .instrument(span)
@@ -123,6 +128,7 @@ impl DetectionPhase {
 /// text via [`ModalityBlock::scan_text`] (today: every modality).
 async fn detect_text_blocks<M>(
     registry: &RecognizerRegistry,
+    enhancer: &ContextEnhancer,
     doc: &mut Document<M>,
     run_id: uuid::Uuid,
 ) -> Result<()>
@@ -149,7 +155,13 @@ where
         let mut input = RecognizerInput::new(TextData::new(text.to_owned()));
         input.correlation_id = Some(run_id);
 
-        let detected = registry.run::<Text>(input).await?;
+        let mut detected = registry.run::<Text>(input).await?;
+        // Apply context-keyword boosting in block-local coordinates,
+        // before lifting to modality-absolute locations. The shared
+        // NLP-pass producer hasn't been wired into the detection
+        // pipeline yet, so we pass an empty `Artifacts` — the
+        // enhancer's substring path runs without it.
+        enhancer.enhance(&mut detected, text, &Artifacts::new());
         for entity in detected {
             let Some(location) =
                 M::lift_from_block(&block.spans, entity.location.start, entity.location.end)
diff --git a/crates/nvisy-engine/src/detection/pipeline.rs b/crates/nvisy-engine/src/detection/pipeline.rs
index 0e6349b8..52b7fcff 100644
--- a/crates/nvisy-engine/src/detection/pipeline.rs
+++ b/crates/nvisy-engine/src/detection/pipeline.rs
@@ -134,12 +134,12 @@ impl DetectionPipeline {
     ) -> Result<(Vec<AnyAudit>, u64, DetectionStatus), Error> {
         let actor_id = prepared.actor_id;
 
-        let recognizer_registry = match self
+        let (recognizer_registry, context_enhancer) = match self
             .state
             .detection_config
             .build_for_request(&prepared.catalog)
         {
-            Ok(r) => Arc::new(r),
+            Ok(r) => (Arc::new(r.recognizers), Arc::new(r.enhancer)),
             Err(e) => {
                 self.detections.fail(self.detection_id, e.to_string()).await;
                 return Err(e);
@@ -163,6 +163,7 @@ impl DetectionPipeline {
         let engines = DetectionEngines {
             extraction_engine: (*self.state.extraction_engine).clone(),
             recognizer_registry,
+            context_enhancer,
         };
         let concurrency = self.base_config.effective_concurrency();
         let ctx = DetectionContext::new(cancel, Arc::new(shared_data), engines, concurrency);
diff --git a/crates/nvisy-ner/Cargo.toml b/crates/nvisy-ner/Cargo.toml
index c802df26..bad8fe13 100644
--- a/crates/nvisy-ner/Cargo.toml
+++ b/crates/nvisy-ner/Cargo.toml
@@ -32,6 +32,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
+nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 
 # Async trait sugar
diff --git a/crates/nvisy-ner/src/nlp/engine.rs b/crates/nvisy-ner/src/nlp/engine.rs
index 3924c3cc..2ffa1779 100644
--- a/crates/nvisy-ner/src/nlp/engine.rs
+++ b/crates/nvisy-ner/src/nlp/engine.rs
@@ -9,7 +9,7 @@
 //! [`RecognizerInput::with_artifacts`].
 //!
 //! [`LanguageDetections`]: nvisy_core::primitive::LanguageDetections
-//! [`Tokens`]: nvisy_core::context::Tokens
+//! [`Tokens`]: nvisy_context::Tokens
 //! [`Artifacts`]: nvisy_core::extraction::Artifacts
 //! [`RecognizerInput`]: nvisy_core::recognition::RecognizerInput
 //! [`RecognizerInput::with_artifacts`]: nvisy_core::recognition::RecognizerInput::with_artifacts
@@ -20,7 +20,7 @@
 //! `process_batch`) once per scan; recognizers and the
 //! [`ContextEnhancer`] borrow the resulting map by reference.
 //!
-//! [`ContextEnhancer`]: nvisy_core::context::ContextEnhancer
+//! [`ContextEnhancer`]: nvisy_context::ContextEnhancer
 
 use nvisy_core::Result;
 use nvisy_core::primitive::LanguageTag;
diff --git a/crates/nvisy-ner/src/nlp/mod.rs b/crates/nvisy-ner/src/nlp/mod.rs
index 2f89fb8a..77fef86d 100644
--- a/crates/nvisy-ner/src/nlp/mod.rs
+++ b/crates/nvisy-ner/src/nlp/mod.rs
@@ -21,9 +21,9 @@
 //! The trait is async because realistic implementations are
 //! HTTP-bound or otherwise yield.
 //!
-//! [`Tokens`]: nvisy_core::context::Tokens
+//! [`Tokens`]: nvisy_context::Tokens
 //! [`LanguageDetections`]: nvisy_core::primitive::LanguageDetections
-//! [`ContextEnhancer`]: nvisy_core::context::ContextEnhancer
+//! [`ContextEnhancer`]: nvisy_context::ContextEnhancer
 //! [`lingua`]: https://crates.io/crates/lingua
 //! [`NerBackend`]: crate::backend::NerBackend
 //! [`NerRecognizer`]: crate::NerRecognizer
diff --git a/crates/nvisy-ner/src/recognition/config.rs b/crates/nvisy-ner/src/recognition/config.rs
index f96b5888..a50c7b6f 100644
--- a/crates/nvisy-ner/src/recognition/config.rs
+++ b/crates/nvisy-ner/src/recognition/config.rs
@@ -62,7 +62,7 @@ pub struct NerModel {
     /// recognizer's [`name`] is used
     /// as the registration key.
     ///
-    /// [`ContextEnhancer`]: nvisy_core::context::ContextEnhancer
+    /// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
     /// [`name`]: super::NerRecognizer::name
     pub default_context: Vec<String>,
 }
diff --git a/crates/nvisy-ner/src/recognition/recognizer.rs b/crates/nvisy-ner/src/recognition/recognizer.rs
index 210b464b..bbdca67b 100644
--- a/crates/nvisy-ner/src/recognition/recognizer.rs
+++ b/crates/nvisy-ner/src/recognition/recognizer.rs
@@ -18,6 +18,7 @@
 use std::sync::Arc;
 
 use derive_builder::Builder;
+use nvisy_context::{Context, ContextRegistry};
 use nvisy_core::entity::{Entity, EntityLabelRef, ModelProvenance, TrailProvenance, TrailStep};
 use nvisy_core::modality::{Text, TextLocation};
 use nvisy_core::primitive::Confidence;
@@ -40,7 +41,7 @@ pub struct NerRecognizer {
     /// the key the [`ContextEnhancer`] looks up to find the
     /// recognizer's [`default_context`].
     ///
-    /// [`ContextEnhancer`]: nvisy_core::context::ContextEnhancer
+    /// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
     /// [`default_context`]: NerModel::default_context
     name: String,
     /// Backend that turns `(text, kinds)` into raw spans. Required.
@@ -91,6 +92,24 @@ impl NerRecognizer {
         &self.model
     }
 
+    /// Build a [`ContextRegistry`] containing this recognizer's
+    /// [`default_context`] keyed on the recognizer's name. Returns
+    /// an empty registry when no keywords were declared.
+    ///
+    /// Mirrors `PatternRegistry::context_registry` so engine code
+    /// can merge per-recognizer contexts from every text-modality
+    /// recognizer into one enhancer input without duplicating the
+    /// keyword data.
+    ///
+    /// [`default_context`]: NerModel::default_context
+    #[must_use]
+    pub fn context_registry(&self) -> ContextRegistry {
+        ContextRegistry::new().with_entry(
+            self.name.clone(),
+            Context::new(self.model.default_context.iter().cloned()),
+        )
+    }
+
     fn build_entity(&self, span: &RawNerSpan, label: EntityLabelRef) -> Entity<Text> {
         let raw_confidence =
             Confidence::try_clamped(span.score).unwrap_or(self.model.default_score);
diff --git a/crates/nvisy-pattern/Cargo.toml b/crates/nvisy-pattern/Cargo.toml
index ffe5dc8d..574fadf8 100644
--- a/crates/nvisy-pattern/Cargo.toml
+++ b/crates/nvisy-pattern/Cargo.toml
@@ -24,6 +24,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
+nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 
 # (De)serialization
diff --git a/crates/nvisy-pattern/src/recognition/dictionary.rs b/crates/nvisy-pattern/src/recognition/dictionary.rs
index 856b707f..7875c2a2 100644
--- a/crates/nvisy-pattern/src/recognition/dictionary.rs
+++ b/crates/nvisy-pattern/src/recognition/dictionary.rs
@@ -23,8 +23,8 @@
 //! [`with_terms`]: DictionaryBuilder::with_terms
 
 use derive_builder::Builder;
+use nvisy_context::Context;
 use nvisy_core::Error;
-use nvisy_core::context::Context;
 use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 use schemars::JsonSchema;
diff --git a/crates/nvisy-pattern/src/recognition/mod.rs b/crates/nvisy-pattern/src/recognition/mod.rs
index 52876335..0ce29c61 100644
--- a/crates/nvisy-pattern/src/recognition/mod.rs
+++ b/crates/nvisy-pattern/src/recognition/mod.rs
@@ -1,10 +1,10 @@
 //! Recognition primitives — the rule shapes ([`Regex`],
 //! [`Dictionary`]), their building blocks ([`Terms`] plus
-//! [`Context`] from `nvisy-core`),
+//! [`Context`] from `nvisy-context`),
 //! the [`PatternRegistry`] that bundles them, and the runtime
 //! [`PatternRecognizer`] that compiles them into pooled scanners.
 //!
-//! [`Context`]: nvisy_core::context::Context
+//! [`Context`]: nvisy_context::Context
 
 mod dictionary;
 mod recognizer;
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index 42fee424..ce987ee3 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -28,7 +28,7 @@ use crate::validators::{Validator, ValidatorRegistry};
 /// the recognizer never reads it; the [`ContextEnhancer`] looks it
 /// up directly on the [`PatternRegistry`] at boost time.
 ///
-/// [`ContextEnhancer`]: crate::ContextEnhancer
+/// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
 struct CompiledPattern {
     name: String,
     label: EntityLabelRef,
diff --git a/crates/nvisy-pattern/src/recognition/regex_rule.rs b/crates/nvisy-pattern/src/recognition/regex_rule.rs
index 5cfec944..55f303ca 100644
--- a/crates/nvisy-pattern/src/recognition/regex_rule.rs
+++ b/crates/nvisy-pattern/src/recognition/regex_rule.rs
@@ -10,8 +10,8 @@
 //! [`Regex::from_toml`] when loading a definition file.
 
 use derive_builder::Builder;
+use nvisy_context::Context;
 use nvisy_core::Error;
-use nvisy_core::context::Context;
 use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 use schemars::JsonSchema;
diff --git a/crates/nvisy-pattern/src/recognition/registry.rs b/crates/nvisy-pattern/src/recognition/registry.rs
index 6b675707..c763661a 100644
--- a/crates/nvisy-pattern/src/recognition/registry.rs
+++ b/crates/nvisy-pattern/src/recognition/registry.rs
@@ -10,9 +10,9 @@
 //! [`Regex`] / [`Dictionary`] storage between the two consumers.
 //!
 //! [`PatternRecognizer`]: super::PatternRecognizer
-//! [`ContextEnhancer`]: nvisy_core::context::ContextEnhancer
+//! [`ContextEnhancer`]: nvisy_context::ContextEnhancer
 
-use nvisy_core::context::ContextRegistry;
+use nvisy_context::ContextRegistry;
 use nvisy_core::entity::EntityLabelCatalog;
 
 use super::dictionary::Dictionary;
@@ -134,7 +134,7 @@ impl PatternRegistry {
     /// from — no duplication of keyword data between rule
     /// registration and enhancer construction.
     ///
-    /// [`ContextEnhancer`]: nvisy_core::context::ContextEnhancer
+    /// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
     #[must_use]
     pub fn context_registry(&self) -> ContextRegistry {
         let mut registry = ContextRegistry::new();
diff --git a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
index 6bb637f6..dbc09cbc 100644
--- a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
+++ b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
@@ -6,8 +6,9 @@
 //!
 //! [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
 
-use nvisy_core::context::{Context, ContextEnhancer};
+use nvisy_context::{Context, ContextEnhancer};
 use nvisy_core::entity::{PatternProvenance, TrailProvenance, TrailStepKind, builtins};
+use nvisy_core::extraction::Artifacts;
 use nvisy_core::modality::TextData;
 use nvisy_core::primitive::Confidence;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
@@ -56,7 +57,7 @@ async fn enhancer_boosts_matches_near_keyword_only() {
         .with_default_boost(0.3)
         .build()
         .expect("enhancer builds");
-    enhancer.enhance(&mut entities, text, &type_map::concurrent::TypeMap::new());
+    enhancer.enhance(&mut entities, text, &Artifacts::new());
 
     // First match has `SSN:` within the 20-byte window → boosted.
     let near = entities

From 534c1c7373384f95c6d17f50efad08dda2ad8aaf Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sun, 14 Jun 2026 14:04:10 +0200
Subject: [PATCH 02/14] chore(deps): clean up workspace deps + normalize
 per-crate manifests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drops 9 unused workspace deps (hmac, include_dir, quick-xml, reqwest,
serde_with, smallvec, stop-words, walkdir, zip) and reorders the root
[workspace.dependencies] foundation-first: primitives → runtime → domain
(text/document/image/audio) → integration (HTTP, AI, server, CLI) →
storage → utilities. Removes per-crate machete-flagged deps and aligns
every crate manifest with the new group names and order. Keeps calamine
and unicode-segmentation in workspace deps for upcoming xlsx + word-boundary
work. Marks humantime-serde as ignored in nvisy-llm/nvisy-engine/nvisy-server
where it's used via serde `with =` strings.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                      | 11 -----
 Cargo.toml                      | 83 ++++++++++++++-------------------
 crates/nvisy-cli/Cargo.toml     | 16 ++++---
 crates/nvisy-codec/Cargo.toml   | 52 +++++++++++----------
 crates/nvisy-core/Cargo.toml    | 21 ++++-----
 crates/nvisy-engine/Cargo.toml  | 61 ++++++++++--------------
 crates/nvisy-fake/Cargo.toml    | 10 ++--
 crates/nvisy-llm/Cargo.toml     | 42 +++++++++--------
 crates/nvisy-ner/Cargo.toml     | 26 +++++------
 crates/nvisy-ocr/Cargo.toml     | 19 ++++----
 crates/nvisy-pattern/Cargo.toml | 17 +++----
 crates/nvisy-server/Cargo.toml  | 26 +++++------
 crates/nvisy-stt/Cargo.toml     |  6 +--
 crates/nvisy-toolkit/Cargo.toml | 26 +++++------
 14 files changed, 185 insertions(+), 231 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a9b28db5..bfed20b0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2888,7 +2888,6 @@ dependencies = [
  "symphonia",
  "tokio",
  "tracing",
- "uuid",
 ]
 
 [[package]]
@@ -2918,7 +2917,6 @@ dependencies = [
  "serde_json",
  "strum 0.28.0",
  "thiserror",
- "tracing",
  "type-map",
  "uuid",
 ]
@@ -2960,8 +2958,6 @@ dependencies = [
  "tokio-util",
  "toml",
  "tracing",
- "type-map",
- "unicode-normalization",
  "uuid",
  "validator",
 ]
@@ -3001,7 +2997,6 @@ dependencies = [
  "toml",
  "tracing",
  "unicode-normalization",
- "uuid",
 ]
 
 [[package]]
@@ -3026,7 +3021,6 @@ name = "nvisy-ocr"
 version = "0.1.0"
 dependencies = [
  "async-trait",
- "base64",
  "bentoml",
  "bytes",
  "futures",
@@ -3051,7 +3045,6 @@ dependencies = [
  "serde",
  "tokio",
  "toml",
- "type-map",
 ]
 
 [[package]]
@@ -3060,7 +3053,6 @@ version = "0.1.0"
 dependencies = [
  "aide",
  "axum",
- "base64",
  "derive_more",
  "futures",
  "humantime-serde",
@@ -3068,7 +3060,6 @@ dependencies = [
  "nvisy-core",
  "nvisy-engine",
  "schemars",
- "semver",
  "serde",
  "tokio",
  "tower",
@@ -3095,8 +3086,6 @@ dependencies = [
  "aes-gcm",
  "async-trait",
  "base64",
- "bytes",
- "hipstr",
  "nvisy-codec",
  "nvisy-core",
  "nvisy-fake",
diff --git a/Cargo.toml b/Cargo.toml
index 0c74b1d1..0e9387ec 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,40 +43,17 @@ nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
 nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" }
 nvisy-llm = { path = "./crates/nvisy-llm", version = "0.1.0" }
-nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" }
-nvisy-toolkit = { path = "./crates/nvisy-toolkit", version = "0.1.0" }
 nvisy-ner = { path = "./crates/nvisy-ner", version = "0.1.0" }
 nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" }
 nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
 nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" }
+nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" }
+nvisy-toolkit = { path = "./crates/nvisy-toolkit", version = "0.1.0" }
 
-# Inference & AI frameworks
-bentoml = { version = "0.5", default-features = false, features = ["rustls-tls", "tracing"] }
-rig = { version = "0.38", features = [], default-features = false }
-
-# HTTP client and middleware
-reqwest = { version = "0.13", default-features = false, features = ["json", "rustls", "multipart"] }
-reqwest-middleware = { version = "0.5", features = ["json", "multipart"] }
-reqwest-retry = { version = "0.9", features = [] }
-reqwest-tracing = { version = "0.7", features = [] }
-
-# Async runtime and parallelism
-tokio = { version = "1.50", features = [] }
-tokio-util = { version = "0.7", features = [] }
-futures = { version = "0.3", features = [] }
-async-trait = { version = "0.1", features = [] }
-rayon = { version = "1.10", features = [] }
-
-# Observability
-tracing = { version = "0.1", features = ["attributes"] }
-tracing-subscriber = { version = "0.3", features = [] }
-
-# (De)serialization
+# Serialization
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = [] }
-serde_with = { version = "3.18", features = [] }
 schemars = { version = "1.0", features = ["uuid1", "bytes1"] }
-csv = { version = "1.0", features = [] }
 toml = { version = "1.1", features = [] }
 minijinja = { version = "2.5", features = [] }
 
@@ -100,32 +77,37 @@ type-map = { version = "0.5", features = [] }
 
 # Encoding and hashing
 base64 = { version = "0.22", features = [] }
+hex = { version = "0.4", features = [] }
 sha2 = { version = "0.11", features = [] }
 aes-gcm = { version = "0.10", features = [] }
-hmac = { version = "0.13", features = [] }
-hex = { version = "0.4", features = [] }
 
-# Pattern matching
+# Async runtime and parallelism
+tokio = { version = "1.50", features = [] }
+tokio-util = { version = "0.7", features = [] }
+futures = { version = "0.3", features = [] }
+async-trait = { version = "0.1", features = [] }
+rayon = { version = "1.10", features = [] }
+
+# Observability
+tracing = { version = "0.1", features = ["attributes"] }
+tracing-subscriber = { version = "0.3", features = [] }
+
+# Text processing (pattern matching, language detection, unicode)
 regex = { version = "1.0", features = [] }
 aho-corasick = { version = "1.0", features = [] }
-smallvec = { version = "1.13", features = [] }
-
-# Language detection and text segmentation
 lingua = { version = "1.8", default-features = false, features = ["english"] }
-stop-words = { version = "0.10", features = ["iso"] }
 unicode-segmentation = { version = "1.13", features = [] }
 unicode-normalization = { version = "0.1", features = [] }
 
-# PDF processing (parsing, text extraction, page-to-image rendering)
-lopdf = { version = "0.41", features = [] }
-pdfium-render = { version = "0.9", features = [] }
+# Tabular document parsing
+csv = { version = "1.0", features = [] }
+calamine = { version = "0.35", features = [] }
 
-# Document parsing
+# Rich-document parsing (HTML, PDF)
 scraper = { version = "0.27", features = [] }
 ego-tree = { version = "0.11", features = [] }
-calamine = { version = "0.35", features = [] }
-zip = { version = "8.4", features = [] }
-quick-xml = { version = "0.40", features = [] }
+lopdf = { version = "0.41", features = [] }
+pdfium-render = { version = "0.9", features = [] }
 
 # Image processing
 image = { version = "0.25", default-features = false, features = ["png", "jpeg", "tiff"] }
@@ -136,27 +118,30 @@ hound = { version = "3.5", features = [] }
 symphonia = { version = "0.6", default-features = false, features = ["wav", "pcm", "mp3"] }
 mp3lame-encoder = { version = "0.2", features = [] }
 
-# CLI
-clap = { version = "4.6", features = [] }
+# AI / LLM frameworks
+bentoml = { version = "0.5", default-features = false, features = ["rustls-tls", "tracing"] }
+rig = { version = "0.38", features = [], default-features = false }
+
+# HTTP client and middleware
+reqwest-middleware = { version = "0.5", features = ["json", "multipart"] }
+reqwest-retry = { version = "0.9", features = [] }
+reqwest-tracing = { version = "0.7", features = [] }
 
-# HTTP server
+# HTTP server and middleware
 axum = { version = "0.8", features = [] }
 aide = { version = "0.16.0-alpha.2", features = [] }
 tower = { version = "0.5", features = [] }
 tower-http = { version = "0.6", features = [] }
 
-# Filesystem traversal
-walkdir = { version = "2.5", features = [] }
+# CLI
+clap = { version = "4.6", features = [] }
 
-# Storage, file detection, and asset embedding
+# Storage and file-type detection
 fjall = { version = "3.1", features = [] }
-include_dir = { version = "0.7", features = [] }
 infer = { version = "0.19", features = [] }
 
 # Utilities
 validator = { version = "0.20", features = ["derive"] }
 rand = { version = "0.10", features = [] }
 tempfile = { version = "3.27", features = [] }
-
-# Fake data generation
 fake = { version = "5.1", features = [] }
diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml
index 7bb728cb..c8fa9aec 100644
--- a/crates/nvisy-cli/Cargo.toml
+++ b/crates/nvisy-cli/Cargo.toml
@@ -58,20 +58,16 @@ path = "src/main.rs"
 nvisy-engine = { workspace = true, features = [] }
 nvisy-server = { workspace = true, features = [] }
 
-# CLI
-clap = { workspace = true, features = ["derive", "env"] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 toml = { workspace = true, features = [] }
-humantime = { workspace = true, features = [] }
 humantime-serde = { workspace = true, features = [] }
 
 # Derive macros and error handling
 anyhow = { workspace = true, features = [] }
 
-# HTTP server
-axum = { workspace = true, features = ["tokio"] }
+# Primitive datatypes
+humantime = { workspace = true, features = [] }
 
 # Async runtime and parallelism
 tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal"] }
@@ -80,5 +76,11 @@ tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal"] }
 tracing = { workspace = true, features = [] }
 tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }
 
+# HTTP server and middleware
+axum = { workspace = true, features = ["tokio"] }
+
+# CLI
+clap = { workspace = true, features = ["derive", "env"] }
+
 [package.metadata.cargo-machete]
 ignored = ["humantime-serde"]
diff --git a/crates/nvisy-codec/Cargo.toml b/crates/nvisy-codec/Cargo.toml
index c94df43b..ff975296 100644
--- a/crates/nvisy-codec/Cargo.toml
+++ b/crates/nvisy-codec/Cargo.toml
@@ -100,27 +100,39 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 
-# Async runtime
-async-trait = { workspace = true, features = [] }
-tokio = { workspace = true, features = ["sync"] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 serde_json = { workspace = true, features = [] }
+schemars = { workspace = true, features = [] }
+
+# Derive macros and error handling
+derive_more = { workspace = true, features = ["as_ref", "deref", "from"] }
 
 # Primitive datatypes
 bytes = { workspace = true, features = [] }
-uuid = { workspace = true, features = [] }
-derive_more = { workspace = true, features = ["as_ref", "deref", "from"] }
+
+# Encoding and hashing
 hex = { workspace = true, features = [] }
-infer = { workspace = true, features = [] }
-schemars = { workspace = true, features = [] }
 sha2 = { workspace = true, features = [] }
 
-# Image processing — pulled in unconditionally because the image
-# handler structs reference `image::DynamicImage` directly. The
-# workspace dep already enables png/jpeg/tiff decoders. `imageproc`
-# powers the per-region gaussian blur in `image::redact`.
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+tokio = { workspace = true, features = ["sync"] }
+rayon = { workspace = true, optional = true, features = [] }
+
+# Observability
+tracing = { workspace = true, features = [] }
+
+# Tabular document parsing (feature-gated)
+csv = { workspace = true, optional = true, features = [] }
+
+# Rich-document parsing (feature-gated: HTML + PDF)
+scraper = { workspace = true, optional = true, features = [] }
+ego-tree = { workspace = true, optional = true, features = [] }
+lopdf = { workspace = true, optional = true, features = [] }
+pdfium-render = { workspace = true, optional = true, features = [] }
+
+# Image processing
 image = { workspace = true, features = [] }
 imageproc = { workspace = true, features = [] }
 
@@ -129,18 +141,8 @@ hound = { workspace = true, optional = true, features = [] }
 symphonia = { workspace = true, optional = true, features = [] }
 mp3lame-encoder = { workspace = true, optional = true, features = [] }
 
-# PDF processing (feature-gated)
-lopdf = { workspace = true, optional = true, features = [] }
-pdfium-render = { workspace = true, optional = true, features = [] }
-rayon = { workspace = true, optional = true, features = [] }
-
-# Document parsing (feature-gated)
-csv = { workspace = true, optional = true, features = [] }
-scraper = { workspace = true, optional = true, features = [] }
-ego-tree = { workspace = true, optional = true, features = [] }
-
-# Observability
-tracing = { workspace = true, features = [] }
+# Storage and file-type detection
+infer = { workspace = true, features = [] }
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml
index 3c1cee13..e815c4ca 100644
--- a/crates/nvisy-core/Cargo.toml
+++ b/crates/nvisy-core/Cargo.toml
@@ -29,26 +29,23 @@ default = []
 test-utils = []
 
 [dependencies]
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 serde_json = { workspace = true, features = [] }
 schemars = { workspace = true, features = [] }
 
-# Primitive datatypes
-bytes = { workspace = true, features = [] }
-hipstr = { workspace = true, features = [] }
-uuid = { workspace = true, features = [] }
-oxilangtag = { workspace = true, features = [] }
-type-map = { workspace = true, features = [] }
-
 # Derive macros and error handling
 thiserror = { workspace = true, features = [] }
 derive_builder = { workspace = true, features = [] }
 derive_more = { workspace = true, features = ["as_ref", "deref", "deref_mut", "display", "from", "from_str", "into", "into_iterator", "is_variant"] }
 strum = { workspace = true, features = [] }
 
-# Async trait sugar (object-safe async methods)
-async-trait = { workspace = true, features = [] }
+# Primitive datatypes
+uuid = { workspace = true, features = [] }
+bytes = { workspace = true, features = [] }
+hipstr = { workspace = true, features = [] }
+oxilangtag = { workspace = true, features = [] }
+type-map = { workspace = true, features = [] }
 
-# Observability
-tracing = { workspace = true, features = [] }
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml
index 5f516ed7..cc4dcd90 100644
--- a/crates/nvisy-engine/Cargo.toml
+++ b/crates/nvisy-engine/Cargo.toml
@@ -39,69 +39,56 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
+nvisy-codec = { workspace = true, features = ["text"] }
 nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
-nvisy-toolkit = { workspace = true, features = [] }
-nvisy-codec = { workspace = true, features = ["text"] }
-
-# Detection + extraction backends used by the phase wiring
+nvisy-llm = { workspace = true, features = [] }
 nvisy-ner = { workspace = true, features = [] }
 nvisy-ocr = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
-nvisy-llm = { workspace = true, features = [] }
 nvisy-stt = { workspace = true, features = [] }
+nvisy-toolkit = { workspace = true, features = [] }
 
-# Storage
-fjall = { workspace = true, features = [] }
-
-# Encoding
-base64 = { workspace = true, features = [] }
-
-# Cheap-clone strings (refcounted / inline / borrowed). Used on
-# policy / rule names and the audit's [`PolicyDecisionRef`] so
-# audit-heavy passes share refcounts rather than allocating
-# per-entity.
-hipstr = { workspace = true, features = [] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 serde_json = { workspace = true, features = [] }
 schemars = { workspace = true, features = [] }
 toml = { workspace = true, features = [] }
 humantime-serde = { workspace = true, features = [] }
 
-# Async runtime
-async-trait = { workspace = true, features = [] }
-futures = { workspace = true, features = [] }
-tokio = { workspace = true, features = ["rt", "sync", "time", "macros"] }
-tokio-util = { workspace = true, features = [] }
+# Derive macros and error handling
+anyhow = { workspace = true, features = [] }
+derive_builder = { workspace = true, features = [] }
+derive_more = { workspace = true, features = ["deref", "deref_mut", "display", "from", "from_str", "into", "into_iterator"] }
+strum = { workspace = true, features = [] }
 
 # Primitive datatypes
 uuid = { workspace = true, features = [] }
+bytes = { workspace = true, features = [] }
+hipstr = { workspace = true, features = [] }
 jiff = { workspace = true, features = [] }
 semver = { workspace = true, features = ["serde"] }
-bytes = { workspace = true, features = [] }
 
-# Heterogeneous container for PolicyStore
-type-map = { workspace = true, features = [] }
+# Encoding and hashing
+base64 = { workspace = true, features = [] }
+aes-gcm = { workspace = true, features = [] }
 
-# Derive macros and error handling
-anyhow = { workspace = true, features = [] }
-derive_builder = { workspace = true, features = [] }
-derive_more = { workspace = true, features = ["deref", "deref_mut", "display", "from", "from_str", "into", "into_iterator"] }
-strum = { workspace = true, features = [] }
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+futures = { workspace = true, features = [] }
+tokio = { workspace = true, features = ["rt", "sync", "time", "macros"] }
+tokio-util = { workspace = true, features = [] }
 
-# Encryption
-aes-gcm = { workspace = true, features = [] }
+# Observability
+tracing = { workspace = true, features = [] }
+
+# Storage and file-type detection
+fjall = { workspace = true, features = [] }
 
 # Utilities
 validator = { workspace = true, features = [] }
 rand = { workspace = true, features = [] }
 tempfile = { workspace = true, features = [], optional = true }
-unicode-normalization = { workspace = true, features = [] }
-
-# Observability
-tracing = { workspace = true, features = [] }
 
 [dev-dependencies]
 nvisy-engine = { path = ".", features = ["test-utils"] }
diff --git a/crates/nvisy-fake/Cargo.toml b/crates/nvisy-fake/Cargo.toml
index 2e979262..d2bdba91 100644
--- a/crates/nvisy-fake/Cargo.toml
+++ b/crates/nvisy-fake/Cargo.toml
@@ -26,14 +26,14 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 
-# Fake data generation
-fake = { workspace = true, features = [] }
+# Primitive datatypes (UUIDv4 for fake `DeviceId`)
+uuid = { workspace = true, features = ["v4"] }
 
-# Async runtime
+# Async runtime and parallelism
 async-trait = { workspace = true, features = [] }
 
-# UUIDs for fake DeviceId
-uuid = { workspace = true, features = ["v4"] }
+# Utilities
+fake = { workspace = true, features = [] }
 
 [dev-dependencies]
 nvisy-core = { workspace = true, features = ["test-utils"] }
diff --git a/crates/nvisy-llm/Cargo.toml b/crates/nvisy-llm/Cargo.toml
index 6d1ea5c5..6e12f04f 100644
--- a/crates/nvisy-llm/Cargo.toml
+++ b/crates/nvisy-llm/Cargo.toml
@@ -36,39 +36,41 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 
-# LLM framework
-rig = { workspace = true, features = ["derive", "reqwest-middleware"] }
-
-# Async runtime
-async-trait = { workspace = true, features = [] }
-
-# HTTP client + middleware (shared retry/tracing layers).
-reqwest-middleware = { workspace = true, features = [] }
-reqwest-retry = { workspace = true, features = [] }
-reqwest-tracing = { workspace = true, features = [] }
-humantime-serde = { workspace = true, features = [] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 serde_json = { workspace = true, features = [] }
 schemars = { workspace = true, features = [] }
 toml = { workspace = true, features = ["parse"] }
 minijinja = { workspace = true, features = [] }
-
-# Primitive datatypes
-uuid = { workspace = true, features = [] }
-
-# Encoding and hashing
-base64 = { workspace = true, features = [] }
-unicode-normalization = { workspace = true, features = [] }
+humantime-serde = { workspace = true, features = [] }
 
 # Derive macros and error handling
 derive_builder = { workspace = true, features = [] }
 derive_more = { workspace = true, features = ["add_assign"] }
 thiserror = { workspace = true, features = [] }
 
+# Encoding and hashing
+base64 = { workspace = true, features = [] }
+
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+
 # Observability
 tracing = { workspace = true, features = [] }
 
+# Text processing
+unicode-normalization = { workspace = true, features = [] }
+
+# AI / LLM frameworks
+rig = { workspace = true, features = ["derive", "reqwest-middleware"] }
+
+# HTTP client and middleware (shared retry/tracing layers)
+reqwest-middleware = { workspace = true, features = [] }
+reqwest-retry = { workspace = true, features = [] }
+reqwest-tracing = { workspace = true, features = [] }
+
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
+
+[package.metadata.cargo-machete]
+ignored = ["humantime-serde"]
diff --git a/crates/nvisy-ner/Cargo.toml b/crates/nvisy-ner/Cargo.toml
index bad8fe13..6758bc16 100644
--- a/crates/nvisy-ner/Cargo.toml
+++ b/crates/nvisy-ner/Cargo.toml
@@ -35,29 +35,27 @@ rustdoc-args = ["--cfg", "docsrs"]
 nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 
-# Async trait sugar
-async-trait = { workspace = true, features = [] }
-
-# Inference & AI frameworks
-bentoml = { workspace = true, optional = true }
-
-# Language detection
-lingua = { workspace = true, features = [] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = ["derive"] }
 
-# Builder derive macro
+# Derive macros and error handling
 derive_builder = { workspace = true, features = [] }
 
-# Heterogeneous typed map for shared-NLP artifacts
+# Primitive datatypes
+uuid = { workspace = true, features = ["v7"] }
 type-map = { workspace = true, features = [] }
 
-# Identifiers
-uuid = { workspace = true, features = ["v7"] }
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
 
 # Observability
 tracing = { workspace = true, features = [] }
 
+# Text processing (language detection)
+lingua = { workspace = true, features = [] }
+
+# AI / LLM frameworks (feature-gated)
+bentoml = { workspace = true, optional = true }
+
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-ocr/Cargo.toml b/crates/nvisy-ocr/Cargo.toml
index bc61457b..dc8999fd 100644
--- a/crates/nvisy-ocr/Cargo.toml
+++ b/crates/nvisy-ocr/Cargo.toml
@@ -36,24 +36,21 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 
-# Async runtime and parallelism
-async-trait = { workspace = true, features = [] }
-futures = { workspace = true, features = [] }
-tokio = { workspace = true, features = ["rt"] }
-
-# BentoML inference client (feature-gated)
-bentoml = { workspace = true, optional = true }
-
 # Primitive datatypes (UUIDv7 for per-call request IDs from the
 # Bento backend; behind the `bento` feature gate at use sites).
 uuid = { workspace = true, features = ["v4", "v7"] }
-
-# Encoding
-base64 = { workspace = true, features = [] }
 bytes = { workspace = true, features = [] }
 
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+futures = { workspace = true, features = [] }
+tokio = { workspace = true, features = ["rt"] }
+
 # Observability
 tracing = { workspace = true, features = [] }
 
+# AI / LLM frameworks (feature-gated)
+bentoml = { workspace = true, optional = true }
+
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-pattern/Cargo.toml b/crates/nvisy-pattern/Cargo.toml
index 574fadf8..1c17cc80 100644
--- a/crates/nvisy-pattern/Cargo.toml
+++ b/crates/nvisy-pattern/Cargo.toml
@@ -27,22 +27,23 @@ rustdoc-args = ["--cfg", "docsrs"]
 nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 
-# (De)serialization
-csv = { workspace = true, features = [] }
+# Serialization
 serde = { workspace = true, features = [] }
-toml = { workspace = true, features = ["parse"] }
 schemars = { workspace = true, features = [] }
+toml = { workspace = true, features = ["parse"] }
 
-# Derive macros
+# Derive macros and error handling
 derive_builder = { workspace = true, features = [] }
 
-# Pattern matching
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+
+# Text processing (regex + Aho-Corasick literal matching)
 regex = { workspace = true, features = [] }
 aho-corasick = { workspace = true, features = [] }
 
-# Async trait sugar
-async-trait = { workspace = true, features = [] }
+# Tabular document parsing (dictionary loading from CSV)
+csv = { workspace = true, features = [] }
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
-type-map = { workspace = true, features = [] }
diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml
index 79a785cb..eb2e7628 100644
--- a/crates/nvisy-server/Cargo.toml
+++ b/crates/nvisy-server/Cargo.toml
@@ -50,34 +50,30 @@ rustdoc-args = ["--cfg", "docsrs"]
 nvisy-core = { workspace = true, features = [] }
 nvisy-engine = { workspace = true, features = [] }
 
-# Async runtime
-futures = { workspace = true, features = [] }
-
-# HTTP server
-axum = { workspace = true, features = ["json", "multipart", "tokio"] }
-aide = { workspace = true, features = ["axum", "axum-json", "axum-query", "scalar", "macros"] }
-tower = { workspace = true, features = ["util", "timeout"] }
-tower-http = { workspace = true, features = ["trace", "cors", "timeout", "request-id", "limit", "compression-gzip", "compression-br", "compression-zstd", "sensitive-headers", "catch-panic", "util"] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 schemars = { workspace = true, features = [] }
 humantime-serde = { workspace = true, features = [] }
 
-# Encoding and hashing
-base64 = { workspace = true, features = [] }
-
-# Derive macros
+# Derive macros and error handling
 derive_more = { workspace = true, features = ["deref", "display"] }
 
 # Primitive datatypes
 uuid = { workspace = true, features = [] }
 jiff = { workspace = true, features = [] }
-semver = { workspace = true, features = [] }
+
+# Async runtime and parallelism
+futures = { workspace = true, features = [] }
 
 # Observability
 tracing = { workspace = true, features = [] }
 
+# HTTP server and middleware
+axum = { workspace = true, features = ["json", "multipart", "tokio"] }
+aide = { workspace = true, features = ["axum", "axum-json", "axum-query", "scalar", "macros"] }
+tower = { workspace = true, features = ["util", "timeout"] }
+tower-http = { workspace = true, features = ["trace", "cors", "timeout", "request-id", "limit", "compression-gzip", "compression-br", "compression-zstd", "sensitive-headers", "catch-panic", "util"] }
+
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
 
diff --git a/crates/nvisy-stt/Cargo.toml b/crates/nvisy-stt/Cargo.toml
index 1381a229..6890e789 100644
--- a/crates/nvisy-stt/Cargo.toml
+++ b/crates/nvisy-stt/Cargo.toml
@@ -29,12 +29,12 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 
-# Async runtime
-async-trait = { workspace = true, features = [] }
-
 # Primitive datatypes
 uuid = { workspace = true, features = [] }
 
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+
 # Observability
 tracing = { workspace = true, features = [] }
 
diff --git a/crates/nvisy-toolkit/Cargo.toml b/crates/nvisy-toolkit/Cargo.toml
index 86dd511d..88d17f47 100644
--- a/crates/nvisy-toolkit/Cargo.toml
+++ b/crates/nvisy-toolkit/Cargo.toml
@@ -46,39 +46,37 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
-nvisy-core = { workspace = true, features = [] }
 nvisy-codec = { workspace = true, features = [] }
+nvisy-core = { workspace = true, features = [] }
 nvisy-llm = { workspace = true, features = [] }
-nvisy-stt = { workspace = true, features = [] }
 nvisy-ner = { workspace = true, features = [] }
 nvisy-ocr = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
+nvisy-stt = { workspace = true, features = [] }
 
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = [] }
 schemars = { workspace = true, features = [] }
 
-# Async runtime
-async-trait = { workspace = true, features = [] }
-tokio = { workspace = true, features = ["rt", "sync", "time", "macros"] }
-
 # Primitive datatypes
 uuid = { workspace = true, features = [] }
-bytes = { workspace = true, features = [] }
-hipstr = { workspace = true, features = [] }
 type-map = { workspace = true, features = [] }
 
-# Unicode-aware text folding for leak detection.
-unicode-normalization = { workspace = true, features = [] }
-
-# Cryptography for built-in redaction operators
+# Encoding and hashing
+base64 = { workspace = true, features = [] }
 sha2 = { workspace = true, features = [] }
 aes-gcm = { workspace = true, features = ["aes", "alloc"], optional = true }
-base64 = { workspace = true, features = [] }
+
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+tokio = { workspace = true, features = ["rt", "sync", "time", "macros"] }
 
 # Observability
 tracing = { workspace = true, features = [] }
 
+# Text processing (unicode-aware folding for leak detection)
+unicode-normalization = { workspace = true, features = [] }
+
 [dev-dependencies]
 # Internal test utilities (Entity::test_builder, …).
 nvisy-core = { workspace = true, features = ["test-utils"] }

From 682913cb2daea37dfdcaa944bff88f43d85f7f9a Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sun, 14 Jun 2026 14:07:12 +0200
Subject: [PATCH 03/14] style: cargo fmt

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/nvisy-engine/src/core/config.rs             |  3 +--
 crates/nvisy-engine/src/core/context.rs            |  2 +-
 crates/nvisy-engine/src/core/ingestion/exporter.rs |  4 ++--
 crates/nvisy-engine/src/core/ingestion/importer.rs |  6 +++---
 crates/nvisy-engine/src/detection/config/mod.rs    |  2 +-
 crates/nvisy-engine/src/detection/document.rs      |  5 ++---
 crates/nvisy-engine/src/detection/engine.rs        |  2 +-
 crates/nvisy-engine/src/detection/orchestrator.rs  |  5 ++---
 .../src/detection/phases/deduplication.rs          | 12 ++++++++----
 .../nvisy-engine/src/detection/phases/detection.rs |  5 ++---
 .../src/detection/phases/extraction.rs             |  4 ++--
 crates/nvisy-engine/src/detection/pipeline.rs      |  8 +++-----
 crates/nvisy-engine/src/detection/plan.rs          | 12 ++++++++----
 crates/nvisy-engine/src/detection/result.rs        |  2 +-
 crates/nvisy-engine/src/detection/state.rs         | 14 +++++++++++---
 crates/nvisy-engine/src/policy/audit.rs            |  3 ++-
 crates/nvisy-engine/src/policy/redaction/mod.rs    |  2 +-
 crates/nvisy-engine/src/policy/suppress.rs         |  3 ++-
 crates/nvisy-engine/src/redaction/document.rs      |  5 ++---
 crates/nvisy-engine/src/redaction/engine.rs        | 14 ++++----------
 crates/nvisy-engine/src/redaction/orchestrator.rs  |  5 ++---
 crates/nvisy-engine/src/redaction/phases/phase.rs  |  3 +--
 .../src/redaction/phases/validation.rs             |  3 +--
 crates/nvisy-engine/src/redaction/pipeline.rs      |  9 ++++-----
 crates/nvisy-engine/tests/redaction_policy.rs      |  5 ++++-
 .../nvisy-server/src/handler/request/detections.rs |  2 +-
 26 files changed, 72 insertions(+), 68 deletions(-)

diff --git a/crates/nvisy-engine/src/core/config.rs b/crates/nvisy-engine/src/core/config.rs
index a5918188..fcc4cffc 100644
--- a/crates/nvisy-engine/src/core/config.rs
+++ b/crates/nvisy-engine/src/core/config.rs
@@ -26,8 +26,7 @@
 use std::num::NonZeroUsize;
 use std::time::Duration;
 
-use nvisy_core::Error;
-use nvisy_core::Result;
+use nvisy_core::{Error, Result};
 use nvisy_llm::backend::http::HttpConfig;
 use semver::Version;
 use serde::{Deserialize, Serialize};
diff --git a/crates/nvisy-engine/src/core/context.rs b/crates/nvisy-engine/src/core/context.rs
index 865e8102..84391b2c 100644
--- a/crates/nvisy-engine/src/core/context.rs
+++ b/crates/nvisy-engine/src/core/context.rs
@@ -23,8 +23,8 @@ use nvisy_toolkit::extraction::ExtractorRegistry;
 use tokio_util::sync::CancellationToken;
 
 use super::SharedData;
-use crate::redaction::phases::RedactionRegistries;
 use crate::redaction::RedactionConfig;
+use crate::redaction::phases::RedactionRegistries;
 
 /// Shared surface every phase reads from regardless of which side
 /// (detection or redaction) it runs on. Implemented by both
diff --git a/crates/nvisy-engine/src/core/ingestion/exporter.rs b/crates/nvisy-engine/src/core/ingestion/exporter.rs
index 6bede14b..95b83940 100644
--- a/crates/nvisy-engine/src/core/ingestion/exporter.rs
+++ b/crates/nvisy-engine/src/core/ingestion/exporter.rs
@@ -12,11 +12,11 @@ use nvisy_codec::content::{Content, ContentData, ContentSource};
 use nvisy_core::Result;
 use uuid::Uuid;
 
-use crate::core::{AnyTree, DocumentTree, SharedData};
-use crate::modality::DocumentModality;
 use crate::core::ingestion::compression::CompressionService;
 use crate::core::ingestion::encryption::CryptoService;
 use crate::core::ingestion::{CompressionAlgorithm, EncryptionConfig};
+use crate::core::{AnyTree, DocumentTree, SharedData};
+use crate::modality::DocumentModality;
 
 const TARGET: &str = "nvisy_engine::op::export_file";
 
diff --git a/crates/nvisy-engine/src/core/ingestion/importer.rs b/crates/nvisy-engine/src/core/ingestion/importer.rs
index 321c3a05..0c6da15b 100644
--- a/crates/nvisy-engine/src/core/ingestion/importer.rs
+++ b/crates/nvisy-engine/src/core/ingestion/importer.rs
@@ -31,15 +31,15 @@ use nvisy_core::entity::{Annotation, LabelAnnotation};
 use nvisy_core::modality::{Audio, Image, Tabular, Text};
 use nvisy_core::{Error, Result};
 
+use crate::core::ingestion::compression::CompressionService;
+use crate::core::ingestion::encryption::{CryptoService, EncryptedContent};
+use crate::core::ingestion::{CompressionAlgorithm, EncryptionAlgorithm, EncryptionConfig};
 use crate::core::{AnyTree, DocumentTree, SharedData};
 use crate::document::{AnyAnnotations, Document};
 use crate::modality::{
     AudioExtraction, AudioMetadata, DocumentModality, ImageExtraction, ImageMetadata,
     TabularExtraction, TabularMetadata, TextExtraction, TextMetadata,
 };
-use crate::core::ingestion::compression::CompressionService;
-use crate::core::ingestion::encryption::{CryptoService, EncryptedContent};
-use crate::core::ingestion::{CompressionAlgorithm, EncryptionAlgorithm, EncryptionConfig};
 
 const TARGET: &str = "nvisy_engine::op::import_file";
 
diff --git a/crates/nvisy-engine/src/detection/config/mod.rs b/crates/nvisy-engine/src/detection/config/mod.rs
index f9c77164..9dcd3a14 100644
--- a/crates/nvisy-engine/src/detection/config/mod.rs
+++ b/crates/nvisy-engine/src/detection/config/mod.rs
@@ -15,9 +15,9 @@
 mod ner;
 mod pattern;
 
+use nvisy_context::{ContextEnhancer, ContextRegistry};
 #[cfg(not(feature = "bento"))]
 use nvisy_core::Error;
-use nvisy_context::{ContextEnhancer, ContextRegistry};
 use nvisy_core::Result;
 use nvisy_core::entity::EntityLabelCatalog;
 use nvisy_core::modality::Text;
diff --git a/crates/nvisy-engine/src/detection/document.rs b/crates/nvisy-engine/src/detection/document.rs
index 378906b8..ab9f2207 100644
--- a/crates/nvisy-engine/src/detection/document.rs
+++ b/crates/nvisy-engine/src/detection/document.rs
@@ -5,12 +5,11 @@
 use nvisy_core::modality::{Audio, Image, Tabular, Text};
 use nvisy_core::{Error, Result};
 
-use crate::core::PhaseContext as _;
-use crate::core::{DetectionContext, DocumentTree};
+use crate::core::{DetectionContext, DocumentTree, PhaseContext as _};
+use crate::detection::DetectionPlan;
 use crate::detection::phases::deduplication::DeduplicationPhase;
 use crate::detection::phases::detection::DetectionPhase;
 use crate::detection::phases::extraction::ExtractionPhase;
-use crate::detection::DetectionPlan;
 
 const TARGET: &str = "nvisy_engine::pipeline::detection::document";
 
diff --git a/crates/nvisy-engine/src/detection/engine.rs b/crates/nvisy-engine/src/detection/engine.rs
index a4da3d61..ab644e50 100644
--- a/crates/nvisy-engine/src/detection/engine.rs
+++ b/crates/nvisy-engine/src/detection/engine.rs
@@ -23,8 +23,8 @@ use super::result::DetectionResult;
 use super::state::DetectionState;
 use super::status::DetectionStatus;
 use super::{DetectionEntry, DetectionFilter, DetectionInput, DetectionSnapshot};
-use crate::core::ingestion::encryption::SharedKeyProvider;
 use crate::core::RuntimeConfig;
+use crate::core::ingestion::encryption::SharedKeyProvider;
 use crate::detection::{DetectionConfig, ExtractionConfig};
 use crate::registry::Registry;
 
diff --git a/crates/nvisy-engine/src/detection/orchestrator.rs b/crates/nvisy-engine/src/detection/orchestrator.rs
index 3764c8ea..75f366ad 100644
--- a/crates/nvisy-engine/src/detection/orchestrator.rs
+++ b/crates/nvisy-engine/src/detection/orchestrator.rs
@@ -8,11 +8,10 @@ use tokio::sync::Semaphore;
 use tokio::task::JoinSet;
 
 use super::document::DetectionDocumentPipeline;
-use crate::core::PhaseContext as _;
-use crate::core::{AnyTree, DetectionContext};
-use crate::document::provenance::AnyAudit;
 use crate::core::ingestion::{ImportFile, Importer};
+use crate::core::{AnyTree, DetectionContext, PhaseContext as _};
 use crate::detection::DetectionPlan;
+use crate::document::provenance::AnyAudit;
 
 const TARGET: &str = "nvisy_engine::pipeline::detection::orchestrator";
 
diff --git a/crates/nvisy-engine/src/detection/phases/deduplication.rs b/crates/nvisy-engine/src/detection/phases/deduplication.rs
index fb2f3d6c..8c3dd742 100644
--- a/crates/nvisy-engine/src/detection/phases/deduplication.rs
+++ b/crates/nvisy-engine/src/detection/phases/deduplication.rs
@@ -17,11 +17,10 @@ use nvisy_toolkit::deduplication::{LayerContext, LayerPipeline, SpanSize};
 use tracing::Instrument;
 use uuid::Uuid;
 
-use crate::core::PhaseContext as _;
-use crate::core::{DetectionContext, DocumentTree};
+use crate::core::{DetectionContext, DocumentTree, PhaseContext as _};
+use crate::detection::{DeduplicationParams, DetectionPlan};
 use crate::document::provenance::EntityRecord;
 use crate::modality::DocumentModality;
-use crate::detection::{DeduplicationParams, DetectionPlan};
 
 const TARGET: &str = "nvisy_engine::deduplication";
 
@@ -73,7 +72,12 @@ impl DeduplicationPhase {
         self.run(ctx, plan, tree).await
     }
 
-    async fn run<M>(&self, ctx: &DetectionContext, plan: &DetectionPlan, tree: &mut DocumentTree<M>) -> Result<()>
+    async fn run<M>(
+        &self,
+        ctx: &DetectionContext,
+        plan: &DetectionPlan,
+        tree: &mut DocumentTree<M>,
+    ) -> Result<()>
     where
         M: DocumentModality,
         M::Location: Overlap + SpanSize,
diff --git a/crates/nvisy-engine/src/detection/phases/detection.rs b/crates/nvisy-engine/src/detection/phases/detection.rs
index ef60bf51..9857308b 100644
--- a/crates/nvisy-engine/src/detection/phases/detection.rs
+++ b/crates/nvisy-engine/src/detection/phases/detection.rs
@@ -23,11 +23,10 @@ use nvisy_core::recognition::RecognizerInput;
 use nvisy_toolkit::detection::RecognizerRegistry;
 use tracing::Instrument;
 
-use crate::core::PhaseContext as _;
-use crate::core::{DetectionContext, DocumentTree};
+use crate::core::{DetectionContext, DocumentTree, PhaseContext as _};
+use crate::detection::DetectionPlan;
 use crate::document::{Document, Span};
 use crate::modality::{DocumentModality, ModalityBlock};
-use crate::detection::DetectionPlan;
 
 const TARGET: &str = "nvisy_engine::detection";
 
diff --git a/crates/nvisy-engine/src/detection/phases/extraction.rs b/crates/nvisy-engine/src/detection/phases/extraction.rs
index e75243e7..244cb2e8 100644
--- a/crates/nvisy-engine/src/detection/phases/extraction.rs
+++ b/crates/nvisy-engine/src/detection/phases/extraction.rs
@@ -27,10 +27,10 @@ use nvisy_ocr::types::RawOcrBlock;
 use nvisy_toolkit::extraction::{Extractor, ExtractorRegistry, ImageExtractorOutput};
 use tracing::Instrument;
 
-use crate::core::{DocumentTree, DetectionContext};
+use crate::core::{DetectionContext, DocumentTree};
+use crate::detection::{DetectionPlan, Extraction};
 use crate::document::{Block, Document, Span};
 use crate::modality::{ImageBlock, TabularBlock, TextBlock, TextContent};
-use crate::detection::{DetectionPlan, Extraction};
 
 const TARGET: &str = "nvisy_engine::extraction";
 
diff --git a/crates/nvisy-engine/src/detection/pipeline.rs b/crates/nvisy-engine/src/detection/pipeline.rs
index 52b7fcff..be2c0118 100644
--- a/crates/nvisy-engine/src/detection/pipeline.rs
+++ b/crates/nvisy-engine/src/detection/pipeline.rs
@@ -20,12 +20,11 @@ use super::orchestrator::DetectionOrchestrator;
 use super::result::DetectionResult;
 use super::state::{DetectionRecord, DetectionState};
 use super::status::DetectionStatus;
-use crate::core::{DetectionContext, DetectionEngines, PolicyStore, SharedData};
-use crate::document::provenance::AnyAudit;
 use crate::core::ingestion::ImportFile;
 use crate::core::ingestion::encryption::SharedKeyProvider;
-use crate::core::RuntimeConfig;
+use crate::core::{DetectionContext, DetectionEngines, PolicyStore, RuntimeConfig, SharedData};
 use crate::detection::DetectionConfig;
+use crate::document::provenance::AnyAudit;
 use crate::policy::{Policy, PolicyDigest};
 use crate::registry::Registry;
 
@@ -93,8 +92,7 @@ impl DetectionPipeline {
         input.validate_actions()?;
 
         let actor_id = input.actor_id;
-        let policy_digests: Vec<PolicyDigest> =
-            input.policies.iter().map(Policy::digest).collect();
+        let policy_digests: Vec<PolicyDigest> = input.policies.iter().map(Policy::digest).collect();
         let policies = Arc::new(PolicyStore::from_policies(input.policies));
 
         self.detections
diff --git a/crates/nvisy-engine/src/detection/plan.rs b/crates/nvisy-engine/src/detection/plan.rs
index 6dea187f..77777a81 100644
--- a/crates/nvisy-engine/src/detection/plan.rs
+++ b/crates/nvisy-engine/src/detection/plan.rs
@@ -58,20 +58,24 @@ pub struct Extraction {
 
 /// Text-modality plan knobs. No tunables today; reserved for future
 /// per-call settings (e.g. whitespace normalization).
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
 pub struct TextPlan {}
 
 /// Tabular-modality plan knobs. No tunables today.
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
 pub struct TabularPlan {}
 
 /// Image-modality plan knobs. No tunables today; reserved for
 /// future OCR tuning (e.g. language hint, page subset).
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
 pub struct ImagePlan {}
 
 /// Audio-modality plan knobs (speech-to-text).
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
 pub struct AudioPlan {
     /// Segment the audio by speaker identity.
     ///
diff --git a/crates/nvisy-engine/src/detection/result.rs b/crates/nvisy-engine/src/detection/result.rs
index 2b5e80c7..a2d832e7 100644
--- a/crates/nvisy-engine/src/detection/result.rs
+++ b/crates/nvisy-engine/src/detection/result.rs
@@ -7,8 +7,8 @@ use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
 use super::status::DetectionStatus;
-use crate::document::provenance::AnyAudit;
 use crate::core::ingestion::ImportFile;
+use crate::document::provenance::AnyAudit;
 use crate::policy::PolicyDigest;
 
 /// Immutable artifact produced by one detection pass.
diff --git a/crates/nvisy-engine/src/detection/state.rs b/crates/nvisy-engine/src/detection/state.rs
index 8f8169b4..4109a26c 100644
--- a/crates/nvisy-engine/src/detection/state.rs
+++ b/crates/nvisy-engine/src/detection/state.rs
@@ -17,8 +17,8 @@ use uuid::Uuid;
 use super::result::{DetectionEntry, DetectionFilter, DetectionResult, DetectionSnapshot};
 use super::status::DetectionStatus;
 use crate::core::PolicyStore;
-use crate::document::provenance::AnyAudit;
 use crate::core::ingestion::ImportFile;
+use crate::document::provenance::AnyAudit;
 use crate::policy::PolicyDigest;
 
 const TARGET: &str = "nvisy_engine::detection::state";
@@ -131,7 +131,11 @@ impl DetectionState {
     /// callers).
     ///
     /// [`ErrorKind::NotFound`]: nvisy_core::ErrorKind::NotFound
-    pub(crate) async fn snapshot(&self, actor_id: Uuid, id: Uuid) -> Result<DetectionSnapshot, Error> {
+    pub(crate) async fn snapshot(
+        &self,
+        actor_id: Uuid,
+        id: Uuid,
+    ) -> Result<DetectionSnapshot, Error> {
         let guard = self.inner.read().await;
         let Some(record) = guard.get(&id) else {
             return Err(Error::not_found(
@@ -206,7 +210,11 @@ impl DetectionState {
         })
     }
 
-    pub(crate) async fn list(&self, actor_id: Uuid, filter: DetectionFilter) -> Vec<DetectionEntry> {
+    pub(crate) async fn list(
+        &self,
+        actor_id: Uuid,
+        filter: DetectionFilter,
+    ) -> Vec<DetectionEntry> {
         let guard = self.inner.read().await;
         let mut out: Vec<DetectionEntry> = guard
             .iter()
diff --git a/crates/nvisy-engine/src/policy/audit.rs b/crates/nvisy-engine/src/policy/audit.rs
index 46d63096..85264fa0 100644
--- a/crates/nvisy-engine/src/policy/audit.rs
+++ b/crates/nvisy-engine/src/policy/audit.rs
@@ -15,7 +15,8 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
 /// Payload for the `audit` action.
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct AuditAction {
     /// Severity hint propagated into the audit entry — e.g.
diff --git a/crates/nvisy-engine/src/policy/redaction/mod.rs b/crates/nvisy-engine/src/policy/redaction/mod.rs
index 46ac8040..254e59f6 100644
--- a/crates/nvisy-engine/src/policy/redaction/mod.rs
+++ b/crates/nvisy-engine/src/policy/redaction/mod.rs
@@ -24,8 +24,8 @@ mod image;
 mod tabular;
 mod text;
 
-pub use nvisy_toolkit::redaction::anonymizer::HashAlgorithm;
 use nvisy_core::modality::{Audio, Image, Tabular, Text};
+pub use nvisy_toolkit::redaction::anonymizer::HashAlgorithm;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
diff --git a/crates/nvisy-engine/src/policy/suppress.rs b/crates/nvisy-engine/src/policy/suppress.rs
index a6543e0a..99fa6209 100644
--- a/crates/nvisy-engine/src/policy/suppress.rs
+++ b/crates/nvisy-engine/src/policy/suppress.rs
@@ -11,7 +11,8 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
 /// Payload for the `suppress` action.
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct SuppressAction {
     /// Human-readable reason the entity is being suppressed. Surfaced
diff --git a/crates/nvisy-engine/src/redaction/document.rs b/crates/nvisy-engine/src/redaction/document.rs
index ef8acda7..f5c802bb 100644
--- a/crates/nvisy-engine/src/redaction/document.rs
+++ b/crates/nvisy-engine/src/redaction/document.rs
@@ -4,11 +4,10 @@
 use nvisy_core::modality::{Audio, Image, Tabular, Text};
 use nvisy_core::{Error, Result};
 
-use crate::core::PhaseContext as _;
-use crate::core::{DocumentTree, RedactionContext};
+use crate::core::{DocumentTree, PhaseContext as _, RedactionContext};
+use crate::redaction::RedactionPlan;
 use crate::redaction::phases::RedactionPhase;
 use crate::redaction::phases::validation::ValidationPhase;
-use crate::redaction::RedactionPlan;
 
 const TARGET: &str = "nvisy_engine::pipeline::redaction::document";
 
diff --git a/crates/nvisy-engine/src/redaction/engine.rs b/crates/nvisy-engine/src/redaction/engine.rs
index 836e580b..75ba09ba 100644
--- a/crates/nvisy-engine/src/redaction/engine.rs
+++ b/crates/nvisy-engine/src/redaction/engine.rs
@@ -26,11 +26,11 @@ use super::result::RedactionResult;
 use super::state::RedactionState;
 use super::status::RedactionStatus;
 use super::{RedactionEntry, RedactionFilter, RedactionInput, RedactionSnapshot};
-use crate::core::ingestion::encryption::SharedKeyProvider;
-use crate::redaction::phases::RedactionRegistries;
 use crate::core::RuntimeConfig;
-use crate::redaction::RedactionConfig;
+use crate::core::ingestion::encryption::SharedKeyProvider;
 use crate::detection::{DetectionEngine, DetectionState};
+use crate::redaction::RedactionConfig;
+use crate::redaction::phases::RedactionRegistries;
 use crate::registry::Registry;
 
 /// Shared inner state for a [`RedactionEngine`], held behind an
@@ -91,13 +91,7 @@ impl RedactionEngine {
     /// [`DetectionState`]: crate::detection::DetectionState
     /// [`shutdown`]: Self::shutdown
     pub fn from_detection(detection: &DetectionEngine) -> Self {
-        let redaction_config = Arc::new(
-            detection
-                .config()
-                .redaction
-                .clone()
-                .unwrap_or_default(),
-        );
+        let redaction_config = Arc::new(detection.config().redaction.clone().unwrap_or_default());
         Self {
             inner: Arc::new(RedactionInner {
                 runtime_config: detection.config().clone(),
diff --git a/crates/nvisy-engine/src/redaction/orchestrator.rs b/crates/nvisy-engine/src/redaction/orchestrator.rs
index 2a8a49dd..ceb52111 100644
--- a/crates/nvisy-engine/src/redaction/orchestrator.rs
+++ b/crates/nvisy-engine/src/redaction/orchestrator.rs
@@ -19,10 +19,9 @@ use tokio::sync::Semaphore;
 use tokio::task::JoinSet;
 
 use super::document::RedactionDocumentPipeline;
-use crate::core::PhaseContext as _;
-use crate::core::{AnyTree, RedactionContext};
-use crate::document::provenance::AnyAudit;
 use crate::core::ingestion::{ExportFile, Exporter, ImportFile, Importer};
+use crate::core::{AnyTree, PhaseContext as _, RedactionContext};
+use crate::document::provenance::AnyAudit;
 use crate::redaction::RedactionPlan;
 
 const TARGET: &str = "nvisy_engine::pipeline::redaction::orchestrator";
diff --git a/crates/nvisy-engine/src/redaction/phases/phase.rs b/crates/nvisy-engine/src/redaction/phases/phase.rs
index a52c1ceb..87fa286f 100644
--- a/crates/nvisy-engine/src/redaction/phases/phase.rs
+++ b/crates/nvisy-engine/src/redaction/phases/phase.rs
@@ -14,8 +14,7 @@
 use nvisy_core::Result;
 use nvisy_core::modality::{Audio, Image, Tabular, Text};
 
-use crate::core::PhaseContext as _;
-use crate::core::{DocumentTree, RedactionContext};
+use crate::core::{DocumentTree, PhaseContext as _, RedactionContext};
 use crate::redaction::phases::registries::RedactionRegistries;
 use crate::redaction::phases::run_redaction;
 use crate::redaction::{RedactionConfig, RedactionPlan};
diff --git a/crates/nvisy-engine/src/redaction/phases/validation.rs b/crates/nvisy-engine/src/redaction/phases/validation.rs
index ccccc9ae..4baa3457 100644
--- a/crates/nvisy-engine/src/redaction/phases/validation.rs
+++ b/crates/nvisy-engine/src/redaction/phases/validation.rs
@@ -20,8 +20,7 @@ use nvisy_toolkit::validation::{
 };
 use tracing::Instrument;
 
-use crate::core::PhaseContext as _;
-use crate::core::{DocumentTree, RedactionContext};
+use crate::core::{DocumentTree, PhaseContext as _, RedactionContext};
 use crate::document::Document;
 use crate::modality::DocumentModality;
 use crate::redaction::RedactionPlan;
diff --git a/crates/nvisy-engine/src/redaction/pipeline.rs b/crates/nvisy-engine/src/redaction/pipeline.rs
index df1cb389..15c0ebd6 100644
--- a/crates/nvisy-engine/src/redaction/pipeline.rs
+++ b/crates/nvisy-engine/src/redaction/pipeline.rs
@@ -14,13 +14,12 @@ use super::orchestrator::RedactionOrchestrator;
 use super::result::RedactionResult;
 use super::state::{RedactionRecord, RedactionState};
 use super::status::RedactionStatus;
-use crate::core::{RedactionContext, RedactionEngines, SharedData};
-use crate::document::provenance::AnyAudit;
 use crate::core::ingestion::encryption::SharedKeyProvider;
-use crate::redaction::phases::RedactionRegistries;
-use crate::redaction::RedactionConfig;
-use crate::core::RuntimeConfig;
+use crate::core::{RedactionContext, RedactionEngines, RuntimeConfig, SharedData};
 use crate::detection::DetectionState;
+use crate::document::provenance::AnyAudit;
+use crate::redaction::RedactionConfig;
+use crate::redaction::phases::RedactionRegistries;
 use crate::registry::Registry;
 
 const TARGET: &str = "nvisy_engine::pipeline::redaction::pipeline";
diff --git a/crates/nvisy-engine/tests/redaction_policy.rs b/crates/nvisy-engine/tests/redaction_policy.rs
index b8f3cf6b..1e72ab7e 100644
--- a/crates/nvisy-engine/tests/redaction_policy.rs
+++ b/crates/nvisy-engine/tests/redaction_policy.rs
@@ -118,7 +118,10 @@ fn action_redact_round_trips_through_json() {
         ..Default::default()
     });
     let json = serde_json::to_string(&action).expect("serialize");
-    assert!(json.contains("\"redact\""), "expected redact tag, got {json}");
+    assert!(
+        json.contains("\"redact\""),
+        "expected redact tag, got {json}"
+    );
     assert!(
         json.contains("\"text\""),
         "expected text operator, got {json}"
diff --git a/crates/nvisy-server/src/handler/request/detections.rs b/crates/nvisy-server/src/handler/request/detections.rs
index e68b05eb..379c3f04 100644
--- a/crates/nvisy-server/src/handler/request/detections.rs
+++ b/crates/nvisy-server/src/handler/request/detections.rs
@@ -13,7 +13,7 @@ use crate::handler::request::pagination::Pagination;
 #[derive(Debug, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct NewDetection {
-    /// Policies to apply, in precedence order 
+    /// Policies to apply, in precedence order
     /// (index 0 is highest precedence).
     pub policies: Vec<Policy>,
     /// Content sources to ingest at the start of the pass.

From 111bbac0819834141943027dcfc8c460fbc510b7 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sun, 14 Jun 2026 21:26:54 +0200
Subject: [PATCH 04/14] =?UTF-8?q?refactor(pattern,context):=20rename=20Pat?=
 =?UTF-8?q?tern=E2=86=92Regex,=20inline=20registries,=20normalize=20docs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- nvisy-pattern: rename Detector→Pattern→Regex; inline PatternRegistry
  into PatternRecognizerBuilder; split CompiledPattern out; export
  built-in validators by bare-noun names (luhn, iban, ssn, phone, date);
  add Scoring::get + per-column resolution; convert pattern assets to
  TOML; normalize module/function docs (returns-form for predicates,
  reference-form doc-links, # Errors + code examples for public types).
- nvisy-context: extract registry/declaration into rule + wrapper;
  trim enhancer/matcher/tokens surface.
- nvisy-toolkit: drop stale PatternRegistry usage in pipeline example;
  fix broken rustdoc links in redaction module.
- nvisy-engine, nvisy-ner: knock-on updates for the new pattern and
  context surfaces.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                                    |  10 +-
 crates/nvisy-context/Cargo.toml               |  14 +-
 crates/nvisy-context/src/declaration.rs       |  89 ---
 crates/nvisy-context/src/enhancer.rs          | 718 ++++++++++++------
 crates/nvisy-context/src/lib.rs               |  10 +-
 crates/nvisy-context/src/matcher.rs           | 117 ++-
 crates/nvisy-context/src/registry.rs          | 122 ---
 crates/nvisy-context/src/rule.rs              | 140 ++++
 crates/nvisy-context/src/tokens.rs            | 151 +---
 crates/nvisy-context/src/wrapper.rs           |  77 ++
 crates/nvisy-engine/Cargo.toml                |   1 -
 crates/nvisy-engine/src/core/context.rs       |  17 -
 .../nvisy-engine/src/detection/config/mod.rs  |  57 +-
 crates/nvisy-engine/src/detection/document.rs |   5 +-
 crates/nvisy-engine/src/detection/mod.rs      |   4 +-
 .../src/detection/phases/detection.rs         |  30 +-
 crates/nvisy-engine/src/detection/pipeline.rs |   5 +-
 crates/nvisy-ner/Cargo.toml                   |   1 -
 crates/nvisy-ner/src/nlp/capabilities.rs      |   2 +-
 crates/nvisy-ner/src/nlp/engine.rs            |  22 +-
 crates/nvisy-ner/src/nlp/mod.rs               |  18 +-
 crates/nvisy-ner/src/recognition/config.rs    |  12 -
 .../nvisy-ner/src/recognition/recognizer.rs   |  27 +-
 crates/nvisy-pattern/Cargo.toml               |   1 -
 crates/nvisy-pattern/README.md                |  41 +-
 .../dictionaries/general/languages.toml       |  15 +-
 .../assets/patterns/contact/email.toml        |   2 +
 .../assets/patterns/contact/phone.toml        |   7 +-
 .../assets/patterns/contact/url.toml          |   2 +
 .../assets/patterns/credentials/aws_key.toml  |   2 +
 .../patterns/credentials/generic_api_key.toml |   2 +
 .../patterns/credentials/github_token.toml    |   2 +
 .../patterns/credentials/private_key.toml     |   2 +
 .../patterns/credentials/stripe_key.toml      |   2 +
 .../patterns/finance/bitcoin_address.toml     |   2 +
 .../assets/patterns/finance/credit_card.toml  |   6 +-
 .../patterns/finance/ethereum_address.toml    |   2 +
 .../assets/patterns/finance/iban.toml         |   6 +-
 .../assets/patterns/finance/swift_code.toml   |   2 +
 .../patterns/finance/us_bank_routing.toml     |   2 +
 .../assets/patterns/identity/ssn.toml         |   6 +-
 .../patterns/identity/us_drivers_license.toml |   2 +
 .../assets/patterns/identity/us_passport.toml |   2 +
 .../patterns/identity/us_postal_code.toml     |   2 +
 .../assets/patterns/network/ipv4.toml         |   2 +
 .../assets/patterns/network/ipv6.toml         |   2 +
 .../assets/patterns/network/mac_address.toml  |   2 +
 .../patterns/personal/date_of_birth.toml      |   7 +-
 .../assets/patterns/personal/datetime.toml    |   6 +-
 crates/nvisy-pattern/src/lib.rs               |   4 +-
 .../nvisy-pattern/src/recognition/compiled.rs | 166 ++++
 .../src/recognition/dictionary.rs             | 174 +++--
 crates/nvisy-pattern/src/recognition/mod.rs   |  25 +-
 .../src/recognition/recognizer.rs             | 485 ++++++------
 crates/nvisy-pattern/src/recognition/regex.rs | 149 ++++
 .../src/recognition/regex_rule.rs             | 107 ---
 .../nvisy-pattern/src/recognition/registry.rs | 167 ----
 crates/nvisy-pattern/src/recognition/terms.rs | 163 ++--
 crates/nvisy-pattern/src/shipped/mod.rs       |   8 +-
 crates/nvisy-pattern/src/shipped/patterns.rs  |   2 +-
 crates/nvisy-pattern/src/validators/date.rs   |  41 +-
 crates/nvisy-pattern/src/validators/iban.rs   |  22 +-
 crates/nvisy-pattern/src/validators/luhn.rs   |  26 +-
 crates/nvisy-pattern/src/validators/mod.rs    |  44 +-
 crates/nvisy-pattern/src/validators/phone.rs  |  33 +-
 crates/nvisy-pattern/src/validators/ssn.rs    |  28 +-
 .../testdata/patterns/employee_id.toml        |   2 +
 .../testdata/patterns/product_codes.toml      |   2 +
 .../nvisy-pattern/tests/enhancer_roundtrip.rs |  80 +-
 .../nvisy-pattern/tests/shipped_detection.rs  |  15 +-
 crates/nvisy-pattern/tests/user_rules.rs      |  28 +-
 crates/nvisy-toolkit/Cargo.toml               |   2 +
 crates/nvisy-toolkit/examples/pipeline.rs     |   5 +-
 .../src/redaction/deanonymizer/mod.rs         |   3 +-
 crates/nvisy-toolkit/src/redaction/mod.rs     |  10 +-
 .../tests/fixtures/registries.rs              |  11 +-
 .../tests/recognition_registry.rs             |   7 +-
 77 files changed, 1891 insertions(+), 1696 deletions(-)
 delete mode 100644 crates/nvisy-context/src/declaration.rs
 delete mode 100644 crates/nvisy-context/src/registry.rs
 create mode 100644 crates/nvisy-context/src/rule.rs
 create mode 100644 crates/nvisy-context/src/wrapper.rs
 create mode 100644 crates/nvisy-pattern/src/recognition/compiled.rs
 create mode 100644 crates/nvisy-pattern/src/recognition/regex.rs
 delete mode 100644 crates/nvisy-pattern/src/recognition/regex_rule.rs
 delete mode 100644 crates/nvisy-pattern/src/recognition/registry.rs

diff --git a/Cargo.lock b/Cargo.lock
index bfed20b0..5d31d114 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2894,12 +2894,10 @@ dependencies = [
 name = "nvisy-context"
 version = "0.1.0"
 dependencies = [
- "derive_builder",
+ "async-trait",
  "hipstr",
  "nvisy-core",
- "schemars",
- "serde",
- "thiserror",
+ "unicode-segmentation",
 ]
 
 [[package]]
@@ -2938,7 +2936,6 @@ dependencies = [
  "humantime-serde",
  "jiff",
  "nvisy-codec",
- "nvisy-context",
  "nvisy-core",
  "nvisy-engine",
  "nvisy-llm",
@@ -3007,7 +3004,6 @@ dependencies = [
  "bentoml",
  "derive_builder",
  "lingua",
- "nvisy-context",
  "nvisy-core",
  "serde",
  "tokio",
@@ -3041,7 +3037,6 @@ dependencies = [
  "nvisy-context",
  "nvisy-core",
  "regex",
- "schemars",
  "serde",
  "tokio",
  "toml",
@@ -3087,6 +3082,7 @@ dependencies = [
  "async-trait",
  "base64",
  "nvisy-codec",
+ "nvisy-context",
  "nvisy-core",
  "nvisy-fake",
  "nvisy-llm",
diff --git a/crates/nvisy-context/Cargo.toml b/crates/nvisy-context/Cargo.toml
index 8c53f2d1..3f564243 100644
--- a/crates/nvisy-context/Cargo.toml
+++ b/crates/nvisy-context/Cargo.toml
@@ -26,16 +26,14 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 
-# Serialization
-serde = { workspace = true, features = [] }
-schemars = { workspace = true, features = [] }
-
-# Derive macros and error handling
-derive_builder = { workspace = true, features = [] }
-thiserror = { workspace = true, features = [] }
-
 # Primitive datatypes (cheap-clone surface form on `Token`)
 hipstr = { workspace = true, features = [] }
 
+# Async runtime and parallelism
+async-trait = { workspace = true, features = [] }
+
+# Text processing (word-window walk for the substring fallback)
+unicode-segmentation = { workspace = true, features = [] }
+
 [dev-dependencies]
 nvisy-core = { workspace = true, features = ["test-utils"] }
diff --git a/crates/nvisy-context/src/declaration.rs b/crates/nvisy-context/src/declaration.rs
deleted file mode 100644
index 5984183c..00000000
--- a/crates/nvisy-context/src/declaration.rs
+++ /dev/null
@@ -1,89 +0,0 @@
-//! [`Context`]: per-source keyword-boost declaration.
-//!
-//! Carried by anything that declares context — per-rule for
-//! patterns (each `Regex`/`Dictionary` may declare one),
-//! per-recognizer for NER (a single `default_context` on
-//! `NerRecognizer`). The shape is identical regardless of who
-//! registers it; the difference is only *what name* gets stored
-//! against it in the [`ContextRegistry`].
-//!
-//! [`ContextRegistry`]: super::ContextRegistry
-//!
-//! `window` and `boost` are `Option<_>` so the common case is "I
-//! have keywords; use the enhancer's defaults." Override only when
-//! the source needs a different policy than the enhancer's global
-//! settings.
-
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-
-/// Per-source context-boost declaration.
-///
-/// Anything that wants to participate in post-recognition keyword
-/// boosting registers one of these against its name in a
-/// [`ContextRegistry`].
-///
-/// [`ContextRegistry`]: super::ContextRegistry
-#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, JsonSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct Context {
-    /// Keywords whose presence near a match boosts the entity's
-    /// confidence. Empty list means "registered, but no boost
-    /// possible" — the enhancer skips this source.
-    pub keywords: Vec<String>,
-    /// Override of the enhancer's default window (in bytes on each
-    /// side of the match). `None` defers to the enhancer's
-    /// configured default.
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub window: Option<usize>,
-    /// Override of the enhancer's default additive boost. `None`
-    /// defers to the enhancer's configured default.
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub boost: Option<f64>,
-}
-
-impl Context {
-    /// Construct with a keyword list. Window and boost default to
-    /// `None` (use the enhancer's defaults).
-    #[must_use]
-    pub fn new(keywords: impl IntoIterator<Item = impl Into<String>>) -> Self {
-        Self {
-            keywords: keywords.into_iter().map(Into::into).collect(),
-            window: None,
-            boost: None,
-        }
-    }
-
-    /// Override the enhancer's window setting for this source.
-    #[must_use]
-    pub fn with_window(mut self, window: usize) -> Self {
-        self.window = Some(window);
-        self
-    }
-
-    /// Override the enhancer's boost setting for this source.
-    #[must_use]
-    pub fn with_boost(mut self, boost: f64) -> Self {
-        self.boost = Some(boost);
-        self
-    }
-
-    /// Whether this context carries no boost-eligible keywords.
-    /// Empty contexts are skipped by the enhancer.
-    #[must_use]
-    pub fn is_empty(&self) -> bool {
-        self.keywords.is_empty()
-    }
-}
-
-impl<S: Into<String>> From<Vec<S>> for Context {
-    fn from(keywords: Vec<S>) -> Self {
-        Self::new(keywords)
-    }
-}
-
-impl<const N: usize> From<[&str; N]> for Context {
-    fn from(keywords: [&str; N]) -> Self {
-        Self::new(keywords)
-    }
-}
diff --git a/crates/nvisy-context/src/enhancer.rs b/crates/nvisy-context/src/enhancer.rs
index ab406d2d..f1eba2df 100644
--- a/crates/nvisy-context/src/enhancer.rs
+++ b/crates/nvisy-context/src/enhancer.rs
@@ -1,181 +1,191 @@
-//! [`ContextEnhancer`]: post-recognition keyword-boost pass for
-//! any [`Entity<Text>`] regardless of which recognizer produced it.
+//! [`Enhancer`]: post-recognition keyword-boost pass for any
+//! [`Entity<Text>`] regardless of which recognizer produced it.
 
-use derive_builder::{Builder, UninitializedFieldError};
-use nvisy_core::entity::{Entity, TrailStep};
-use nvisy_core::extraction::Artifacts;
+use std::collections::HashMap;
+
+use nvisy_core::entity::{Entity, EntityLabelRef, TrailStep};
 use nvisy_core::modality::Text;
-use nvisy_core::primitive::Confidence;
+use unicode_segmentation::UnicodeSegmentation;
+
+use super::matcher::KeywordMatcher;
+use super::rule::BoostRule;
+use super::tokens::Token;
 
-use super::Tokens;
-use super::matcher::{KeywordMatcher, SubstringMatcher};
-use super::registry::ContextRegistry;
+/// Source name stamped onto every refinement [`TrailStep`] the
+/// enhancer appends.
+const TRAIL_SOURCE: &str = "context";
 
-/// Post-recognition enhancer that boosts entity confidence when
-/// keywords declared by the source recognizer appear near the match.
+/// Post-recognition enhancer. Holds a label-keyed [`BoostRule`]
+/// map plus the keyword-matching strategy, and lifts the
+/// confidence of each text entity whose label has a rule and
+/// whose surrounding word window contains one of the rule's
+/// keywords.
 ///
-/// Construct via [`builder`]. The two required
-/// settings are [`default_window`] (in source-text bytes on each
-/// side of the match) and [`default_boost`] (the additive bump
-/// applied when a keyword fires). Per-source overrides on
-/// [`Context::window`] / [`Context::boost`] take precedence.
+/// Construct via [`Enhancer::new`]. Rules are passed in by value;
+/// duplicates for the same label are merged via
+/// [`BoostRule::merge`] (union of keywords; window radii / `boost`
+/// kept from the first-seen rule).
 ///
-/// The matcher strategy defaults to [`SubstringMatcher`] when not
-/// supplied. Wire [`LemmaMatcher`] instead when an upstream
-/// `NlpEngine` populates [`Tokens`] in `RecognizerInput.artifacts` and you
-/// want morphological-variant boosting.
+/// The matcher defaults are picked by the engine that constructs
+/// the enhancer: [`SubstringMatcher`] when no upstream NLP engine
+/// produces tokens, [`LemmaMatcher`] when one does.
 ///
-/// [`builder`]: Self::builder
-/// [`Context::window`]: super::Context::window
-/// [`Context::boost`]: super::Context::boost
-/// [`default_window`]: ContextEnhancerBuilder::with_default_window
-/// [`default_boost`]: ContextEnhancerBuilder::with_default_boost
+/// [`SubstringMatcher`]: super::SubstringMatcher
 /// [`LemmaMatcher`]: super::LemmaMatcher
-/// [`Tokens`]: super::Tokens
-#[derive(Builder)]
-#[builder(
-    name = "ContextEnhancerBuilder",
-    pattern = "owned",
-    setter(prefix = "with"),
-    build_fn(error = "ContextEnhancerBuilderError")
-)]
-pub struct ContextEnhancer {
-    /// Lookup table built at construction time. The enhancer reads
-    /// the source-recognizer / rule name off the entity's first
-    /// recognition step and looks it up here to find the declared
-    /// [`Context`].
-    ///
-    /// [`Context`]: super::Context
-    #[builder(setter(custom))]
-    registry: ContextRegistry,
-    /// Keyword-matching strategy (substring, lemma, custom).
-    /// Defaults to [`SubstringMatcher`] when omitted.
-    #[builder(
-        setter(custom),
-        default = "Box::new(SubstringMatcher) as Box<dyn KeywordMatcher>"
-    )]
+pub struct Enhancer {
+    rules: HashMap<EntityLabelRef, BoostRule>,
     matcher: Box<dyn KeywordMatcher>,
-    /// Default window radius (in source-text bytes on each side of
-    /// the match). Per-source [`Context::window`] overrides this.
-    ///
-    /// [`Context::window`]: super::Context::window
-    default_window: usize,
-    /// Default additive boost applied when a keyword fires.
-    /// Per-source [`Context::boost`] overrides this.
-    ///
-    /// [`Context::boost`]: super::Context::boost
-    default_boost: f64,
 }
 
-impl ContextEnhancer {
-    /// Start building a `ContextEnhancer`. Required:
-    /// [`with_registry`],
-    /// [`with_default_window`],
-    /// [`with_default_boost`].
+impl Enhancer {
+    /// Construct from a rule iterator and matcher. Rules sharing
+    /// the same label are merged via [`BoostRule::merge`].
+    pub fn new(
+        rules: impl IntoIterator<Item = BoostRule>,
+        matcher: Box<dyn KeywordMatcher>,
+    ) -> Self {
+        let mut map: HashMap<EntityLabelRef, BoostRule> = HashMap::new();
+        for rule in rules {
+            match map.get_mut(&rule.label) {
+                Some(existing) => existing.merge(rule),
+                None => {
+                    map.insert(rule.label.clone(), rule);
+                }
+            }
+        }
+        Self {
+            rules: map,
+            matcher,
+        }
+    }
+
+    /// `true` when no rules are registered. Engine code uses this
+    /// to short-circuit calls to [`enhance`] entirely.
     ///
-    /// [`with_registry`]: ContextEnhancerBuilder::with_registry
-    /// [`with_default_window`]: ContextEnhancerBuilder::with_default_window
-    /// [`with_default_boost`]: ContextEnhancerBuilder::with_default_boost
+    /// [`enhance`]: Self::enhance
     #[must_use]
-    pub fn builder() -> ContextEnhancerBuilder {
-        ContextEnhancerBuilder::default()
+    pub fn is_empty(&self) -> bool {
+        self.rules.is_empty()
     }
 
-    /// Borrow the underlying registry. Useful for diagnostics and
-    /// for engine code that wants to short-circuit when there are
-    /// no entries to boost against.
+    /// Number of distinct labels with rules.
     #[must_use]
-    pub fn registry(&self) -> &ContextRegistry {
-        &self.registry
+    pub fn len(&self) -> usize {
+        self.rules.len()
     }
 
-    /// Apply context-keyword boosting to `entities` in place.
-    ///
-    /// For each entity, looks at its first recognition step's
-    /// provenance to identify the source name, looks the name up
-    /// in the [`ContextRegistry`], walks the surrounding window
-    /// (token-based when [`Tokens`] are present in `artifacts` and
-    /// the matcher uses tokens, substring-based otherwise), and
-    /// bumps the confidence by the configured boost — capped at
-    /// `1.0`. A [`Refinement`] step is appended to the trail, and
-    /// the recognition step's `contextual` flag is set.
+    /// Apply boost rules to `entities` in place. For each entity:
+    /// look up the rule for its label, walk a window of
+    /// `prefix_words` words before and `suffix_words` words after
+    /// the entity's location, ask the matcher whether any keyword
+    /// fires, and on a hit lift confidence by the rule's `boost`
+    /// (saturating at the [`Confidence`] ceiling) plus append a
+    /// [`Refinement`] trail step.
     ///
-    /// Entities whose source isn't in the registry (or whose
-    /// declared context has an empty keyword list) pass through
-    /// unchanged.
+    /// `tokens` is the optional token artifact produced by an
+    /// upstream NLP engine. When present, words are counted
+    /// against the token stream; when absent, words are derived
+    /// from the source text via Unicode word segmentation.
     ///
+    /// [`Confidence`]: nvisy_core::primitive::Confidence
     /// [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
-    pub fn enhance(&self, entities: &mut [Entity<Text>], text: &str, artifacts: &Artifacts) {
-        for entity in entities.iter_mut() {
-            self.enhance_one(entity, text, artifacts);
+    pub fn enhance(&self, entities: &mut [Entity<Text>], text: &str, tokens: Option<&[Token]>) {
+        if self.rules.is_empty() {
+            return;
+        }
+        for entity in entities {
+            self.enhance_one(entity, text, tokens);
         }
     }
 
-    fn enhance_one(&self, entity: &mut Entity<Text>, text: &str, artifacts: &Artifacts) {
-        let Some(name) = entity
-            .trail
-            .first()
-            .and_then(|s| s.provenance.name())
-            .map(str::to_owned)
-        else {
-            return;
-        };
-        let Some(ctx) = self.registry.get(&name) else {
+    fn enhance_one(&self, entity: &mut Entity<Text>, text: &str, tokens: Option<&[Token]>) {
+        let Some(rule) = self.rules.get(&entity.label) else {
             return;
         };
-        if ctx.keywords.is_empty() {
+        if rule.keywords.is_empty() {
             return;
         }
-        let window = ctx.window.unwrap_or(self.default_window);
-        let boost = ctx.boost.unwrap_or(self.default_boost);
 
         let start = entity.location.start;
         let end = entity.location.end;
-        let snippet = window_around(text, start, end, window);
-        let tokens_in_window = artifacts
-            .get::<Tokens>()
-            .map(|t| t.around(start..end, window));
-        // The matcher reads tokens by reference; wrap the in-window
-        // slice into a temporary owning `Tokens` only when one is
-        // present.
-        let owned_tokens;
-        let tokens_arg = match tokens_in_window {
-            Some(slice) if !slice.is_empty() => {
-                owned_tokens = Tokens::new(slice.to_vec());
-                Some(&owned_tokens)
-            }
-            _ => None,
+
+        // Prefer the token stream when the producer reached this
+        // entity. Fall back to the word-segmented substring window
+        // whenever the token slice would be empty — that covers
+        // `tokens: None`, `tokens: Some(&[])`, and the "tokens
+        // present but none overlap the entity" case (e.g. NLP
+        // engine only tokenized part of the document).
+        let token_slice = tokens
+            .map(|toks| slice_tokens_around(toks, start, end, rule.prefix_words, rule.suffix_words))
+            .unwrap_or(&[]);
+        let (snippet, tokens_in_window): (&str, &[Token]) = if token_slice.is_empty() {
+            let snippet = word_window(text, start, end, rule.prefix_words, rule.suffix_words);
+            (snippet, &[])
+        } else {
+            let snippet = token_span(text, token_slice, start, end);
+            (snippet, token_slice)
         };
-        if !self.matcher.any_match(snippet, tokens_arg, &ctx.keywords) {
+
+        if !self
+            .matcher
+            .any_match(snippet, tokens_in_window, &rule.keywords)
+        {
             return;
         }
 
         let original = entity.confidence;
-        let adjusted_raw = (original.get() + boost).clamp(0.0, 1.0);
-        let Some(adjusted) = Confidence::new(adjusted_raw) else {
+        let adjusted = original.saturating_add(rule.boost.get());
+        if adjusted == original {
             return;
-        };
-        entity.confidence = adjusted;
-
-        if let Some(step) = entity.trail.first_mut() {
-            step.provenance.mark_contextual();
         }
+        entity.confidence = adjusted;
 
         entity.trail.push(TrailStep::refinement(
-            "context-enhancer",
+            TRAIL_SOURCE,
             original,
             adjusted,
-            format!("context keyword near `{name}` (+{boost})"),
+            format!(
+                "context keyword near `{}` (+{:.3})",
+                entity.label.as_str(),
+                rule.boost.get(),
+            ),
         ));
     }
 }
 
-/// Borrow a `window`-radius slice of `text` centered on the entity
-/// location, clamped to the string bounds and snapped to UTF-8
-/// character boundaries.
-fn window_around(text: &str, start: usize, end: usize, window: usize) -> &str {
-    let lo = floor_char_boundary(text, start.saturating_sub(window));
-    let hi = ceil_char_boundary(text, end.saturating_add(window).min(text.len()));
+/// Walk `prefix` words before `[start, end)` and `suffix` words
+/// after, via Unicode word segmentation, and return the spanning
+/// substring (including any non-word whitespace and punctuation
+/// between words). The returned slice covers `[start, end)` itself
+/// plus the prefix / suffix words; the entity's own bytes are
+/// always inside.
+fn word_window(text: &str, start: usize, end: usize, prefix: usize, suffix: usize) -> &str {
+    let prefix_text = &text[..start.min(text.len())];
+    let suffix_text = &text[end.min(text.len())..];
+
+    // `unicode_word_indices` yields `(byte_offset, word_str)` for
+    // every "word" (alphanumeric run) in source order. Take the
+    // last `prefix` on the prefix side, the first `suffix` on the
+    // suffix side, and compute the spanning byte range.
+    let prefix_words: Vec<(usize, &str)> = prefix_text.unicode_word_indices().collect();
+    let prefix_take = prefix_words.len().saturating_sub(prefix);
+    let prefix_byte = prefix_words
+        .get(prefix_take)
+        .map(|(idx, _)| *idx)
+        .unwrap_or(start.min(text.len()));
+
+    let suffix_byte = if suffix == 0 {
+        end.min(text.len())
+    } else {
+        suffix_text
+            .unicode_word_indices()
+            .nth(suffix - 1)
+            .map(|(idx, word)| end + idx + word.len())
+            .unwrap_or(text.len())
+    };
+
+    let lo = floor_char_boundary(text, prefix_byte);
+    let hi = ceil_char_boundary(text, suffix_byte.min(text.len()));
     &text[lo..hi]
 }
 
@@ -193,171 +203,389 @@ fn ceil_char_boundary(s: &str, mut pos: usize) -> usize {
     pos
 }
 
-impl ContextEnhancerBuilder {
-    /// Attach the [`ContextRegistry`] the enhancer reads at boost
-    /// time. Required.
-    #[must_use]
-    pub fn with_registry(mut self, registry: ContextRegistry) -> Self {
-        self.registry = Some(registry);
-        self
+/// Slice tokens by *count*: take `prefix` tokens before the first
+/// token overlapping `[start, end)` and `suffix` tokens after the
+/// last. The returned slice is contiguous.
+fn slice_tokens_around(
+    tokens: &[Token],
+    start: usize,
+    end: usize,
+    prefix: usize,
+    suffix: usize,
+) -> &[Token] {
+    if tokens.is_empty() {
+        return &[];
     }
-
-    /// Override the keyword-matching strategy. Defaults to
-    /// [`SubstringMatcher`].
-    #[must_use]
-    pub fn with_matcher<M: KeywordMatcher + 'static>(mut self, matcher: M) -> Self {
-        self.matcher = Some(Box::new(matcher));
-        self
+    // First token whose `offset.end > start` overlaps or follows the entity.
+    let first_overlap = tokens.partition_point(|t| t.offset.end <= start);
+    // One past the last token whose `offset.start < end` overlaps the entity.
+    let last_overlap = tokens.partition_point(|t| t.offset.start < end);
+    let lo = first_overlap.saturating_sub(prefix);
+    let hi = (last_overlap + suffix).min(tokens.len());
+    if lo >= hi {
+        return &[];
     }
+    &tokens[lo..hi]
 }
 
-/// Error returned by [`ContextEnhancerBuilder::build`].
-#[derive(Debug, thiserror::Error)]
-#[error("context enhancer build failed: {0}")]
-pub struct ContextEnhancerBuilderError(String);
-
-impl From<UninitializedFieldError> for ContextEnhancerBuilderError {
-    fn from(err: UninitializedFieldError) -> Self {
-        Self(format!("missing field `{}`", err.field_name()))
-    }
+/// Spanning substring covering `tokens` plus the entity itself.
+/// Used to give the matcher a contiguous text window when slicing
+/// against the token stream.
+///
+/// Precondition: `tokens` is non-empty. Callers must take the
+/// `word_window` fallback path when their token slice is empty —
+/// see `Enhancer::enhance_one`.
+fn token_span<'a>(text: &'a str, tokens: &[Token], start: usize, end: usize) -> &'a str {
+    debug_assert!(!tokens.is_empty(), "token_span requires non-empty slice");
+    let lo = tokens[0].offset.start.min(start);
+    let hi = tokens[tokens.len() - 1].offset.end.max(end);
+    let lo = floor_char_boundary(text, lo.min(text.len()));
+    let hi = ceil_char_boundary(text, hi.min(text.len()));
+    &text[lo..hi]
 }
 
 #[cfg(test)]
 mod tests {
     use nvisy_core::entity::{
-        EntityLabelRef, ModelProvenance, PatternProvenance, TrailProvenance, TrailStepKind,
-        builtins,
+        EntityLabelRef, PatternProvenance, TrailProvenance, TrailStepKind, builtins,
     };
-    use nvisy_core::extraction::Artifacts;
     use nvisy_core::modality::{Text, TextLocation};
+    use nvisy_core::primitive::Confidence;
 
     use super::*;
-    use crate::Context;
-
-    fn pattern_entity(name: &str, span: std::ops::Range<usize>) -> Entity<Text> {
-        let confidence = Confidence::new(0.6).unwrap();
-        let provenance = TrailProvenance::Pattern(PatternProvenance::Regex {
-            name: name.to_owned(),
-            regex: None,
-            validator: None,
-            contextual: false,
-        });
-        let step = TrailStep::recognition(
-            "pattern",
-            confidence,
-            provenance,
-            format!("pattern `{name}` matched"),
-        );
-        Entity::builder()
-            .with_label(EntityLabelRef::from(builtins::GOVERNMENT_ID.name.clone()))
-            .with_trail(vec![step])
-            .with_confidence(confidence)
-            .with_location(TextLocation::new(span.start, span.end))
-            .build()
-            .expect("entity builds")
+    use crate::SubstringMatcher;
+
+    fn govid_label() -> EntityLabelRef {
+        builtins::GOVERNMENT_ID.label_ref()
     }
 
-    fn model_entity(name: &str, span: std::ops::Range<usize>) -> Entity<Text> {
-        let confidence = Confidence::new(0.5).unwrap();
-        let provenance = TrailProvenance::Model(ModelProvenance::new(name));
+    fn person_label() -> EntityLabelRef {
+        builtins::PERSON_NAME.label_ref()
+    }
+
+    fn entity(label: EntityLabelRef, start: usize, end: usize, score: f64) -> Entity<Text> {
+        let confidence = Confidence::new(score).unwrap();
         let step = TrailStep::recognition(
-            "ner",
+            "test",
             confidence,
-            provenance,
-            format!("model `{name}` matched"),
+            TrailProvenance::Pattern(PatternProvenance::DenyList),
+            "test fixture",
         );
         Entity::builder()
-            .with_label(EntityLabelRef::from(builtins::PERSON_NAME.name.clone()))
+            .with_label(label)
             .with_trail(vec![step])
             .with_confidence(confidence)
-            .with_location(TextLocation::new(span.start, span.end))
+            .with_location(TextLocation::new(start, end))
             .build()
             .expect("entity builds")
     }
 
-    fn enhancer(registry: ContextRegistry) -> ContextEnhancer {
-        ContextEnhancer::builder()
-            .with_registry(registry)
-            .with_default_window(80)
-            .with_default_boost(0.2)
-            .build()
-            .expect("enhancer builds")
+    fn enhancer(rules: Vec<BoostRule>) -> Enhancer {
+        Enhancer::new(rules, Box::new(SubstringMatcher))
+    }
+
+    fn rule(
+        label: EntityLabelRef,
+        keywords: &[&'static str],
+        prefix: usize,
+        suffix: usize,
+        boost: f64,
+    ) -> BoostRule {
+        BoostRule::new(
+            label,
+            keywords.iter().copied(),
+            prefix,
+            suffix,
+            Confidence::clamped(boost),
+        )
     }
 
     #[test]
-    fn boosts_pattern_entity_when_keyword_near() {
-        let registry =
-            ContextRegistry::new().with_entry("ssn", Context::new(["ssn", "social security"]));
-        let enhancer = enhancer(registry);
+    fn boosts_entity_when_keyword_in_word_window() {
+        let enhancer = enhancer(vec![rule(
+            govid_label(),
+            &["ssn", "social security"],
+            5,
+            5,
+            0.2,
+        )]);
         let text = "Your SSN: 123-45-6789";
-        let mut entities = vec![pattern_entity("ssn", 10..21)];
-        let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &Artifacts::new());
-        assert!(entities[0].confidence.get() > before);
+        let mut entities = vec![entity(govid_label(), 10, 21, 0.6)];
+        enhancer.enhance(&mut entities, text, None);
+        assert!(entities[0].confidence.get() > 0.6);
         assert!(
             entities[0]
                 .trail
                 .iter()
-                .any(|s| matches!(s.kind, TrailStepKind::Refinement))
+                .any(|s| matches!(s.kind, TrailStepKind::Refinement)),
         );
-        let TrailProvenance::Pattern(PatternProvenance::Regex { contextual, .. }) =
-            &entities[0].trail[0].provenance
-        else {
-            panic!("expected regex provenance");
-        };
-        assert!(contextual);
     }
 
     #[test]
-    fn boosts_model_entity_when_keyword_near() {
-        let registry =
-            ContextRegistry::new().with_entry("gliner", Context::new(["named", "called", "mr"]));
-        let enhancer = enhancer(registry);
-        let text = "Mr. Smith is named in the report.";
-        let mut entities = vec![model_entity("gliner", 4..9)];
+    fn boosts_entity_when_keyword_in_suffix() {
+        let enhancer = enhancer(vec![rule(govid_label(), &["social"], 0, 5, 0.2)]);
+        let text = "123-45-6789 (social security number)";
+        let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
+        enhancer.enhance(&mut entities, text, None);
+        assert!(
+            entities[0].confidence.get() > 0.6,
+            "trailing keyword within suffix window should boost",
+        );
+    }
+
+    #[test]
+    fn suffix_zero_ignores_trailing_keyword() {
+        // Prefix-only: trailing keyword must not boost.
+        let enhancer = enhancer(vec![rule(govid_label(), &["social"], 5, 0, 0.2)]);
+        let text = "123-45-6789 (social security number)";
+        let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &Artifacts::new());
-        assert!(entities[0].confidence.get() > before);
-        let TrailProvenance::Model(prov) = &entities[0].trail[0].provenance else {
-            panic!("expected model provenance");
-        };
-        assert!(prov.contextual);
+        enhancer.enhance(&mut entities, text, None);
+        assert_eq!(entities[0].confidence.get(), before);
     }
 
     #[test]
-    fn skips_entity_with_no_registered_source() {
-        let registry = ContextRegistry::new();
-        let enhancer = enhancer(registry);
-        let text = "Your SSN: 123-45-6789";
-        let mut entities = vec![pattern_entity("ssn", 10..21)];
+    fn skips_entity_with_no_rule_for_label() {
+        let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
+        let text = "Mr. Smith is named in the report.";
+        let mut entities = vec![entity(person_label(), 4, 9, 0.5)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &Artifacts::new());
+        enhancer.enhance(&mut entities, text, None);
         assert_eq!(entities[0].confidence.get(), before);
     }
 
     #[test]
-    fn per_source_window_overrides_default() {
-        let registry =
-            ContextRegistry::new().with_entry("far", Context::new(["far_keyword"]).with_window(5));
-        let enhancer = enhancer(registry);
-        let text = "far_keyword                            XYZ here";
-        let mut entities = vec![pattern_entity("far", 39..42)];
+    fn window_bounds_the_search() {
+        // 2-word prefix / 2-word suffix: "far_keyword" is at the
+        // start; the entity is after many filler words.
+        let enhancer = enhancer(vec![rule(govid_label(), &["far_keyword"], 2, 2, 0.2)]);
+        let text = "far_keyword here is some filler between the keyword and XYZ here";
+        let xyz_start = text.find("XYZ").unwrap();
+        let xyz_end = xyz_start + "XYZ".len();
+        let mut entities = vec![entity(govid_label(), xyz_start, xyz_end, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, &Artifacts::new());
+        enhancer.enhance(&mut entities, text, None);
         assert_eq!(entities[0].confidence.get(), before);
     }
 
     #[test]
-    fn boost_caps_at_one() {
-        let registry =
-            ContextRegistry::new().with_entry("high", Context::new(["here"]).with_boost(0.9));
-        let enhancer = enhancer(registry);
+    fn boost_saturates_at_one() {
+        let enhancer = enhancer(vec![rule(govid_label(), &["here"], 5, 5, 0.9)]);
         let text = "the value is right here in plain sight";
-        let mut entity = pattern_entity("high", 16..21);
-        // Push base confidence to 0.95
-        entity.confidence = Confidence::new(0.95).unwrap();
-        let mut entities = vec![entity];
-        enhancer.enhance(&mut entities, text, &Artifacts::new());
+        let mut entities = vec![entity(govid_label(), 16, 21, 0.95)];
+        enhancer.enhance(&mut entities, text, None);
         assert!((entities[0].confidence.get() - 1.0).abs() < f64::EPSILON);
     }
+
+    #[test]
+    fn duplicate_label_rules_merge_keywords() {
+        // Two rules for the same label, each contributing a
+        // distinct keyword. The merged rule must trigger boosts
+        // for matches near keywords from *either* original source,
+        // proving the keyword union survived the merge (not just
+        // last-write-wins).
+        let make_enhancer = || {
+            enhancer(vec![
+                rule(govid_label(), &["ssn"], 5, 5, 0.2),
+                rule(govid_label(), &["tax id"], 5, 5, 0.2),
+            ])
+        };
+        assert_eq!(make_enhancer().len(), 1);
+
+        // Keyword only from the first rule.
+        let ssn_only = "ssn: 123-45-6789";
+        let ssn_entity_start = ssn_only.find("123").unwrap();
+        let ssn_entity_end = ssn_entity_start + "123-45-6789".len();
+        let mut from_first = vec![entity(govid_label(), ssn_entity_start, ssn_entity_end, 0.6)];
+        make_enhancer().enhance(&mut from_first, ssn_only, None);
+        assert!(
+            from_first[0].confidence.get() > 0.6,
+            "keyword `ssn` from the first rule must still boost after merge",
+        );
+
+        // Keyword only from the second rule.
+        let taxid_only = "tax id: 987-65-4329";
+        let tax_entity_start = taxid_only.find("987").unwrap();
+        let tax_entity_end = tax_entity_start + "987-65-4329".len();
+        let mut from_second = vec![entity(govid_label(), tax_entity_start, tax_entity_end, 0.6)];
+        make_enhancer().enhance(&mut from_second, taxid_only, None);
+        assert!(
+            from_second[0].confidence.get() > 0.6,
+            "keyword `tax id` from the second rule must still boost after merge",
+        );
+    }
+
+    #[test]
+    fn word_window_handles_unicode() {
+        // 3-word prefix reaches "café" past "naïve" and "resume".
+        let enhancer = enhancer(vec![rule(govid_label(), &["café"], 3, 0, 0.2)]);
+        let text = "café naïve resume — 123-45-6789";
+        let entity_start = text.find("123").unwrap();
+        let entity_end = entity_start + "123-45-6789".len();
+        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
+        enhancer.enhance(&mut entities, text, None);
+        assert!(
+            entities[0].confidence.get() > 0.6,
+            "unicode word should be reachable within 3-word prefix",
+        );
+    }
+
+    #[test]
+    fn word_window_excludes_too_distant_unicode() {
+        // 2-word prefix: "café" is the 3rd word before the entity.
+        let enhancer = enhancer(vec![rule(govid_label(), &["café"], 2, 0, 0.2)]);
+        let text = "café naïve resume — 123-45-6789";
+        let entity_start = text.find("123").unwrap();
+        let entity_end = entity_start + "123-45-6789".len();
+        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
+        let before = entities[0].confidence.get();
+        enhancer.enhance(&mut entities, text, None);
+        assert_eq!(entities[0].confidence.get(), before);
+    }
+
+    #[test]
+    fn empty_tokens_slice_matches_none_behaviour() {
+        // Keyword sits in the prefix word-window but outside the
+        // entity bytes. With the empty-slice fix, `Some(&[])` must
+        // not collapse the snippet to the entity bytes — it should
+        // fall back to the word-window path just like `None`.
+        let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
+        let text = "Your SSN: 123-45-6789";
+        let mut from_none = vec![entity(govid_label(), 10, 21, 0.6)];
+        let mut from_empty = vec![entity(govid_label(), 10, 21, 0.6)];
+        enhancer.enhance(&mut from_none, text, None);
+        enhancer.enhance(&mut from_empty, text, Some(&[]));
+        assert_eq!(
+            from_none[0].confidence.get(),
+            from_empty[0].confidence.get(),
+            "Some(&[]) must behave identically to None",
+        );
+        assert!(
+            from_empty[0].confidence.get() > 0.6,
+            "empty tokens slice must still allow the word-window fallback to boost",
+        );
+    }
+
+    #[test]
+    fn token_path_counts_words_against_token_stream() {
+        // 1-word prefix, 0-word suffix: the only word the
+        // prefix reaches is the immediate predecessor token
+        // "Your". The tokenizer here treats "social security"
+        // as a single compound token outside the window, so the
+        // keyword "social security" must NOT fire — unlike a
+        // hypothetical caller that gave it the word-window path,
+        // which would split on whitespace.
+        let enhancer = enhancer(vec![rule(
+            govid_label(),
+            &["social security"],
+            1,
+            0,
+            0.2,
+        )]);
+        let text = "social security: Your 123-45-6789";
+        let entity_start = text.find("123").unwrap();
+        let entity_end = entity_start + "123-45-6789".len();
+        let tokens: Vec<Token> = vec![
+            Token::from_text("social security", 0..15),
+            Token::from_text("Your", 17..21),
+            Token::from_text("123-45-6789", 22..33),
+        ];
+        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
+        let before = entities[0].confidence.get();
+        enhancer.enhance(&mut entities, text, Some(&tokens));
+        assert_eq!(
+            entities[0].confidence.get(),
+            before,
+            "1-word prefix should not reach the `social security` token two positions back",
+        );
+    }
+
+    #[test]
+    fn token_path_boosts_when_keyword_within_token_window() {
+        // Same tokens, 2-word prefix: now the `social security`
+        // token is reachable and the boost fires.
+        let enhancer = enhancer(vec![rule(
+            govid_label(),
+            &["social security"],
+            2,
+            0,
+            0.2,
+        )]);
+        let text = "social security: Your 123-45-6789";
+        let entity_start = text.find("123").unwrap();
+        let entity_end = entity_start + "123-45-6789".len();
+        let tokens: Vec<Token> = vec![
+            Token::from_text("social security", 0..15),
+            Token::from_text("Your", 17..21),
+            Token::from_text("123-45-6789", 22..33),
+        ];
+        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
+        enhancer.enhance(&mut entities, text, Some(&tokens));
+        assert!(
+            entities[0].confidence.get() > 0.6,
+            "2-word prefix should reach the `social security` token",
+        );
+    }
+
+    #[test]
+    fn lemma_matcher_boosts_on_morphological_variant() {
+        // Substring matcher would miss `running` for keyword
+        // `run`. Lemma matcher reads the lemma directly off the
+        // token and boosts.
+        let enhancer = Enhancer::new(
+            vec![rule(govid_label(), &["run"], 5, 5, 0.2)],
+            Box::new(crate::LemmaMatcher),
+        );
+        let text = "They were running 123-45-6789 across the system";
+        let entity_start = text.find("123").unwrap();
+        let entity_end = entity_start + "123-45-6789".len();
+        let tokens: Vec<Token> = vec![
+            Token::from_text("They", 0..4),
+            Token::from_text("were", 5..9),
+            Token::from_text("running", 10..17).with_lemma("run"),
+            Token::from_text("123-45-6789", 18..29),
+            Token::from_text("across", 30..36),
+            Token::from_text("the", 37..40),
+            Token::from_text("system", 41..47),
+        ];
+        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
+        enhancer.enhance(&mut entities, text, Some(&tokens));
+        assert!(
+            entities[0].confidence.get() > 0.6,
+            "lemma matcher should match `run` against the `running` token's lemma",
+        );
+        assert!(
+            entities[0]
+                .trail
+                .iter()
+                .any(|s| matches!(s.kind, TrailStepKind::Refinement)),
+        );
+    }
+
+    #[test]
+    fn tokens_with_no_overlap_fall_back_to_word_window() {
+        // Tokens cover the first half of the document; the entity
+        // is in the second half, outside any token's range.
+        // Without the fallback the token slice would be empty and
+        // the snippet would collapse to entity bytes. With the
+        // fallback, the word-window path reaches the keyword.
+        let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
+        let text = "First half of the document. Your SSN: 123-45-6789";
+        let entity_start = text.find("123").unwrap();
+        let entity_end = entity_start + "123-45-6789".len();
+        // Tokens that cover only the first sentence.
+        let tokens: Vec<Token> = vec![
+            Token::from_text("First", 0..5),
+            Token::from_text("half", 6..10),
+            Token::from_text("of", 11..13),
+            Token::from_text("the", 14..17),
+            Token::from_text("document", 18..26),
+        ];
+        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
+        enhancer.enhance(&mut entities, text, Some(&tokens));
+        assert!(
+            entities[0].confidence.get() > 0.6,
+            "tokens that don't overlap the entity must fall back to the word window",
+        );
+    }
 }
diff --git a/crates/nvisy-context/src/lib.rs b/crates/nvisy-context/src/lib.rs
index 2004d7c6..192796f2 100644
--- a/crates/nvisy-context/src/lib.rs
+++ b/crates/nvisy-context/src/lib.rs
@@ -2,14 +2,14 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-mod declaration;
 mod enhancer;
 mod matcher;
-mod registry;
+mod rule;
 mod tokens;
+mod wrapper;
 
-pub use self::declaration::Context;
-pub use self::enhancer::{ContextEnhancer, ContextEnhancerBuilder, ContextEnhancerBuilderError};
+pub use self::enhancer::Enhancer;
 pub use self::matcher::{KeywordMatcher, LemmaMatcher, SubstringMatcher};
-pub use self::registry::ContextRegistry;
+pub use self::rule::{BoostRule, DEFAULT_BOOST, DEFAULT_PREFIX_WORDS, DEFAULT_SUFFIX_WORDS};
 pub use self::tokens::{Token, Tokens};
+pub use self::wrapper::Boosting;
diff --git a/crates/nvisy-context/src/matcher.rs b/crates/nvisy-context/src/matcher.rs
index 4cc939ac..a2cdb3c3 100644
--- a/crates/nvisy-context/src/matcher.rs
+++ b/crates/nvisy-context/src/matcher.rs
@@ -1,72 +1,68 @@
 //! [`KeywordMatcher`] strategy + the two shipped implementations.
 //!
 //! - [`SubstringMatcher`] — ASCII case-insensitive substring search
-//!   over the raw text window. The fallback when no [`Tokens`] are
-//!   present in `RecognizerInput.artifacts`.
+//!   over the raw text window. The fallback when no token artifact
+//!   is present on `RecognizerInput.artifacts`.
 //! - [`LemmaMatcher`] — matches keywords against lemmatized tokens
-//!   stamped on `RecognizerInput.artifacts` as a [`Tokens`] entry by an
-//!   upstream NLP engine. Recognizes morphological variants
-//!   ("running" → "run", "SSNs" → "ssn") that substring matching
-//!   misses, at the cost of needing a producer engine with
-//!   lemmatization.
+//!   the upstream NLP engine stamped on `RecognizerInput.artifacts`
+//!   as a [`Tokens`] entry. Recognizes morphological variants
+//!   ("running" → "run", "SSNs" → "ssn") substring matching misses.
 //!
-//! Both implementations are stateless; the
-//! [`ContextEnhancer`] owns one as a
-//! configured strategy.
+//! Both implementations are stateless; the [`Enhancer`] owns one
+//! as a configured strategy.
 //!
 //! [`Tokens`]: super::Tokens
-//! [`ContextEnhancer`]: super::ContextEnhancer
+//! [`Enhancer`]: super::Enhancer
 
-use super::Tokens;
+use hipstr::HipStr;
 
-/// Decide whether any keyword from `keywords` fires within `window`.
+use super::Token;
+
+/// Decide whether any keyword from `keywords` fires within the
+/// candidate region around an entity match.
 ///
-/// The trait is the strategy slot that lets the enhancer swap raw
-/// substring matching for lemma-aware matching (or a third-party
+/// The strategy slot that lets the enhancer swap raw substring
+/// matching for lemma-aware matching (or a third-party
 /// fuzzy/word-boundary implementation) without changing its core
 /// pipeline.
 ///
 /// Implementations receive both a raw `window` slice of the source
-/// text (for substring strategies) and an optional `tokens` view
-/// (for token/lemma strategies). Either or both may be ignored.
+/// text (for substring strategies) and the `tokens` covering that
+/// same range (for token/lemma strategies). Either or both may be
+/// ignored; `tokens` is empty when no NLP engine produced a token
+/// artifact.
 pub trait KeywordMatcher: Send + Sync {
     /// `true` if at least one keyword from `keywords` appears in
-    /// the input. `window` is the raw text slice surrounding the
-    /// entity match; `tokens` is the subset of [`Tokens`] covering
-    /// that same range when an upstream NLP engine produced one,
-    /// `None` otherwise.
-    ///
-    /// [`Tokens`]: super::Tokens
-    fn any_match(&self, window: &str, tokens: Option<&Tokens>, keywords: &[String]) -> bool;
+    /// the input.
+    fn any_match(&self, window: &str, tokens: &[Token], keywords: &[HipStr<'static>]) -> bool;
 }
 
-/// ASCII case-insensitive substring matcher. The default — used
-/// whenever no [`Tokens`] were stamped on `RecognizerInput.artifacts`, or
-/// whenever the caller explicitly picks raw matching.
+/// ASCII case-insensitive substring matcher. The default —
+/// runs whenever no token artifact was stamped on
+/// `RecognizerInput.artifacts`, or whenever the caller explicitly
+/// picks raw matching.
 ///
 /// Fast, allocation-light, permissive: the keyword `"email"` fires
 /// inside `"MyEmailAddress"`. Ignores the `tokens` argument.
-///
-/// [`Tokens`]: super::Tokens
 #[derive(Debug, Clone, Copy, Default)]
 pub struct SubstringMatcher;
 
 impl KeywordMatcher for SubstringMatcher {
-    fn any_match(&self, window: &str, _tokens: Option<&Tokens>, keywords: &[String]) -> bool {
+    fn any_match(&self, window: &str, _tokens: &[Token], keywords: &[HipStr<'static>]) -> bool {
         let lowered = window.to_ascii_lowercase();
         keywords
             .iter()
-            .any(|kw| lowered.contains(&kw.to_ascii_lowercase()))
+            .any(|kw| lowered.contains(kw.as_str().to_ascii_lowercase().as_str()))
     }
 }
 
-/// Lemma-aware matcher. Compares each lemma in `tokens` against the
-/// keyword list with ASCII case-insensitive equality.
+/// Lemma-aware matcher. Compares each lemma in `tokens` against
+/// the keyword list with ASCII case-insensitive equality.
 ///
 /// Falls back to [`SubstringMatcher`] semantics when `tokens` is
-/// `None` (no shared NLP artifact was produced) so the enhancer
-/// can be wired uniformly regardless of whether a given scan had
-/// artifacts.
+/// empty (no shared NLP artifact was produced) so the enhancer
+/// runs uniformly regardless of whether the upstream pass emitted
+/// tokens.
 ///
 /// Recognizes morphological variants the substring matcher cannot:
 /// `"running" → "run"`, `"dogs" → "dog"`, `"SSNs" → "ssn"`. Cost
@@ -76,60 +72,59 @@ impl KeywordMatcher for SubstringMatcher {
 pub struct LemmaMatcher;
 
 impl KeywordMatcher for LemmaMatcher {
-    fn any_match(&self, window: &str, tokens: Option<&Tokens>, keywords: &[String]) -> bool {
-        let Some(tokens) = tokens else {
-            return SubstringMatcher.any_match(window, None, keywords);
-        };
-        let lowered_keywords: Vec<String> =
-            keywords.iter().map(|k| k.to_ascii_lowercase()).collect();
+    fn any_match(&self, window: &str, tokens: &[Token], keywords: &[HipStr<'static>]) -> bool {
+        if tokens.is_empty() {
+            return SubstringMatcher.any_match(window, tokens, keywords);
+        }
+        let lowered_keywords: Vec<String> = keywords
+            .iter()
+            .map(|k| k.as_str().to_ascii_lowercase())
+            .collect();
         tokens.iter().any(|tok| {
-            let lemma = tok.lemma.to_ascii_lowercase();
-            lowered_keywords.iter().any(|kw| kw == &lemma)
+            let lemma = tok.lemma.as_str().to_ascii_lowercase();
+            lowered_keywords.contains(&lemma)
         })
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::super::Token;
     use super::*;
 
+    fn kws(items: &[&'static str]) -> Vec<HipStr<'static>> {
+        items.iter().copied().map(HipStr::from).collect()
+    }
+
     #[test]
     fn substring_matches_case_insensitively() {
         let m = SubstringMatcher;
-        assert!(m.any_match("Your SSN: 123", None, &["ssn".into()]));
-        assert!(m.any_match(
-            "the SOCIAL SECURITY number",
-            None,
-            &["social security".into()]
-        ));
-        assert!(!m.any_match("nothing here", None, &["ssn".into()]));
+        assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
+        assert!(m.any_match("the SOCIAL SECURITY number", &[], &kws(&["social security"])));
+        assert!(!m.any_match("nothing here", &[], &kws(&["ssn"])));
     }
 
     #[test]
     fn substring_is_permissive() {
         let m = SubstringMatcher;
-        assert!(m.any_match("MyEmailAddress", None, &["email".into()]));
+        assert!(m.any_match("MyEmailAddress", &[], &kws(&["email"])));
     }
 
     #[test]
     fn lemma_matches_morph_variants() {
-        // tokens with lemmatization: "running" → "run", "dogs" → "dog"
-        let tokens = Tokens::new(vec![
+        let tokens = vec![
             Token::from_text("the", 0..3),
             Token::from_text("running", 4..11).with_lemma("run"),
             Token::from_text("dogs", 12..16).with_lemma("dog"),
-        ]);
+        ];
         let m = LemmaMatcher;
-        assert!(m.any_match("", Some(&tokens), &["run".into()]));
-        assert!(m.any_match("", Some(&tokens), &["dog".into()]));
-        assert!(!m.any_match("", Some(&tokens), &["cat".into()]));
+        assert!(m.any_match("", &tokens, &kws(&["run"])));
+        assert!(m.any_match("", &tokens, &kws(&["dog"])));
+        assert!(!m.any_match("", &tokens, &kws(&["cat"])));
     }
 
     #[test]
     fn lemma_falls_back_to_substring_without_tokens() {
         let m = LemmaMatcher;
-        // No artifacts → fall back to substring matching.
-        assert!(m.any_match("Your SSN: 123", None, &["ssn".into()]));
+        assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
     }
 }
diff --git a/crates/nvisy-context/src/registry.rs b/crates/nvisy-context/src/registry.rs
deleted file mode 100644
index d0043bc9..00000000
--- a/crates/nvisy-context/src/registry.rs
+++ /dev/null
@@ -1,122 +0,0 @@
-//! [`ContextRegistry`]: the `name → Context` lookup the enhancer
-//! reads at boost time.
-//!
-//! The recognizer side of the pipeline (`PatternRecognizer`,
-//! `NerRecognizer`, …) registers one entry per
-//! source name — for patterns that's one entry per
-//! `Regex`/`Dictionary` rule; for NER it's typically one entry per
-//! recognizer keyed on the recognizer's name. The enhancer reads
-//! the entity's first-step provenance, pulls the name, and looks
-//! up the [`Context`] here.
-//!
-//! Last-write-wins on duplicate names: callers are responsible for
-//! choosing distinct keys when mixing per-rule and per-recognizer
-//! registrations.
-
-use std::collections::HashMap;
-
-use super::Context;
-
-/// Lookup table the [`ContextEnhancer`]
-/// reads at boost time.
-///
-/// Construct with [`new`], populate with
-/// [`with_entry`] /
-/// [`with_entries`], then hand to a
-/// [`ContextEnhancerBuilder`].
-///
-/// [`ContextEnhancer`]: super::ContextEnhancer
-/// [`new`]: Self::new
-/// [`with_entry`]: Self::with_entry
-/// [`with_entries`]: Self::with_entries
-/// [`ContextEnhancerBuilder`]: super::ContextEnhancerBuilder
-#[derive(Debug, Clone, Default)]
-pub struct ContextRegistry {
-    entries: HashMap<String, Context>,
-}
-
-impl ContextRegistry {
-    /// Empty registry.
-    #[must_use]
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Register one entry. Last write wins on duplicate names.
-    #[must_use]
-    pub fn with_entry(mut self, name: impl Into<String>, context: Context) -> Self {
-        let context_name = name.into();
-        if !context.is_empty() {
-            self.entries.insert(context_name, context);
-        }
-        self
-    }
-
-    /// Register many entries.
-    #[must_use]
-    pub fn with_entries<I, S>(mut self, entries: I) -> Self
-    where
-        I: IntoIterator<Item = (S, Context)>,
-        S: Into<String>,
-    {
-        for (name, context) in entries {
-            let context_name = name.into();
-            if !context.is_empty() {
-                self.entries.insert(context_name, context);
-            }
-        }
-        self
-    }
-
-    /// Merge another registry into this one. Last-write-wins on
-    /// duplicate names. Used to combine per-source registries (e.g.
-    /// pattern registry + NER registry) into one enhancer input.
-    #[must_use]
-    pub fn merge(mut self, other: ContextRegistry) -> Self {
-        for (name, context) in other.entries {
-            self.entries.insert(name, context);
-        }
-        self
-    }
-
-    /// Look up the [`Context`] for `name`. Returns `None` when the
-    /// name was never registered or when the registered context
-    /// had an empty keyword list (which is treated as "not
-    /// registered" — see [`with_entry`]).
-    ///
-    /// [`with_entry`]: Self::with_entry
-    #[must_use]
-    pub fn get(&self, name: &str) -> Option<&Context> {
-        self.entries.get(name)
-    }
-
-    /// Number of registered names with non-empty contexts.
-    #[must_use]
-    pub fn len(&self) -> usize {
-        self.entries.len()
-    }
-
-    /// Whether the registry has no entries.
-    #[must_use]
-    pub fn is_empty(&self) -> bool {
-        self.entries.is_empty()
-    }
-}
-
-impl Extend<(String, Context)> for ContextRegistry {
-    fn extend<I: IntoIterator<Item = (String, Context)>>(&mut self, iter: I) {
-        for (name, context) in iter {
-            if !context.is_empty() {
-                self.entries.insert(name, context);
-            }
-        }
-    }
-}
-
-impl FromIterator<(String, Context)> for ContextRegistry {
-    fn from_iter<I: IntoIterator<Item = (String, Context)>>(iter: I) -> Self {
-        let mut registry = Self::new();
-        registry.extend(iter);
-        registry
-    }
-}
diff --git a/crates/nvisy-context/src/rule.rs b/crates/nvisy-context/src/rule.rs
new file mode 100644
index 00000000..7f88cf78
--- /dev/null
+++ b/crates/nvisy-context/src/rule.rs
@@ -0,0 +1,140 @@
+//! [`BoostRule`]: per-label keyword-boost rule.
+//!
+//! One rule per [`EntityLabelRef`] declares the keyword set that
+//! lifts confidence when one of those keywords appears within
+//! `prefix_words` words before or `suffix_words` words after an
+//! entity carrying that label. The window radii and the additive
+//! `boost` are resolved at rule construction time — there are no
+//! per-source overrides at apply time.
+//!
+//! Producers (the pattern crate today, future NER/LLM/custom
+//! recognizer authors) hand the engine a `Vec<BoostRule>` keyed by
+//! label. When several rules contribute to the same label (e.g.
+//! two different SSN detectors both contributing to
+//! `GOVERNMENT_ID`), the engine merges them by union of keywords —
+//! see [`BoostRule::merge`].
+//!
+//! [`EntityLabelRef`]: nvisy_core::entity::EntityLabelRef
+
+use std::collections::HashSet;
+
+use hipstr::HipStr;
+use nvisy_core::entity::EntityLabelRef;
+use nvisy_core::primitive::Confidence;
+
+/// Default window radius in words *before* an entity match.
+/// Mirrors Presidio's `context_prefix_count = 5`.
+pub const DEFAULT_PREFIX_WORDS: usize = 5;
+
+/// Default window radius in words *after* an entity match. Set
+/// equal to [`DEFAULT_PREFIX_WORDS`] so trailing context like
+/// "123-45-6789 (social security)" boosts the same as leading
+/// context. Presidio defaults `context_suffix_count` to `0`; we
+/// pick symmetric defaults because operators rarely realize the
+/// asymmetry exists, and one-sided windows surprise people.
+pub const DEFAULT_SUFFIX_WORDS: usize = 5;
+
+/// Default additive boost applied when a keyword fires. Matches
+/// Presidio's `context_similarity_factor = 0.35`.
+pub const DEFAULT_BOOST: f64 = 0.35;
+
+/// Per-label boost rule the [`Enhancer`] applies at runtime.
+///
+/// [`Enhancer`]: super::Enhancer
+#[derive(Debug, Clone, PartialEq)]
+pub struct BoostRule {
+    /// Entity label this rule applies to. Each emitted
+    /// `Entity<Text>` whose [`label`] matches is checked against
+    /// this rule's keywords.
+    ///
+    /// [`label`]: nvisy_core::entity::Entity::label
+    pub label: EntityLabelRef,
+    /// Keywords whose presence near a match lifts the entity's
+    /// confidence. Stored as [`HipStr`] for cheap clones across
+    /// per-pass rule sets.
+    pub keywords: Vec<HipStr<'static>>,
+    /// Window radius in words *before* the entity's match.
+    /// Counted against the token artifact on
+    /// `RecognizerInput.artifacts` when present, or via Unicode
+    /// word segmentation of the source text otherwise.
+    pub prefix_words: usize,
+    /// Window radius in words *after* the entity's match. Same
+    /// source as [`prefix_words`].
+    ///
+    /// [`prefix_words`]: Self::prefix_words
+    pub suffix_words: usize,
+    /// Additive boost applied to the entity's confidence when a
+    /// keyword fires. Clamped at the [`Confidence`] ceiling on
+    /// apply.
+    pub boost: Confidence,
+}
+
+impl BoostRule {
+    /// Construct a rule for `label` with explicit window radii
+    /// and `boost`. Most callers want [`BoostRule::for_label`]
+    /// instead — it bakes in the default window / boost values.
+    #[must_use]
+    pub fn new(
+        label: EntityLabelRef,
+        keywords: impl IntoIterator<Item = impl Into<HipStr<'static>>>,
+        prefix_words: usize,
+        suffix_words: usize,
+        boost: Confidence,
+    ) -> Self {
+        Self {
+            label,
+            keywords: keywords.into_iter().map(Into::into).collect(),
+            prefix_words,
+            suffix_words,
+            boost,
+        }
+    }
+
+    /// Construct a rule for `label` using the crate's default
+    /// [`prefix_words`], [`suffix_words`], and [`boost`]
+    /// constants. The common case — recognizers building their
+    /// own boost rules from declared keywords don't need to
+    /// think about tuning knobs.
+    ///
+    /// [`prefix_words`]: DEFAULT_PREFIX_WORDS
+    /// [`suffix_words`]: DEFAULT_SUFFIX_WORDS
+    /// [`boost`]: DEFAULT_BOOST
+    #[must_use]
+    pub fn for_label(
+        label: EntityLabelRef,
+        keywords: impl IntoIterator<Item = impl Into<HipStr<'static>>>,
+    ) -> Self {
+        Self::new(
+            label,
+            keywords,
+            DEFAULT_PREFIX_WORDS,
+            DEFAULT_SUFFIX_WORDS,
+            Confidence::clamped(DEFAULT_BOOST),
+        )
+    }
+
+    /// Merge `other` into this rule by extending the keyword set
+    /// with any keywords not already present. Window radii and
+    /// `boost` are kept from `self` — callers that need different
+    /// values per source should construct independent rules and
+    /// keep them separate.
+    ///
+    /// # Panics
+    ///
+    /// Debug-asserts when the labels differ. Merging across labels
+    /// is a caller bug — rules are keyed by label and the engine
+    /// looks them up by label.
+    pub fn merge(&mut self, other: BoostRule) {
+        debug_assert_eq!(
+            self.label, other.label,
+            "BoostRule::merge requires matching labels",
+        );
+        let existing: HashSet<&str> = self.keywords.iter().map(HipStr::as_str).collect();
+        let additions: Vec<HipStr<'static>> = other
+            .keywords
+            .into_iter()
+            .filter(|kw| !existing.contains(kw.as_str()))
+            .collect();
+        self.keywords.extend(additions);
+    }
+}
diff --git a/crates/nvisy-context/src/tokens.rs b/crates/nvisy-context/src/tokens.rs
index 24181797..eb490adc 100644
--- a/crates/nvisy-context/src/tokens.rs
+++ b/crates/nvisy-context/src/tokens.rs
@@ -6,33 +6,29 @@
 //! text, and two precomputed predicates the enhancer reads
 //! (`is_stop`, `is_punct`).
 //!
-//! [`Tokens`] is the owning collection plus lookup helpers the
-//! enhancer uses: [`around`] gets the slice of tokens within a byte
-//! window, [`lemmas_in`] iterates lemmas covering a byte range.
-//! Both work in *source-text byte offsets* — the same coordinate
-//! space as [`Entity::location`] — so there's no coordinate
-//! translation at the call site.
+//! [`Tokens`] is the owning collection — a `Vec<Token>` newtype
+//! exposing iteration and length. The [`Enhancer`] slices the
+//! stream by *count* (prefix/suffix word radii) using its own
+//! internal helpers; the byte range carried on each [`Token`] is
+//! there for consumers that want to map a token back to its
+//! source-text substring.
 //!
-//! [`around`]: Tokens::around
-//! [`lemmas_in`]: Tokens::lemmas_in
-//! [`Entity::location`]: nvisy_core::entity::Entity::location
+//! [`Enhancer`]: super::Enhancer
 //!
-//! Tokens live next to the [`ContextEnhancer`] because that's the
-//! only consumer: the enhancer reads them off
+//! Tokens live next to the [`Enhancer`] because that's the only
+//! consumer: the enhancer reads them off
 //! `RecognizerInput::artifacts` to drive lemma-aware keyword
 //! matching. The producer (a tokenizer in some upstream NLP
 //! backend) only needs to know the type by name; the type itself
-//! belongs in the consumer's neighborhood.
+//! belongs in the consumer's neighbourhood.
 //!
 //! The shape is intentionally minimal. POS tags, morphology,
-//! dependency trees, and other heavier features are not part of the
-//! v1 surface; they get added as fields when a downstream consumer
-//! needs them. This keeps the artifact cheap for engines that don't
-//! produce them — `text == lemma`, `is_stop == false`,
+//! dependency trees, and other heavier features are not part of
+//! the v1 surface; they get added as fields when a downstream
+//! consumer needs them. This keeps the artifact cheap for engines
+//! that don't produce them — `text == lemma`, `is_stop == false`,
 //! `is_punct == false` are the defaults for a tokenizer-only
 //! engine.
-//!
-//! [`ContextEnhancer`]: super::ContextEnhancer
 
 use std::ops::Range;
 
@@ -99,17 +95,18 @@ impl Token {
     }
 }
 
-/// The owning token sequence carried by a
-/// [`RecognizerInput::artifacts`] bundle.
+/// Owning token sequence stamped on a
+/// [`RecognizerInput::artifacts`] bundle by an upstream NLP engine.
 ///
 /// [`RecognizerInput::artifacts`]: nvisy_core::recognition::RecognizerInput::artifacts
 ///
 /// Tokens are sorted by `offset.start` (producers should emit them
-/// in order; consumer-side code assumes this). The collection
-/// exposes byte-range lookup helpers the [`ContextEnhancer`] uses
-/// to pull lemmas around an entity match.
+/// in order; consumer-side code assumes this). The [`Enhancer`]
+/// borrows the underlying slice via [`as_slice`] and walks it by
+/// count when scoring the entity's neighbourhood.
 ///
-/// [`ContextEnhancer`]: super::ContextEnhancer
+/// [`Enhancer`]: super::Enhancer
+/// [`as_slice`]: Tokens::as_slice
 #[derive(Debug, Clone, Default, PartialEq, Eq)]
 pub struct Tokens(Vec<Token>);
 
@@ -150,47 +147,6 @@ impl Tokens {
     pub fn iter(&self) -> std::slice::Iter<'_, Token> {
         self.0.iter()
     }
-
-    /// Tokens overlapping `byte_range`, plus a `window`-byte
-    /// margin on each side.
-    ///
-    /// Used by the enhancer to grab the keyword neighborhood around
-    /// an entity match. Returns the contiguous sub-slice; tokens at
-    /// the boundary are included when their byte range overlaps the
-    /// expanded range.
-    ///
-    /// Cost is `O(log n)` for the start probe + linear over the
-    /// returned slice; the sequence is sorted so a binary search
-    /// suffices.
-    #[must_use]
-    pub fn around(&self, byte_range: Range<usize>, window: usize) -> &[Token] {
-        let lo = byte_range.start.saturating_sub(window);
-        let hi = byte_range.end.saturating_add(window);
-        self.in_range(lo..hi)
-    }
-
-    /// Lemmas of every token overlapping `byte_range`. Useful when
-    /// only the lemma strings are needed (e.g. for keyword
-    /// matching).
-    pub fn lemmas_in(&self, byte_range: Range<usize>) -> impl Iterator<Item = &str> {
-        self.in_range(byte_range).iter().map(|t| t.lemma.as_str())
-    }
-
-    /// Tokens fully contained within (or overlapping) `byte_range`.
-    /// Returned as a sub-slice — tokens with `offset.end > range.start`
-    /// and `offset.start < range.end` are included.
-    #[must_use]
-    pub fn in_range(&self, byte_range: Range<usize>) -> &[Token] {
-        if self.0.is_empty() || byte_range.start >= byte_range.end {
-            return &[];
-        }
-        let start = self.0.partition_point(|t| t.offset.end <= byte_range.start);
-        let end = self.0.partition_point(|t| t.offset.start < byte_range.end);
-        if start >= end {
-            return &[];
-        }
-        &self.0[start..end]
-    }
 }
 
 impl FromIterator<Token> for Tokens {
@@ -207,68 +163,3 @@ impl IntoIterator for Tokens {
         self.0.into_iter()
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn t(text: &'static str, start: usize, end: usize) -> Token {
-        Token::from_text(text, start..end)
-    }
-
-    #[test]
-    fn in_range_returns_overlapping_tokens() {
-        let tokens = Tokens::new(vec![t("hello", 0, 5), t("world", 6, 11), t("foo", 12, 15)]);
-        // 4..7 overlaps "hello" and "world"
-        let got: Vec<&str> = tokens
-            .in_range(4..7)
-            .iter()
-            .map(|t| t.text.as_str())
-            .collect();
-        assert_eq!(got, vec!["hello", "world"]);
-    }
-
-    #[test]
-    fn around_extends_by_window() {
-        let tokens = Tokens::new(vec![
-            t("a", 0, 1),
-            t("b", 2, 3),
-            t("c", 4, 5),
-            t("d", 6, 7),
-            t("e", 8, 9),
-        ]);
-        // around 4..5 with window=2 → look at 2..7 → "b","c","d"
-        let got: Vec<&str> = tokens
-            .around(4..5, 2)
-            .iter()
-            .map(|t| t.text.as_str())
-            .collect();
-        assert_eq!(got, vec!["b", "c", "d"]);
-    }
-
-    #[test]
-    fn lemmas_in_yields_lemmas() {
-        let tokens = Tokens::new(vec![
-            t("running", 0, 7).with_lemma("run"),
-            t("dogs", 8, 12).with_lemma("dog"),
-        ]);
-        let got: Vec<&str> = tokens.lemmas_in(0..12).collect();
-        assert_eq!(got, vec!["run", "dog"]);
-    }
-
-    #[test]
-    fn in_range_empty_for_disjoint_range() {
-        let tokens = Tokens::new(vec![t("a", 0, 5)]);
-        assert!(tokens.in_range(10..20).is_empty());
-    }
-
-    #[test]
-    fn in_range_empty_for_inverted_range() {
-        let tokens = Tokens::new(vec![t("a", 0, 5)]);
-        let inverted = Range {
-            start: 5usize,
-            end: 3usize,
-        };
-        assert!(tokens.in_range(inverted).is_empty());
-    }
-}
diff --git a/crates/nvisy-context/src/wrapper.rs b/crates/nvisy-context/src/wrapper.rs
new file mode 100644
index 00000000..688f2838
--- /dev/null
+++ b/crates/nvisy-context/src/wrapper.rs
@@ -0,0 +1,77 @@
+//! [`Boosting`]: post-recognition keyword-boost wrapper for any
+//! [`EntityRecognizer<Text>`].
+//!
+//! Composes an inner recognizer with an [`Enhancer`]: the wrapper
+//! delegates `recognize` to the inner, then runs the enhancer
+//! over the produced entities. Equivalent to "the recognizer
+//! owns its boosting" without each recognizer reimplementing the
+//! enhancement step.
+//!
+//! Typical use:
+//!
+//! ```ignore
+//! let inner = MyRecognizer::new(...);
+//! let enhancer = Enhancer::new(rules, Box::new(SubstringMatcher));
+//! let recognizer = Boosting::new(inner, enhancer);
+//! ```
+//!
+//! The wrapper implements [`EntityRecognizer<Text>`] so the engine
+//! never has to know boosting happened.
+
+use nvisy_core::Result;
+use nvisy_core::modality::Text;
+use nvisy_core::recognition::{EntityRecognizer, RecognizerInput, RecognizerOutput};
+
+use super::Enhancer;
+use super::Tokens;
+
+/// Wraps an [`EntityRecognizer<Text>`] with a post-recognition
+/// [`Enhancer`] pass. Implements [`EntityRecognizer<Text>`] so
+/// the wrapped recognizer is a drop-in replacement.
+///
+/// Assumes the inner recognizer emits entities whose byte offsets
+/// index into `input.data.text` (the standard
+/// [`EntityRecognizer<Text>`] contract). The wrapper reads the
+/// same `&str` for the keyword-window walk; a recognizer that
+/// emitted entities relative to a different coordinate space
+/// would surface stale or panic-on-slice offsets.
+pub struct Boosting<R> {
+    inner: R,
+    enhancer: Enhancer,
+}
+
+impl<R> Boosting<R> {
+    /// Wrap `inner` with `enhancer`. After `recognize` produces
+    /// entities, `enhancer` runs over them in place.
+    pub fn new(inner: R, enhancer: Enhancer) -> Self {
+        Self { inner, enhancer }
+    }
+
+    /// Borrow the wrapped recognizer.
+    pub fn inner(&self) -> &R {
+        &self.inner
+    }
+
+    /// Borrow the enhancer applied to the inner recognizer's
+    /// output.
+    pub fn enhancer(&self) -> &Enhancer {
+        &self.enhancer
+    }
+}
+
+#[async_trait::async_trait]
+impl<R> EntityRecognizer<Text> for Boosting<R>
+where
+    R: EntityRecognizer<Text> + 'static,
+{
+    async fn recognize(&self, input: &RecognizerInput<Text>) -> Result<RecognizerOutput<Text>> {
+        let mut output = self.inner.recognize(input).await?;
+        if self.enhancer.is_empty() {
+            return Ok(output);
+        }
+        let text = input.data.text.as_str();
+        let tokens = input.artifacts.get::<Tokens>().map(Tokens::as_slice);
+        self.enhancer.enhance(&mut output.entities, text, tokens);
+        Ok(output)
+    }
+}
diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml
index cc4dcd90..97a99643 100644
--- a/crates/nvisy-engine/Cargo.toml
+++ b/crates/nvisy-engine/Cargo.toml
@@ -40,7 +40,6 @@ rustdoc-args = ["--cfg", "docsrs"]
 [dependencies]
 # Internal crates
 nvisy-codec = { workspace = true, features = ["text"] }
-nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 nvisy-llm = { workspace = true, features = [] }
 nvisy-ner = { workspace = true, features = [] }
diff --git a/crates/nvisy-engine/src/core/context.rs b/crates/nvisy-engine/src/core/context.rs
index 84391b2c..d988fd44 100644
--- a/crates/nvisy-engine/src/core/context.rs
+++ b/crates/nvisy-engine/src/core/context.rs
@@ -17,7 +17,6 @@
 use std::num::NonZeroUsize;
 use std::sync::Arc;
 
-use nvisy_context::ContextEnhancer;
 use nvisy_toolkit::detection::RecognizerRegistry;
 use nvisy_toolkit::extraction::ExtractorRegistry;
 use tokio_util::sync::CancellationToken;
@@ -52,11 +51,6 @@ pub struct DetectionContext {
     /// engine-side detection-config template plus the request's
     /// label catalog.
     pub(crate) recognizer_registry: Arc<RecognizerRegistry>,
-    /// Post-recognition keyword-boost enhancer — built alongside
-    /// `recognizer_registry` from the same recognizer set. Shared
-    /// behind `Arc` so per-document phases borrow it without
-    /// cloning the embedded registry / matcher.
-    pub(crate) context_enhancer: Arc<ContextEnhancer>,
     pub(crate) concurrency: Option<NonZeroUsize>,
 }
 
@@ -67,7 +61,6 @@ pub struct DetectionContext {
 pub(crate) struct DetectionEngines {
     pub extraction_engine: ExtractorRegistry,
     pub recognizer_registry: Arc<RecognizerRegistry>,
-    pub context_enhancer: Arc<ContextEnhancer>,
 }
 
 impl DetectionContext {
@@ -82,14 +75,12 @@ impl DetectionContext {
         let DetectionEngines {
             extraction_engine,
             recognizer_registry,
-            context_enhancer,
         } = engines;
         Self {
             cancel,
             shared,
             extraction_engine,
             recognizer_registry,
-            context_enhancer,
             concurrency,
         }
     }
@@ -108,14 +99,6 @@ impl DetectionContext {
     pub(crate) fn recognizer_registry(&self) -> &Arc<RecognizerRegistry> {
         &self.recognizer_registry
     }
-
-    /// Per-request context-keyword enhancer borrowed by
-    /// [`DetectionPhase`].
-    ///
-    /// [`DetectionPhase`]: crate::detection::phases::detection::DetectionPhase
-    pub(crate) fn context_enhancer(&self) -> &Arc<ContextEnhancer> {
-        &self.context_enhancer
-    }
 }
 
 impl PhaseContext for DetectionContext {
diff --git a/crates/nvisy-engine/src/detection/config/mod.rs b/crates/nvisy-engine/src/detection/config/mod.rs
index 9dcd3a14..3f74c9ac 100644
--- a/crates/nvisy-engine/src/detection/config/mod.rs
+++ b/crates/nvisy-engine/src/detection/config/mod.rs
@@ -9,13 +9,17 @@
 //! currently wired — those modules are parked pending rework to
 //! implement [`EntityRecognizer<M>`] directly.
 //!
+//! Each recognizer owns its own post-recognition processing
+//! (boosting, deduplication-within-recognizer, validation post-pass).
+//! The engine orchestrates recognizers; it does not orchestrate
+//! recognizer-internal phases.
+//!
 //! [`RecognizerRegistry`]: nvisy_toolkit::detection::RecognizerRegistry
 //! [`EntityRecognizer<M>`]: nvisy_core::recognition::EntityRecognizer
 
 mod ner;
 mod pattern;
 
-use nvisy_context::{ContextEnhancer, ContextRegistry};
 #[cfg(not(feature = "bento"))]
 use nvisy_core::Error;
 use nvisy_core::Result;
@@ -25,7 +29,7 @@ use nvisy_ner::NerRecognizer;
 use nvisy_ner::backend::NoopBackend;
 #[cfg(feature = "bento")]
 use nvisy_ner::backend::{BentoBackend, BentoParams};
-use nvisy_pattern::{PatternRecognizer, PatternRegistry};
+use nvisy_pattern::PatternRecognizer;
 use nvisy_toolkit::detection::RecognizerRegistry;
 
 pub use self::ner::{NerBackend, NerDetection};
@@ -35,26 +39,6 @@ pub use self::pattern::PatternDetection;
 /// provenance on emitted entities).
 const NER_RECOGNIZER_NAME: &str = "ner";
 
-/// Engine-wide defaults for the post-recognition [`ContextEnhancer`].
-/// Mirrors Presidio's defaults (`context_similarity_factor = 0.35`,
-/// `context_prefix_count = ~5 words ≈ 50 bytes`).
-const ENHANCER_DEFAULT_WINDOW: usize = 50;
-const ENHANCER_DEFAULT_BOOST: f64 = 0.35;
-
-/// Bundle returned by [`DetectionConfig::build_for_request`]:
-/// the per-request recognizer registry plus the matching
-/// [`ContextEnhancer`] built from each recognizer's declared
-/// context keywords.
-pub struct DetectionResources {
-    /// Recognizers selected for this request.
-    pub recognizers: RecognizerRegistry,
-    /// Post-recognition keyword-boost enhancer for `Text`
-    /// entities. Always present; carries an empty registry when
-    /// no recognizer declared context keywords (cheap to skip
-    /// inside [`ContextEnhancer::enhance`]).
-    pub enhancer: ContextEnhancer,
-}
-
 /// Configuration for the [`RecognizerRegistry`].
 ///
 /// Each field maps to a `[detection.*]` section in `Nvisy.toml`.
@@ -92,19 +76,17 @@ impl DetectionConfig {
     /// Returns the first construction error encountered — pattern
     /// compile failure, NER backend init failure, or a
     /// config-selected backend whose feature wasn't compiled in.
-    pub fn build_for_request(&self, catalog: &EntityLabelCatalog) -> Result<DetectionResources> {
+    pub fn build_for_request(&self, catalog: &EntityLabelCatalog) -> Result<RecognizerRegistry> {
         let mut reg = RecognizerRegistry::new();
-        let mut context_registry = ContextRegistry::new();
 
         let pattern_cfg = self.pattern.clone().unwrap_or_default();
         if pattern_cfg.enabled {
-            let pattern_registry = PatternRegistry::builtin().filter_by_catalog(catalog);
-            if !pattern_registry.is_empty() {
-                context_registry = context_registry.merge(pattern_registry.context_registry());
-                let recognizer = PatternRecognizer::builder()
-                    .with_registry(pattern_registry)
-                    .build()?;
-                reg = reg.with_recognizer::<Text>(recognizer);
+            let builder = PatternRecognizer::builder()
+                .with_builtin_patterns()
+                .with_builtin_dictionaries()
+                .filter_by_catalog(catalog);
+            if !builder.is_empty() {
+                reg = reg.with_recognizer::<Text>(builder.build()?);
             }
         }
 
@@ -135,20 +117,9 @@ impl DetectionConfig {
                     ));
                 }
             };
-            context_registry = context_registry.merge(recognizer.context_registry());
             reg = reg.with_recognizer::<Text>(recognizer);
         }
 
-        let enhancer = ContextEnhancer::builder()
-            .with_registry(context_registry)
-            .with_default_window(ENHANCER_DEFAULT_WINDOW)
-            .with_default_boost(ENHANCER_DEFAULT_BOOST)
-            .build()
-            .expect("enhancer fields (window, boost, registry) all set");
-
-        Ok(DetectionResources {
-            recognizers: reg,
-            enhancer,
-        })
+        Ok(reg)
     }
 }
diff --git a/crates/nvisy-engine/src/detection/document.rs b/crates/nvisy-engine/src/detection/document.rs
index ab9f2207..6698dd26 100644
--- a/crates/nvisy-engine/src/detection/document.rs
+++ b/crates/nvisy-engine/src/detection/document.rs
@@ -26,10 +26,7 @@ impl DetectionDocumentPipeline {
     pub(super) fn from_context(ctx: &DetectionContext) -> Self {
         Self {
             extraction: ExtractionPhase::new(ctx.extraction_engine().clone()),
-            detection: DetectionPhase::new(
-                ctx.recognizer_registry().clone(),
-                ctx.context_enhancer().clone(),
-            ),
+            detection: DetectionPhase::new(ctx.recognizer_registry().clone()),
             deduplication: DeduplicationPhase::new(),
         }
     }
diff --git a/crates/nvisy-engine/src/detection/mod.rs b/crates/nvisy-engine/src/detection/mod.rs
index b0c2e6d0..31e50884 100644
--- a/crates/nvisy-engine/src/detection/mod.rs
+++ b/crates/nvisy-engine/src/detection/mod.rs
@@ -28,9 +28,7 @@ mod result;
 mod state;
 mod status;
 
-pub use self::config::{
-    DetectionConfig, DetectionResources, NerBackend, NerDetection, PatternDetection,
-};
+pub use self::config::{DetectionConfig, NerBackend, NerDetection, PatternDetection};
 pub use self::engine::DetectionEngine;
 pub use self::extraction::ExtractionConfig;
 #[cfg(feature = "image")]
diff --git a/crates/nvisy-engine/src/detection/phases/detection.rs b/crates/nvisy-engine/src/detection/phases/detection.rs
index 9857308b..ecbd163f 100644
--- a/crates/nvisy-engine/src/detection/phases/detection.rs
+++ b/crates/nvisy-engine/src/detection/phases/detection.rs
@@ -11,10 +11,8 @@
 
 use std::sync::Arc;
 
-use nvisy_context::ContextEnhancer;
 use nvisy_core::Result;
 use nvisy_core::entity::Entity;
-use nvisy_core::extraction::Artifacts;
 use nvisy_core::modality::{
     Audio, AudioLocation, Image, ImageLocation, Overlap, Tabular, TabularLocation, Text, TextData,
     TextLocation,
@@ -35,21 +33,20 @@ const TARGET: &str = "nvisy_engine::detection";
 ///
 /// Holds an `Arc<RecognizerRegistry>` so the registry is shared
 /// cheaply across per-document phases without cloning the
-/// underlying recognizer lists, plus an `Arc<ContextEnhancer>` for
-/// the post-recognition keyword-boost pass.
+/// underlying recognizer lists. Recognizers own any post-detection
+/// work they need (boosting, dedup, validation) — the engine just
+/// orchestrates the registry.
 ///
 /// [`EntityRecord`]: crate::document::provenance::EntityRecord
 pub struct DetectionPhase {
     registry: Arc<RecognizerRegistry>,
-    enhancer: Arc<ContextEnhancer>,
 }
 
 impl DetectionPhase {
-    /// Build the phase from the shared recognizer registry and
-    /// matching context enhancer. Called once per pipeline by the
-    /// pipeline orchestrator.
-    pub fn new(registry: Arc<RecognizerRegistry>, enhancer: Arc<ContextEnhancer>) -> Self {
-        Self { registry, enhancer }
+    /// Build the phase from the shared recognizer registry. Called
+    /// once per pipeline by the pipeline orchestrator.
+    pub fn new(registry: Arc<RecognizerRegistry>) -> Self {
+        Self { registry }
     }
 
     pub(crate) async fn apply_text(
@@ -88,7 +85,7 @@ impl DetectionPhase {
         let span = tracing::info_span!(target: TARGET, "phase", name = "detection.image");
         let run_id = ctx.shared().run_id;
         async move {
-            detect_text_blocks(&self.registry, &self.enhancer, &mut tree.root, run_id).await?;
+            detect_text_blocks(&self.registry, &mut tree.root, run_id).await?;
             detect_image_chunks(
                 &self.registry,
                 &mut tree.root,
@@ -115,7 +112,7 @@ impl DetectionPhase {
         let span = tracing::info_span!(target: TARGET, "phase", name = "detection.text_only");
         let run_id = ctx.shared().run_id;
         async move {
-            detect_text_blocks(&self.registry, &self.enhancer, doc, run_id).await?;
+            detect_text_blocks(&self.registry, doc, run_id).await?;
             Ok(())
         }
         .instrument(span)
@@ -127,7 +124,6 @@ impl DetectionPhase {
 /// text via [`ModalityBlock::scan_text`] (today: every modality).
 async fn detect_text_blocks<M>(
     registry: &RecognizerRegistry,
-    enhancer: &ContextEnhancer,
     doc: &mut Document<M>,
     run_id: uuid::Uuid,
 ) -> Result<()>
@@ -154,13 +150,7 @@ where
         let mut input = RecognizerInput::new(TextData::new(text.to_owned()));
         input.correlation_id = Some(run_id);
 
-        let mut detected = registry.run::<Text>(input).await?;
-        // Apply context-keyword boosting in block-local coordinates,
-        // before lifting to modality-absolute locations. The shared
-        // NLP-pass producer hasn't been wired into the detection
-        // pipeline yet, so we pass an empty `Artifacts` — the
-        // enhancer's substring path runs without it.
-        enhancer.enhance(&mut detected, text, &Artifacts::new());
+        let detected = registry.run::<Text>(input).await?;
         for entity in detected {
             let Some(location) =
                 M::lift_from_block(&block.spans, entity.location.start, entity.location.end)
diff --git a/crates/nvisy-engine/src/detection/pipeline.rs b/crates/nvisy-engine/src/detection/pipeline.rs
index be2c0118..43fdd871 100644
--- a/crates/nvisy-engine/src/detection/pipeline.rs
+++ b/crates/nvisy-engine/src/detection/pipeline.rs
@@ -132,12 +132,12 @@ impl DetectionPipeline {
     ) -> Result<(Vec<AnyAudit>, u64, DetectionStatus), Error> {
         let actor_id = prepared.actor_id;
 
-        let (recognizer_registry, context_enhancer) = match self
+        let recognizer_registry = match self
             .state
             .detection_config
             .build_for_request(&prepared.catalog)
         {
-            Ok(r) => (Arc::new(r.recognizers), Arc::new(r.enhancer)),
+            Ok(r) => Arc::new(r),
             Err(e) => {
                 self.detections.fail(self.detection_id, e.to_string()).await;
                 return Err(e);
@@ -161,7 +161,6 @@ impl DetectionPipeline {
         let engines = DetectionEngines {
             extraction_engine: (*self.state.extraction_engine).clone(),
             recognizer_registry,
-            context_enhancer,
         };
         let concurrency = self.base_config.effective_concurrency();
         let ctx = DetectionContext::new(cancel, Arc::new(shared_data), engines, concurrency);
diff --git a/crates/nvisy-ner/Cargo.toml b/crates/nvisy-ner/Cargo.toml
index 6758bc16..a6e3739e 100644
--- a/crates/nvisy-ner/Cargo.toml
+++ b/crates/nvisy-ner/Cargo.toml
@@ -32,7 +32,6 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
-nvisy-context = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 
 # Serialization
diff --git a/crates/nvisy-ner/src/nlp/capabilities.rs b/crates/nvisy-ner/src/nlp/capabilities.rs
index 4e4143af..49d68ef3 100644
--- a/crates/nvisy-ner/src/nlp/capabilities.rs
+++ b/crates/nvisy-ner/src/nlp/capabilities.rs
@@ -4,7 +4,7 @@
 //! Composition-time contract between an `NlpEngine` and the
 //! recognizers / enhancer that read its artifacts. Lets the engine
 //! orchestrator refuse impossible asks at construction time — e.g.
-//! wiring a lemma-aware `ContextEnhancer` to a tokenizer-only
+//! wiring a lemma-aware enhancer to a tokenizer-only
 //! engine that doesn't produce lemmas.
 //!
 //! Booleans rather than an enum because capabilities are
diff --git a/crates/nvisy-ner/src/nlp/engine.rs b/crates/nvisy-ner/src/nlp/engine.rs
index 2ffa1779..2766e9e5 100644
--- a/crates/nvisy-ner/src/nlp/engine.rs
+++ b/crates/nvisy-ner/src/nlp/engine.rs
@@ -1,26 +1,22 @@
 //! [`NlpEngine`]: the producer-side trait that builds the
 //! shared-NLP-pass [`TypeMap`] for one or more texts.
 //!
-//! Engines stamp typed enrichment entries —
-//! [`LanguageDetections`] and [`Tokens`] — into the returned
-//! `TypeMap`. An orchestrator that wants shared NLP runs
-//! `process` once per scan, wraps the result in [`Artifacts`], and
-//! attaches it to each [`RecognizerInput`] via
+//! Engines stamp typed enrichment entries (`LanguageDetections`
+//! today; token artifacts when the upstream service supports
+//! them) into the returned `TypeMap`. An orchestrator that wants
+//! shared NLP runs `process` once per scan, wraps the result in
+//! [`Artifacts`], and attaches it to each [`RecognizerInput`] via
 //! [`RecognizerInput::with_artifacts`].
 //!
-//! [`LanguageDetections`]: nvisy_core::primitive::LanguageDetections
-//! [`Tokens`]: nvisy_context::Tokens
 //! [`Artifacts`]: nvisy_core::extraction::Artifacts
 //! [`RecognizerInput`]: nvisy_core::recognition::RecognizerInput
 //! [`RecognizerInput::with_artifacts`]: nvisy_core::recognition::RecognizerInput::with_artifacts
 //!
 //! Pluggable so different deployment shapes (pure language
-//! detection, hosted full-NLP service, future in-process model) can
-//! be wired interchangeably. The orchestrator calls `process` (or
-//! `process_batch`) once per scan; recognizers and the
-//! [`ContextEnhancer`] borrow the resulting map by reference.
-//!
-//! [`ContextEnhancer`]: nvisy_context::ContextEnhancer
+//! detection, hosted full-NLP service, future in-process model)
+//! can be wired interchangeably. The orchestrator calls `process`
+//! (or `process_batch`) once per scan; recognizers and the
+//! keyword-boost enhancer borrow the resulting map by reference.
 
 use nvisy_core::Result;
 use nvisy_core::primitive::LanguageTag;
diff --git a/crates/nvisy-ner/src/nlp/mod.rs b/crates/nvisy-ner/src/nlp/mod.rs
index 77fef86d..ab265acc 100644
--- a/crates/nvisy-ner/src/nlp/mod.rs
+++ b/crates/nvisy-ner/src/nlp/mod.rs
@@ -1,13 +1,13 @@
 //! Producer side of the shared-NLP-pass primitive.
 //!
-//! Consumer-side types live in `nvisy-core` so any text consumer
-//! (pattern recognizers, NER adapters, context enhancer) can read
-//! them without depending on this crate:
-//! [`LanguageDetections`] sits with the language primitives;
-//! [`Tokens`] sits next to the [`ContextEnhancer`] that consumes
-//! it. This module declares the [`NlpEngine`] trait and the
-//! engines that produce those artifacts into the shared `TypeMap`
-//! stamped on `RecognizerInput.artifacts`.
+//! Consumer-side types live in their natural crates so any text
+//! consumer can read them without depending on this one:
+//! [`LanguageDetections`] sits with the language primitives in
+//! `nvisy-core`; the optional token artifact lives in
+//! `nvisy-context` next to its only consumer (the keyword-boost
+//! `Enhancer`). This module declares the [`NlpEngine`] trait and
+//! the engines that produce those artifacts into the shared
+//! `TypeMap` stamped on `RecognizerInput.artifacts`.
 //!
 //! One engine ships today:
 //! - [`LinguaNlpEngine`] — language-only NLP, backed by the
@@ -21,9 +21,7 @@
 //! The trait is async because realistic implementations are
 //! HTTP-bound or otherwise yield.
 //!
-//! [`Tokens`]: nvisy_context::Tokens
 //! [`LanguageDetections`]: nvisy_core::primitive::LanguageDetections
-//! [`ContextEnhancer`]: nvisy_context::ContextEnhancer
 //! [`lingua`]: https://crates.io/crates/lingua
 //! [`NerBackend`]: crate::backend::NerBackend
 //! [`NerRecognizer`]: crate::NerRecognizer
diff --git a/crates/nvisy-ner/src/recognition/config.rs b/crates/nvisy-ner/src/recognition/config.rs
index a50c7b6f..c8af8c87 100644
--- a/crates/nvisy-ner/src/recognition/config.rs
+++ b/crates/nvisy-ner/src/recognition/config.rs
@@ -55,16 +55,6 @@ pub struct NerModel {
     /// Alignment policy for sub-word predictions. Same advisory
     /// status as `aggregation`.
     pub alignment: AlignmentMode,
-    /// Per-recognizer context-keyword list for the post-recognition
-    /// [`ContextEnhancer`].
-    /// Empty when the recognizer doesn't participate in boosting.
-    /// Each emitted entity's source name keys the lookup, so the
-    /// recognizer's [`name`] is used
-    /// as the registration key.
-    ///
-    /// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
-    /// [`name`]: super::NerRecognizer::name
-    pub default_context: Vec<String>,
 }
 
 impl Default for NerModel {
@@ -77,7 +67,6 @@ impl Default for NerModel {
             low_score_multiplier: 0.4,
             aggregation: AggregationStrategy::Max,
             alignment: AlignmentMode::Expand,
-            default_context: Vec::new(),
         }
     }
 }
@@ -108,7 +97,6 @@ impl NerModelBuilder {
                 .unwrap_or(defaults.low_score_multiplier),
             aggregation: self.aggregation.unwrap_or(defaults.aggregation),
             alignment: self.alignment.unwrap_or(defaults.alignment),
-            default_context: self.default_context.unwrap_or(defaults.default_context),
         }
     }
 }
diff --git a/crates/nvisy-ner/src/recognition/recognizer.rs b/crates/nvisy-ner/src/recognition/recognizer.rs
index bbdca67b..1c4647e6 100644
--- a/crates/nvisy-ner/src/recognition/recognizer.rs
+++ b/crates/nvisy-ner/src/recognition/recognizer.rs
@@ -18,7 +18,6 @@
 use std::sync::Arc;
 
 use derive_builder::Builder;
-use nvisy_context::{Context, ContextRegistry};
 use nvisy_core::entity::{Entity, EntityLabelRef, ModelProvenance, TrailProvenance, TrailStep};
 use nvisy_core::modality::{Text, TextLocation};
 use nvisy_core::primitive::Confidence;
@@ -37,12 +36,8 @@ use crate::backend::{NerBackend, NerRequest, RawNerSpan};
     build_fn(error = "Error", name = "try_build", private)
 )]
 pub struct NerRecognizer {
-    /// Recognizer name. Surfaced in trail provenance and used as
-    /// the key the [`ContextEnhancer`] looks up to find the
-    /// recognizer's [`default_context`].
-    ///
-    /// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
-    /// [`default_context`]: NerModel::default_context
+    /// Recognizer name. Surfaced in trail provenance on every
+    /// emitted entity.
     name: String,
     /// Backend that turns `(text, kinds)` into raw spans. Required.
     /// Set via [`with_engine`], which accepts any concrete
@@ -92,24 +87,6 @@ impl NerRecognizer {
         &self.model
     }
 
-    /// Build a [`ContextRegistry`] containing this recognizer's
-    /// [`default_context`] keyed on the recognizer's name. Returns
-    /// an empty registry when no keywords were declared.
-    ///
-    /// Mirrors `PatternRegistry::context_registry` so engine code
-    /// can merge per-recognizer contexts from every text-modality
-    /// recognizer into one enhancer input without duplicating the
-    /// keyword data.
-    ///
-    /// [`default_context`]: NerModel::default_context
-    #[must_use]
-    pub fn context_registry(&self) -> ContextRegistry {
-        ContextRegistry::new().with_entry(
-            self.name.clone(),
-            Context::new(self.model.default_context.iter().cloned()),
-        )
-    }
-
     fn build_entity(&self, span: &RawNerSpan, label: EntityLabelRef) -> Entity<Text> {
         let raw_confidence =
             Confidence::try_clamped(span.score).unwrap_or(self.model.default_score);
diff --git a/crates/nvisy-pattern/Cargo.toml b/crates/nvisy-pattern/Cargo.toml
index 1c17cc80..d89d43fc 100644
--- a/crates/nvisy-pattern/Cargo.toml
+++ b/crates/nvisy-pattern/Cargo.toml
@@ -29,7 +29,6 @@ nvisy-core = { workspace = true, features = [] }
 
 # Serialization
 serde = { workspace = true, features = [] }
-schemars = { workspace = true, features = [] }
 toml = { workspace = true, features = ["parse"] }
 
 # Derive macros and error handling
diff --git a/crates/nvisy-pattern/README.md b/crates/nvisy-pattern/README.md
index 8f86d1ee..7d299119 100644
--- a/crates/nvisy-pattern/README.md
+++ b/crates/nvisy-pattern/README.md
@@ -2,20 +2,41 @@
 
 [![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml)
 
-Built-in patterns, dictionaries, and validators for PII/PHI detection in the
+Regex and dictionary recognizers for PII / PHI detection in the
 Nvisy runtime.
 
 ## Overview
 
-A pre-compiled pattern engine for PII/PHI detection. Each scan runs
-regex (`RegexSet`-prefiltered), dictionary lookup (Aho-Corasick),
-and deny-list injection. Built-in patterns and dictionaries live as
-JSON under `assets/` and are embedded at compile time.
-
-Per-scan inputs (allow / deny lists, context-keyword hints,
-caller-supplied ad-hoc patterns) flow through `PatternContext` without
-rebuilding the engine. Regex patterns can opt into post-match
-validation by name (e.g. `"luhn"`, `"ssn"`, `"iban"`).
+`PatternRecognizer` compiles a set of `Regex` rules (each holding
+one or more regex `Variant`s, a Presidio-shaped multi-strategy
+group) and `Dictionary` term lists into pooled scanners — one
+shared `regex::RegexSet` for the regex side and one shared
+`aho_corasick::AhoCorasick` automaton for the literal side. A
+single walk over the input runs both scanners and emits
+`Entity<Text>` values in modality-local byte coordinates.
+
+Each rule may declare per-label context keywords; the recognizer
+wraps itself in a `nvisy_context::Boosting` layer at build time
+that lifts confidence on matches whose neighbourhood contains a
+declared keyword.
+
+The built-in pattern + dictionary set lives as TOML under
+`assets/` and is embedded at compile time. The recognizer's
+builder accepts both built-ins and user-supplied rules:
+
+```rust
+use nvisy_pattern::PatternRecognizer;
+
+let recognizer = PatternRecognizer::builder()
+    .with_builtin_patterns()
+    .with_builtin_dictionaries()
+    .build()
+    .expect("built-in recognizer builds");
+```
+
+Regex variants can opt into a post-match validator by name
+(`"luhn"`, `"ssn"`, `"iban"`, `"phone"`, `"date"`); custom
+validators can be registered via `ValidatorRegistry::with`.
 
 ## Documentation
 
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/languages.toml b/crates/nvisy-pattern/assets/dictionaries/general/languages.toml
index d356fef1..0a7e0aee 100644
--- a/crates/nvisy-pattern/assets/dictionaries/general/languages.toml
+++ b/crates/nvisy-pattern/assets/dictionaries/general/languages.toml
@@ -1,12 +1,7 @@
 name = "languages"
 label = "language"
-score = 0.85
-# Per-CSV-column overrides:
-#   column 0 = long-form names (`English`, `Spanish`, ...) — high
-#             confidence; collisions with everyday words are rare.
-#   column 1 = ISO 639-1 codes (`en`, `es`, ...) — low confidence;
-#             two-letter codes routinely collide with English words
-#             like `or` (Odia), `it` (Italian), `am` (Amharic).
-#             Below the dedup default threshold of 0.5 so they
-#             drop unless an operator explicitly lowers the floor.
-column_scores = [0.85, 0.30]
+
+# column 0 = long-form names (`English`, `Spanish`, ...)
+# column 1 = ISO 639-1 codes (`en`, `es`, ...)
+# column 2 = alternate long-form names (`Farsi` for Persian)
+score = [0.85, 0.30, 0.85]
diff --git a/crates/nvisy-pattern/assets/patterns/contact/email.toml b/crates/nvisy-pattern/assets/patterns/contact/email.toml
index 13e70bcf..fb37ff45 100644
--- a/crates/nvisy-pattern/assets/patterns/contact/email.toml
+++ b/crates/nvisy-pattern/assets/patterns/contact/email.toml
@@ -1,4 +1,6 @@
 name = "email"
 label = "email_address"
+
+[[variants]]
 regex = "\\b[a-zA-Z0-9._%+\\-]+@[a-zA-Z0-9.\\-]+\\.[a-zA-Z]{2,}\\b"
 score = 0.95
diff --git a/crates/nvisy-pattern/assets/patterns/contact/phone.toml b/crates/nvisy-pattern/assets/patterns/contact/phone.toml
index b2e1faf5..01df2224 100644
--- a/crates/nvisy-pattern/assets/patterns/contact/phone.toml
+++ b/crates/nvisy-pattern/assets/patterns/contact/phone.toml
@@ -1,9 +1,8 @@
 name = "phone"
 label = "phone_number"
+context = ["phone", "call", "mobile", "tel", "fax", "contact"]
+
+[[variants]]
 regex = "(?:\\+\\d{1,3}[\\s.\\-]?)?\\(?\\d{2,4}\\)?[\\s.\\-]?\\d{3,4}[\\s.\\-]?\\d{4}\\b"
 score = 0.8
 validator = "phone"
-
-[context]
-keywords = ["phone", "call", "mobile", "tel", "fax", "contact"]
-penalty = 0.15
diff --git a/crates/nvisy-pattern/assets/patterns/contact/url.toml b/crates/nvisy-pattern/assets/patterns/contact/url.toml
index 24c3c9a2..ec11fcee 100644
--- a/crates/nvisy-pattern/assets/patterns/contact/url.toml
+++ b/crates/nvisy-pattern/assets/patterns/contact/url.toml
@@ -1,4 +1,6 @@
 name = "url"
 label = "url"
+
+[[variants]]
 regex = "\\bhttps?://[^\\s/$.?#][^\\s]*\\b"
 score = 0.9
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/aws_key.toml b/crates/nvisy-pattern/assets/patterns/credentials/aws_key.toml
index 2748a222..189aacc9 100644
--- a/crates/nvisy-pattern/assets/patterns/credentials/aws_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/credentials/aws_key.toml
@@ -1,4 +1,6 @@
 name = "aws-key"
 label = "api_key"
+
+[[variants]]
 regex = "\\bAKIA[0-9A-Z]{16}\\b"
 score = 0.95
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml b/crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml
index 4c851fa1..be69abc5 100644
--- a/crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml
@@ -1,4 +1,6 @@
 name = "generic-api-key"
 label = "api_key"
+
+[[variants]]
 regex = "(?i)(?:api[_\\-]?key|api[_\\-]?secret|access[_\\-]?token|secret[_\\-]?key|bearer)\\s*[:=]\\s*[\"']?([a-zA-Z0-9_\\-]{20,})[\"']?"
 score = 0.7
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/github_token.toml b/crates/nvisy-pattern/assets/patterns/credentials/github_token.toml
index 39c9bb1c..ba247e60 100644
--- a/crates/nvisy-pattern/assets/patterns/credentials/github_token.toml
+++ b/crates/nvisy-pattern/assets/patterns/credentials/github_token.toml
@@ -1,4 +1,6 @@
 name = "github-token"
 label = "auth_token"
+
+[[variants]]
 regex = "\\bgh[pousr]_[a-zA-Z0-9]{36}\\b"
 score = 0.95
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/private_key.toml b/crates/nvisy-pattern/assets/patterns/credentials/private_key.toml
index cdaff752..61d6977e 100644
--- a/crates/nvisy-pattern/assets/patterns/credentials/private_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/credentials/private_key.toml
@@ -1,4 +1,6 @@
 name = "private-key"
 label = "private_key"
+
+[[variants]]
 regex = "-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
 score = 0.98
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/stripe_key.toml b/crates/nvisy-pattern/assets/patterns/credentials/stripe_key.toml
index 73437d3b..127517f6 100644
--- a/crates/nvisy-pattern/assets/patterns/credentials/stripe_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/credentials/stripe_key.toml
@@ -1,4 +1,6 @@
 name = "stripe-key"
 label = "api_key"
+
+[[variants]]
 regex = "\\bsk_(live|test)_[a-zA-Z0-9]{24,}\\b"
 score = 0.95
diff --git a/crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml b/crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml
index 23f78873..a68c435a 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml
@@ -1,4 +1,6 @@
 name = "bitcoin-address"
 label = "crypto_address"
+
+[[variants]]
 regex = "\\b(?:bc1[a-z0-9]{25,39}|[13][a-km-zA-HJ-NP-Z1-9]{25,34})\\b"
 score = 0.85
diff --git a/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml b/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
index 9d73cd20..78b3325a 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
@@ -1,8 +1,8 @@
 name = "credit-card"
 label = "payment_card"
+context = ["card", "credit", "debit", "payment", "visa", "mastercard", "amex"]
+
+[[variants]]
 regex = "\\b(?:\\d[ \\-]*?){13,19}\\b"
 score = 0.85
 validator = "luhn"
-
-[context]
-keywords = ["card", "credit", "debit", "payment", "visa", "mastercard", "amex"]
diff --git a/crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml b/crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml
index 02fa0939..2860d8a4 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml
@@ -1,4 +1,6 @@
 name = "ethereum-address"
 label = "crypto_address"
+
+[[variants]]
 regex = "\\b0x[0-9a-fA-F]{40}\\b"
 score = 0.85
diff --git a/crates/nvisy-pattern/assets/patterns/finance/iban.toml b/crates/nvisy-pattern/assets/patterns/finance/iban.toml
index 364dff6c..7256b240 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/iban.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/iban.toml
@@ -1,8 +1,8 @@
 name = "iban"
 label = "iban"
+context = ["iban", "bank", "account", "transfer", "swift"]
+
+[[variants]]
 regex = "\\b[A-Z]{2}\\d{2}\\s?[A-Z0-9]{4}\\s?(?:\\d{4}\\s?){2,7}\\d{1,4}\\b"
 score = 0.85
 validator = "iban"
-
-[context]
-keywords = ["iban", "bank", "account", "transfer", "swift"]
diff --git a/crates/nvisy-pattern/assets/patterns/finance/swift_code.toml b/crates/nvisy-pattern/assets/patterns/finance/swift_code.toml
index 39b5c508..7147b65c 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/swift_code.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/swift_code.toml
@@ -1,4 +1,6 @@
 name = "swift-code"
 label = "swift_code"
+
+[[variants]]
 regex = "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b"
 score = 0.7
diff --git a/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml b/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml
index b6fadd82..12010716 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml
@@ -1,4 +1,6 @@
 name = "us-bank-routing"
 label = "bank_routing"
+
+[[variants]]
 regex = "\\b(?:0[1-9]|[12]\\d|3[0-2])\\d{7}\\b"
 score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/identity/ssn.toml b/crates/nvisy-pattern/assets/patterns/identity/ssn.toml
index 17028ed5..f2076b26 100644
--- a/crates/nvisy-pattern/assets/patterns/identity/ssn.toml
+++ b/crates/nvisy-pattern/assets/patterns/identity/ssn.toml
@@ -1,8 +1,8 @@
 name = "ssn"
 label = "government_id"
+context = ["social security", "ssn", "tax id", "taxpayer identification"]
+
+[[variants]]
 regex = "\\b(\\d{3})-(\\d{2})-(\\d{4})\\b"
 score = 0.9
 validator = "ssn"
-
-[context]
-keywords = ["social security", "ssn", "tax id", "taxpayer identification"]
diff --git a/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml b/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml
index 0720e2b9..873af318 100644
--- a/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml
+++ b/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml
@@ -1,4 +1,6 @@
 name = "us-drivers-license"
 label = "drivers_license"
+
+[[variants]]
 regex = "\\b[A-Z]\\d{3}-\\d{4}-\\d{4}\\b"
 score = 0.4
diff --git a/crates/nvisy-pattern/assets/patterns/identity/us_passport.toml b/crates/nvisy-pattern/assets/patterns/identity/us_passport.toml
index 48da58bd..d7087d83 100644
--- a/crates/nvisy-pattern/assets/patterns/identity/us_passport.toml
+++ b/crates/nvisy-pattern/assets/patterns/identity/us_passport.toml
@@ -1,4 +1,6 @@
 name = "us-passport"
 label = "passport_number"
+
+[[variants]]
 regex = "\\b[A-Z]\\d{8}\\b"
 score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml b/crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml
index adf40e1d..737b391f 100644
--- a/crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml
+++ b/crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml
@@ -1,4 +1,6 @@
 name = "us-postal-code"
 label = "postal_code"
+
+[[variants]]
 regex = "\\b\\d{5}(?:-\\d{4})?\\b"
 score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/network/ipv4.toml b/crates/nvisy-pattern/assets/patterns/network/ipv4.toml
index 914c6b46..d64403dd 100644
--- a/crates/nvisy-pattern/assets/patterns/network/ipv4.toml
+++ b/crates/nvisy-pattern/assets/patterns/network/ipv4.toml
@@ -1,4 +1,6 @@
 name = "ipv4"
 label = "ip_address"
+
+[[variants]]
 regex = "\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b"
 score = 0.75
diff --git a/crates/nvisy-pattern/assets/patterns/network/ipv6.toml b/crates/nvisy-pattern/assets/patterns/network/ipv6.toml
index 0107ad00..dfc12ecd 100644
--- a/crates/nvisy-pattern/assets/patterns/network/ipv6.toml
+++ b/crates/nvisy-pattern/assets/patterns/network/ipv6.toml
@@ -1,4 +1,6 @@
 name = "ipv6"
 label = "ip_address"
+
+[[variants]]
 regex = "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b|(?:[0-9a-fA-F]{1,4}:){1,7}:|::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\\b"
 score = 0.75
diff --git a/crates/nvisy-pattern/assets/patterns/network/mac_address.toml b/crates/nvisy-pattern/assets/patterns/network/mac_address.toml
index 2766fc31..fcca5944 100644
--- a/crates/nvisy-pattern/assets/patterns/network/mac_address.toml
+++ b/crates/nvisy-pattern/assets/patterns/network/mac_address.toml
@@ -1,4 +1,6 @@
 name = "mac-address"
 label = "mac_address"
+
+[[variants]]
 regex = "\\b(?:[0-9A-Fa-f]{2}[:\\-]){5}[0-9A-Fa-f]{2}\\b"
 score = 0.85
diff --git a/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml b/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
index 8bafa63f..c88f21b9 100644
--- a/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
+++ b/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
@@ -1,9 +1,8 @@
 name = "date_of_birth"
 label = "date_of_birth"
+context = ["birth", "born", "dob", "birthday"]
+
+[[variants]]
 regex = "\\b(?:(?:0[1-9]|1[0-2]|[1-9])[/\\-](?:0[1-9]|[12]\\d|3[01]|[1-9])[/\\-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/\\-](?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01]))\\b"
 score = 0.6
 validator = "date"
-
-[context]
-keywords = ["birth", "born", "dob", "birthday"]
-penalty = 0.1
diff --git a/crates/nvisy-pattern/assets/patterns/personal/datetime.toml b/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
index 10ed0de4..c1e00f7a 100644
--- a/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
+++ b/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
@@ -1,7 +1,7 @@
 name = "datetime"
 label = "date_time"
+context = ["timestamp", "created", "modified", "logged", "at", "time"]
+
+[[variants]]
 regex = "\\b(?:19|20)\\d{2}[/\\-](?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01])[T ](?:[01]\\d|2[0-3]):[0-5]\\d(?::[0-5]\\d)?(?:Z|[+\\-]\\d{2}:?\\d{2})?\\b"
 score = 0.7
-
-[context]
-keywords = ["timestamp", "created", "modified", "logged", "at", "time"]
diff --git a/crates/nvisy-pattern/src/lib.rs b/crates/nvisy-pattern/src/lib.rs
index 3222cca9..ed069016 100644
--- a/crates/nvisy-pattern/src/lib.rs
+++ b/crates/nvisy-pattern/src/lib.rs
@@ -7,7 +7,7 @@ mod shipped;
 pub mod validators;
 
 pub use self::recognition::{
-    Dictionary, DictionaryBuilder, PatternRecognizer, PatternRecognizerBuilder, PatternRegistry,
-    Regex, RegexBuilder, Terms,
+    Dictionary, DictionaryBuilder, PatternRecognizer, PatternRecognizerBuilder, Regex,
+    RegexBuilder, Scoring, Term, Terms, Variant, VariantBuilder,
 };
 pub use self::shipped::{dictionaries, patterns};
diff --git a/crates/nvisy-pattern/src/recognition/compiled.rs b/crates/nvisy-pattern/src/recognition/compiled.rs
new file mode 100644
index 00000000..1283025f
--- /dev/null
+++ b/crates/nvisy-pattern/src/recognition/compiled.rs
@@ -0,0 +1,166 @@
+//! Compiled, recognizer-ready forms of [`Regex`] rules and
+//! [`Dictionary`]s.
+//!
+//! [`PatternRecognizerBuilder::build`] compiles each regex variant
+//! into a [`::regex::Regex`] and folds every dictionary's terms
+//! into a shared [`AhoCorasick`] automaton, then stores the
+//! per-rule emission metadata next to those scanners. This module
+//! holds the per-rule metadata structs ([`CompiledPattern`],
+//! [`CompiledDictionary`]) and their `build_entity` constructors —
+//! the bits that turn a regex / Aho-Corasick hit into an
+//! `Entity<Text>`.
+//!
+//! [`Regex`]: super::Regex
+//! [`Dictionary`]: super::Dictionary
+//! [`AhoCorasick`]: aho_corasick::AhoCorasick
+//! [`PatternRecognizerBuilder::build`]: super::PatternRecognizerBuilder::build
+
+use std::sync::Arc;
+
+use nvisy_core::entity::{Entity, EntityLabelRef, PatternProvenance, TrailProvenance, TrailStep};
+use nvisy_core::modality::{Text, TextLocation};
+use nvisy_core::primitive::{Confidence, LanguageTag};
+use regex::Regex;
+
+use crate::validators::Validator;
+
+/// One compiled regex slot: a single `(pattern, variant)` pair,
+/// keyed in the shared `RegexSet` by its position in
+/// `PatternRecognizer.patterns`. Pattern-level metadata (name,
+/// label, languages) is repeated across the pattern's variants so
+/// the dispatch loop has everything it needs without a second
+/// indirection.
+///
+/// `context` is intentionally not stored on compiled state — the
+/// recognizer's wrapping `Boosting` layer harvests keywords from
+/// the source patterns at build time.
+pub(super) struct CompiledPattern {
+    /// Pattern name (e.g. `"ssn"`). Surfaced in trail provenance.
+    pub pattern_name: String,
+    pub label: EntityLabelRef,
+    pub regex: Regex,
+    pub score: Confidence,
+    pub validator: Option<Arc<dyn Validator>>,
+    /// Languages the parent pattern applies to.
+    /// Empty means "any language".
+    pub languages: Vec<LanguageTag>,
+}
+
+impl CompiledPattern {
+    /// Emit an `Entity<Text>` for a regex match at `[start, end)`
+    /// in modality-local byte coordinates. The recognizer phase
+    /// lifts the location to absolute document coordinates after
+    /// dispatch.
+    pub(super) fn build_entity(&self, start: usize, end: usize) -> Entity<Text> {
+        let provenance = TrailProvenance::Pattern(PatternProvenance::Regex {
+            name: self.pattern_name.clone(),
+            regex: Some(self.regex.as_str().to_owned()),
+            validator: self.validator.as_ref().map(|_| self.pattern_name.clone()),
+            contextual: false,
+        });
+        let step = TrailStep::recognition(
+            "pattern",
+            self.score,
+            provenance,
+            format!("pattern `{}` matched", self.pattern_name),
+        );
+        Entity::builder()
+            .with_label(self.label.clone())
+            .with_trail(vec![step])
+            .with_confidence(self.score)
+            .with_location(TextLocation::new(start, end))
+            .build()
+            .expect("required fields provided")
+    }
+}
+
+/// Source of truth for one runtime dictionary: its term range
+/// inside the shared Aho-Corasick automaton, plus per-dictionary
+/// emission metadata.
+pub(super) struct CompiledDictionary {
+    pub name: String,
+    pub label: EntityLabelRef,
+    /// First term-id (inclusive) for this dictionary inside the
+    /// shared automaton.
+    pub term_start: usize,
+    /// One past the last term-id for this dictionary inside the
+    /// shared automaton.
+    pub term_end: usize,
+    /// Per-term confidence, indexed by `term_id - term_start`.
+    /// Resolved at compile time from the dictionary's `scoring`
+    /// policy and any per-term overrides.
+    pub term_scores: Vec<Confidence>,
+    /// Languages this dictionary applies to. Empty means "any
+    /// language".
+    pub languages: Vec<LanguageTag>,
+    /// Reject matches whose immediate neighbours are word
+    /// characters (alphanumeric or `_`). Mirrors regex `\b`.
+    pub word_boundary: bool,
+}
+
+impl CompiledDictionary {
+    /// Emit an `Entity<Text>` for an Aho-Corasick hit at
+    /// `[start, end)` in modality-local byte coordinates. `score`
+    /// is the per-term confidence resolved at recognizer-build
+    /// time (the dictionary's `scoring` policy or per-term
+    /// override).
+    pub(super) fn build_entity(&self, score: Confidence, start: usize, end: usize) -> Entity<Text> {
+        let provenance = TrailProvenance::Pattern(PatternProvenance::Dictionary {
+            name: self.name.clone(),
+            contextual: false,
+        });
+        let step = TrailStep::recognition(
+            "pattern",
+            score,
+            provenance,
+            format!("dictionary `{}` matched", self.name),
+        );
+        Entity::builder()
+            .with_label(self.label.clone())
+            .with_trail(vec![step])
+            .with_confidence(score)
+            .with_location(TextLocation::new(start, end))
+            .build()
+            .expect("required fields provided")
+    }
+}
+
+/// Mirror of regex `\b` for the byte range `text[start..end]`:
+/// the immediate neighbour characters (or start/end of input)
+/// must not be word characters. A word character here is Unicode
+/// alphanumeric or `_`, matching the conventional regex
+/// definition.
+///
+/// Operates on `char` boundaries, not raw bytes, so multibyte
+/// codepoints don't trigger false rejections (`é` is one char,
+/// not two).
+pub(super) fn has_word_boundaries(text: &str, start: usize, end: usize) -> bool {
+    let left_is_word = text[..start].chars().next_back().is_some_and(is_word_char);
+    let right_is_word = text[end..].chars().next().is_some_and(is_word_char);
+    !left_is_word && !right_is_word
+}
+
+fn is_word_char(c: char) -> bool {
+    c.is_alphanumeric() || c == '_'
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn has_word_boundaries_handles_edges_and_unicode() {
+        // Match touches both edges of the input → boundaries OK.
+        assert!(has_word_boundaries("hello", 0, 5));
+        // Match preceded by a word char → not a boundary.
+        assert!(!has_word_boundaries("example", 5, 7));
+        // Match followed by a word char → not a boundary.
+        assert!(!has_word_boundaries("amount", 0, 2));
+        // Space surround → boundaries OK.
+        assert!(has_word_boundaries(" am ", 1, 3));
+        // Unicode word char on the left → not a boundary.
+        assert!(!has_word_boundaries("café_am", 5, 7));
+        // Punctuation around → boundaries OK.
+        assert!(has_word_boundaries("(am)", 1, 3));
+    }
+}
diff --git a/crates/nvisy-pattern/src/recognition/dictionary.rs b/crates/nvisy-pattern/src/recognition/dictionary.rs
index 7875c2a2..bf20a1e0 100644
--- a/crates/nvisy-pattern/src/recognition/dictionary.rs
+++ b/crates/nvisy-pattern/src/recognition/dictionary.rs
@@ -1,40 +1,111 @@
 //! [`Dictionary`]: literal-term detection rule.
 //!
 //! A dictionary scans for a fixed list of literal strings using an
-//! Aho-Corasick automaton. Compared with [`Pattern`], a dictionary
+//! Aho-Corasick automaton. Compared with [`Regex`], a dictionary
 //! has no regex engine, no validator, and a single shared confidence
 //! score applied to every match.
 //!
-//! Construct via:
+//! Construct via [`Dictionary::builder`] for the chainable style or
+//! [`Dictionary::from_toml`] for a self-contained TOML source.
 //!
-//! - [`Dictionary::builder`] — chainable, ground-up
-//! - [`Dictionary::from_toml`] — self-contained TOML
+//! Term sources are first-class — see [`Terms`] for [`from_text`]
+//! and [`from_csv`] constructors. The builder's [`with_terms`]
+//! setter accepts anything convertible to [`Terms`].
 //!
-//! Term sources are first-class — see [`Terms`] for
-//! [`from_text`] and
-//! [`from_csv`] constructors. The builder's
-//! [`with_terms`] setter accepts
-//! anything convertible to [`Terms`].
-//!
-//! [`Pattern`]: crate::Pattern
+//! [`Regex`]: crate::Regex
 //! [`Terms`]: crate::Terms
 //! [`from_text`]: crate::Terms::from_text
 //! [`from_csv`]: crate::Terms::from_csv
 //! [`with_terms`]: DictionaryBuilder::with_terms
 
 use derive_builder::Builder;
-use nvisy_context::Context;
 use nvisy_core::Error;
 use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
+use serde::Deserialize;
 
 use super::terms::Terms;
 
+/// Confidence policy for a [`Dictionary`]'s matches.
+///
+/// Either every term gets the same score ([`Uniform`]), or scores
+/// are picked per CSV source column ([`PerColumn`]). The untagged
+/// serde representation accepts a bare number for the uniform
+/// case and an array for the per-column case:
+///
+/// ```toml
+/// score = 0.9              # Uniform
+/// score = [0.85, 0.30]     # PerColumn
+/// ```
+///
+/// [`Uniform`]: Scoring::Uniform
+/// [`PerColumn`]: Scoring::PerColumn
+#[derive(Debug, Clone, PartialEq, Deserialize)]
+#[serde(untagged)]
+pub enum Scoring {
+    /// Single confidence stamped on every match. The common case.
+    Uniform(Confidence),
+    /// Per-column confidence vector. `[i]` is the confidence
+    /// stamped on every term whose source CSV column was `i`. A
+    /// term from a column past the end of this vec is a
+    /// recognizer-build error — define one score per column.
+    PerColumn(Vec<Confidence>),
+}
+
+impl Scoring {
+    /// Validate the policy's internal shape. A
+    /// `PerColumn(vec![])` can never resolve a score for any
+    /// column, so callers (the recognizer at build time) surface
+    /// it as a configuration error.
+    ///
+    /// # Errors
+    ///
+    /// Returns the human-readable reason the policy is invalid.
+    pub fn validate(&self) -> Result<(), &'static str> {
+        match self {
+            Self::Uniform(_) => Ok(()),
+            Self::PerColumn(scores) if scores.is_empty() => {
+                Err("PerColumn scoring with no scores can never resolve")
+            }
+            Self::PerColumn(_) => Ok(()),
+        }
+    }
+
+    /// Resolve a score for `column`. `Uniform` ignores the column
+    /// and always returns its score; `PerColumn` returns the entry
+    /// at `column`, or `None` when no column is supplied or the
+    /// index is past the end of the per-column vector. Callers
+    /// decide the fall-back policy (per-term override, hard
+    /// error, default constant, etc.).
+    #[must_use]
+    pub fn get(&self, column: Option<u16>) -> Option<Confidence> {
+        match self {
+            Self::Uniform(s) => Some(*s),
+            Self::PerColumn(scores) => column.and_then(|c| scores.get(c as usize).copied()),
+        }
+    }
+}
+
+impl Default for Scoring {
+    fn default() -> Self {
+        Self::Uniform(Confidence::MAX)
+    }
+}
+
 /// Literal-term detection rule.
-#[derive(Debug, Clone, PartialEq, Builder)]
-#[derive(Serialize, Deserialize, JsonSchema)]
+///
+/// ```
+/// use nvisy_core::entity::builtins;
+/// use nvisy_pattern::{Dictionary, Terms};
+///
+/// let dictionary = Dictionary::builder()
+///     .with_name("nationalities")
+///     .with_label(builtins::NATIONALITY.label_ref())
+///     .with_terms(Terms::from(["German", "French", "Italian"]))
+///     .build()
+///     .expect("nationalities dictionary builds");
+/// ```
+#[derive(Debug, Clone, PartialEq, Builder, Deserialize)]
 #[builder(
     name = "DictionaryBuilder",
     pattern = "owned",
@@ -46,49 +117,39 @@ pub struct Dictionary {
     pub name: String,
     /// Entity label every match emits.
     pub label: EntityLabelRef,
-    /// Literal terms to scan for. The recognizer compiles these into
-    /// an Aho-Corasick automaton at build time.
+    /// Literal terms to scan for. The recognizer compiles these
+    /// into an Aho-Corasick automaton at build time.
     pub terms: Terms,
-    /// Confidence score stamped on every match before any boost.
-    #[builder(default = "Confidence::MAX")]
-    pub score: Confidence,
-    /// Optional context keywords carried through to emitted entities
-    /// for a downstream enhancer to apply boosts.
+    /// Confidence policy: uniform across every term, or per CSV
+    /// source column. Defaults to [`Scoring::Uniform`] with
+    /// [`Confidence::MAX`].
     #[builder(default)]
-    #[serde(default, skip_serializing_if = "context_is_default")]
-    pub context: Context,
+    #[serde(default, rename = "score")]
+    pub scoring: Scoring,
+    /// Context keywords that lift confidence when one of them
+    /// appears near a match. Harvested by the engine into a
+    /// per-label `BoostRule` in `nvisy-context`; the recognizer
+    /// itself never reads this field.
+    #[builder(default)]
+    #[serde(default)]
+    pub context: Vec<String>,
     /// Languages the dictionary applies to (BCP-47 tags). An empty
-    /// list (the default) means the dictionary applies regardless of
-    /// language; otherwise the recognizer skips this dictionary when
-    /// the per-call language hint is set to a tag not in this list.
+    /// list (the default) means the dictionary applies regardless
+    /// of language; otherwise the recognizer skips this dictionary
+    /// when the per-call language hint is set to a tag not in this
+    /// list.
     #[builder(default)]
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    #[schemars(with = "Vec<String>")]
+    #[serde(default)]
     pub languages: Vec<LanguageTag>,
     /// Require word-boundary surroundings on every match. With the
-    /// default of `true`, a term `"am"` matches the word `"am"` but
-    /// not the `"am"` inside `"example"`. Word characters are
+    /// default of `true`, a term `"am"` matches the word `"am"`
+    /// but not the `"am"` inside `"example"`. Word characters are
     /// alphanumerics and `_` (Unicode-aware). Set to `false` for
     /// dictionaries that genuinely want substring matching (e.g.
     /// scanning for embedded credentials inside arbitrary tokens).
     #[builder(default = "true")]
     #[serde(default = "default_word_boundary")]
     pub word_boundary: bool,
-    /// Per-column confidence overrides for terms loaded from a
-    /// multi-column CSV. `column_scores[i]` is the confidence
-    /// stamped on every term whose source column was `i`; terms
-    /// from a column past the end of this vec fall back to the
-    /// dictionary's default `score`. Useful when one column
-    /// carries unambiguous long-form names (`English`, `Spanish`)
-    /// and another carries short codes (`en`, `es`) that collide
-    /// with common words.
-    ///
-    /// Empty (the default) means "use `score` for every match",
-    /// preserving the historical behaviour of single-confidence
-    /// dictionaries.
-    #[builder(default)]
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub column_scores: Vec<Confidence>,
 }
 
 fn default_word_boundary() -> bool {
@@ -143,8 +204,8 @@ impl Dictionary {
         let mut builder = Dictionary::builder()
             .with_name(metadata.name)
             .with_label(metadata.label);
-        if let Some(score) = metadata.score {
-            builder = builder.with_score(score);
+        if let Some(scoring) = metadata.score {
+            builder = builder.with_scoring(scoring);
         }
         if let Some(context) = metadata.context {
             builder = builder.with_context(context);
@@ -152,29 +213,20 @@ impl Dictionary {
         if let Some(wb) = metadata.word_boundary {
             builder = builder.with_word_boundary(wb);
         }
-        if let Some(cs) = metadata.column_scores {
-            builder = builder.with_column_scores(cs);
-        }
         Ok(builder)
     }
 }
 
 /// Wire shape for the dictionary metadata sidecar TOML — every
 /// field [`Dictionary`] carries except `terms`.
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Deserialize)]
 struct DictionaryMetadata {
     name: String,
     label: EntityLabelRef,
     #[serde(default)]
-    score: Option<Confidence>,
+    score: Option<Scoring>,
     #[serde(default)]
-    context: Option<Context>,
+    context: Option<Vec<String>>,
     #[serde(default)]
     word_boundary: Option<bool>,
-    #[serde(default)]
-    column_scores: Option<Vec<Confidence>>,
-}
-
-fn context_is_default(ctx: &Context) -> bool {
-    ctx.is_empty() && ctx.window.is_none() && ctx.boost.is_none()
 }
diff --git a/crates/nvisy-pattern/src/recognition/mod.rs b/crates/nvisy-pattern/src/recognition/mod.rs
index 0ce29c61..d6d2d18e 100644
--- a/crates/nvisy-pattern/src/recognition/mod.rs
+++ b/crates/nvisy-pattern/src/recognition/mod.rs
@@ -1,19 +1,18 @@
-//! Recognition primitives — the rule shapes ([`Regex`],
-//! [`Dictionary`]), their building blocks ([`Terms`] plus
-//! [`Context`] from `nvisy-context`),
-//! the [`PatternRegistry`] that bundles them, and the runtime
-//! [`PatternRecognizer`] that compiles them into pooled scanners.
-//!
-//! [`Context`]: nvisy_context::Context
+//! Recognition primitives — the rule shapes ([`Regex`] + its
+//! [`Variant`]s, [`Dictionary`]), their building blocks ([`Terms`]),
+//! and the runtime [`PatternRecognizer`] that compiles them into
+//! pooled scanners. Per-rule and per-dictionary `context` keyword
+//! lists are harvested by the recognizer at build time into a
+//! wrapping `Boosting` layer that applies post-recognition keyword
+//! boosts.
 
+mod compiled;
 mod dictionary;
 mod recognizer;
-mod regex_rule;
-mod registry;
+mod regex;
 mod terms;
 
-pub use self::dictionary::{Dictionary, DictionaryBuilder};
+pub use self::dictionary::{Dictionary, DictionaryBuilder, Scoring};
 pub use self::recognizer::{PatternRecognizer, PatternRecognizerBuilder};
-pub use self::regex_rule::{Regex, RegexBuilder};
-pub use self::registry::PatternRegistry;
-pub use self::terms::Terms;
+pub use self::regex::{Regex, RegexBuilder, Variant, VariantBuilder};
+pub use self::terms::{Term, Terms};
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index ce987ee3..01bc5533 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -1,5 +1,5 @@
-//! [`PatternRecognizer`]: compiles a [`PatternRegistry`] into pooled
-//! scanners and implements [`EntityRecognizer<Text>`].
+//! [`PatternRecognizer`]: compiles patterns and dictionaries into
+//! pooled scanners and implements [`EntityRecognizer<Text>`].
 //!
 //! The internal split is intentional: regex patterns go into a
 //! single [`regex::RegexSet`] for a one-pass scan across every
@@ -7,64 +7,41 @@
 //! [`aho_corasick::AhoCorasick`] automaton for a one-pass scan
 //! across every literal. Both passes share one walk over the input
 //! and emit entities in modality-local byte coordinates.
-
-use std::sync::Arc;
+//!
+//! Construction is builder-driven: [`PatternRecognizer::builder`]
+//! returns a [`PatternRecognizerBuilder`] that accumulates patterns,
+//! dictionaries, and (optionally) a custom validator registry, then
+//! compiles everything into the scanners on [`build`]. The shipped
+//! built-in pattern + dictionary set is [`PatternRecognizerBuilder::builtin`].
+//!
+//! [`build`]: PatternRecognizerBuilder::build
 
 use aho_corasick::{AhoCorasick, MatchKind};
-use nvisy_core::entity::{Entity, EntityLabelRef, PatternProvenance, TrailProvenance, TrailStep};
-use nvisy_core::modality::{Text, TextLocation};
-use nvisy_core::primitive::{Confidence, LanguageTag};
+use nvisy_context::{BoostRule, Boosting, Enhancer, SubstringMatcher};
+use nvisy_core::entity::{Entity, EntityLabelCatalog, EntityLabelRef};
+use nvisy_core::modality::Text;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput, RecognizerOutput};
 use nvisy_core::{Error, Result};
-use regex::{Regex, RegexSet};
+use regex::RegexSet;
 
-use super::registry::PatternRegistry;
-use crate::validators::{Validator, ValidatorRegistry};
+use super::compiled::{CompiledDictionary, CompiledPattern, has_word_boundaries};
+use super::dictionary::Dictionary;
+use super::regex::Regex;
+use crate::shipped;
+use crate::validators::ValidatorRegistry;
 
-/// Source of truth for one runtime pattern: the regex compiled
-/// once, plus the metadata needed to emit entities.
+/// Runtime text recognizer composed of one regex pool and one
+/// Aho-Corasick automaton.
 ///
-/// `context` is intentionally not stored on the compiled state —
-/// the recognizer never reads it; the [`ContextEnhancer`] looks it
-/// up directly on the [`PatternRegistry`] at boost time.
+/// ```
+/// use nvisy_pattern::PatternRecognizer;
 ///
-/// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
-struct CompiledPattern {
-    name: String,
-    label: EntityLabelRef,
-    regex: Regex,
-    raw_regex: String,
-    score: Confidence,
-    validator: Option<Arc<dyn Validator>>,
-    /// Languages this pattern applies to. Empty means "any language".
-    languages: Vec<LanguageTag>,
-}
-
-/// Source of truth for one runtime dictionary: its term range
-/// inside the shared Aho-Corasick automaton, plus per-dictionary
-/// emission metadata.
-struct CompiledDictionary {
-    name: String,
-    label: EntityLabelRef,
-    /// First term-id (inclusive) for this dictionary inside the
-    /// shared automaton.
-    term_start: usize,
-    /// One past the last term-id for this dictionary inside the
-    /// shared automaton.
-    term_end: usize,
-    /// Per-term confidence, indexed by `term_id - term_start`.
-    /// Resolved at compile time from the dictionary's
-    /// `column_scores` override (when set) or its default `score`.
-    term_scores: Vec<Confidence>,
-    /// Languages this dictionary applies to. Empty means "any
-    /// language".
-    languages: Vec<LanguageTag>,
-    /// Reject matches whose immediate neighbours are word
-    /// characters (alphanumeric or `_`). Mirrors regex `\b`.
-    word_boundary: bool,
-}
-
-/// Composes a [`PatternRegistry`] into a single text recognizer.
+/// let recognizer = PatternRecognizer::builder()
+///     .with_builtin_patterns()
+///     .with_builtin_dictionaries()
+///     .build()
+///     .expect("built-in recognizer builds");
+/// ```
 pub struct PatternRecognizer {
     patterns: Vec<CompiledPattern>,
     regex_set: Option<RegexSet>,
@@ -73,28 +50,77 @@ pub struct PatternRecognizer {
 }
 
 impl PatternRecognizer {
-    /// Start assembling a recognizer. Required: a registry, supplied
-    /// via [`with_registry`].
+    /// Start a builder. Required: at least one pattern or
+    /// dictionary; otherwise [`build`] succeeds with a recognizer
+    /// that always emits zero entities.
     ///
-    /// [`with_registry`]: PatternRecognizerBuilder::with_registry
+    /// [`build`]: PatternRecognizerBuilder::build
     #[must_use]
     pub fn builder() -> PatternRecognizerBuilder {
         PatternRecognizerBuilder::default()
     }
+
+    fn dictionary_owning_term(&self, term_id: usize) -> Option<&CompiledDictionary> {
+        self.dictionaries
+            .iter()
+            .find(|d| term_id >= d.term_start && term_id < d.term_end)
+    }
 }
 
-/// Builder for [`PatternRecognizer`].
-#[derive(Default)]
+/// Accumulates patterns, dictionaries, and a validator registry,
+/// then compiles them into a [`PatternRecognizer`] wrapped in a
+/// [`Boosting`] layer.
+#[derive(Debug, Clone, Default)]
 pub struct PatternRecognizerBuilder {
-    registry: Option<PatternRegistry>,
+    patterns: Vec<Regex>,
+    dictionaries: Vec<Dictionary>,
     validators: Option<ValidatorRegistry>,
 }
 
 impl PatternRecognizerBuilder {
-    /// Attach the pattern + dictionary registry to compile.
+    /// Construct an empty builder.
     #[must_use]
-    pub fn with_registry(mut self, registry: PatternRegistry) -> Self {
-        self.registry = Some(registry);
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Pre-seed with the shipped built-in pattern + dictionary set.
+    /// Shorthand for
+    /// `Self::new().with_builtin_patterns().with_builtin_dictionaries()`.
+    #[must_use]
+    pub fn builtin() -> Self {
+        Self::new()
+            .with_builtin_patterns()
+            .with_builtin_dictionaries()
+    }
+
+    /// Register one pattern. Patterns accumulate in registration
+    /// order.
+    #[must_use]
+    pub fn with_pattern(mut self, pattern: Regex) -> Self {
+        self.patterns.push(pattern);
+        self
+    }
+
+    /// Register one dictionary. Dictionaries accumulate in
+    /// registration order.
+    #[must_use]
+    pub fn with_dictionary(mut self, dictionary: Dictionary) -> Self {
+        self.dictionaries.push(dictionary);
+        self
+    }
+
+    /// Register every shipped built-in pattern.
+    #[must_use]
+    pub fn with_builtin_patterns(mut self) -> Self {
+        self.patterns.extend(shipped::patterns::all());
+        self
+    }
+
+    /// Register every shipped built-in dictionary.
+    #[must_use]
+    pub fn with_builtin_dictionaries(mut self) -> Self {
+        self.dictionaries.extend(shipped::dictionaries::all());
         self
     }
 
@@ -106,52 +132,109 @@ impl PatternRecognizerBuilder {
         self
     }
 
+    /// Drop every pattern and dictionary whose `label` is not
+    /// registered in `catalog`. Used to build a per-request
+    /// recognizer from a workspace-wide template — rules that
+    /// would emit labels no policy declared never run.
+    #[must_use]
+    pub fn filter_by_catalog(mut self, catalog: &EntityLabelCatalog) -> Self {
+        self.patterns
+            .retain(|p| catalog.lookup(p.label.as_str()).is_some());
+        self.dictionaries
+            .retain(|d| catalog.lookup(d.label.as_str()).is_some());
+        self
+    }
+
+    /// `true` when the builder has no patterns and no
+    /// dictionaries. Engine code uses this to skip the
+    /// per-request recognizer entirely when the catalog filter
+    /// dropped every rule.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.patterns.is_empty() && self.dictionaries.is_empty()
+    }
+
+    /// Borrow the accumulated patterns.
+    #[must_use]
+    pub fn patterns(&self) -> &[Regex] {
+        &self.patterns
+    }
+
+    /// Borrow the accumulated dictionaries.
+    #[must_use]
+    pub fn dictionaries(&self) -> &[Dictionary] {
+        &self.dictionaries
+    }
+
     /// Compile every registered pattern and dictionary into the
-    /// pooled scanners.
+    /// pooled scanners and wrap the recognizer in a [`Boosting`]
+    /// layer carrying per-label keyword boosts harvested from the
+    /// same set of rules.
     ///
     /// # Errors
     ///
-    /// Returns an error when no registry was supplied, when a
-    /// pattern's regex fails to compile, when a pattern references
-    /// an unknown validator name, or when the shared automata
-    /// cannot be constructed.
-    pub fn build(self) -> Result<PatternRecognizer> {
-        let registry = self.registry.ok_or_else(|| {
-            Error::validation(
-                "PatternRecognizer requires a registry — call `with_registry` first",
-                "nvisy-pattern",
-            )
-        })?;
-        let validators = self.validators.unwrap_or_else(ValidatorRegistry::builtin);
-        let mut compiled_patterns = Vec::with_capacity(registry.patterns().len());
-        let mut regex_sources = Vec::with_capacity(registry.patterns().len());
-
-        for pattern in registry.patterns() {
-            let regex = Regex::new(&pattern.regex).map_err(|e| {
-                Error::validation(
-                    format!("pattern `{}`: invalid regex: {e}", pattern.name),
-                    "nvisy-pattern",
-                )
-            })?;
-            let validator = match pattern.validator.as_deref() {
-                None => None,
-                Some(name) => Some(validators.resolve(name).ok_or_else(|| {
+    /// Returns a validation error when a pattern variant's regex
+    /// fails to compile, when a variant references an unknown
+    /// validator name, when a dictionary's `scoring` is invalid
+    /// or under-declared for some term's source column, or when
+    /// the shared automata cannot be constructed.
+    pub fn build(self) -> Result<Boosting<PatternRecognizer>> {
+        let validators = self
+            .validators
+            .clone()
+            .unwrap_or_else(ValidatorRegistry::builtin);
+        let (compiled_patterns, regex_set) = self.compile_patterns(&validators)?;
+        let (compiled_dicts, aho) = self.compile_dictionaries()?;
+        let enhancer = self.build_enhancer();
+
+        let recognizer = PatternRecognizer {
+            patterns: compiled_patterns,
+            regex_set,
+            dictionaries: compiled_dicts,
+            aho,
+        };
+
+        Ok(Boosting::new(recognizer, enhancer))
+    }
+
+    /// Compile every `(pattern, variant)` pair into a
+    /// [`CompiledPattern`] keyed by its slot in the shared
+    /// [`RegexSet`].
+    fn compile_patterns(
+        &self,
+        validators: &ValidatorRegistry,
+    ) -> Result<(Vec<CompiledPattern>, Option<RegexSet>)> {
+        let variant_total: usize = self.patterns.iter().map(|p| p.variants.len()).sum();
+        let mut compiled = Vec::with_capacity(variant_total);
+        let mut regex_sources = Vec::with_capacity(variant_total);
+
+        for pattern in &self.patterns {
+            for variant in &pattern.variants {
+                let regex = ::regex::Regex::new(&variant.regex).map_err(|e| {
                     Error::validation(
-                        format!("pattern `{}`: unknown validator `{}`", pattern.name, name),
+                        format!("pattern `{}`: invalid regex: {e}", pattern.name),
                         "nvisy-pattern",
                     )
-                })?),
-            };
-            regex_sources.push(pattern.regex.clone());
-            compiled_patterns.push(CompiledPattern {
-                name: pattern.name.clone(),
-                label: pattern.label.clone(),
-                regex,
-                raw_regex: pattern.regex.clone(),
-                score: pattern.score,
-                validator,
-                languages: pattern.languages.clone(),
-            });
+                })?;
+                let validator = match variant.validator.as_deref() {
+                    None => None,
+                    Some(name) => Some(validators.resolve(name).ok_or_else(|| {
+                        Error::validation(
+                            format!("pattern `{}`: unknown validator `{}`", pattern.name, name),
+                            "nvisy-pattern",
+                        )
+                    })?),
+                };
+                regex_sources.push(variant.regex.clone());
+                compiled.push(CompiledPattern {
+                    pattern_name: pattern.name.clone(),
+                    label: pattern.label.clone(),
+                    regex,
+                    score: variant.score,
+                    validator,
+                    languages: pattern.languages.clone(),
+                });
+            }
         }
 
         let regex_set = if regex_sources.is_empty() {
@@ -161,25 +244,55 @@ impl PatternRecognizerBuilder {
                 Error::validation(format!("compiling regex set: {e}"), "nvisy-pattern")
             })?)
         };
+        Ok((compiled, regex_set))
+    }
 
-        let mut compiled_dicts = Vec::with_capacity(registry.dictionaries().len());
+    /// Compile every dictionary into a [`CompiledDictionary`]
+    /// with its term-id range inside the shared Aho-Corasick
+    /// automaton, plus per-term confidences resolved from the
+    /// dictionary's `scoring` policy (with per-term overrides
+    /// taking precedence).
+    fn compile_dictionaries(&self) -> Result<(Vec<CompiledDictionary>, Option<AhoCorasick>)> {
+        let mut compiled = Vec::with_capacity(self.dictionaries.len());
         let mut all_terms: Vec<String> = Vec::new();
-        for dict in registry.dictionaries() {
+
+        for dict in &self.dictionaries {
+            if let Err(reason) = dict.scoring.validate() {
+                return Err(Error::validation(
+                    format!("dictionary `{}`: {reason}", dict.name),
+                    "nvisy-pattern",
+                ));
+            }
             let term_start = all_terms.len();
             let mut term_scores = Vec::with_capacity(dict.terms.len());
             for entry in dict.terms.entries() {
                 all_terms.push(entry.term.clone());
-                // Resolve column → score. Out-of-range columns fall
-                // back to the dictionary's default score.
-                let score = dict
-                    .column_scores
-                    .get(entry.column as usize)
-                    .copied()
-                    .unwrap_or(dict.score);
+                // Per-term `score` wins when set; otherwise ask
+                // the dictionary's `Scoring` to resolve against
+                // the term's source column. `None` means the
+                // column didn't map to a declared score —
+                // surfaced as a hard build error so silent
+                // misconfiguration can't happen.
+                let score = entry
+                    .score
+                    .or_else(|| dict.scoring.get(entry.column))
+                    .ok_or_else(|| {
+                        let column_desc = entry
+                            .column
+                            .map_or_else(|| "no column".to_owned(), |c| format!("column {c}"));
+                        Error::validation(
+                            format!(
+                                "dictionary `{}`: term `{}` ({column_desc}) has no score in \
+                                 dictionary scoring",
+                                dict.name, entry.term,
+                            ),
+                            "nvisy-pattern",
+                        )
+                    })?;
                 term_scores.push(score);
             }
             let term_end = all_terms.len();
-            compiled_dicts.push(CompiledDictionary {
+            compiled.push(CompiledDictionary {
                 name: dict.name.clone(),
                 label: dict.label.clone(),
                 term_start,
@@ -196,12 +309,12 @@ impl PatternRecognizerBuilder {
             Some(
                 AhoCorasick::builder()
                     .ascii_case_insensitive(false)
-                    // Longest-match-at-position: when both `en` and
-                    // `English` start at the same offset, return
-                    // `English`. Without this, the short ISO code
-                    // would win and word-boundary post-filtering
-                    // would then reject it, dropping the legitimate
-                    // long-form match.
+                    // Longest-match-at-position: when both `en`
+                    // and `English` start at the same offset,
+                    // return `English`. Without this, the short
+                    // ISO code would win and word-boundary
+                    // post-filtering would then reject it,
+                    // dropping the legitimate long-form match.
                     .match_kind(MatchKind::LeftmostLongest)
                     .build(&all_terms)
                     .map_err(|e| {
@@ -212,13 +325,33 @@ impl PatternRecognizerBuilder {
                     })?,
             )
         };
+        Ok((compiled, aho))
+    }
 
-        Ok(PatternRecognizer {
-            patterns: compiled_patterns,
-            regex_set,
-            dictionaries: compiled_dicts,
-            aho,
-        })
+    /// Build the wrapping [`Enhancer`] from per-pattern and
+    /// per-dictionary context keywords.
+    fn build_enhancer(&self) -> Enhancer {
+        let boost_rules: Vec<BoostRule> = self
+            .context_keywords()
+            .map(|(label, keywords)| BoostRule::for_label(label.clone(), keywords.iter().cloned()))
+            .collect();
+        Enhancer::new(boost_rules, Box::new(SubstringMatcher))
+    }
+
+    /// Yield `(label, keywords)` for every pattern and dictionary
+    /// that declares a non-empty context.
+    fn context_keywords(&self) -> impl Iterator<Item = (&EntityLabelRef, &[String])> {
+        let pattern_keywords = self
+            .patterns
+            .iter()
+            .filter(|p| !p.context.is_empty())
+            .map(|p| (&p.label, p.context.as_slice()));
+        let dict_keywords = self
+            .dictionaries
+            .iter()
+            .filter(|d| !d.context.is_empty())
+            .map(|d| (&d.label, d.context.as_slice()));
+        pattern_keywords.chain(dict_keywords)
     }
 }
 
@@ -226,7 +359,7 @@ impl PatternRecognizerBuilder {
 impl EntityRecognizer<Text> for PatternRecognizer {
     async fn recognize(&self, input: &RecognizerInput<Text>) -> Result<RecognizerOutput<Text>> {
         let text = input.data.text.as_str();
-        let mut entities = Vec::new();
+        let mut entities: Vec<Entity<Text>> = Vec::new();
 
         if let Some(set) = self.regex_set.as_ref() {
             for pattern_id in set.matches(text).into_iter() {
@@ -240,7 +373,7 @@ impl EntityRecognizer<Text> for PatternRecognizer {
                     {
                         continue;
                     }
-                    entities.push(build_pattern_entity(pat, m.start(), m.end()));
+                    entities.push(pat.build_entity(m.start(), m.end()));
                 }
             }
         }
@@ -258,7 +391,7 @@ impl EntityRecognizer<Text> for PatternRecognizer {
                     continue;
                 }
                 let score = dict.term_scores[term_id - dict.term_start];
-                entities.push(build_dictionary_entity(dict, score, mat.start(), mat.end()));
+                entities.push(dict.build_entity(score, mat.start(), mat.end()));
             }
         }
 
@@ -266,88 +399,14 @@ impl EntityRecognizer<Text> for PatternRecognizer {
     }
 }
 
-impl PatternRecognizer {
-    fn dictionary_owning_term(&self, term_id: usize) -> Option<&CompiledDictionary> {
-        self.dictionaries
-            .iter()
-            .find(|d| term_id >= d.term_start && term_id < d.term_end)
-    }
-}
-
-fn build_pattern_entity(pat: &CompiledPattern, start: usize, end: usize) -> Entity<Text> {
-    let provenance = TrailProvenance::Pattern(PatternProvenance::Regex {
-        name: pat.name.clone(),
-        regex: Some(pat.raw_regex.clone()),
-        validator: pat.validator.as_ref().map(|_| pat.name.clone()),
-        contextual: false,
-    });
-    let step = TrailStep::recognition(
-        "pattern",
-        pat.score,
-        provenance,
-        format!("pattern `{}` matched", pat.name),
-    );
-    Entity::builder()
-        .with_label(pat.label.clone())
-        .with_trail(vec![step])
-        .with_confidence(pat.score)
-        .with_location(TextLocation::new(start, end))
-        .build()
-        .expect("required fields provided")
-}
-
-/// Mirror of regex `\b` for the byte range `text[start..end]`:
-/// the immediate neighbour characters (or start/end of input) must
-/// not be word characters. A word character here is Unicode
-/// alphanumeric or `_`, matching the conventional regex definition.
-///
-/// Operates on `char` boundaries, not raw bytes, so multibyte
-/// codepoints don't trigger false rejections (`é` is one char, not
-/// two).
-fn has_word_boundaries(text: &str, start: usize, end: usize) -> bool {
-    let left_is_word = text[..start].chars().next_back().is_some_and(is_word_char);
-    let right_is_word = text[end..].chars().next().is_some_and(is_word_char);
-    !left_is_word && !right_is_word
-}
-
-fn is_word_char(c: char) -> bool {
-    c.is_alphanumeric() || c == '_'
-}
-
-fn build_dictionary_entity(
-    dict: &CompiledDictionary,
-    score: Confidence,
-    start: usize,
-    end: usize,
-) -> Entity<Text> {
-    let provenance = TrailProvenance::Pattern(PatternProvenance::Dictionary {
-        name: dict.name.clone(),
-        contextual: false,
-    });
-    let step = TrailStep::recognition(
-        "pattern",
-        score,
-        provenance,
-        format!("dictionary `{}` matched", dict.name),
-    );
-    Entity::builder()
-        .with_label(dict.label.clone())
-        .with_trail(vec![step])
-        .with_confidence(score)
-        .with_location(TextLocation::new(start, end))
-        .build()
-        .expect("required fields provided")
-}
-
 #[cfg(test)]
 mod tests {
-    use nvisy_core::entity::builtins;
-    use nvisy_core::modality::TextData;
+    use nvisy_core::entity::{Entity, EntityLabelRef, builtins};
+    use nvisy_core::modality::{Text, TextData};
     use nvisy_core::recognition::RecognizerInput;
 
     use super::*;
     use crate::Dictionary;
-    use crate::recognition::registry::PatternRegistry;
     use crate::recognition::terms::Terms;
 
     fn dict(name: &str, terms: &[&str], word_boundary: bool) -> Dictionary {
@@ -360,7 +419,7 @@ mod tests {
             .expect("dictionary builds")
     }
 
-    async fn run(recognizer: &PatternRecognizer, text: &str) -> Vec<Entity<Text>> {
+    async fn run(recognizer: &impl EntityRecognizer<Text>, text: &str) -> Vec<Entity<Text>> {
         let input = RecognizerInput::new(TextData::new(text.to_owned()));
         recognizer
             .recognize(&input)
@@ -371,9 +430,8 @@ mod tests {
 
     #[tokio::test]
     async fn word_boundary_rejects_substring_matches() {
-        let registry = PatternRegistry::new().with_dictionary(dict("langs", &["am", "or"], true));
         let recognizer = PatternRecognizer::builder()
-            .with_registry(registry)
+            .with_dictionary(dict("langs", &["am", "or"], true))
             .build()
             .expect("recognizer builds");
 
@@ -391,29 +449,12 @@ mod tests {
 
     #[tokio::test]
     async fn word_boundary_disabled_keeps_substring_matches() {
-        let registry = PatternRegistry::new().with_dictionary(dict("langs", &["am"], false));
         let recognizer = PatternRecognizer::builder()
-            .with_registry(registry)
+            .with_dictionary(dict("langs", &["am"], false))
             .build()
             .expect("recognizer builds");
 
         let entities = run(&recognizer, "example").await;
         assert_eq!(entities.len(), 1, "substring match must be kept");
     }
-
-    #[test]
-    fn has_word_boundaries_handles_edges_and_unicode() {
-        // Match touches both edges of the input → boundaries OK.
-        assert!(has_word_boundaries("hello", 0, 5));
-        // Match preceded by a word char → not a boundary.
-        assert!(!has_word_boundaries("example", 5, 7));
-        // Match followed by a word char → not a boundary.
-        assert!(!has_word_boundaries("amount", 0, 2));
-        // Space surround → boundaries OK.
-        assert!(has_word_boundaries(" am ", 1, 3));
-        // Unicode word char on the left → not a boundary.
-        assert!(!has_word_boundaries("café_am", 5, 7));
-        // Punctuation around → boundaries OK.
-        assert!(has_word_boundaries("(am)", 1, 3));
-    }
 }
diff --git a/crates/nvisy-pattern/src/recognition/regex.rs b/crates/nvisy-pattern/src/recognition/regex.rs
new file mode 100644
index 00000000..0d762fcc
--- /dev/null
+++ b/crates/nvisy-pattern/src/recognition/regex.rs
@@ -0,0 +1,149 @@
+//! [`Regex`]: per-label regex-based detection rule.
+//!
+//! A `Regex` rule bundles one entity label, its context-keyword
+//! list, and one or more [`Variant`]s. Each variant carries its
+//! own regex source, emission score, and optional named
+//! validator. All variants under one rule emit the same label.
+//!
+//! Construct via [`Regex::builder`] for the chainable style or
+//! [`Regex::from_toml`] when loading a definition file.
+
+use derive_builder::Builder;
+use nvisy_core::Error;
+use nvisy_core::entity::EntityLabelRef;
+use nvisy_core::primitive::{Confidence, LanguageTag};
+use serde::Deserialize;
+
+/// One regex variant inside a [`Regex`] rule. Carries the regex
+/// source, the emission confidence stamped on every match, and the
+/// optional validator name resolved at recognizer-build time.
+#[derive(Debug, Clone, PartialEq, Builder, Deserialize)]
+#[builder(
+    name = "VariantBuilder",
+    pattern = "owned",
+    setter(into, strip_option, prefix = "with"),
+    build_fn(error = "Error", validate = "VariantBuilder::validate")
+)]
+pub struct Variant {
+    /// Regex source. Compiled to a [`::regex::Regex`] by
+    /// [`PatternRecognizer::build`]; shape errors there, not here.
+    ///
+    /// [`PatternRecognizer::build`]: super::PatternRecognizer
+    pub regex: String,
+    /// Confidence score stamped on every match this variant emits
+    /// before any post-recognition boost.
+    #[builder(default = "Confidence::MAX")]
+    pub score: Confidence,
+    /// Optional validator name. Resolved at recognizer-build time
+    /// against the [`ValidatorRegistry`]; matches that fail
+    /// validation are dropped.
+    ///
+    /// [`ValidatorRegistry`]: crate::validators::ValidatorRegistry
+    #[builder(default)]
+    #[serde(default)]
+    pub validator: Option<String>,
+}
+
+impl Variant {
+    /// Start a chainable builder. Required field: `regex`.
+    #[must_use]
+    pub fn builder() -> VariantBuilder {
+        VariantBuilder::default()
+    }
+}
+
+impl VariantBuilder {
+    fn validate(&self) -> Result<(), Error> {
+        if let Some(regex) = self.regex.as_ref()
+            && let Err(e) = ::regex::Regex::new(regex)
+        {
+            return Err(Error::validation(
+                format!("invalid regex: {e}"),
+                "nvisy-pattern",
+            ));
+        }
+        Ok(())
+    }
+}
+
+/// Regex-based detection rule: one label, optional boost
+/// keywords, one or more [`Variant`]s. Matches the Presidio
+/// "pattern recognizer" shape — multiple regex strategies for one
+/// entity type, plus a shared context keyword list.
+///
+/// ```
+/// use nvisy_core::entity::builtins;
+/// use nvisy_core::primitive::Confidence;
+/// use nvisy_pattern::{Regex, Variant};
+///
+/// let variant = Variant::builder()
+///     .with_regex(r"\b\d{3}-\d{2}-\d{4}\b")
+///     .with_score(Confidence::clamped(0.9))
+///     .with_validator("ssn")
+///     .build()
+///     .expect("ssn variant builds");
+///
+/// let ssn = Regex::builder()
+///     .with_name("ssn")
+///     .with_label(builtins::GOVERNMENT_ID.label_ref())
+///     .with_context(vec!["ssn".to_owned(), "social security".to_owned()])
+///     .with_variants(vec![variant])
+///     .build()
+///     .expect("ssn rule builds");
+/// ```
+#[derive(Debug, Clone, PartialEq, Builder, Deserialize)]
+#[builder(
+    name = "RegexBuilder",
+    pattern = "owned",
+    setter(into, strip_option, prefix = "with"),
+    build_fn(error = "Error")
+)]
+pub struct Regex {
+    /// Human-readable identifier (e.g. `"ssn"`, `"credit_card"`).
+    /// Surfaced in trail steps so downstream consumers can see
+    /// which rule matched.
+    pub name: String,
+    /// Entity label every variant emits.
+    pub label: EntityLabelRef,
+    /// Context keywords that lift confidence when one of them
+    /// appears near a match. Harvested by [`PatternRecognizer`]
+    /// into a per-label boost rule; rules themselves never read
+    /// this field.
+    ///
+    /// [`PatternRecognizer`]: super::PatternRecognizer
+    #[builder(default)]
+    #[serde(default)]
+    pub context: Vec<String>,
+    /// Regex variants. At least one is required for the rule to
+    /// produce any matches; the recognizer skips rules with no
+    /// variants.
+    pub variants: Vec<Variant>,
+    /// Languages this rule applies to (BCP-47 tags). An empty
+    /// list (the default) means the rule applies regardless of
+    /// language; otherwise the recognizer skips this rule when
+    /// the per-call language hint is set to a tag not in this
+    /// list.
+    #[builder(default)]
+    #[serde(default)]
+    pub languages: Vec<LanguageTag>,
+}
+
+impl Regex {
+    /// Start a chainable builder. Required fields: `name`,
+    /// `label`, `variants`.
+    #[must_use]
+    pub fn builder() -> RegexBuilder {
+        RegexBuilder::default()
+    }
+
+    /// Parse a regex rule from a TOML string.
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when the TOML is malformed or
+    /// missing required fields.
+    pub fn from_toml(raw: &str) -> Result<Self, Error> {
+        toml::from_str(raw)
+            .map_err(|e| Error::validation(format!("regex rule TOML: {e}"), "nvisy-pattern"))
+    }
+}
diff --git a/crates/nvisy-pattern/src/recognition/regex_rule.rs b/crates/nvisy-pattern/src/recognition/regex_rule.rs
deleted file mode 100644
index 55f303ca..00000000
--- a/crates/nvisy-pattern/src/recognition/regex_rule.rs
+++ /dev/null
@@ -1,107 +0,0 @@
-//! [`Regex`]: regex-backed detection rule.
-//!
-//! A regex rule bundles a regular expression with the entity kind
-//! it detects, an emission confidence score, optional context
-//! keywords that downstream enhancers can boost on, and an optional
-//! named validator (Luhn, IBAN, …) the recognizer runs over each
-//! match before emitting an entity.
-//!
-//! Construct via [`Regex::builder`] for the chainable style or
-//! [`Regex::from_toml`] when loading a definition file.
-
-use derive_builder::Builder;
-use nvisy_context::Context;
-use nvisy_core::Error;
-use nvisy_core::entity::EntityLabelRef;
-use nvisy_core::primitive::{Confidence, LanguageTag};
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-
-/// Regex-backed detection rule.
-///
-/// Identical fields whether built via [`RegexBuilder`] or loaded
-/// from a TOML file via [`Regex::from_toml`].
-#[derive(Debug, Clone, PartialEq, Builder)]
-#[derive(Serialize, Deserialize, JsonSchema)]
-#[builder(
-    name = "RegexBuilder",
-    pattern = "owned",
-    setter(into, strip_option, prefix = "with"),
-    build_fn(error = "Error", validate = "RegexBuilder::validate")
-)]
-pub struct Regex {
-    /// Human-readable identifier (e.g. `"ssn"`, `"credit_card"`).
-    /// Surfaced in trail steps so downstream consumers can see
-    /// which rule matched.
-    pub name: String,
-    /// Entity label every match emits.
-    pub label: EntityLabelRef,
-    /// Regex source. Compiled to a [`regex::Regex`] by
-    /// [`PatternRecognizer::build`]; shape
-    /// errors there, not here.
-    ///
-    /// [`PatternRecognizer::build`]: super::PatternRecognizer
-    pub regex: String,
-    /// Confidence score stamped on every match before any boost.
-    #[builder(default = "Confidence::MAX")]
-    pub score: Confidence,
-    /// Optional context keywords. Carried through to emitted
-    /// entities so a downstream enhancer can apply boosts.
-    #[builder(default)]
-    #[serde(default, skip_serializing_if = "context_is_default")]
-    pub context: Context,
-    /// Optional validator name. Resolved at recognizer build time
-    /// against the [`ValidatorRegistry`].
-    /// Matches that fail validation are dropped.
-    ///
-    /// [`ValidatorRegistry`]: crate::validators::ValidatorRegistry
-    #[builder(default)]
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub validator: Option<String>,
-    /// Languages the rule applies to (BCP-47 tags). An empty list
-    /// (the default) means the rule applies regardless of language;
-    /// otherwise the recognizer skips this rule when the per-call
-    /// language hint is set to a tag not in this list.
-    #[builder(default)]
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    #[schemars(with = "Vec<String>")]
-    pub languages: Vec<LanguageTag>,
-}
-
-impl Regex {
-    /// Start a chainable builder. Required fields: `name`,
-    /// `label`, `regex`.
-    #[must_use]
-    pub fn builder() -> RegexBuilder {
-        RegexBuilder::default()
-    }
-
-    /// Parse a regex rule from a TOML string.
-    ///
-    /// # Errors
-    ///
-    /// Returns a validation error when the TOML is malformed or
-    /// missing required fields.
-    pub fn from_toml(raw: &str) -> Result<Self, Error> {
-        toml::from_str(raw)
-            .map_err(|e| Error::validation(format!("regex TOML: {e}"), "nvisy-pattern"))
-    }
-}
-
-impl RegexBuilder {
-    fn validate(&self) -> Result<(), Error> {
-        if let Some(regex) = self.regex.as_ref()
-            && let Err(e) = ::regex::Regex::new(regex)
-        {
-            return Err(Error::validation(
-                format!("invalid regex: {e}"),
-                "nvisy-pattern",
-            ));
-        }
-        Ok(())
-    }
-}
-
-fn context_is_default(ctx: &Context) -> bool {
-    ctx.is_empty() && ctx.window.is_none() && ctx.boost.is_none()
-}
diff --git a/crates/nvisy-pattern/src/recognition/registry.rs b/crates/nvisy-pattern/src/recognition/registry.rs
deleted file mode 100644
index c763661a..00000000
--- a/crates/nvisy-pattern/src/recognition/registry.rs
+++ /dev/null
@@ -1,167 +0,0 @@
-//! [`PatternRegistry`]: a curated bundle of [`Regex`]es and
-//! [`Dictionary`]s that downstream consumers borrow.
-//!
-//! Both [`PatternRecognizer`] and the shared [`ContextEnhancer`]
-//! consume a registry — the recognizer compiles its rules into
-//! pooled scanners; the enhancer reads per-rule context keywords
-//! via [`PatternRegistry::context_registry`].
-//!
-//! Centralising the rule set here means no duplication of
-//! [`Regex`] / [`Dictionary`] storage between the two consumers.
-//!
-//! [`PatternRecognizer`]: super::PatternRecognizer
-//! [`ContextEnhancer`]: nvisy_context::ContextEnhancer
-
-use nvisy_context::ContextRegistry;
-use nvisy_core::entity::EntityLabelCatalog;
-
-use super::dictionary::Dictionary;
-use super::regex_rule::Regex;
-use crate::shipped;
-
-/// Bundle of regexes and dictionaries shared by every downstream
-/// consumer.
-///
-/// Cheap to clone (`Vec` of small structs). Construct via
-/// [`PatternRegistry::new`] for an empty registry,
-/// [`PatternRegistry::builtin`] for the shipped registry (every
-/// built-in regex + dictionary), or chain [`with_pattern`] /
-/// [`with_dictionary`] / [`with_builtin_patterns`] /
-/// [`with_builtin_dictionaries`] to mix custom rules in.
-///
-/// [`with_pattern`]: PatternRegistry::with_pattern
-/// [`with_dictionary`]: PatternRegistry::with_dictionary
-/// [`with_builtin_patterns`]: PatternRegistry::with_builtin_patterns
-/// [`with_builtin_dictionaries`]: PatternRegistry::with_builtin_dictionaries
-#[derive(Debug, Clone, Default)]
-pub struct PatternRegistry {
-    regexes: Vec<Regex>,
-    dictionaries: Vec<Dictionary>,
-}
-
-impl PatternRegistry {
-    /// Construct an empty registry.
-    #[must_use]
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Construct the shipped registry: every built-in regex pattern
-    /// and every built-in dictionary, in registration order.
-    /// Shorthand for `PatternRegistry::new().with_builtin_patterns().with_builtin_dictionaries()`.
-    #[must_use]
-    pub fn builtin() -> Self {
-        Self::new()
-            .with_builtin_patterns()
-            .with_builtin_dictionaries()
-    }
-
-    /// Register one regex. Call once per regex; the registry
-    /// accumulates them in registration order.
-    #[must_use]
-    pub fn with_pattern(mut self, regex: Regex) -> Self {
-        self.regexes.push(regex);
-        self
-    }
-
-    /// Register one dictionary. Call once per dictionary; the
-    /// registry accumulates them in registration order.
-    #[must_use]
-    pub fn with_dictionary(mut self, dictionary: Dictionary) -> Self {
-        self.dictionaries.push(dictionary);
-        self
-    }
-
-    /// Register every shipped built-in regex pattern in registration
-    /// order. Replaces the common `for p in patterns::all() { reg =
-    /// reg.with_pattern(p); }` boilerplate.
-    #[must_use]
-    pub fn with_builtin_patterns(mut self) -> Self {
-        self.regexes.extend(shipped::patterns::all());
-        self
-    }
-
-    /// Register every shipped built-in dictionary in registration
-    /// order. Replaces the common `dictionaries::all().into_iter()
-    /// .fold(reg, PatternRegistry::with_dictionary)` boilerplate.
-    #[must_use]
-    pub fn with_builtin_dictionaries(mut self) -> Self {
-        self.dictionaries.extend(shipped::dictionaries::all());
-        self
-    }
-
-    /// Borrow the registered regexes.
-    #[must_use]
-    pub fn patterns(&self) -> &[Regex] {
-        &self.regexes
-    }
-
-    /// Borrow the registered dictionaries.
-    #[must_use]
-    pub fn dictionaries(&self) -> &[Dictionary] {
-        &self.dictionaries
-    }
-
-    /// Drop every regex and dictionary whose `label` is not
-    /// registered in `catalog`. Used to build a per-request
-    /// registry from the workspace template — patterns that would
-    /// emit labels no policy declared never run.
-    #[must_use]
-    pub fn filter_by_catalog(mut self, catalog: &EntityLabelCatalog) -> Self {
-        self.regexes
-            .retain(|r| catalog.lookup(r.label.as_str()).is_some());
-        self.dictionaries
-            .retain(|d| catalog.lookup(d.label.as_str()).is_some());
-        self
-    }
-
-    /// `true` when the registry has no regexes and no dictionaries.
-    #[must_use]
-    pub fn is_empty(&self) -> bool {
-        self.regexes.is_empty() && self.dictionaries.is_empty()
-    }
-
-    /// Build a [`ContextRegistry`] containing every per-rule
-    /// context keyword declaration in this registry.
-    ///
-    /// Each [`Regex`] and [`Dictionary`] that declares a non-empty
-    /// context contributes one entry, keyed on its rule name.
-    /// Rules without context declarations are skipped.
-    ///
-    /// Use this to wire the
-    /// [`ContextEnhancer`]
-    /// against the same source of truth the recognizer compiles
-    /// from — no duplication of keyword data between rule
-    /// registration and enhancer construction.
-    ///
-    /// [`ContextEnhancer`]: nvisy_context::ContextEnhancer
-    #[must_use]
-    pub fn context_registry(&self) -> ContextRegistry {
-        let mut registry = ContextRegistry::new();
-        for r in &self.regexes {
-            registry = registry.with_entry(r.name.clone(), r.context.clone());
-        }
-        for d in &self.dictionaries {
-            registry = registry.with_entry(d.name.clone(), d.context.clone());
-        }
-        registry
-    }
-}
-
-impl FromIterator<Regex> for PatternRegistry {
-    fn from_iter<I: IntoIterator<Item = Regex>>(iter: I) -> Self {
-        Self {
-            regexes: iter.into_iter().collect(),
-            dictionaries: Vec::new(),
-        }
-    }
-}
-
-impl FromIterator<Dictionary> for PatternRegistry {
-    fn from_iter<I: IntoIterator<Item = Dictionary>>(iter: I) -> Self {
-        Self {
-            regexes: Vec::new(),
-            dictionaries: iter.into_iter().collect(),
-        }
-    }
-}
diff --git a/crates/nvisy-pattern/src/recognition/terms.rs b/crates/nvisy-pattern/src/recognition/terms.rs
index 7c17ddd1..d59ec141 100644
--- a/crates/nvisy-pattern/src/recognition/terms.rs
+++ b/crates/nvisy-pattern/src/recognition/terms.rs
@@ -17,33 +17,86 @@
 use std::io::Cursor;
 
 use nvisy_core::Error;
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
+use nvisy_core::primitive::Confidence;
+use serde::Deserialize;
 
-/// Literal term list. Each term carries the **column index** it
-/// came from (CSV column number, 0-based; non-CSV sources always
-/// use column `0`). The column index is the join key for
-/// [`Dictionary::column_scores`] per-column overrides.
+/// Literal term list. Each [`Term`] carries an optional source
+/// column (set by [`Terms::from_csv`]) plus an optional per-term
+/// score override. The column index is the join key for
+/// [`Dictionary::scoring`] when it's [`Scoring::PerColumn`].
 ///
-/// JSON-transparent: serialises to / deserialises from a JSON array
-/// of `[term, column]` pairs.
-///
-/// [`Dictionary::column_scores`]: crate::Dictionary::column_scores
-#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, JsonSchema)]
+/// [`Dictionary::scoring`]: crate::Dictionary::scoring
+/// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
+#[derive(Debug, Clone, PartialEq, Default, Deserialize)]
 #[serde(transparent)]
-pub struct Terms(Vec<TermEntry>);
+pub struct Terms(Vec<Term>);
 
-/// One entry in a [`Terms`] list: the literal plus the column it
-/// was loaded from. Serde-renamed so the wire shape is the compact
-/// tuple `[term, column]` rather than a verbose object.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
-pub struct TermEntry {
+/// One entry in a [`Terms`] list: the literal, the column it was
+/// loaded from (when applicable), and an optional explicit score
+/// that overrides the dictionary's [`Scoring`] policy for this
+/// term.
+///
+/// Per-term score is `None` for the common path — the dictionary's
+/// [`Scoring`] resolves the per-match score from the column.
+/// Set `score` only for one-off exceptions (e.g. a term known to
+/// be high-confidence even though its column is generally noisy).
+///
+/// Per-term column is `None` for non-CSV sources (plain text
+/// lists, the `From<Vec<String>>` / array impls). `Some(i)` flags
+/// a CSV cell from column `i`; the dictionary's
+/// [`Scoring::PerColumn`] uses it to pick the per-column score.
+///
+/// [`Scoring`]: crate::Scoring
+/// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
+#[derive(Debug, Clone, PartialEq, Deserialize)]
+pub struct Term {
     /// The literal scanned for.
     pub term: String,
-    /// CSV column the term came from (0-based). `0` for any
-    /// non-CSV source.
+    /// CSV column the term came from. `None` for non-CSV
+    /// sources; `Some(i)` for the cell at column `i` of a CSV.
     #[serde(default)]
-    pub column: u16,
+    pub column: Option<u16>,
+    /// Optional per-term score override. When `Some`, the
+    /// recognizer stamps this score on every match of this term;
+    /// when `None`, falls back to the dictionary's [`Scoring`]
+    /// policy resolved against [`column`].
+    ///
+    /// [`Scoring`]: crate::Scoring
+    /// [`column`]: Self::column
+    #[serde(default)]
+    pub score: Option<Confidence>,
+}
+
+impl Term {
+    /// Construct a term with no column and no per-term score
+    /// override. The common path for plain-text sources and
+    /// programmatic `From<…>` constructions.
+    #[must_use]
+    pub fn new(term: impl Into<String>) -> Self {
+        Self {
+            term: term.into(),
+            column: None,
+            score: None,
+        }
+    }
+
+    /// Attach a CSV source-column index, used by the dictionary's
+    /// [`Scoring::PerColumn`] to pick a per-column score.
+    ///
+    /// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
+    #[must_use]
+    pub fn with_column(mut self, column: u16) -> Self {
+        self.column = Some(column);
+        self
+    }
+
+    /// Set an explicit per-term score, overriding the dictionary's
+    /// column-resolved score for this term.
+    #[must_use]
+    pub fn with_score(mut self, score: Confidence) -> Self {
+        self.score = Some(score);
+        self
+    }
 }
 
 impl Terms {
@@ -55,7 +108,7 @@ impl Terms {
 
     /// Borrow the inner entries.
     #[must_use]
-    pub fn entries(&self) -> &[TermEntry] {
+    pub fn entries(&self) -> &[Term] {
         &self.0
     }
 
@@ -73,44 +126,42 @@ impl Terms {
 
     /// Consume into the inner entries.
     #[must_use]
-    pub fn into_inner(self) -> Vec<TermEntry> {
+    pub fn into_inner(self) -> Vec<Term> {
         self.0
     }
 
     /// Parse terms from plain-text bytes — one term per line.
-    /// Each line is trimmed; empty lines and lines starting with `#`
-    /// are skipped. Every term gets column `0`.
+    /// Each line is trimmed; empty lines and lines starting with
+    /// `#` are skipped. Plain-text terms carry no column.
     ///
     /// # Errors
     ///
-    /// Returns a validation error when the input is not valid UTF-8.
+    /// Returns a validation error when the input is not valid
+    /// UTF-8.
     pub fn from_text(bytes: &[u8]) -> Result<Self, Error> {
         let text = std::str::from_utf8(bytes)
             .map_err(|e| Error::validation(format!("terms text: {e}"), "nvisy-pattern"))?;
-        let entries: Vec<TermEntry> = text
+        let entries: Vec<Term> = text
             .lines()
             .map(str::trim)
             .filter(|line| !line.is_empty() && !line.starts_with('#'))
-            .map(|line| TermEntry {
-                term: line.to_owned(),
-                column: 0,
-            })
+            .map(Term::new)
             .collect();
         Ok(Self(entries))
     }
 
-    /// Parse terms from CSV bytes. Every non-empty cell across every
-    /// row becomes a term, and each term remembers the (0-based)
-    /// column index it came from so a [`Dictionary`] can apply
-    /// per-column confidence overrides via
-    /// [`Dictionary::column_scores`].
+    /// Parse terms from CSV bytes. Every non-empty cell across
+    /// every row becomes a term, and each term remembers the
+    /// (0-based) column index it came from so a [`Dictionary`]
+    /// can apply per-column confidence overrides via
+    /// [`Scoring::PerColumn`].
     ///
     /// # Errors
     ///
     /// Returns a validation error when the CSV is malformed.
     ///
     /// [`Dictionary`]: crate::Dictionary
-    /// [`Dictionary::column_scores`]: crate::Dictionary::column_scores
+    /// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
     pub fn from_csv(bytes: &[u8]) -> Result<Self, Error> {
         let mut reader = csv::ReaderBuilder::new()
             .has_headers(false)
@@ -123,10 +174,8 @@ impl Terms {
             for (col_idx, cell) in row.iter().enumerate() {
                 let trimmed = cell.trim();
                 if !trimmed.is_empty() {
-                    entries.push(TermEntry {
-                        term: trimmed.to_owned(),
-                        column: u16::try_from(col_idx).unwrap_or(u16::MAX),
-                    });
+                    let column = u16::try_from(col_idx).unwrap_or(u16::MAX);
+                    entries.push(Term::new(trimmed).with_column(column));
                 }
             }
         }
@@ -136,50 +185,24 @@ impl Terms {
 
 impl From<Vec<String>> for Terms {
     fn from(terms: Vec<String>) -> Self {
-        Self(
-            terms
-                .into_iter()
-                .map(|term| TermEntry { term, column: 0 })
-                .collect(),
-        )
+        Self(terms.into_iter().map(Term::new).collect())
     }
 }
 
 impl From<&[&str]> for Terms {
     fn from(terms: &[&str]) -> Self {
-        Self(
-            terms
-                .iter()
-                .map(|s| TermEntry {
-                    term: (*s).to_owned(),
-                    column: 0,
-                })
-                .collect(),
-        )
+        Self(terms.iter().copied().map(Term::new).collect())
     }
 }
 
 impl<const N: usize> From<[&str; N]> for Terms {
     fn from(terms: [&str; N]) -> Self {
-        Self(
-            terms
-                .iter()
-                .map(|s| TermEntry {
-                    term: (*s).to_owned(),
-                    column: 0,
-                })
-                .collect(),
-        )
+        Self(terms.iter().copied().map(Term::new).collect())
     }
 }
 
 impl<const N: usize> From<[String; N]> for Terms {
     fn from(terms: [String; N]) -> Self {
-        Self(
-            terms
-                .into_iter()
-                .map(|term| TermEntry { term, column: 0 })
-                .collect(),
-        )
+        Self(terms.into_iter().map(Term::new).collect())
     }
 }
diff --git a/crates/nvisy-pattern/src/shipped/mod.rs b/crates/nvisy-pattern/src/shipped/mod.rs
index faec8ff8..062acea8 100644
--- a/crates/nvisy-pattern/src/shipped/mod.rs
+++ b/crates/nvisy-pattern/src/shipped/mod.rs
@@ -2,10 +2,10 @@
 //! crate.
 //!
 //! Each accessor parses an asset file embedded via
-//! [`include_bytes!`] and returns a fresh [`Regex`] or
-//! [`Dictionary`]. Metadata for dictionaries (entity kind, score,
-//! context) is split into a JSON sidecar paired with a CSV / TXT
-//! term source; regex rules are self-contained JSON.
+//! [`include_str!`] and returns a fresh [`Regex`] or
+//! [`Dictionary`]. Metadata for dictionaries (entity label, score,
+//! context) is split into a TOML sidecar paired with a CSV / TXT
+//! term source; regex rules are self-contained TOML.
 //!
 //! Use [`patterns::all`] and [`dictionaries::all`] to load the
 //! complete shipped set, or pick individual accessors.
diff --git a/crates/nvisy-pattern/src/shipped/patterns.rs b/crates/nvisy-pattern/src/shipped/patterns.rs
index 09b6a36c..f9fc4140 100644
--- a/crates/nvisy-pattern/src/shipped/patterns.rs
+++ b/crates/nvisy-pattern/src/shipped/patterns.rs
@@ -119,7 +119,7 @@ shipped_pattern!(
     fn datetime from "personal/datetime.toml"
 );
 
-/// Every built-in regex pattern shipped by this crate, in arbitrary
+/// Every built-in pattern shipped by this crate, in arbitrary
 /// stable order.
 #[must_use]
 pub fn all() -> Vec<Regex> {
diff --git a/crates/nvisy-pattern/src/validators/date.rs b/crates/nvisy-pattern/src/validators/date.rs
index 69a38f4f..bcf30246 100644
--- a/crates/nvisy-pattern/src/validators/date.rs
+++ b/crates/nvisy-pattern/src/validators/date.rs
@@ -3,13 +3,12 @@
 //! Validates that a regex-matched date string represents a real calendar
 //! date. Supports multiple common formats.
 
-/// Validate a date string in common formats.
+/// Return `true` if `value` is a real calendar date in one of the
+/// supported written formats.
 ///
 /// Supported: `MM/DD/YYYY`, `DD/MM/YYYY`, `YYYY-MM-DD`, `YYYY/MM/DD`
-/// (with `/` or `-` separators).
-///
-/// Checks that the date is a real calendar date (accounts for leap years)
-/// and that the year is in 1900:2100.
+/// (with `/` or `-` separators). Leap years are honoured and the
+/// year must fall in `1900..=2100`.
 ///
 /// # Ambiguity
 ///
@@ -18,7 +17,7 @@
 /// back to `DD/MM/YYYY` if the first part is not a valid month. This
 /// is a format-level structural check — locale disambiguation is out
 /// of scope.
-pub fn validate_date(value: &str) -> bool {
+pub fn date(value: &str) -> bool {
     let parts: Vec<&str> = value.split(['/', '-']).collect();
     if parts.len() != 3 {
         return false;
@@ -94,47 +93,47 @@ mod tests {
 
     #[test]
     fn mm_dd_yyyy() {
-        assert!(validate_date("01/15/1990"));
-        assert!(validate_date("12-31-2000"));
+        assert!(date("01/15/1990"));
+        assert!(date("12-31-2000"));
     }
 
     #[test]
     fn yyyy_mm_dd() {
-        assert!(validate_date("1990-01-15"));
-        assert!(validate_date("2000/12/31"));
+        assert!(date("1990-01-15"));
+        assert!(date("2000/12/31"));
     }
 
     #[test]
     fn leap_year() {
-        assert!(validate_date("02/29/2000"));
-        assert!(validate_date("2000-02-29"));
-        assert!(!validate_date("02/29/2001"));
+        assert!(date("02/29/2000"));
+        assert!(date("2000-02-29"));
+        assert!(!date("02/29/2001"));
     }
 
     #[test]
     fn invalid_day() {
-        assert!(!validate_date("04/31/1990"));
-        assert!(!validate_date("01/32/1990"));
-        assert!(!validate_date("01/00/1990"));
+        assert!(!date("04/31/1990"));
+        assert!(!date("01/32/1990"));
+        assert!(!date("01/00/1990"));
     }
 
     #[test]
     fn invalid_month() {
         // 13/01/1990 is valid as DD/MM/YYYY (Jan 13)
-        assert!(validate_date("13/01/1990"));
+        assert!(date("13/01/1990"));
         // YYYY-MM-DD format: month 13 is invalid
-        assert!(!validate_date("1990-13-01"));
+        assert!(!date("1990-13-01"));
     }
 
     #[test]
     fn invalid_year() {
-        assert!(!validate_date("01/01/1899"));
-        assert!(!validate_date("1899-01-01"));
+        assert!(!date("01/01/1899"));
+        assert!(!date("1899-01-01"));
     }
 
     #[test]
     fn dd_mm_yyyy_ambiguous() {
         // 15/01/1990: first part > 12 so must be DD/MM
-        assert!(validate_date("15/01/1990"));
+        assert!(date("15/01/1990"));
     }
 }
diff --git a/crates/nvisy-pattern/src/validators/iban.rs b/crates/nvisy-pattern/src/validators/iban.rs
index 52faf668..0df4a542 100644
--- a/crates/nvisy-pattern/src/validators/iban.rs
+++ b/crates/nvisy-pattern/src/validators/iban.rs
@@ -7,7 +7,7 @@
 /// Return `true` if `value` passes the ISO 13616 mod-97 IBAN check.
 ///
 /// Whitespace and dashes are stripped before validation.
-pub fn validate_iban(value: &str) -> bool {
+pub fn iban(value: &str) -> bool {
     let cleaned: String = value
         .chars()
         .filter(|c| !c.is_ascii_whitespace() && *c != '-')
@@ -53,31 +53,31 @@ mod tests {
     #[test]
     fn valid_ibans() {
         // GB, DE, FR examples from Wikipedia.
-        assert!(validate_iban("GB29 NWBK 6016 1331 9268 19"));
-        assert!(validate_iban("DE89370400440532013000"));
-        assert!(validate_iban("FR76 3000 6000 0112 3456 7890 189"));
+        assert!(iban("GB29 NWBK 6016 1331 9268 19"));
+        assert!(iban("DE89370400440532013000"));
+        assert!(iban("FR76 3000 6000 0112 3456 7890 189"));
     }
 
     #[test]
     fn invalid_check_digits() {
-        assert!(!validate_iban("GB29 NWBK 6016 1331 9268 18"));
-        assert!(!validate_iban("DE00370400440532013000"));
+        assert!(!iban("GB29 NWBK 6016 1331 9268 18"));
+        assert!(!iban("DE00370400440532013000"));
     }
 
     #[test]
     fn too_short() {
-        assert!(!validate_iban("GB29"));
-        assert!(!validate_iban(""));
+        assert!(!iban("GB29"));
+        assert!(!iban(""));
     }
 
     #[test]
     fn non_alphanumeric() {
-        assert!(!validate_iban("GB29!NWBK60161331926819"));
+        assert!(!iban("GB29!NWBK60161331926819"));
     }
 
     #[test]
     fn strips_whitespace_and_dashes() {
-        assert!(validate_iban("GB29-NWBK-6016-1331-9268-19"));
-        assert!(validate_iban("  GB29 NWBK 6016 1331 9268 19  "));
+        assert!(iban("GB29-NWBK-6016-1331-9268-19"));
+        assert!(iban("  GB29 NWBK 6016 1331 9268 19  "));
     }
 }
diff --git a/crates/nvisy-pattern/src/validators/luhn.rs b/crates/nvisy-pattern/src/validators/luhn.rs
index 88cb5146..40bb5bc0 100644
--- a/crates/nvisy-pattern/src/validators/luhn.rs
+++ b/crates/nvisy-pattern/src/validators/luhn.rs
@@ -15,7 +15,7 @@
 ///
 /// Returns `false` if the input is empty or contains characters other
 /// than digits, spaces, and dashes.
-pub fn luhn_check(num: &str) -> bool {
+pub fn luhn(num: &str) -> bool {
     if num.is_empty() {
         return false;
     }
@@ -56,41 +56,41 @@ mod tests {
 
     #[test]
     fn valid_card_numbers() {
-        assert!(luhn_check("4539 1488 0343 6467"));
-        assert!(luhn_check("4539148803436467"));
-        assert!(luhn_check("4539-1488-0343-6467"));
+        assert!(luhn("4539 1488 0343 6467"));
+        assert!(luhn("4539148803436467"));
+        assert!(luhn("4539-1488-0343-6467"));
     }
 
     #[test]
     fn invalid_card_numbers() {
-        assert!(!luhn_check("4539 1488 0343 6466"));
-        assert!(!luhn_check("1234567890123456"));
+        assert!(!luhn("4539 1488 0343 6466"));
+        assert!(!luhn("1234567890123456"));
     }
 
     #[test]
     fn empty_input() {
-        assert!(!luhn_check(""));
+        assert!(!luhn(""));
     }
 
     #[test]
     fn non_digit_input() {
-        assert!(!luhn_check("abcdef"));
+        assert!(!luhn("abcdef"));
     }
 
     #[test]
     fn mixed_alpha_digit_rejected() {
-        assert!(!luhn_check("45abc39"));
-        assert!(!luhn_check("4539 14X8 0343 6467"));
+        assert!(!luhn("45abc39"));
+        assert!(!luhn("4539 14X8 0343 6467"));
     }
 
     #[test]
     fn single_zero() {
-        assert!(luhn_check("0"));
+        assert!(luhn("0"));
     }
 
     #[test]
     fn only_separators_rejected() {
-        assert!(!luhn_check("   "));
-        assert!(!luhn_check("---"));
+        assert!(!luhn("   "));
+        assert!(!luhn("---"));
     }
 }
diff --git a/crates/nvisy-pattern/src/validators/mod.rs b/crates/nvisy-pattern/src/validators/mod.rs
index 991f8422..f384b762 100644
--- a/crates/nvisy-pattern/src/validators/mod.rs
+++ b/crates/nvisy-pattern/src/validators/mod.rs
@@ -1,9 +1,10 @@
 //! Post-match validators for detected entity values.
 //!
-//! A [`Regex`] can reference a validator by name (e.g.
-//! `validator: Some("luhn")`) to reduce false positives. At
-//! [`PatternRecognizer::build`] time the name is resolved against a
-//! [`ValidatorRegistry`] to a concrete validation function.
+//! A [`Variant`] inside a [`Regex`] rule can reference a validator
+//! by name (e.g. `validator: Some("luhn")`) to reduce false
+//! positives. At [`PatternRecognizer::build`] time the name is
+//! resolved against a [`ValidatorRegistry`] to a concrete
+//! validation function.
 //!
 //! The default [`ValidatorRegistry::builtin`] ships with five
 //! validators — `luhn`, `iban`, `ssn`, `phone`, `date`. Consumers
@@ -11,6 +12,7 @@
 //! [`ValidatorRegistry::with`] before handing it to the recognizer
 //! builder.
 //!
+//! [`Variant`]: crate::Variant
 //! [`Regex`]: crate::Regex
 //! [`PatternRecognizer::build`]: crate::PatternRecognizer
 
@@ -20,16 +22,16 @@ mod luhn;
 mod phone;
 mod ssn;
 
+pub use self::date::date;
+pub use self::iban::iban;
+pub use self::luhn::luhn;
+pub use self::phone::phone;
+pub use self::ssn::ssn;
+
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use self::date::validate_date;
-use self::iban::validate_iban;
-use self::luhn::luhn_check;
-use self::phone::validate_phone;
-use self::ssn::validate_ssn;
-
 /// Post-match validator: returns `true` when `matched` passes the
 /// validator's check.
 ///
@@ -51,15 +53,15 @@ where
     }
 }
 
-/// Resolves validator names referenced in [`Regex`] definitions to
-/// concrete [`Validator`] implementations.
+/// Resolves validator names referenced in [`Variant`] definitions
+/// to concrete [`Validator`] implementations.
 ///
 /// Keys are [`Cow<'static, str>`] so the built-in registrations skip
 /// any allocation (`&'static str` literal → borrowed variant) while
 /// caller-supplied names that aren't `'static` (e.g. dynamically
 /// constructed at runtime) still flow through as owned `String`s.
 ///
-/// [`Regex`]: crate::Regex
+/// [`Variant`]: crate::Variant
 #[derive(Clone, Default)]
 pub struct ValidatorRegistry {
     table: HashMap<Cow<'static, str>, Arc<dyn Validator>>,
@@ -74,16 +76,18 @@ impl ValidatorRegistry {
         Self::default()
     }
 
-    /// Registry pre-loaded with every built-in validator: `luhn`,
-    /// `iban`, `ssn`, `phone`, `date`.
+    /// Registry pre-loaded with every built-in validator: [`luhn`],
+    /// [`iban`], [`ssn`], [`phone`], [`date`]. Each is also
+    /// re-exported individually from this module so consumers can
+    /// mix-and-match without taking all five.
     #[must_use]
     pub fn builtin() -> Self {
         Self::empty()
-            .with("luhn", luhn_check)
-            .with("iban", validate_iban)
-            .with("ssn", validate_ssn)
-            .with("phone", validate_phone)
-            .with("date", validate_date)
+            .with("luhn", luhn)
+            .with("iban", iban)
+            .with("ssn", ssn)
+            .with("phone", phone)
+            .with("date", date)
     }
 
     /// Register `validator` under `name`. Overwrites any previous
diff --git a/crates/nvisy-pattern/src/validators/phone.rs b/crates/nvisy-pattern/src/validators/phone.rs
index cf7ed377..d503ba1f 100644
--- a/crates/nvisy-pattern/src/validators/phone.rs
+++ b/crates/nvisy-pattern/src/validators/phone.rs
@@ -3,15 +3,16 @@
 //! Validates that a regex-matched phone number has a plausible structure:
 //! correct digit count and no obviously invalid prefixes.
 
-/// Validate a phone number matched by regex.
+/// Return `true` if `value` has a plausible phone-number structure.
 ///
 /// Strips all non-digit characters, then checks:
+///
 /// - 7 to 15 digits (ITU-T E.164 range)
 /// - When the original begins with `+` (explicit E.164), the digits
 ///   must not start with 0 (no country code is `0…`). National formats
 ///   such as UK `020 7946 0958` keep their trunk-prefix zero and remain
 ///   valid.
-pub fn validate_phone(value: &str) -> bool {
+pub fn phone(value: &str) -> bool {
     let digits: String = value.chars().filter(|c| c.is_ascii_digit()).collect();
     let len = digits.len();
 
@@ -32,44 +33,44 @@ mod tests {
 
     #[test]
     fn valid_us_numbers() {
-        assert!(validate_phone("+1-555-123-4567"));
-        assert!(validate_phone("(555) 123-4567"));
-        assert!(validate_phone("555.123.4567"));
-        assert!(validate_phone("5551234567"));
+        assert!(phone("+1-555-123-4567"));
+        assert!(phone("(555) 123-4567"));
+        assert!(phone("555.123.4567"));
+        assert!(phone("5551234567"));
     }
 
     #[test]
     fn valid_international() {
-        assert!(validate_phone("+44 20 7946 0958"));
-        assert!(validate_phone("+49 30 12345678"));
-        assert!(validate_phone("+81 3 1234 5678"));
+        assert!(phone("+44 20 7946 0958"));
+        assert!(phone("+49 30 12345678"));
+        assert!(phone("+81 3 1234 5678"));
     }
 
     #[test]
     fn too_few_digits() {
-        assert!(!validate_phone("12345"));
-        assert!(!validate_phone("123-45"));
+        assert!(!phone("12345"));
+        assert!(!phone("123-45"));
     }
 
     #[test]
     fn too_many_digits() {
-        assert!(!validate_phone("1234567890123456"));
+        assert!(!phone("1234567890123456"));
     }
 
     #[test]
     fn e164_starting_with_zero_rejected() {
-        assert!(!validate_phone("+0123456789012"));
+        assert!(!phone("+0123456789012"));
     }
 
     #[test]
     fn national_format_with_trunk_zero_accepted() {
         // UK national format keeps the leading 0 trunk prefix.
-        assert!(validate_phone("020 7946 0958"));
-        assert!(validate_phone("0207946 0958"));
+        assert!(phone("020 7946 0958"));
+        assert!(phone("0207946 0958"));
     }
 
     #[test]
     fn local_number_with_seven_digits() {
-        assert!(validate_phone("123-4567"));
+        assert!(phone("123-4567"));
     }
 }
diff --git a/crates/nvisy-pattern/src/validators/ssn.rs b/crates/nvisy-pattern/src/validators/ssn.rs
index 6732a0d4..46258064 100644
--- a/crates/nvisy-pattern/src/validators/ssn.rs
+++ b/crates/nvisy-pattern/src/validators/ssn.rs
@@ -10,7 +10,7 @@
 /// format.
 ///
 /// This is a format check, not a verification against SSA records.
-pub fn validate_ssn(value: &str) -> bool {
+pub fn ssn(value: &str) -> bool {
     let parts: Vec<&str> = value.split('-').collect();
     if parts.len() != 3 {
         return false;
@@ -36,42 +36,42 @@ mod tests {
 
     #[test]
     fn valid() {
-        assert!(validate_ssn("123-45-6789"));
-        assert!(validate_ssn("001-01-0001"));
-        assert!(validate_ssn("899-99-9999"));
+        assert!(ssn("123-45-6789"));
+        assert!(ssn("001-01-0001"));
+        assert!(ssn("899-99-9999"));
     }
 
     #[test]
     fn invalid_area_zero() {
-        assert!(!validate_ssn("000-45-6789"));
+        assert!(!ssn("000-45-6789"));
     }
 
     #[test]
     fn invalid_area_666() {
-        assert!(!validate_ssn("666-45-6789"));
+        assert!(!ssn("666-45-6789"));
     }
 
     #[test]
     fn invalid_area_900_plus() {
-        assert!(!validate_ssn("900-45-6789"));
-        assert!(!validate_ssn("999-45-6789"));
+        assert!(!ssn("900-45-6789"));
+        assert!(!ssn("999-45-6789"));
     }
 
     #[test]
     fn invalid_group_zero() {
-        assert!(!validate_ssn("123-00-6789"));
+        assert!(!ssn("123-00-6789"));
     }
 
     #[test]
     fn invalid_serial_zero() {
-        assert!(!validate_ssn("123-45-0000"));
+        assert!(!ssn("123-45-0000"));
     }
 
     #[test]
     fn wrong_format() {
-        assert!(!validate_ssn("12345-6789"));
-        assert!(!validate_ssn("123456789"));
-        assert!(!validate_ssn("abc-de-fghi"));
-        assert!(!validate_ssn(""));
+        assert!(!ssn("12345-6789"));
+        assert!(!ssn("123456789"));
+        assert!(!ssn("abc-de-fghi"));
+        assert!(!ssn(""));
     }
 }
diff --git a/crates/nvisy-pattern/testdata/patterns/employee_id.toml b/crates/nvisy-pattern/testdata/patterns/employee_id.toml
index 19949064..b959d33e 100644
--- a/crates/nvisy-pattern/testdata/patterns/employee_id.toml
+++ b/crates/nvisy-pattern/testdata/patterns/employee_id.toml
@@ -1,4 +1,6 @@
 name = "internal-employee-id"
 label = "internal_id"
+
+[[variants]]
 regex = "\\bEMP-\\d{5}\\b"
 score = 0.95
diff --git a/crates/nvisy-pattern/testdata/patterns/product_codes.toml b/crates/nvisy-pattern/testdata/patterns/product_codes.toml
index 29f74865..87e7941b 100644
--- a/crates/nvisy-pattern/testdata/patterns/product_codes.toml
+++ b/crates/nvisy-pattern/testdata/patterns/product_codes.toml
@@ -1,4 +1,6 @@
 name = "internal-product-code"
 label = "internal_id"
+
+[[variants]]
 regex = "\\b(?:WIDGET-\\d{3}|SPROCKET-\\d{2}|GADGET-X\\d)\\b"
 score = 0.9
diff --git a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
index dbc09cbc..6c1fca87 100644
--- a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
+++ b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
@@ -1,103 +1,77 @@
-//! End-to-end: feed real input through the
-//! recognizer → [`ContextEnhancer`] handoff, and verify that
-//! confidence is boosted, the recognition step's `contextual` flag is
-//! set, and a [`Refinement`]
-//! step is appended only for matches that had a nearby keyword.
+//! End-to-end: feed real input through a [`Regex`] →
+//! [`PatternRecognizer`] (wrapped in [`Boosting`]) and verify
+//! that confidence is boosted, and a [`Refinement`] step is
+//! appended only for matches that had a nearby keyword.
 //!
 //! [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
+//! [`Boosting`]: nvisy_context::Boosting
 
-use nvisy_context::{Context, ContextEnhancer};
-use nvisy_core::entity::{PatternProvenance, TrailProvenance, TrailStepKind, builtins};
-use nvisy_core::extraction::Artifacts;
+use nvisy_core::entity::{TrailStepKind, builtins};
 use nvisy_core::modality::TextData;
 use nvisy_core::primitive::Confidence;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
-use nvisy_pattern::{PatternRecognizer, PatternRegistry, Regex};
+use nvisy_pattern::{Regex, PatternRecognizer, Variant};
 
 #[tokio::test]
 async fn enhancer_boosts_matches_near_keyword_only() {
-    let ssn = Regex::builder()
-        .with_name("ssn")
-        .with_label(builtins::GOVERNMENT_ID.label_ref())
+    let variant = Variant::builder()
         .with_regex(r"\b\d{3}-\d{2}-\d{4}\b")
         .with_score(Confidence::clamped(0.6))
-        .with_context(Context::new(["ssn", "social security"]))
+        .build()
+        .expect("ssn variant builds");
+    let regex = Regex::builder()
+        .with_name("ssn")
+        .with_label(builtins::GOVERNMENT_ID.label_ref())
+        .with_context(vec!["ssn".to_owned(), "social security".to_owned()])
+        .with_variants(vec![variant])
         .build()
         .expect("ssn regex builds");
 
-    let registry = PatternRegistry::new().with_pattern(ssn);
     let recognizer = PatternRecognizer::builder()
-        .with_registry(registry.clone())
+        .with_pattern(regex)
         .build()
         .expect("recognizer builds");
 
     // Two SSN-shaped numbers: one near the keyword, one not.
     let text = "First SSN: 123-45-6789. Unrelated number 987-65-4329 elsewhere.";
     let input = RecognizerInput::new(TextData::new(text.to_owned()));
-    let mut entities = recognizer
+    let entities = recognizer
         .recognize(&input)
         .await
         .expect("recognize")
         .entities;
     assert_eq!(entities.len(), 2, "two SSN matches expected");
 
-    // Snapshot base confidences keyed by match text so we can compare
-    // before vs after.
-    let mut before: std::collections::HashMap<String, f64> = std::collections::HashMap::new();
-    for e in &entities {
-        before.insert(
-            text[e.location.start..e.location.end].to_owned(),
-            e.confidence.get(),
-        );
-    }
-
-    let enhancer = ContextEnhancer::builder()
-        .with_registry(registry.context_registry())
-        .with_default_window(20)
-        .with_default_boost(0.3)
-        .build()
-        .expect("enhancer builds");
-    enhancer.enhance(&mut entities, text, &Artifacts::new());
-
-    // First match has `SSN:` within the 20-byte window → boosted.
+    // First match has `SSN:` within the default 5-word prefix/suffix
+    // window and gets boosted by the Boosting<PatternRecognizer> wrapper.
     let near = entities
         .iter()
         .find(|e| &text[e.location.start..e.location.end] == "123-45-6789")
         .expect("near match present");
     assert!(
-        near.confidence.get() > before["123-45-6789"],
-        "near-keyword match should be boosted"
+        near.confidence.get() > 0.6,
+        "near-keyword match should be boosted",
     );
     assert!(
         near.trail
             .iter()
             .any(|s| matches!(s.kind, TrailStepKind::Refinement)),
-        "near-keyword match should have a Refinement step"
-    );
-    let TrailProvenance::Pattern(PatternProvenance::Regex { contextual, .. }) =
-        &near.trail[0].provenance
-    else {
-        panic!("expected regex provenance on recognition step");
-    };
-    assert!(
-        *contextual,
-        "contextual flag should be set on recognition step"
+        "near-keyword match should have a Refinement step",
     );
 
-    // Second match is well outside the 20-byte window → untouched.
+    // Second match is well outside the window → untouched.
     let far = entities
         .iter()
         .find(|e| &text[e.location.start..e.location.end] == "987-65-4329")
         .expect("far match present");
-    assert_eq!(
-        far.confidence.get(),
-        before["987-65-4329"],
-        "far-from-keyword match should not be boosted"
+    assert!(
+        (far.confidence.get() - 0.6).abs() < f64::EPSILON,
+        "far-from-keyword match should not be boosted",
     );
     assert!(
         !far.trail
             .iter()
             .any(|s| matches!(s.kind, TrailStepKind::Refinement)),
-        "far-from-keyword match should have no Refinement step"
+        "far-from-keyword match should have no Refinement step",
     );
 }
diff --git a/crates/nvisy-pattern/tests/shipped_detection.rs b/crates/nvisy-pattern/tests/shipped_detection.rs
index c6374987..1f6a30f9 100644
--- a/crates/nvisy-pattern/tests/shipped_detection.rs
+++ b/crates/nvisy-pattern/tests/shipped_detection.rs
@@ -10,17 +10,14 @@
 use nvisy_core::entity::{Entity, EntityLabelRef, builtins};
 use nvisy_core::modality::{Text, TextData};
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
-use nvisy_pattern::{PatternRecognizer, PatternRegistry};
-
-fn shipped_recognizer() -> PatternRecognizer {
-    PatternRecognizer::builder()
-        .with_registry(PatternRegistry::builtin())
-        .build()
-        .expect("shipped recognizer builds")
-}
+use nvisy_pattern::PatternRecognizer;
 
 async fn scan(text: &str) -> (String, Vec<Entity<Text>>) {
-    let recognizer = shipped_recognizer();
+    let recognizer = PatternRecognizer::builder()
+        .with_builtin_patterns()
+        .with_builtin_dictionaries()
+        .build()
+        .expect("shipped recognizer builds");
     let input = RecognizerInput::new(TextData::new(text.to_owned()));
     let entities = recognizer
         .recognize(&input)
diff --git a/crates/nvisy-pattern/tests/user_rules.rs b/crates/nvisy-pattern/tests/user_rules.rs
index 6e78c8ac..38dcee10 100644
--- a/crates/nvisy-pattern/tests/user_rules.rs
+++ b/crates/nvisy-pattern/tests/user_rules.rs
@@ -1,20 +1,21 @@
-//! End-to-end: load user-supplied rules from the on-disk wire shape
-//! (`testdata/patterns/*.toml`, `testdata/dictionaries/*.{toml,csv}`)
-//! through [`Regex::from_toml`], [`Dictionary::metadata_from_toml`],
-//! and [`Terms::from_csv`], mix them with shipped patterns, and
+//! End-to-end: load user-supplied patterns from the on-disk wire
+//! shape (`testdata/patterns/*.toml`,
+//! `testdata/dictionaries/*.{toml,csv}`) through
+//! [`Regex::from_toml`], [`Dictionary::metadata_from_toml`], and
+//! [`Terms::from_csv`], mix them with shipped patterns, and
 //! confirm a real internal-handoff document yields the custom
 //! entities.
 
 use nvisy_core::entity::builtins;
 use nvisy_core::modality::TextData;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
-use nvisy_pattern::{Dictionary, PatternRecognizer, PatternRegistry, Regex, Terms};
+use nvisy_pattern::{Dictionary, Regex, PatternRecognizer, Terms};
 
 #[tokio::test]
 async fn user_toml_rules_load_and_detect() {
     let employee_id = Regex::from_toml(include_str!("../testdata/patterns/employee_id.toml"))
         .expect("employee_id.toml parses");
-    let product_code_regex =
+    let product_code_pattern =
         Regex::from_toml(include_str!("../testdata/patterns/product_codes.toml"))
             .expect("product_codes.toml parses");
 
@@ -30,15 +31,12 @@ async fn user_toml_rules_load_and_detect() {
     // 4 rows × 3 columns; every non-empty cell becomes a term.
     assert_eq!(product_code_dict.terms.len(), 12);
 
-    // Mix user rules with shipped (so the input also sees email etc.).
-    let registry = PatternRegistry::new()
+    // Mix user patterns with shipped (so the input also sees email etc.).
+    let recognizer = PatternRecognizer::builder()
         .with_pattern(employee_id)
-        .with_pattern(product_code_regex)
+        .with_pattern(product_code_pattern)
         .with_dictionary(product_code_dict)
-        .with_builtin_patterns();
-
-    let recognizer = PatternRecognizer::builder()
-        .with_registry(registry)
+        .with_builtin_patterns()
         .build()
         .expect("recognizer builds");
 
@@ -82,12 +80,12 @@ async fn user_toml_rules_load_and_detect() {
         "expected dictionary alias/full-name hit, got {emp_hits:?}"
     );
 
-    // Shipped email pattern fires too — proves user + shipped coexist.
+    // Shipped email regex fires too — proves user + shipped coexist.
     assert!(
         entities
             .iter()
             .any(|e| e.label == builtins::EMAIL_ADDRESS.label_ref()
                 && &text[e.location.start..e.location.end] == "counsel@example.com"),
-        "expected shipped email pattern to fire alongside user rules"
+        "expected shipped email regex to fire alongside user rules"
     );
 }
diff --git a/crates/nvisy-toolkit/Cargo.toml b/crates/nvisy-toolkit/Cargo.toml
index 88d17f47..3a485c61 100644
--- a/crates/nvisy-toolkit/Cargo.toml
+++ b/crates/nvisy-toolkit/Cargo.toml
@@ -80,6 +80,8 @@ unicode-normalization = { workspace = true, features = [] }
 [dev-dependencies]
 # Internal test utilities (Entity::test_builder, …).
 nvisy-core = { workspace = true, features = ["test-utils"] }
+# Boosting<R> wrapper type returned by PatternRecognizer::build().
+nvisy-context = { workspace = true, features = [] }
 # Codec front door for E2E pipeline tests. Production builds pull
 # nvisy-codec via the toolkit's per-modality features; the dev
 # entry pins txt/csv/json so the pipeline_*.rs tests compile.
diff --git a/crates/nvisy-toolkit/examples/pipeline.rs b/crates/nvisy-toolkit/examples/pipeline.rs
index 9a6a0656..d4703f4f 100644
--- a/crates/nvisy-toolkit/examples/pipeline.rs
+++ b/crates/nvisy-toolkit/examples/pipeline.rs
@@ -26,7 +26,7 @@ use nvisy_core::modality::{Text, TextData};
 use nvisy_core::primitive::ConfidenceThreshold;
 use nvisy_core::recognition::RecognizerInput;
 use nvisy_core::redaction::RedactAt;
-use nvisy_pattern::{PatternRecognizer, PatternRegistry};
+use nvisy_pattern::PatternRecognizer;
 use nvisy_toolkit::deduplication::{LayerContext, LayerParams, LayerPipeline};
 use nvisy_toolkit::detection::RecognizerRegistry;
 use nvisy_toolkit::redaction::RedactionRegistry;
@@ -54,7 +54,8 @@ async fn main() -> Result<()> {
     // services. Add NER / LLM recognizers with extra
     // `.with_recognizer(...)` calls.
     let pattern = PatternRecognizer::builder()
-        .with_registry(PatternRegistry::builtin())
+        .with_builtin_patterns()
+        .with_builtin_dictionaries()
         .build()?;
     let detection = RecognizerRegistry::new().with_recognizer(pattern);
 
diff --git a/crates/nvisy-toolkit/src/redaction/deanonymizer/mod.rs b/crates/nvisy-toolkit/src/redaction/deanonymizer/mod.rs
index 3ea2731b..733c0664 100644
--- a/crates/nvisy-toolkit/src/redaction/deanonymizer/mod.rs
+++ b/crates/nvisy-toolkit/src/redaction/deanonymizer/mod.rs
@@ -3,7 +3,8 @@
 //!
 //! Each operator recovers the original payload an [`Anonymizer<M>`]
 //! wrote. Two recovery shapes ship today (see [`Deanonymizer`]):
-//! audit-keyed (no impl yet) and self-contained ([`Decrypt`]).
+//! audit-keyed (no impl yet) and self-contained (e.g. `Decrypt`,
+//! gated behind the `encrypt` feature).
 //!
 //! [`Anonymizer<M>`]: crate::redaction::Anonymizer
 //! [`Deanonymizer<M>`]: crate::redaction::Deanonymizer
diff --git a/crates/nvisy-toolkit/src/redaction/mod.rs b/crates/nvisy-toolkit/src/redaction/mod.rs
index f5fbb9e3..d8ec4a2a 100644
--- a/crates/nvisy-toolkit/src/redaction/mod.rs
+++ b/crates/nvisy-toolkit/src/redaction/mod.rs
@@ -26,12 +26,10 @@
 //! built-in (constructed inline) or a `Custom(AnonymizerId<M>)`
 //! (looked up in the registry).
 //!
-//! [`Replace`]: anonymizer::Replace
-//! [`Mask`]: anonymizer::Mask
-//! [`Hash`]: anonymizer::Hash
-//! [`Redact`]: anonymizer::Redact
-//! [`Keep`]: anonymizer::Keep
-//! [`Encrypt`]: anonymizer::Encrypt
+//! [`Anonymizer<M>`]: Anonymizer
+//! [`Deanonymizer<M>`]: Deanonymizer
+//! [`AnonymizerId<M>`]: AnonymizerId
+//! [`RedactionRegistry<M>`]: RedactionRegistry
 
 mod id;
 mod registry;
diff --git a/crates/nvisy-toolkit/tests/fixtures/registries.rs b/crates/nvisy-toolkit/tests/fixtures/registries.rs
index 08657054..3c77d299 100644
--- a/crates/nvisy-toolkit/tests/fixtures/registries.rs
+++ b/crates/nvisy-toolkit/tests/fixtures/registries.rs
@@ -1,18 +1,21 @@
 //! Shared recognizer + redaction registry constructors and dedup
 //! params used by every codec E2E test.
 
+use nvisy_context::Boosting;
 use nvisy_core::entity::builtins;
 use nvisy_core::modality::Modality;
 use nvisy_core::primitive::ConfidenceThreshold;
-use nvisy_pattern::{PatternRecognizer, PatternRegistry};
+use nvisy_pattern::PatternRecognizer;
 use nvisy_toolkit::deduplication::LayerParams;
 use nvisy_toolkit::redaction::anonymizer::{Mask, Replace};
 use nvisy_toolkit::redaction::{Anonymizer, RedactionRegistry};
 
-/// Build the shipped pattern recognizer from every built-in pattern.
-pub fn shipped_recognizer() -> PatternRecognizer {
+/// Build the shipped pattern recognizer from every built-in
+/// pattern + dictionary, wrapped in its [`Boosting`] layer.
+pub fn shipped_recognizer() -> Boosting<PatternRecognizer> {
     PatternRecognizer::builder()
-        .with_registry(PatternRegistry::builtin())
+        .with_builtin_patterns()
+        .with_builtin_dictionaries()
         .build()
         .expect("shipped recognizer builds")
 }
diff --git a/crates/nvisy-toolkit/tests/recognition_registry.rs b/crates/nvisy-toolkit/tests/recognition_registry.rs
index 1556e2d8..80460d41 100644
--- a/crates/nvisy-toolkit/tests/recognition_registry.rs
+++ b/crates/nvisy-toolkit/tests/recognition_registry.rs
@@ -30,7 +30,7 @@ use nvisy_llm::provider::LlmProvider;
 use nvisy_llm::{DefaultPrompt, LlmRecognizer};
 use nvisy_ner::NerRecognizer;
 use nvisy_ner::backend::{BentoBackend, BentoParams};
-use nvisy_pattern::{PatternRecognizer, PatternRegistry};
+use nvisy_pattern::PatternRecognizer;
 use nvisy_toolkit::detection::RecognizerRegistry;
 
 /// Sample text that triggers all three recognizers:
@@ -46,9 +46,10 @@ fn env_or(key: &str, default: &str) -> String {
 
 fn build_registry() -> RecognizerRegistry {
     let pattern = PatternRecognizer::builder()
-        .with_registry(PatternRegistry::builtin())
+        .with_builtin_patterns()
+        .with_builtin_dictionaries()
         .build()
-        .expect("pattern recognizer builds from builtin registry");
+        .expect("pattern recognizer builds from builtin set");
 
     let bento_url = env_or("NVISY_BENTO_URL", "http://localhost:3000");
     let bento_backend = BentoBackend::new(BentoParams::new(bento_url)).expect("bento backend init");

From 628227cfc4c856ba77fcd1e8373628cb97d68971 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sun, 14 Jun 2026 22:43:09 +0200
Subject: [PATCH 05/14] refactor(pattern): inline VariantBuilder, drop Terms,
 normalize all docs

- Variant: replace derive_builder with `new(regex)?` +
  `with_score` / `with_validator` chain (matches Term::new style).
- Drop the Terms newtype; Dictionary::terms is `Vec<Term>` and the
  parsers move to associated fns on Term:
  - Term::from_text(&str) -> Vec<Term>   (infallible)
  - Term::from_csv(&str) -> Result<Vec<Term>, Error>
  Signatures now match Regex::from_toml / Dictionary::from_toml.
- Rewrite every public-item docblock in nvisy-pattern for a
  consistent style: noun-phrase openers for types, imperative for
  constructors/setters, returns-form for predicates, reference-form
  doc-links at the bottom, `# Errors` only where fallible, code
  examples on top-level types.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/nvisy-context/src/enhancer.rs          |  16 +-
 crates/nvisy-context/src/matcher.rs           |   6 +-
 crates/nvisy-context/src/wrapper.rs           |   3 +-
 crates/nvisy-pattern/src/lib.rs               |   2 +-
 .../src/recognition/dictionary.rs             | 146 ++++++------
 crates/nvisy-pattern/src/recognition/mod.rs   |  21 +-
 .../src/recognition/recognizer.rs             | 104 +++++----
 crates/nvisy-pattern/src/recognition/regex.rs | 157 +++++++------
 crates/nvisy-pattern/src/recognition/term.rs  | 104 +++++++++
 crates/nvisy-pattern/src/recognition/terms.rs | 208 ------------------
 .../nvisy-pattern/src/shipped/dictionaries.rs |  17 +-
 crates/nvisy-pattern/src/shipped/mod.rs       |  13 +-
 crates/nvisy-pattern/src/validators/date.rs   |  20 +-
 crates/nvisy-pattern/src/validators/iban.rs   |  14 +-
 crates/nvisy-pattern/src/validators/luhn.rs   |  20 +-
 crates/nvisy-pattern/src/validators/mod.rs    |  92 ++++----
 crates/nvisy-pattern/src/validators/phone.rs  |  17 +-
 crates/nvisy-pattern/src/validators/ssn.rs    |  19 +-
 .../nvisy-pattern/tests/enhancer_roundtrip.rs |  10 +-
 crates/nvisy-pattern/tests/user_rules.rs      |   6 +-
 20 files changed, 445 insertions(+), 550 deletions(-)
 create mode 100644 crates/nvisy-pattern/src/recognition/term.rs
 delete mode 100644 crates/nvisy-pattern/src/recognition/terms.rs

diff --git a/crates/nvisy-context/src/enhancer.rs b/crates/nvisy-context/src/enhancer.rs
index f1eba2df..cd3103b1 100644
--- a/crates/nvisy-context/src/enhancer.rs
+++ b/crates/nvisy-context/src/enhancer.rs
@@ -475,13 +475,7 @@ mod tests {
         // keyword "social security" must NOT fire — unlike a
         // hypothetical caller that gave it the word-window path,
         // which would split on whitespace.
-        let enhancer = enhancer(vec![rule(
-            govid_label(),
-            &["social security"],
-            1,
-            0,
-            0.2,
-        )]);
+        let enhancer = enhancer(vec![rule(govid_label(), &["social security"], 1, 0, 0.2)]);
         let text = "social security: Your 123-45-6789";
         let entity_start = text.find("123").unwrap();
         let entity_end = entity_start + "123-45-6789".len();
@@ -504,13 +498,7 @@ mod tests {
     fn token_path_boosts_when_keyword_within_token_window() {
         // Same tokens, 2-word prefix: now the `social security`
         // token is reachable and the boost fires.
-        let enhancer = enhancer(vec![rule(
-            govid_label(),
-            &["social security"],
-            2,
-            0,
-            0.2,
-        )]);
+        let enhancer = enhancer(vec![rule(govid_label(), &["social security"], 2, 0, 0.2)]);
         let text = "social security: Your 123-45-6789";
         let entity_start = text.find("123").unwrap();
         let entity_end = entity_start + "123-45-6789".len();
diff --git a/crates/nvisy-context/src/matcher.rs b/crates/nvisy-context/src/matcher.rs
index a2cdb3c3..06beef22 100644
--- a/crates/nvisy-context/src/matcher.rs
+++ b/crates/nvisy-context/src/matcher.rs
@@ -99,7 +99,11 @@ mod tests {
     fn substring_matches_case_insensitively() {
         let m = SubstringMatcher;
         assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
-        assert!(m.any_match("the SOCIAL SECURITY number", &[], &kws(&["social security"])));
+        assert!(m.any_match(
+            "the SOCIAL SECURITY number",
+            &[],
+            &kws(&["social security"])
+        ));
         assert!(!m.any_match("nothing here", &[], &kws(&["ssn"])));
     }
 
diff --git a/crates/nvisy-context/src/wrapper.rs b/crates/nvisy-context/src/wrapper.rs
index 688f2838..87105b19 100644
--- a/crates/nvisy-context/src/wrapper.rs
+++ b/crates/nvisy-context/src/wrapper.rs
@@ -22,8 +22,7 @@ use nvisy_core::Result;
 use nvisy_core::modality::Text;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput, RecognizerOutput};
 
-use super::Enhancer;
-use super::Tokens;
+use super::{Enhancer, Tokens};
 
 /// Wraps an [`EntityRecognizer<Text>`] with a post-recognition
 /// [`Enhancer`] pass. Implements [`EntityRecognizer<Text>`] so
diff --git a/crates/nvisy-pattern/src/lib.rs b/crates/nvisy-pattern/src/lib.rs
index ed069016..129b002f 100644
--- a/crates/nvisy-pattern/src/lib.rs
+++ b/crates/nvisy-pattern/src/lib.rs
@@ -8,6 +8,6 @@ pub mod validators;
 
 pub use self::recognition::{
     Dictionary, DictionaryBuilder, PatternRecognizer, PatternRecognizerBuilder, Regex,
-    RegexBuilder, Scoring, Term, Terms, Variant, VariantBuilder,
+    RegexBuilder, Scoring, Term, Variant,
 };
 pub use self::shipped::{dictionaries, patterns};
diff --git a/crates/nvisy-pattern/src/recognition/dictionary.rs b/crates/nvisy-pattern/src/recognition/dictionary.rs
index bf20a1e0..3285f7af 100644
--- a/crates/nvisy-pattern/src/recognition/dictionary.rs
+++ b/crates/nvisy-pattern/src/recognition/dictionary.rs
@@ -1,22 +1,4 @@
 //! [`Dictionary`]: literal-term detection rule.
-//!
-//! A dictionary scans for a fixed list of literal strings using an
-//! Aho-Corasick automaton. Compared with [`Regex`], a dictionary
-//! has no regex engine, no validator, and a single shared confidence
-//! score applied to every match.
-//!
-//! Construct via [`Dictionary::builder`] for the chainable style or
-//! [`Dictionary::from_toml`] for a self-contained TOML source.
-//!
-//! Term sources are first-class — see [`Terms`] for [`from_text`]
-//! and [`from_csv`] constructors. The builder's [`with_terms`]
-//! setter accepts anything convertible to [`Terms`].
-//!
-//! [`Regex`]: crate::Regex
-//! [`Terms`]: crate::Terms
-//! [`from_text`]: crate::Terms::from_text
-//! [`from_csv`]: crate::Terms::from_csv
-//! [`with_terms`]: DictionaryBuilder::with_terms
 
 use derive_builder::Builder;
 use nvisy_core::Error;
@@ -24,14 +6,14 @@ use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 use serde::Deserialize;
 
-use super::terms::Terms;
+use super::term::Term;
 
 /// Confidence policy for a [`Dictionary`]'s matches.
 ///
 /// Either every term gets the same score ([`Uniform`]), or scores
-/// are picked per CSV source column ([`PerColumn`]). The untagged
-/// serde representation accepts a bare number for the uniform
-/// case and an array for the per-column case:
+/// vary by CSV source column ([`PerColumn`]). The untagged serde
+/// representation accepts a bare number for the uniform case and
+/// an array for the per-column case:
 ///
 /// ```toml
 /// score = 0.9              # Uniform
@@ -43,24 +25,28 @@ use super::terms::Terms;
 #[derive(Debug, Clone, PartialEq, Deserialize)]
 #[serde(untagged)]
 pub enum Scoring {
-    /// Single confidence stamped on every match. The common case.
+    /// One confidence stamped on every match — the common case.
     Uniform(Confidence),
-    /// Per-column confidence vector. `[i]` is the confidence
-    /// stamped on every term whose source CSV column was `i`. A
-    /// term from a column past the end of this vec is a
-    /// recognizer-build error — define one score per column.
+    /// Per-column confidence vector. Entry `i` is the score for
+    /// terms loaded from CSV column `i`. A term from a column past
+    /// the end of this vector causes a recognizer-build error, so
+    /// callers must declare one score per source column.
     PerColumn(Vec<Confidence>),
 }
 
 impl Scoring {
-    /// Validate the policy's internal shape. A
-    /// `PerColumn(vec![])` can never resolve a score for any
-    /// column, so callers (the recognizer at build time) surface
-    /// it as a configuration error.
+    /// Return `Ok(())` when the policy can resolve a score for at
+    /// least one input.
+    ///
+    /// [`PerColumn`] with an empty vector can never resolve and is
+    /// rejected here; the recognizer surfaces the error at build
+    /// time.
     ///
     /// # Errors
     ///
-    /// Returns the human-readable reason the policy is invalid.
+    /// Returns a human-readable reason when the policy is invalid.
+    ///
+    /// [`PerColumn`]: Self::PerColumn
     pub fn validate(&self) -> Result<(), &'static str> {
         match self {
             Self::Uniform(_) => Ok(()),
@@ -71,12 +57,15 @@ impl Scoring {
         }
     }
 
-    /// Resolve a score for `column`. `Uniform` ignores the column
-    /// and always returns its score; `PerColumn` returns the entry
-    /// at `column`, or `None` when no column is supplied or the
-    /// index is past the end of the per-column vector. Callers
-    /// decide the fall-back policy (per-term override, hard
-    /// error, default constant, etc.).
+    /// Resolve a score for the given source `column`.
+    ///
+    /// [`Uniform`] ignores `column` and always returns its score;
+    /// [`PerColumn`] returns the entry at `column`, or `None` when
+    /// `column` is `None` or out of range. Callers decide the
+    /// fall-back policy (per-term override, hard error, …).
+    ///
+    /// [`Uniform`]: Self::Uniform
+    /// [`PerColumn`]: Self::PerColumn
     #[must_use]
     pub fn get(&self, column: Option<u16>) -> Option<Confidence> {
         match self {
@@ -94,17 +83,29 @@ impl Default for Scoring {
 
 /// Literal-term detection rule.
 ///
+/// Scans for a fixed list of literals using a shared Aho-Corasick
+/// automaton. Unlike [`Regex`], a dictionary has no regex engine,
+/// no validator, and a [`Scoring`] policy shared across its terms.
+///
+/// # Examples
+///
 /// ```
 /// use nvisy_core::entity::builtins;
-/// use nvisy_pattern::{Dictionary, Terms};
+/// use nvisy_pattern::{Dictionary, Term};
 ///
 /// let dictionary = Dictionary::builder()
 ///     .with_name("nationalities")
 ///     .with_label(builtins::NATIONALITY.label_ref())
-///     .with_terms(Terms::from(["German", "French", "Italian"]))
+///     .with_terms(vec![
+///         Term::new("German"),
+///         Term::new("French"),
+///         Term::new("Italian"),
+///     ])
 ///     .build()
 ///     .expect("nationalities dictionary builds");
 /// ```
+///
+/// [`Regex`]: crate::Regex
 #[derive(Debug, Clone, PartialEq, Builder, Deserialize)]
 #[builder(
     name = "DictionaryBuilder",
@@ -113,40 +114,39 @@ impl Default for Scoring {
     build_fn(error = "Error")
 )]
 pub struct Dictionary {
-    /// Human-readable identifier (e.g. `"nationalities"`).
+    /// Human-readable identifier surfaced in trail provenance
+    /// (e.g. `"nationalities"`).
     pub name: String,
     /// Entity label every match emits.
     pub label: EntityLabelRef,
-    /// Literal terms to scan for. The recognizer compiles these
-    /// into an Aho-Corasick automaton at build time.
-    pub terms: Terms,
-    /// Confidence policy: uniform across every term, or per CSV
-    /// source column. Defaults to [`Scoring::Uniform`] with
-    /// [`Confidence::MAX`].
+    /// Literal terms to scan for. Compiled into the shared
+    /// Aho-Corasick automaton at recognizer-build time.
+    pub terms: Vec<Term>,
+    /// Confidence policy resolved against each term at
+    /// recognizer-build time. Defaults to [`Scoring::Uniform`]
+    /// with [`Confidence::MAX`].
     #[builder(default)]
     #[serde(default, rename = "score")]
     pub scoring: Scoring,
     /// Context keywords that lift confidence when one of them
-    /// appears near a match. Harvested by the engine into a
-    /// per-label `BoostRule` in `nvisy-context`; the recognizer
-    /// itself never reads this field.
+    /// appears near a match.
     #[builder(default)]
     #[serde(default)]
     pub context: Vec<String>,
-    /// Languages the dictionary applies to (BCP-47 tags). An empty
-    /// list (the default) means the dictionary applies regardless
-    /// of language; otherwise the recognizer skips this dictionary
-    /// when the per-call language hint is set to a tag not in this
+    /// BCP-47 language tags the dictionary applies to. Empty means
+    /// "any language"; otherwise the recognizer skips the
+    /// dictionary when the per-call language hint is not in the
     /// list.
     #[builder(default)]
     #[serde(default)]
     pub languages: Vec<LanguageTag>,
-    /// Require word-boundary surroundings on every match. With the
-    /// default of `true`, a term `"am"` matches the word `"am"`
-    /// but not the `"am"` inside `"example"`. Word characters are
-    /// alphanumerics and `_` (Unicode-aware). Set to `false` for
-    /// dictionaries that genuinely want substring matching (e.g.
-    /// scanning for embedded credentials inside arbitrary tokens).
+    /// Require word-boundary surroundings on every match.
+    ///
+    /// With the default of `true`, the term `"am"` matches the
+    /// word `"am"` but not the `"am"` inside `"example"`. Word
+    /// characters are Unicode alphanumerics and `_`. Set to
+    /// `false` to allow substring matches (e.g. scanning for
+    /// embedded credentials).
     #[builder(default = "true")]
     #[serde(default = "default_word_boundary")]
     pub word_boundary: bool,
@@ -157,16 +157,18 @@ fn default_word_boundary() -> bool {
 }
 
 impl Dictionary {
-    /// Start a chainable builder. Required fields: `name`,
-    /// `label`, `terms`.
+    /// Start a chainable builder.
+    ///
+    /// Required fields: `name`, `label`, `terms`.
     #[must_use]
     pub fn builder() -> DictionaryBuilder {
         DictionaryBuilder::default()
     }
 
-    /// Parse a self-contained dictionary from a TOML string. The
-    /// TOML must include a `terms` field; for metadata-only TOML
-    /// paired with a separate term source, use
+    /// Parse a self-contained dictionary from a TOML source.
+    ///
+    /// The TOML must include a `terms` field; for metadata-only
+    /// TOML paired with a separate term source, use
     /// [`metadata_from_toml`] instead.
     ///
     /// # Errors
@@ -180,15 +182,11 @@ impl Dictionary {
             .map_err(|e| Error::validation(format!("dictionary TOML: {e}"), "nvisy-pattern"))
     }
 
-    /// Parse the metadata fields of a dictionary from TOML (no
-    /// `terms` required) and return a seeded builder. The caller is
-    /// expected to chain
-    /// [`with_terms`] before
-    /// [`build`].
+    /// Parse dictionary metadata from a sidecar TOML source.
     ///
-    /// Useful when shipped or user-supplied dictionaries split
-    /// metadata into a TOML sidecar and store the actual terms as
-    /// CSV / TXT.
+    /// The returned [`DictionaryBuilder`] is seeded with every
+    /// field except `terms`; callers chain [`with_terms`] (e.g.
+    /// loaded from a paired CSV/TXT) before [`build`].
     ///
     /// # Errors
     ///
@@ -217,8 +215,6 @@ impl Dictionary {
     }
 }
 
-/// Wire shape for the dictionary metadata sidecar TOML — every
-/// field [`Dictionary`] carries except `terms`.
 #[derive(Debug, Clone, Deserialize)]
 struct DictionaryMetadata {
     name: String,
diff --git a/crates/nvisy-pattern/src/recognition/mod.rs b/crates/nvisy-pattern/src/recognition/mod.rs
index d6d2d18e..e55bbb39 100644
--- a/crates/nvisy-pattern/src/recognition/mod.rs
+++ b/crates/nvisy-pattern/src/recognition/mod.rs
@@ -1,18 +1,19 @@
-//! Recognition primitives — the rule shapes ([`Regex`] + its
-//! [`Variant`]s, [`Dictionary`]), their building blocks ([`Terms`]),
-//! and the runtime [`PatternRecognizer`] that compiles them into
-//! pooled scanners. Per-rule and per-dictionary `context` keyword
-//! lists are harvested by the recognizer at build time into a
-//! wrapping `Boosting` layer that applies post-recognition keyword
-//! boosts.
+//! Recognition primitives.
+//!
+//! Holds the rule shapes ([`Regex`] + its [`Variant`]s, [`Dictionary`]),
+//! their building blocks ([`Terms`]), and the runtime
+//! [`PatternRecognizer`] that compiles them into pooled scanners.
+//! Per-rule and per-dictionary `context` keyword lists are harvested
+//! by the recognizer at build time into a wrapping `Boosting` layer
+//! that lifts confidence on matches near a declared keyword.
 
 mod compiled;
 mod dictionary;
 mod recognizer;
 mod regex;
-mod terms;
+mod term;
 
 pub use self::dictionary::{Dictionary, DictionaryBuilder, Scoring};
 pub use self::recognizer::{PatternRecognizer, PatternRecognizerBuilder};
-pub use self::regex::{Regex, RegexBuilder, Variant, VariantBuilder};
-pub use self::terms::{Term, Terms};
+pub use self::regex::{Regex, RegexBuilder, Variant};
+pub use self::term::Term;
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index 01bc5533..fd133fdf 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -1,20 +1,4 @@
-//! [`PatternRecognizer`]: compiles patterns and dictionaries into
-//! pooled scanners and implements [`EntityRecognizer<Text>`].
-//!
-//! The internal split is intentional: regex patterns go into a
-//! single [`regex::RegexSet`] for a one-pass scan across every
-//! regex; dictionary terms go into a single
-//! [`aho_corasick::AhoCorasick`] automaton for a one-pass scan
-//! across every literal. Both passes share one walk over the input
-//! and emit entities in modality-local byte coordinates.
-//!
-//! Construction is builder-driven: [`PatternRecognizer::builder`]
-//! returns a [`PatternRecognizerBuilder`] that accumulates patterns,
-//! dictionaries, and (optionally) a custom validator registry, then
-//! compiles everything into the scanners on [`build`]. The shipped
-//! built-in pattern + dictionary set is [`PatternRecognizerBuilder::builtin`].
-//!
-//! [`build`]: PatternRecognizerBuilder::build
+//! [`PatternRecognizer`] and its builder.
 
 use aho_corasick::{AhoCorasick, MatchKind};
 use nvisy_context::{BoostRule, Boosting, Enhancer, SubstringMatcher};
@@ -30,8 +14,21 @@ use super::regex::Regex;
 use crate::shipped;
 use crate::validators::ValidatorRegistry;
 
-/// Runtime text recognizer composed of one regex pool and one
-/// Aho-Corasick automaton.
+/// Runtime text recognizer composed of a regex pool and an Aho-Corasick automaton.
+///
+/// Every registered [`Regex`] variant goes into one
+/// [`::regex::RegexSet`] for a single one-pass scan across every
+/// regex; every [`Dictionary`] term goes into one
+/// [`::aho_corasick::AhoCorasick`] automaton for a single one-pass
+/// scan across every literal. Both passes share one walk over the
+/// input and emit entities in modality-local byte coordinates.
+///
+/// Construct via [`PatternRecognizer::builder`]; the build wraps
+/// the recognizer in a [`Boosting`] layer that lifts confidence on
+/// matches whose neighbourhood contains a per-label context
+/// keyword harvested from the same rules.
+///
+/// # Examples
 ///
 /// ```
 /// use nvisy_pattern::PatternRecognizer;
@@ -42,6 +39,9 @@ use crate::validators::ValidatorRegistry;
 ///     .build()
 ///     .expect("built-in recognizer builds");
 /// ```
+///
+/// [`Regex`]: super::Regex
+/// [`Dictionary`]: super::Dictionary
 pub struct PatternRecognizer {
     patterns: Vec<CompiledPattern>,
     regex_set: Option<RegexSet>,
@@ -50,11 +50,10 @@ pub struct PatternRecognizer {
 }
 
 impl PatternRecognizer {
-    /// Start a builder. Required: at least one pattern or
-    /// dictionary; otherwise [`build`] succeeds with a recognizer
-    /// that always emits zero entities.
+    /// Start a chainable builder.
     ///
-    /// [`build`]: PatternRecognizerBuilder::build
+    /// A recognizer built with no patterns and no dictionaries is
+    /// valid — it emits zero entities on every call.
     #[must_use]
     pub fn builder() -> PatternRecognizerBuilder {
         PatternRecognizerBuilder::default()
@@ -67,9 +66,13 @@ impl PatternRecognizer {
     }
 }
 
-/// Accumulates patterns, dictionaries, and a validator registry,
-/// then compiles them into a [`PatternRecognizer`] wrapped in a
-/// [`Boosting`] layer.
+/// Accumulator of rules + validator registry for
+/// [`PatternRecognizer`].
+///
+/// Patterns and dictionaries are stored as authored — compilation
+/// into the pooled scanners happens in [`build`].
+///
+/// [`build`]: Self::build
 #[derive(Debug, Clone, Default)]
 pub struct PatternRecognizerBuilder {
     patterns: Vec<Regex>,
@@ -84,7 +87,9 @@ impl PatternRecognizerBuilder {
         Self::default()
     }
 
-    /// Pre-seed with the shipped built-in pattern + dictionary set.
+    /// Pre-seed with the shipped built-in patterns and
+    /// dictionaries.
+    ///
     /// Shorthand for
     /// `Self::new().with_builtin_patterns().with_builtin_dictionaries()`.
     #[must_use]
@@ -94,7 +99,7 @@ impl PatternRecognizerBuilder {
             .with_builtin_dictionaries()
     }
 
-    /// Register one pattern. Patterns accumulate in registration
+    /// Register one pattern; patterns accumulate in registration
     /// order.
     #[must_use]
     pub fn with_pattern(mut self, pattern: Regex) -> Self {
@@ -102,7 +107,7 @@ impl PatternRecognizerBuilder {
         self
     }
 
-    /// Register one dictionary. Dictionaries accumulate in
+    /// Register one dictionary; dictionaries accumulate in
     /// registration order.
     #[must_use]
     pub fn with_dictionary(mut self, dictionary: Dictionary) -> Self {
@@ -124,18 +129,22 @@ impl PatternRecognizerBuilder {
         self
     }
 
-    /// Override the validator registry. When unset, the built-in
-    /// registry ([`ValidatorRegistry::builtin`]) is used.
+    /// Override the validator registry used to resolve variant
+    /// validator names.
+    ///
+    /// Defaults to [`ValidatorRegistry::builtin`] when unset.
     #[must_use]
     pub fn with_validators(mut self, registry: ValidatorRegistry) -> Self {
         self.validators = Some(registry);
         self
     }
 
-    /// Drop every pattern and dictionary whose `label` is not
-    /// registered in `catalog`. Used to build a per-request
-    /// recognizer from a workspace-wide template — rules that
-    /// would emit labels no policy declared never run.
+    /// Drop every pattern and dictionary whose label is not
+    /// declared in `catalog`.
+    ///
+    /// The engine uses this to build a per-request recognizer from
+    /// a workspace-wide template — rules that would emit labels no
+    /// policy declared never run.
     #[must_use]
     pub fn filter_by_catalog(mut self, catalog: &EntityLabelCatalog) -> Self {
         self.patterns
@@ -145,10 +154,11 @@ impl PatternRecognizerBuilder {
         self
     }
 
-    /// `true` when the builder has no patterns and no
-    /// dictionaries. Engine code uses this to skip the
-    /// per-request recognizer entirely when the catalog filter
-    /// dropped every rule.
+    /// Return `true` when no patterns and no dictionaries are
+    /// registered.
+    ///
+    /// The engine uses this to skip the per-request recognizer
+    /// entirely after a catalog filter dropped every rule.
     #[must_use]
     pub fn is_empty(&self) -> bool {
         self.patterns.is_empty() && self.dictionaries.is_empty()
@@ -166,10 +176,12 @@ impl PatternRecognizerBuilder {
         &self.dictionaries
     }
 
-    /// Compile every registered pattern and dictionary into the
-    /// pooled scanners and wrap the recognizer in a [`Boosting`]
-    /// layer carrying per-label keyword boosts harvested from the
-    /// same set of rules.
+    /// Compile every rule into the pooled scanners and wrap the
+    /// recognizer in a [`Boosting`] layer.
+    ///
+    /// Context keywords from every pattern and dictionary are
+    /// harvested into per-label [`BoostRule`]s that lift confidence
+    /// on matches whose neighbourhood contains a declared keyword.
     ///
     /// # Errors
     ///
@@ -265,7 +277,7 @@ impl PatternRecognizerBuilder {
             }
             let term_start = all_terms.len();
             let mut term_scores = Vec::with_capacity(dict.terms.len());
-            for entry in dict.terms.entries() {
+            for entry in &dict.terms {
                 all_terms.push(entry.term.clone());
                 // Per-term `score` wins when set; otherwise ask
                 // the dictionary's `Scoring` to resolve against
@@ -407,13 +419,13 @@ mod tests {
 
     use super::*;
     use crate::Dictionary;
-    use crate::recognition::terms::Terms;
+    use crate::recognition::term::Term;
 
     fn dict(name: &str, terms: &[&str], word_boundary: bool) -> Dictionary {
         Dictionary::builder()
             .with_name(name.to_owned())
             .with_label(EntityLabelRef::from(builtins::LANGUAGE.name.clone()))
-            .with_terms(Terms::from(terms))
+            .with_terms(terms.iter().copied().map(Term::new).collect::<Vec<_>>())
             .with_word_boundary(word_boundary)
             .build()
             .expect("dictionary builds")
diff --git a/crates/nvisy-pattern/src/recognition/regex.rs b/crates/nvisy-pattern/src/recognition/regex.rs
index 0d762fcc..f084f4a0 100644
--- a/crates/nvisy-pattern/src/recognition/regex.rs
+++ b/crates/nvisy-pattern/src/recognition/regex.rs
@@ -1,12 +1,4 @@
-//! [`Regex`]: per-label regex-based detection rule.
-//!
-//! A `Regex` rule bundles one entity label, its context-keyword
-//! list, and one or more [`Variant`]s. Each variant carries its
-//! own regex source, emission score, and optional named
-//! validator. All variants under one rule emit the same label.
-//!
-//! Construct via [`Regex::builder`] for the chainable style or
-//! [`Regex::from_toml`] when loading a definition file.
+//! [`Regex`] rule and its [`Variant`]s.
 
 use derive_builder::Builder;
 use nvisy_core::Error;
@@ -14,74 +6,107 @@ use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 use serde::Deserialize;
 
-/// One regex variant inside a [`Regex`] rule. Carries the regex
-/// source, the emission confidence stamped on every match, and the
-/// optional validator name resolved at recognizer-build time.
-#[derive(Debug, Clone, PartialEq, Builder, Deserialize)]
-#[builder(
-    name = "VariantBuilder",
-    pattern = "owned",
-    setter(into, strip_option, prefix = "with"),
-    build_fn(error = "Error", validate = "VariantBuilder::validate")
-)]
+/// One regex strategy inside a [`Regex`] rule.
+///
+/// A variant pairs a regex source with the confidence stamped on
+/// every match it produces and, optionally, a validator name
+/// resolved against the [`ValidatorRegistry`] at recognizer-build
+/// time so structurally-suspect matches can be dropped.
+///
+/// [`ValidatorRegistry`]: crate::validators::ValidatorRegistry
+#[derive(Debug, Clone, PartialEq, Deserialize)]
 pub struct Variant {
     /// Regex source. Compiled to a [`::regex::Regex`] by
-    /// [`PatternRecognizer::build`]; shape errors there, not here.
+    /// [`PatternRecognizer::build`].
     ///
     /// [`PatternRecognizer::build`]: super::PatternRecognizer
     pub regex: String,
-    /// Confidence score stamped on every match this variant emits
-    /// before any post-recognition boost.
-    #[builder(default = "Confidence::MAX")]
+    /// Confidence stamped on every match, before any
+    /// post-recognition keyword boost.
+    #[serde(default = "default_score")]
     pub score: Confidence,
-    /// Optional validator name. Resolved at recognizer-build time
-    /// against the [`ValidatorRegistry`]; matches that fail
-    /// validation are dropped.
+    /// Validator name resolved against the [`ValidatorRegistry`].
+    /// Matches that fail validation are dropped.
     ///
     /// [`ValidatorRegistry`]: crate::validators::ValidatorRegistry
-    #[builder(default)]
     #[serde(default)]
     pub validator: Option<String>,
 }
 
 impl Variant {
-    /// Start a chainable builder. Required field: `regex`.
-    #[must_use]
-    pub fn builder() -> VariantBuilder {
-        VariantBuilder::default()
-    }
-}
-
-impl VariantBuilder {
-    fn validate(&self) -> Result<(), Error> {
-        if let Some(regex) = self.regex.as_ref()
-            && let Err(e) = ::regex::Regex::new(regex)
-        {
+    /// Construct a variant from a regex source.
+    ///
+    /// `score` defaults to [`Confidence::MAX`] and `validator` to
+    /// `None`; override with [`with_score`] / [`with_validator`].
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when `regex` is not a valid
+    /// regular expression.
+    ///
+    /// [`with_score`]: Self::with_score
+    /// [`with_validator`]: Self::with_validator
+    pub fn new(regex: impl Into<String>) -> Result<Self, Error> {
+        let regex = regex.into();
+        if let Err(e) = ::regex::Regex::new(&regex) {
             return Err(Error::validation(
                 format!("invalid regex: {e}"),
                 "nvisy-pattern",
             ));
         }
-        Ok(())
+        Ok(Self {
+            regex,
+            score: Confidence::MAX,
+            validator: None,
+        })
+    }
+
+    /// Set the per-match confidence score.
+    #[must_use]
+    pub fn with_score(mut self, score: Confidence) -> Self {
+        self.score = score;
+        self
+    }
+
+    /// Set the validator name to run on every match.
+    ///
+    /// The name is resolved against the [`ValidatorRegistry`] when
+    /// the parent [`PatternRecognizer`] is built; unknown names
+    /// surface as a build-time error.
+    ///
+    /// [`ValidatorRegistry`]: crate::validators::ValidatorRegistry
+    /// [`PatternRecognizer`]: super::PatternRecognizer
+    #[must_use]
+    pub fn with_validator(mut self, name: impl Into<String>) -> Self {
+        self.validator = Some(name.into());
+        self
     }
 }
 
-/// Regex-based detection rule: one label, optional boost
-/// keywords, one or more [`Variant`]s. Matches the Presidio
-/// "pattern recognizer" shape — multiple regex strategies for one
-/// entity type, plus a shared context keyword list.
+fn default_score() -> Confidence {
+    Confidence::MAX
+}
+
+/// Regex detection rule: one label, optional keyword boosts, and
+/// one or more [`Variant`]s.
+///
+/// Mirrors the Presidio "pattern recognizer" shape — several regex
+/// strategies for one entity type, plus a shared context-keyword
+/// list. Every variant emits the same [`label`]; context keywords
+/// are harvested by [`PatternRecognizer`] into a wrapping boost
+/// layer and are never read by the rule itself.
+///
+/// # Examples
 ///
 /// ```
 /// use nvisy_core::entity::builtins;
 /// use nvisy_core::primitive::Confidence;
 /// use nvisy_pattern::{Regex, Variant};
 ///
-/// let variant = Variant::builder()
-///     .with_regex(r"\b\d{3}-\d{2}-\d{4}\b")
+/// let variant = Variant::new(r"\b\d{3}-\d{2}-\d{4}\b")
+///     .expect("ssn variant builds")
 ///     .with_score(Confidence::clamped(0.9))
-///     .with_validator("ssn")
-///     .build()
-///     .expect("ssn variant builds");
+///     .with_validator("ssn");
 ///
 /// let ssn = Regex::builder()
 ///     .with_name("ssn")
@@ -91,6 +116,9 @@ impl VariantBuilder {
 ///     .build()
 ///     .expect("ssn rule builds");
 /// ```
+///
+/// [`label`]: Regex::label
+/// [`PatternRecognizer`]: super::PatternRecognizer
 #[derive(Debug, Clone, PartialEq, Builder, Deserialize)]
 #[builder(
     name = "RegexBuilder",
@@ -99,44 +127,37 @@ impl VariantBuilder {
     build_fn(error = "Error")
 )]
 pub struct Regex {
-    /// Human-readable identifier (e.g. `"ssn"`, `"credit_card"`).
-    /// Surfaced in trail steps so downstream consumers can see
-    /// which rule matched.
+    /// Human-readable identifier surfaced in trail provenance (e.g.
+    /// `"ssn"`, `"credit_card"`).
     pub name: String,
     /// Entity label every variant emits.
     pub label: EntityLabelRef,
     /// Context keywords that lift confidence when one of them
-    /// appears near a match. Harvested by [`PatternRecognizer`]
-    /// into a per-label boost rule; rules themselves never read
-    /// this field.
-    ///
-    /// [`PatternRecognizer`]: super::PatternRecognizer
+    /// appears near a match.
     #[builder(default)]
     #[serde(default)]
     pub context: Vec<String>,
-    /// Regex variants. At least one is required for the rule to
-    /// produce any matches; the recognizer skips rules with no
-    /// variants.
+    /// Regex variants. At least one is required to produce matches;
+    /// the recognizer skips rules with an empty variant list.
     pub variants: Vec<Variant>,
-    /// Languages this rule applies to (BCP-47 tags). An empty
-    /// list (the default) means the rule applies regardless of
-    /// language; otherwise the recognizer skips this rule when
-    /// the per-call language hint is set to a tag not in this
-    /// list.
+    /// BCP-47 language tags the rule applies to. Empty means "any
+    /// language"; otherwise the recognizer skips the rule when the
+    /// per-call language hint is not in the list.
     #[builder(default)]
     #[serde(default)]
     pub languages: Vec<LanguageTag>,
 }
 
 impl Regex {
-    /// Start a chainable builder. Required fields: `name`,
-    /// `label`, `variants`.
+    /// Start a chainable builder.
+    ///
+    /// Required fields: `name`, `label`, `variants`.
     #[must_use]
     pub fn builder() -> RegexBuilder {
         RegexBuilder::default()
     }
 
-    /// Parse a regex rule from a TOML string.
+    /// Parse a rule from a TOML source.
     ///
     /// # Errors
     ///
diff --git a/crates/nvisy-pattern/src/recognition/term.rs b/crates/nvisy-pattern/src/recognition/term.rs
new file mode 100644
index 00000000..613dbe0e
--- /dev/null
+++ b/crates/nvisy-pattern/src/recognition/term.rs
@@ -0,0 +1,104 @@
+//! [`Term`]: one literal entry inside a [`Dictionary`].
+//!
+//! [`Dictionary`]: crate::Dictionary
+
+use nvisy_core::Error;
+use nvisy_core::primitive::Confidence;
+use serde::Deserialize;
+
+/// One literal scanned for by a [`Dictionary`].
+///
+/// The `column` field is `Some(i)` for CSV-loaded terms and `None`
+/// for plain-text or programmatic sources. The `score` field
+/// overrides the dictionary's [`Scoring`] for this single term
+/// when set — useful for one-off exceptions in an otherwise
+/// uniform list.
+///
+/// [`Dictionary`]: crate::Dictionary
+/// [`Scoring`]: crate::Scoring
+#[derive(Debug, Clone, PartialEq, Deserialize)]
+pub struct Term {
+    /// The literal scanned for.
+    pub term: String,
+    /// CSV source-column index when loaded via [`Term::from_csv`];
+    /// `None` otherwise.
+    #[serde(default)]
+    pub column: Option<u16>,
+    /// Per-term score override. When `Some`, the recognizer
+    /// stamps this score on every match; when `None`, falls back
+    /// to the dictionary's [`Scoring`] resolved against [`column`].
+    ///
+    /// [`Scoring`]: crate::Scoring
+    /// [`column`]: Self::column
+    #[serde(default)]
+    pub score: Option<Confidence>,
+}
+
+impl Term {
+    /// Construct a term with no column and no score override.
+    #[must_use]
+    pub fn new(term: impl Into<String>) -> Self {
+        Self {
+            term: term.into(),
+            column: None,
+            score: None,
+        }
+    }
+
+    /// Attach a CSV source-column index.
+    #[must_use]
+    pub fn with_column(mut self, column: u16) -> Self {
+        self.column = Some(column);
+        self
+    }
+
+    /// Set a per-term score override.
+    #[must_use]
+    pub fn with_score(mut self, score: Confidence) -> Self {
+        self.score = Some(score);
+        self
+    }
+
+    /// Parse a list of terms from plain text — one term per line.
+    ///
+    /// Each line is trimmed; empty lines and lines starting with
+    /// `#` are skipped. Plain-text terms carry no column.
+    pub fn from_text(raw: &str) -> Vec<Self> {
+        raw.lines()
+            .map(str::trim)
+            .filter(|line| !line.is_empty() && !line.starts_with('#'))
+            .map(Term::new)
+            .collect()
+    }
+
+    /// Parse a list of terms from CSV.
+    ///
+    /// Every non-empty cell becomes a term tagged with its 0-based
+    /// source-column index. The dictionary's [`Scoring::PerColumn`]
+    /// uses that index to resolve a per-column confidence.
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when the CSV is malformed.
+    ///
+    /// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
+    pub fn from_csv(raw: &str) -> Result<Vec<Self>, Error> {
+        let mut reader = csv::ReaderBuilder::new()
+            .has_headers(false)
+            .flexible(true)
+            .from_reader(raw.as_bytes());
+        let mut entries = Vec::new();
+        for row in reader.records() {
+            let row =
+                row.map_err(|e| Error::validation(format!("terms CSV: {e}"), "nvisy-pattern"))?;
+            for (col_idx, cell) in row.iter().enumerate() {
+                let trimmed = cell.trim();
+                if !trimmed.is_empty() {
+                    let column = u16::try_from(col_idx).unwrap_or(u16::MAX);
+                    entries.push(Term::new(trimmed).with_column(column));
+                }
+            }
+        }
+        Ok(entries)
+    }
+}
diff --git a/crates/nvisy-pattern/src/recognition/terms.rs b/crates/nvisy-pattern/src/recognition/terms.rs
deleted file mode 100644
index d59ec141..00000000
--- a/crates/nvisy-pattern/src/recognition/terms.rs
+++ /dev/null
@@ -1,208 +0,0 @@
-//! [`Terms`]: a literal-string list, the term source for
-//! [`Dictionary`].
-//!
-//! [`Dictionary`]: crate::Dictionary
-//!
-//! A `Terms` value is the bag of literals the recognizer's
-//! Aho-Corasick automaton scans for. Construct it from any common
-//! shape:
-//!
-//! - [`Terms::from`] — `Vec<String>`, `&[&str]`, or `[&str; N]`
-//! - [`Terms::from_text`] — one term per line, trimmed, with
-//!   `#`-prefixed comments and blank lines skipped
-//! - [`Terms::from_csv`] — every non-empty cell across every row
-//!   becomes a term; each term remembers its source column index
-//!   so dictionaries can apply per-column confidence overrides
-
-use std::io::Cursor;
-
-use nvisy_core::Error;
-use nvisy_core::primitive::Confidence;
-use serde::Deserialize;
-
-/// Literal term list. Each [`Term`] carries an optional source
-/// column (set by [`Terms::from_csv`]) plus an optional per-term
-/// score override. The column index is the join key for
-/// [`Dictionary::scoring`] when it's [`Scoring::PerColumn`].
-///
-/// [`Dictionary::scoring`]: crate::Dictionary::scoring
-/// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
-#[derive(Debug, Clone, PartialEq, Default, Deserialize)]
-#[serde(transparent)]
-pub struct Terms(Vec<Term>);
-
-/// One entry in a [`Terms`] list: the literal, the column it was
-/// loaded from (when applicable), and an optional explicit score
-/// that overrides the dictionary's [`Scoring`] policy for this
-/// term.
-///
-/// Per-term score is `None` for the common path — the dictionary's
-/// [`Scoring`] resolves the per-match score from the column.
-/// Set `score` only for one-off exceptions (e.g. a term known to
-/// be high-confidence even though its column is generally noisy).
-///
-/// Per-term column is `None` for non-CSV sources (plain text
-/// lists, the `From<Vec<String>>` / array impls). `Some(i)` flags
-/// a CSV cell from column `i`; the dictionary's
-/// [`Scoring::PerColumn`] uses it to pick the per-column score.
-///
-/// [`Scoring`]: crate::Scoring
-/// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
-#[derive(Debug, Clone, PartialEq, Deserialize)]
-pub struct Term {
-    /// The literal scanned for.
-    pub term: String,
-    /// CSV column the term came from. `None` for non-CSV
-    /// sources; `Some(i)` for the cell at column `i` of a CSV.
-    #[serde(default)]
-    pub column: Option<u16>,
-    /// Optional per-term score override. When `Some`, the
-    /// recognizer stamps this score on every match of this term;
-    /// when `None`, falls back to the dictionary's [`Scoring`]
-    /// policy resolved against [`column`].
-    ///
-    /// [`Scoring`]: crate::Scoring
-    /// [`column`]: Self::column
-    #[serde(default)]
-    pub score: Option<Confidence>,
-}
-
-impl Term {
-    /// Construct a term with no column and no per-term score
-    /// override. The common path for plain-text sources and
-    /// programmatic `From<…>` constructions.
-    #[must_use]
-    pub fn new(term: impl Into<String>) -> Self {
-        Self {
-            term: term.into(),
-            column: None,
-            score: None,
-        }
-    }
-
-    /// Attach a CSV source-column index, used by the dictionary's
-    /// [`Scoring::PerColumn`] to pick a per-column score.
-    ///
-    /// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
-    #[must_use]
-    pub fn with_column(mut self, column: u16) -> Self {
-        self.column = Some(column);
-        self
-    }
-
-    /// Set an explicit per-term score, overriding the dictionary's
-    /// column-resolved score for this term.
-    #[must_use]
-    pub fn with_score(mut self, score: Confidence) -> Self {
-        self.score = Some(score);
-        self
-    }
-}
-
-impl Terms {
-    /// Construct an empty term list.
-    #[must_use]
-    pub fn new() -> Self {
-        Self(Vec::new())
-    }
-
-    /// Borrow the inner entries.
-    #[must_use]
-    pub fn entries(&self) -> &[Term] {
-        &self.0
-    }
-
-    /// Number of terms.
-    #[must_use]
-    pub fn len(&self) -> usize {
-        self.0.len()
-    }
-
-    /// Whether this list contains no terms.
-    #[must_use]
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-
-    /// Consume into the inner entries.
-    #[must_use]
-    pub fn into_inner(self) -> Vec<Term> {
-        self.0
-    }
-
-    /// Parse terms from plain-text bytes — one term per line.
-    /// Each line is trimmed; empty lines and lines starting with
-    /// `#` are skipped. Plain-text terms carry no column.
-    ///
-    /// # Errors
-    ///
-    /// Returns a validation error when the input is not valid
-    /// UTF-8.
-    pub fn from_text(bytes: &[u8]) -> Result<Self, Error> {
-        let text = std::str::from_utf8(bytes)
-            .map_err(|e| Error::validation(format!("terms text: {e}"), "nvisy-pattern"))?;
-        let entries: Vec<Term> = text
-            .lines()
-            .map(str::trim)
-            .filter(|line| !line.is_empty() && !line.starts_with('#'))
-            .map(Term::new)
-            .collect();
-        Ok(Self(entries))
-    }
-
-    /// Parse terms from CSV bytes. Every non-empty cell across
-    /// every row becomes a term, and each term remembers the
-    /// (0-based) column index it came from so a [`Dictionary`]
-    /// can apply per-column confidence overrides via
-    /// [`Scoring::PerColumn`].
-    ///
-    /// # Errors
-    ///
-    /// Returns a validation error when the CSV is malformed.
-    ///
-    /// [`Dictionary`]: crate::Dictionary
-    /// [`Scoring::PerColumn`]: crate::Scoring::PerColumn
-    pub fn from_csv(bytes: &[u8]) -> Result<Self, Error> {
-        let mut reader = csv::ReaderBuilder::new()
-            .has_headers(false)
-            .flexible(true)
-            .from_reader(Cursor::new(bytes));
-        let mut entries = Vec::new();
-        for row in reader.records() {
-            let row =
-                row.map_err(|e| Error::validation(format!("terms CSV: {e}"), "nvisy-pattern"))?;
-            for (col_idx, cell) in row.iter().enumerate() {
-                let trimmed = cell.trim();
-                if !trimmed.is_empty() {
-                    let column = u16::try_from(col_idx).unwrap_or(u16::MAX);
-                    entries.push(Term::new(trimmed).with_column(column));
-                }
-            }
-        }
-        Ok(Self(entries))
-    }
-}
-
-impl From<Vec<String>> for Terms {
-    fn from(terms: Vec<String>) -> Self {
-        Self(terms.into_iter().map(Term::new).collect())
-    }
-}
-
-impl From<&[&str]> for Terms {
-    fn from(terms: &[&str]) -> Self {
-        Self(terms.iter().copied().map(Term::new).collect())
-    }
-}
-
-impl<const N: usize> From<[&str; N]> for Terms {
-    fn from(terms: [&str; N]) -> Self {
-        Self(terms.iter().copied().map(Term::new).collect())
-    }
-}
-
-impl<const N: usize> From<[String; N]> for Terms {
-    fn from(terms: [String; N]) -> Self {
-        Self(terms.into_iter().map(Term::new).collect())
-    }
-}
diff --git a/crates/nvisy-pattern/src/shipped/dictionaries.rs b/crates/nvisy-pattern/src/shipped/dictionaries.rs
index 00c1e504..dac8baa8 100644
--- a/crates/nvisy-pattern/src/shipped/dictionaries.rs
+++ b/crates/nvisy-pattern/src/shipped/dictionaries.rs
@@ -1,21 +1,21 @@
 //! Built-in [`Dictionary`]s, embedded at compile time.
 //!
 //! Each accessor pairs a TOML metadata sidecar
-//! (`assets/dictionaries/**/*.toml`) with a term source
-//! (`*.csv` for multi-column term lists, `*.txt` for one-per-line),
-//! merging them via [`Dictionary::metadata_from_toml`] +
-//! [`Terms::from_csv`] / [`Terms::from_text`].
+//! (`assets/dictionaries/**/*.toml`) with a term source (`*.csv`
+//! for multi-column term lists, `*.txt` for one-per-line), merging
+//! them via [`Dictionary::metadata_from_toml`] + [`Term::from_csv`]
+//! / [`Term::from_text`].
 //!
 //! [`Dictionary`]: crate::Dictionary
 
-use crate::recognition::{Dictionary, Terms};
+use crate::recognition::{Dictionary, Term};
 
 macro_rules! shipped_dictionary {
     ($(#[$meta:meta])* fn $name:ident from $meta_path:literal with csv $terms:literal) => {
         $(#[$meta])*
         #[must_use]
         pub fn $name() -> Dictionary {
-            let terms = Terms::from_csv(include_bytes!(concat!(
+            let terms = Term::from_csv(include_str!(concat!(
                 "../../assets/dictionaries/",
                 $terms
             )))
@@ -34,11 +34,10 @@ macro_rules! shipped_dictionary {
         $(#[$meta])*
         #[must_use]
         pub fn $name() -> Dictionary {
-            let terms = Terms::from_text(include_bytes!(concat!(
+            let terms = Term::from_text(include_str!(concat!(
                 "../../assets/dictionaries/",
                 $terms
-            )))
-            .expect(concat!("shipped term source `", $terms, "` parses"));
+            )));
             Dictionary::metadata_from_toml(include_str!(concat!(
                 "../../assets/dictionaries/",
                 $meta_path
diff --git a/crates/nvisy-pattern/src/shipped/mod.rs b/crates/nvisy-pattern/src/shipped/mod.rs
index 062acea8..db9c7fc5 100644
--- a/crates/nvisy-pattern/src/shipped/mod.rs
+++ b/crates/nvisy-pattern/src/shipped/mod.rs
@@ -1,14 +1,11 @@
 //! Built-in [`Regex`] rules and [`Dictionary`]s shipped with this
 //! crate.
 //!
-//! Each accessor parses an asset file embedded via
-//! [`include_str!`] and returns a fresh [`Regex`] or
-//! [`Dictionary`]. Metadata for dictionaries (entity label, score,
-//! context) is split into a TOML sidecar paired with a CSV / TXT
-//! term source; regex rules are self-contained TOML.
-//!
-//! Use [`patterns::all`] and [`dictionaries::all`] to load the
-//! complete shipped set, or pick individual accessors.
+//! Each accessor parses an asset embedded via [`include_str!`] and
+//! returns a fresh value. Dictionaries split metadata into a TOML
+//! sidecar paired with a CSV/TXT term source; regex rules are
+//! self-contained TOML. Call [`patterns::all`] / [`dictionaries::all`]
+//! to load the full set, or pick individual accessors.
 //!
 //! [`Regex`]: crate::Regex
 //! [`Dictionary`]: crate::Dictionary
diff --git a/crates/nvisy-pattern/src/validators/date.rs b/crates/nvisy-pattern/src/validators/date.rs
index bcf30246..7d35d0d1 100644
--- a/crates/nvisy-pattern/src/validators/date.rs
+++ b/crates/nvisy-pattern/src/validators/date.rs
@@ -1,22 +1,18 @@
-//! Date structural validation.
-//!
-//! Validates that a regex-matched date string represents a real calendar
-//! date. Supports multiple common formats.
+//! Calendar-date structural validator.
 
 /// Return `true` if `value` is a real calendar date in one of the
 /// supported written formats.
 ///
-/// Supported: `MM/DD/YYYY`, `DD/MM/YYYY`, `YYYY-MM-DD`, `YYYY/MM/DD`
-/// (with `/` or `-` separators). Leap years are honoured and the
-/// year must fall in `1900..=2100`.
+/// Supported formats are `MM/DD/YYYY`, `DD/MM/YYYY`, `YYYY-MM-DD`,
+/// and `YYYY/MM/DD`, with `/` or `-` as separators. Leap years
+/// are honoured and the year must fall in `1900..=2100`.
 ///
 /// # Ambiguity
 ///
-/// When both interpretations are valid (e.g. `02/03/1999` could mean
-/// Feb 3 or 3 Mar), the validator prefers `MM/DD/YYYY` and only falls
-/// back to `DD/MM/YYYY` if the first part is not a valid month. This
-/// is a format-level structural check — locale disambiguation is out
-/// of scope.
+/// When both interpretations are valid (e.g. `02/03/1999` could
+/// mean Feb 3 or 3 Mar), the validator prefers `MM/DD/YYYY` and
+/// only falls back to `DD/MM/YYYY` when the first part is not a
+/// valid month. Locale disambiguation is out of scope.
 pub fn date(value: &str) -> bool {
     let parts: Vec<&str> = value.split(['/', '-']).collect();
     if parts.len() != 3 {
diff --git a/crates/nvisy-pattern/src/validators/iban.rs b/crates/nvisy-pattern/src/validators/iban.rs
index 0df4a542..15888d7f 100644
--- a/crates/nvisy-pattern/src/validators/iban.rs
+++ b/crates/nvisy-pattern/src/validators/iban.rs
@@ -1,12 +1,12 @@
-//! IBAN checksum validator (ISO 13616).
-//!
-//! Rearranges the IBAN so the country code and check digits move to the
-//! end, converts letters to numbers (A=10 … Z=35), and verifies that
-//! the resulting number mod 97 equals 1.
+//! ISO 13616 IBAN checksum validator.
 
-/// Return `true` if `value` passes the ISO 13616 mod-97 IBAN check.
+/// Return `true` if `value` passes the ISO 13616 mod-97 IBAN
+/// checksum.
 ///
-/// Whitespace and dashes are stripped before validation.
+/// Whitespace and dashes are stripped before validation. The
+/// country code and check digits are moved to the end, letters
+/// are converted to numbers (`A`=10 … `Z`=35), and the result is
+/// accepted when `mod 97 == 1`.
 pub fn iban(value: &str) -> bool {
     let cleaned: String = value
         .chars()
diff --git a/crates/nvisy-pattern/src/validators/luhn.rs b/crates/nvisy-pattern/src/validators/luhn.rs
index 40bb5bc0..878728ac 100644
--- a/crates/nvisy-pattern/src/validators/luhn.rs
+++ b/crates/nvisy-pattern/src/validators/luhn.rs
@@ -1,20 +1,16 @@
-//! Luhn checksum validator.
-//!
-//! Implements the [Luhn algorithm] used to validate credit/debit card
-//! numbers and other identification numbers. Only digits, spaces, and
-//! dashes are accepted as input: any other character causes the check
-//! to fail.
-//!
-//! [Luhn algorithm]: https://en.wikipedia.org/wiki/Luhn_algorithm
+//! Luhn checksum validator for credit-card and similar identifier
+//! numbers.
 
-/// Return `true` if `num` passes the Luhn checksum.
+/// Return `true` if `num` passes the [Luhn algorithm] checksum.
 ///
 /// Spaces and dashes are stripped before validation, so
 /// `"4539 1488 0343 6467"`, `"4539-1488-0343-6467"`, and
-/// `"4539148803436467"` are all equivalent.
+/// `"4539148803436467"` are equivalent inputs.
 ///
-/// Returns `false` if the input is empty or contains characters other
-/// than digits, spaces, and dashes.
+/// Returns `false` when the input is empty or contains any
+/// character other than digits, spaces, and dashes.
+///
+/// [Luhn algorithm]: https://en.wikipedia.org/wiki/Luhn_algorithm
 pub fn luhn(num: &str) -> bool {
     if num.is_empty() {
         return false;
diff --git a/crates/nvisy-pattern/src/validators/mod.rs b/crates/nvisy-pattern/src/validators/mod.rs
index f384b762..46395652 100644
--- a/crates/nvisy-pattern/src/validators/mod.rs
+++ b/crates/nvisy-pattern/src/validators/mod.rs
@@ -1,20 +1,19 @@
-//! Post-match validators for detected entity values.
+//! Post-match validators for regex-detected entity values.
 //!
-//! A [`Variant`] inside a [`Regex`] rule can reference a validator
-//! by name (e.g. `validator: Some("luhn")`) to reduce false
-//! positives. At [`PatternRecognizer::build`] time the name is
-//! resolved against a [`ValidatorRegistry`] to a concrete
-//! validation function.
+//! A [`Variant`] inside a [`Regex`] rule may name a validator
+//! (e.g. `validator: Some("luhn")`); the recognizer resolves the
+//! name against a [`ValidatorRegistry`] at build time and drops
+//! matches that fail the resolved check. Use validators to weed
+//! out structurally-suspect false positives that a regex alone
+//! can't.
 //!
-//! The default [`ValidatorRegistry::builtin`] ships with five
-//! validators — `luhn`, `iban`, `ssn`, `phone`, `date`. Consumers
-//! can extend the registry with their own validators by calling
-//! [`ValidatorRegistry::with`] before handing it to the recognizer
-//! builder.
+//! [`ValidatorRegistry::builtin`] ships with [`luhn`], [`iban`],
+//! [`ssn`], [`phone`], and [`date`]. Each validator is also
+//! re-exported as a free function so consumers can compose a
+//! custom registry without taking the full set.
 //!
 //! [`Variant`]: crate::Variant
 //! [`Regex`]: crate::Regex
-//! [`PatternRecognizer::build`]: crate::PatternRecognizer
 
 mod date;
 mod iban;
@@ -22,25 +21,25 @@ mod luhn;
 mod phone;
 mod ssn;
 
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::sync::Arc;
+
 pub use self::date::date;
 pub use self::iban::iban;
 pub use self::luhn::luhn;
 pub use self::phone::phone;
 pub use self::ssn::ssn;
 
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::sync::Arc;
-
-/// Post-match validator: returns `true` when `matched` passes the
-/// validator's check.
+/// Post-match validator returning whether a matched string is
+/// structurally valid.
 ///
-/// Implemented by both built-in function-pointer validators (via the
-/// blanket impl) and any third-party validator types a consumer
-/// registers.
+/// Implemented by every `Fn(&str) -> bool + Send + Sync` via the
+/// blanket impl, so plain function pointers slot in without a
+/// wrapper type. Implement directly for types that need to carry
+/// state (e.g. a remote-lookup client).
 pub trait Validator: Send + Sync {
-    /// Validate the text the recognizer matched. Returns `true` to
-    /// keep the match, `false` to drop it.
+    /// Return `true` to keep the match, `false` to drop it.
     fn validate(&self, matched: &str) -> bool;
 }
 
@@ -53,33 +52,30 @@ where
     }
 }
 
-/// Resolves validator names referenced in [`Variant`] definitions
-/// to concrete [`Validator`] implementations.
-///
-/// Keys are [`Cow<'static, str>`] so the built-in registrations skip
-/// any allocation (`&'static str` literal → borrowed variant) while
-/// caller-supplied names that aren't `'static` (e.g. dynamically
-/// constructed at runtime) still flow through as owned `String`s.
+/// Name → validator resolver consulted at recognizer-build time.
 ///
-/// [`Variant`]: crate::Variant
+/// Keys are [`Cow<'static, str>`] so a `&'static str` literal stays
+/// borrowed while a runtime-built name flows through as an owned
+/// `String`.
 #[derive(Clone, Default)]
 pub struct ValidatorRegistry {
     table: HashMap<Cow<'static, str>, Arc<dyn Validator>>,
 }
 
 impl ValidatorRegistry {
-    /// Empty registry — no validators registered. Regex rules that
-    /// reference a validator name will fail to resolve at recognizer
-    /// build time.
+    /// Construct an empty registry.
+    ///
+    /// Any [`Variant`] referencing a validator name will fail to
+    /// resolve at recognizer-build time.
+    ///
+    /// [`Variant`]: crate::Variant
     #[must_use]
     pub fn empty() -> Self {
         Self::default()
     }
 
-    /// Registry pre-loaded with every built-in validator: [`luhn`],
-    /// [`iban`], [`ssn`], [`phone`], [`date`]. Each is also
-    /// re-exported individually from this module so consumers can
-    /// mix-and-match without taking all five.
+    /// Construct a registry pre-loaded with the built-in
+    /// validators: [`luhn`], [`iban`], [`ssn`], [`phone`], [`date`].
     #[must_use]
     pub fn builtin() -> Self {
         Self::empty()
@@ -90,16 +86,11 @@ impl ValidatorRegistry {
             .with("date", date)
     }
 
-    /// Register `validator` under `name`. Overwrites any previous
-    /// entry with the same name.
-    ///
-    /// Built-ins live under `"luhn"`, `"iban"`, `"ssn"`, `"phone"`,
-    /// and `"date"`; consumers can override them with their own
-    /// implementations by registering under the same name.
+    /// Register `validator` under `name`, overwriting any previous
+    /// entry with the same key.
     ///
-    /// `name` accepts anything convertible to [`Cow<'static, str>`]
-    /// — a `&'static str` literal stays borrowed (zero allocation),
-    /// an owned `String` becomes the owned variant.
+    /// Override a built-in by registering under the same name
+    /// (e.g. `"luhn"`).
     #[must_use]
     pub fn with<N, V>(mut self, name: N, validator: V) -> Self
     where
@@ -110,8 +101,11 @@ impl ValidatorRegistry {
         self
     }
 
-    /// Look up a validator by name, returning the registered
-    /// implementation or `None` when the name is unknown.
+    /// Look up a validator by name.
+    ///
+    /// Returns `None` when the name is unregistered; the
+    /// recognizer's build step surfaces that as a configuration
+    /// error.
     #[must_use]
     pub fn resolve(&self, name: &str) -> Option<Arc<dyn Validator>> {
         self.table.get(name).cloned()
diff --git a/crates/nvisy-pattern/src/validators/phone.rs b/crates/nvisy-pattern/src/validators/phone.rs
index d503ba1f..51d4cdd6 100644
--- a/crates/nvisy-pattern/src/validators/phone.rs
+++ b/crates/nvisy-pattern/src/validators/phone.rs
@@ -1,17 +1,14 @@
-//! Phone number structural validation.
-//!
-//! Validates that a regex-matched phone number has a plausible structure:
-//! correct digit count and no obviously invalid prefixes.
+//! Phone-number structural validator.
 
 /// Return `true` if `value` has a plausible phone-number structure.
 ///
-/// Strips all non-digit characters, then checks:
+/// All non-digit characters are stripped, then checks:
 ///
-/// - 7 to 15 digits (ITU-T E.164 range)
-/// - When the original begins with `+` (explicit E.164), the digits
-///   must not start with 0 (no country code is `0…`). National formats
-///   such as UK `020 7946 0958` keep their trunk-prefix zero and remain
-///   valid.
+/// - 7 to 15 digits (the ITU-T E.164 range).
+/// - When the original begins with `+` (explicit E.164), the
+///   digits must not start with `0` — no country code is `0…`.
+///   National formats such as UK `020 7946 0958` keep their
+///   trunk-prefix zero and remain valid.
 pub fn phone(value: &str) -> bool {
     let digits: String = value.chars().filter(|c| c.is_ascii_digit()).collect();
     let len = digits.len();
diff --git a/crates/nvisy-pattern/src/validators/ssn.rs b/crates/nvisy-pattern/src/validators/ssn.rs
index 46258064..223408eb 100644
--- a/crates/nvisy-pattern/src/validators/ssn.rs
+++ b/crates/nvisy-pattern/src/validators/ssn.rs
@@ -1,15 +1,16 @@
 //! US Social Security Number format validator.
-//!
-//! Validates the `AAA-GG-SSSS` format where:
-//!
-//! - **Area** (AAA): 001–899, excluding 666.
-//! - **Group** (GG): 01–99.
-//! - **Serial** (SSSS): 0001–9999.
 
-/// Return `true` if `value` is a structurally valid US SSN in `AAA-GG-SSSS`
-/// format.
+/// Return `true` if `value` is a structurally valid US SSN in
+/// `AAA-GG-SSSS` format.
 ///
-/// This is a format check, not a verification against SSA records.
+/// Validates the three parts as:
+///
+/// - **Area** (`AAA`): 001–899, excluding 666.
+/// - **Group** (`GG`): 01–99.
+/// - **Serial** (`SSSS`): 0001–9999.
+///
+/// This is a format check only — not a verification against SSA
+/// records.
 pub fn ssn(value: &str) -> bool {
     let parts: Vec<&str> = value.split('-').collect();
     if parts.len() != 3 {
diff --git a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
index 6c1fca87..12d3c7f2 100644
--- a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
+++ b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
@@ -10,15 +10,13 @@ use nvisy_core::entity::{TrailStepKind, builtins};
 use nvisy_core::modality::TextData;
 use nvisy_core::primitive::Confidence;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
-use nvisy_pattern::{Regex, PatternRecognizer, Variant};
+use nvisy_pattern::{PatternRecognizer, Regex, Variant};
 
 #[tokio::test]
 async fn enhancer_boosts_matches_near_keyword_only() {
-    let variant = Variant::builder()
-        .with_regex(r"\b\d{3}-\d{2}-\d{4}\b")
-        .with_score(Confidence::clamped(0.6))
-        .build()
-        .expect("ssn variant builds");
+    let variant = Variant::new(r"\b\d{3}-\d{2}-\d{4}\b")
+        .expect("ssn variant builds")
+        .with_score(Confidence::clamped(0.6));
     let regex = Regex::builder()
         .with_name("ssn")
         .with_label(builtins::GOVERNMENT_ID.label_ref())
diff --git a/crates/nvisy-pattern/tests/user_rules.rs b/crates/nvisy-pattern/tests/user_rules.rs
index 38dcee10..589e6464 100644
--- a/crates/nvisy-pattern/tests/user_rules.rs
+++ b/crates/nvisy-pattern/tests/user_rules.rs
@@ -2,14 +2,14 @@
 //! shape (`testdata/patterns/*.toml`,
 //! `testdata/dictionaries/*.{toml,csv}`) through
 //! [`Regex::from_toml`], [`Dictionary::metadata_from_toml`], and
-//! [`Terms::from_csv`], mix them with shipped patterns, and
+//! [`Term::from_csv`], mix them with shipped patterns, and
 //! confirm a real internal-handoff document yields the custom
 //! entities.
 
 use nvisy_core::entity::builtins;
 use nvisy_core::modality::TextData;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
-use nvisy_pattern::{Dictionary, Regex, PatternRecognizer, Terms};
+use nvisy_pattern::{Dictionary, PatternRecognizer, Regex, Term};
 
 #[tokio::test]
 async fn user_toml_rules_load_and_detect() {
@@ -19,7 +19,7 @@ async fn user_toml_rules_load_and_detect() {
         Regex::from_toml(include_str!("../testdata/patterns/product_codes.toml"))
             .expect("product_codes.toml parses");
 
-    let terms = Terms::from_csv(include_bytes!("../testdata/dictionaries/product_codes.csv"))
+    let terms = Term::from_csv(include_str!("../testdata/dictionaries/product_codes.csv"))
         .expect("product_codes.csv parses");
     let product_code_dict =
         Dictionary::metadata_from_toml(include_str!("../testdata/dictionaries/product_codes.toml"))

From ae12d261c9265a3b0b9373e1421aa46f51ce8d48 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sun, 14 Jun 2026 23:40:16 +0200
Subject: [PATCH 06/14] =?UTF-8?q?refactor(pattern,context):=20rename=20Boo?=
 =?UTF-8?q?sting=E2=86=92ContextEnhanced,=20split=20build()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- nvisy-context: `Boosting<R>` → `ContextEnhanced<R>` (more
  self-descriptive; reads as "an R that's been context-enhanced").
- nvisy-pattern: `PatternRecognizerBuilder::build()` now returns
  the bare `PatternRecognizer`; the wrapped form moves to
  `build_context_enhanced() -> ContextEnhanced<PatternRecognizer>`.
  Callers opt into the keyword-boost layer explicitly.
- Engine config, shipped-detection / user-rules / enhancer
  roundtrip tests, toolkit fixtures + example flipped to
  `build_context_enhanced()` to preserve prior behavior.
- README + module/struct docs rewritten to describe both methods
  without historical framing.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/nvisy-context/src/lib.rs               |  2 +-
 crates/nvisy-context/src/wrapper.rs           | 10 ++--
 .../nvisy-engine/src/detection/config/mod.rs  |  2 +-
 crates/nvisy-pattern/README.md                |  9 ++--
 .../nvisy-pattern/src/recognition/compiled.rs |  2 +-
 crates/nvisy-pattern/src/recognition/mod.rs   | 13 ++---
 .../src/recognition/recognizer.rs             | 53 +++++++++++++------
 .../nvisy-pattern/tests/enhancer_roundtrip.rs |  8 +--
 .../nvisy-pattern/tests/shipped_detection.rs  |  2 +-
 crates/nvisy-pattern/tests/user_rules.rs      |  2 +-
 crates/nvisy-toolkit/Cargo.toml               |  2 +-
 crates/nvisy-toolkit/examples/pipeline.rs     |  2 +-
 .../tests/fixtures/registries.rs              |  8 +--
 .../tests/recognition_registry.rs             |  2 +-
 14 files changed, 71 insertions(+), 46 deletions(-)

diff --git a/crates/nvisy-context/src/lib.rs b/crates/nvisy-context/src/lib.rs
index 192796f2..244e113c 100644
--- a/crates/nvisy-context/src/lib.rs
+++ b/crates/nvisy-context/src/lib.rs
@@ -12,4 +12,4 @@ pub use self::enhancer::Enhancer;
 pub use self::matcher::{KeywordMatcher, LemmaMatcher, SubstringMatcher};
 pub use self::rule::{BoostRule, DEFAULT_BOOST, DEFAULT_PREFIX_WORDS, DEFAULT_SUFFIX_WORDS};
 pub use self::tokens::{Token, Tokens};
-pub use self::wrapper::Boosting;
+pub use self::wrapper::ContextEnhanced;
diff --git a/crates/nvisy-context/src/wrapper.rs b/crates/nvisy-context/src/wrapper.rs
index 87105b19..c4128f0f 100644
--- a/crates/nvisy-context/src/wrapper.rs
+++ b/crates/nvisy-context/src/wrapper.rs
@@ -1,4 +1,4 @@
-//! [`Boosting`]: post-recognition keyword-boost wrapper for any
+//! [`ContextEnhanced`]: post-recognition keyword-boost wrapper for any
 //! [`EntityRecognizer<Text>`].
 //!
 //! Composes an inner recognizer with an [`Enhancer`]: the wrapper
@@ -12,7 +12,7 @@
 //! ```ignore
 //! let inner = MyRecognizer::new(...);
 //! let enhancer = Enhancer::new(rules, Box::new(SubstringMatcher));
-//! let recognizer = Boosting::new(inner, enhancer);
+//! let recognizer = ContextEnhanced::new(inner, enhancer);
 //! ```
 //!
 //! The wrapper implements [`EntityRecognizer<Text>`] so the engine
@@ -34,12 +34,12 @@ use super::{Enhancer, Tokens};
 /// same `&str` for the keyword-window walk; a recognizer that
 /// emitted entities relative to a different coordinate space
 /// would surface stale or panic-on-slice offsets.
-pub struct Boosting<R> {
+pub struct ContextEnhanced<R> {
     inner: R,
     enhancer: Enhancer,
 }
 
-impl<R> Boosting<R> {
+impl<R> ContextEnhanced<R> {
     /// Wrap `inner` with `enhancer`. After `recognize` produces
     /// entities, `enhancer` runs over them in place.
     pub fn new(inner: R, enhancer: Enhancer) -> Self {
@@ -59,7 +59,7 @@ impl<R> Boosting<R> {
 }
 
 #[async_trait::async_trait]
-impl<R> EntityRecognizer<Text> for Boosting<R>
+impl<R> EntityRecognizer<Text> for ContextEnhanced<R>
 where
     R: EntityRecognizer<Text> + 'static,
 {
diff --git a/crates/nvisy-engine/src/detection/config/mod.rs b/crates/nvisy-engine/src/detection/config/mod.rs
index 3f74c9ac..ec937436 100644
--- a/crates/nvisy-engine/src/detection/config/mod.rs
+++ b/crates/nvisy-engine/src/detection/config/mod.rs
@@ -86,7 +86,7 @@ impl DetectionConfig {
                 .with_builtin_dictionaries()
                 .filter_by_catalog(catalog);
             if !builder.is_empty() {
-                reg = reg.with_recognizer::<Text>(builder.build()?);
+                reg = reg.with_recognizer::<Text>(builder.build_context_enhanced()?);
             }
         }
 
diff --git a/crates/nvisy-pattern/README.md b/crates/nvisy-pattern/README.md
index 7d299119..f39a99e2 100644
--- a/crates/nvisy-pattern/README.md
+++ b/crates/nvisy-pattern/README.md
@@ -15,10 +15,11 @@ shared `regex::RegexSet` for the regex side and one shared
 single walk over the input runs both scanners and emits
 `Entity<Text>` values in modality-local byte coordinates.
 
-Each rule may declare per-label context keywords; the recognizer
-wraps itself in a `nvisy_context::Boosting` layer at build time
-that lifts confidence on matches whose neighbourhood contains a
-declared keyword.
+Rules may declare per-label context keywords. Calling
+`build_context_enhanced()` wraps the recognizer in a
+`nvisy_context::ContextEnhanced` layer that lifts confidence on
+matches whose neighbourhood contains a declared keyword;
+`build()` returns the bare recognizer.
 
 The built-in pattern + dictionary set lives as TOML under
 `assets/` and is embedded at compile time. The recognizer's
diff --git a/crates/nvisy-pattern/src/recognition/compiled.rs b/crates/nvisy-pattern/src/recognition/compiled.rs
index 1283025f..d1a61556 100644
--- a/crates/nvisy-pattern/src/recognition/compiled.rs
+++ b/crates/nvisy-pattern/src/recognition/compiled.rs
@@ -32,7 +32,7 @@ use crate::validators::Validator;
 /// indirection.
 ///
 /// `context` is intentionally not stored on compiled state — the
-/// recognizer's wrapping `Boosting` layer harvests keywords from
+/// recognizer's wrapping `ContextEnhanced` layer harvests keywords from
 /// the source patterns at build time.
 pub(super) struct CompiledPattern {
     /// Pattern name (e.g. `"ssn"`). Surfaced in trail provenance.
diff --git a/crates/nvisy-pattern/src/recognition/mod.rs b/crates/nvisy-pattern/src/recognition/mod.rs
index e55bbb39..e9be1ed0 100644
--- a/crates/nvisy-pattern/src/recognition/mod.rs
+++ b/crates/nvisy-pattern/src/recognition/mod.rs
@@ -1,11 +1,12 @@
 //! Recognition primitives.
 //!
-//! Holds the rule shapes ([`Regex`] + its [`Variant`]s, [`Dictionary`]),
-//! their building blocks ([`Terms`]), and the runtime
-//! [`PatternRecognizer`] that compiles them into pooled scanners.
-//! Per-rule and per-dictionary `context` keyword lists are harvested
-//! by the recognizer at build time into a wrapping `Boosting` layer
-//! that lifts confidence on matches near a declared keyword.
+//! Holds the rule shapes ([`Regex`] + its [`Variant`]s,
+//! [`Dictionary`]), their building blocks ([`Term`]), and the
+//! runtime [`PatternRecognizer`] that compiles them into pooled
+//! scanners. Per-rule and per-dictionary `context` keyword lists
+//! are harvested by [`PatternRecognizerBuilder::build_context_enhanced`]
+//! into a wrapping `ContextEnhanced` layer that lifts confidence
+//! on matches near a declared keyword.
 
 mod compiled;
 mod dictionary;
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index fd133fdf..f4ad3db9 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -1,7 +1,7 @@
 //! [`PatternRecognizer`] and its builder.
 
 use aho_corasick::{AhoCorasick, MatchKind};
-use nvisy_context::{BoostRule, Boosting, Enhancer, SubstringMatcher};
+use nvisy_context::{BoostRule, ContextEnhanced, Enhancer, SubstringMatcher};
 use nvisy_core::entity::{Entity, EntityLabelCatalog, EntityLabelRef};
 use nvisy_core::modality::Text;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput, RecognizerOutput};
@@ -14,7 +14,8 @@ use super::regex::Regex;
 use crate::shipped;
 use crate::validators::ValidatorRegistry;
 
-/// Runtime text recognizer composed of a regex pool and an Aho-Corasick automaton.
+/// Runtime text recognizer composed of a regex pool and an
+/// Aho-Corasick automaton.
 ///
 /// Every registered [`Regex`] variant goes into one
 /// [`::regex::RegexSet`] for a single one-pass scan across every
@@ -23,10 +24,11 @@ use crate::validators::ValidatorRegistry;
 /// scan across every literal. Both passes share one walk over the
 /// input and emit entities in modality-local byte coordinates.
 ///
-/// Construct via [`PatternRecognizer::builder`]; the build wraps
-/// the recognizer in a [`Boosting`] layer that lifts confidence on
+/// Construct via [`PatternRecognizer::builder`]. [`build`]
+/// returns the bare recognizer; [`build_context_enhanced`] wraps
+/// it in a [`ContextEnhanced`] layer that lifts confidence on
 /// matches whose neighbourhood contains a per-label context
-/// keyword harvested from the same rules.
+/// keyword.
 ///
 /// # Examples
 ///
@@ -42,6 +44,8 @@ use crate::validators::ValidatorRegistry;
 ///
 /// [`Regex`]: super::Regex
 /// [`Dictionary`]: super::Dictionary
+/// [`build`]: PatternRecognizerBuilder::build
+/// [`build_context_enhanced`]: PatternRecognizerBuilder::build_context_enhanced
 pub struct PatternRecognizer {
     patterns: Vec<CompiledPattern>,
     regex_set: Option<RegexSet>,
@@ -176,12 +180,14 @@ impl PatternRecognizerBuilder {
         &self.dictionaries
     }
 
-    /// Compile every rule into the pooled scanners and wrap the
-    /// recognizer in a [`Boosting`] layer.
+    /// Compile every rule into the pooled scanners and return the
+    /// bare recognizer.
     ///
-    /// Context keywords from every pattern and dictionary are
-    /// harvested into per-label [`BoostRule`]s that lift confidence
-    /// on matches whose neighbourhood contains a declared keyword.
+    /// Per-rule `context` keywords are ignored on the emission
+    /// path; the recognizer emits raw confidence as authored by
+    /// each rule. Wrap the result with [`build_context_enhanced`]
+    /// (or compose with [`ContextEnhanced`] manually) to lift
+    /// confidence on matches near a declared keyword.
     ///
     /// # Errors
     ///
@@ -190,23 +196,40 @@ impl PatternRecognizerBuilder {
     /// validator name, when a dictionary's `scoring` is invalid
     /// or under-declared for some term's source column, or when
     /// the shared automata cannot be constructed.
-    pub fn build(self) -> Result<Boosting<PatternRecognizer>> {
+    ///
+    /// [`build_context_enhanced`]: Self::build_context_enhanced
+    pub fn build(self) -> Result<PatternRecognizer> {
         let validators = self
             .validators
             .clone()
             .unwrap_or_else(ValidatorRegistry::builtin);
         let (compiled_patterns, regex_set) = self.compile_patterns(&validators)?;
         let (compiled_dicts, aho) = self.compile_dictionaries()?;
-        let enhancer = self.build_enhancer();
 
-        let recognizer = PatternRecognizer {
+        Ok(PatternRecognizer {
             patterns: compiled_patterns,
             regex_set,
             dictionaries: compiled_dicts,
             aho,
-        };
+        })
+    }
 
-        Ok(Boosting::new(recognizer, enhancer))
+    /// Compile every rule and wrap the recognizer in a
+    /// [`ContextEnhanced`] layer.
+    ///
+    /// Context keywords from every pattern and dictionary are
+    /// harvested into per-label [`BoostRule`]s that lift confidence
+    /// on matches whose neighbourhood contains a declared keyword.
+    ///
+    /// # Errors
+    ///
+    /// See [`build`].
+    ///
+    /// [`build`]: Self::build
+    pub fn build_context_enhanced(self) -> Result<ContextEnhanced<PatternRecognizer>> {
+        let enhancer = self.build_enhancer();
+        let recognizer = self.build()?;
+        Ok(ContextEnhanced::new(recognizer, enhancer))
     }
 
     /// Compile every `(pattern, variant)` pair into a
diff --git a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
index 12d3c7f2..057a5708 100644
--- a/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
+++ b/crates/nvisy-pattern/tests/enhancer_roundtrip.rs
@@ -1,10 +1,10 @@
 //! End-to-end: feed real input through a [`Regex`] →
-//! [`PatternRecognizer`] (wrapped in [`Boosting`]) and verify
+//! [`PatternRecognizer`] (wrapped in [`ContextEnhanced`]) and verify
 //! that confidence is boosted, and a [`Refinement`] step is
 //! appended only for matches that had a nearby keyword.
 //!
 //! [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
-//! [`Boosting`]: nvisy_context::Boosting
+//! [`ContextEnhanced`]: nvisy_context::ContextEnhanced
 
 use nvisy_core::entity::{TrailStepKind, builtins};
 use nvisy_core::modality::TextData;
@@ -27,7 +27,7 @@ async fn enhancer_boosts_matches_near_keyword_only() {
 
     let recognizer = PatternRecognizer::builder()
         .with_pattern(regex)
-        .build()
+        .build_context_enhanced()
         .expect("recognizer builds");
 
     // Two SSN-shaped numbers: one near the keyword, one not.
@@ -41,7 +41,7 @@ async fn enhancer_boosts_matches_near_keyword_only() {
     assert_eq!(entities.len(), 2, "two SSN matches expected");
 
     // First match has `SSN:` within the default 5-word prefix/suffix
-    // window and gets boosted by the Boosting<PatternRecognizer> wrapper.
+    // window and gets boosted by the ContextEnhanced<PatternRecognizer> wrapper.
     let near = entities
         .iter()
         .find(|e| &text[e.location.start..e.location.end] == "123-45-6789")
diff --git a/crates/nvisy-pattern/tests/shipped_detection.rs b/crates/nvisy-pattern/tests/shipped_detection.rs
index 1f6a30f9..32d0ac13 100644
--- a/crates/nvisy-pattern/tests/shipped_detection.rs
+++ b/crates/nvisy-pattern/tests/shipped_detection.rs
@@ -16,7 +16,7 @@ async fn scan(text: &str) -> (String, Vec<Entity<Text>>) {
     let recognizer = PatternRecognizer::builder()
         .with_builtin_patterns()
         .with_builtin_dictionaries()
-        .build()
+        .build_context_enhanced()
         .expect("shipped recognizer builds");
     let input = RecognizerInput::new(TextData::new(text.to_owned()));
     let entities = recognizer
diff --git a/crates/nvisy-pattern/tests/user_rules.rs b/crates/nvisy-pattern/tests/user_rules.rs
index 589e6464..a3b477fd 100644
--- a/crates/nvisy-pattern/tests/user_rules.rs
+++ b/crates/nvisy-pattern/tests/user_rules.rs
@@ -37,7 +37,7 @@ async fn user_toml_rules_load_and_detect() {
         .with_pattern(product_code_pattern)
         .with_dictionary(product_code_dict)
         .with_builtin_patterns()
-        .build()
+        .build_context_enhanced()
         .expect("recognizer builds");
 
     let text = include_str!("../testdata/inputs/internal.txt");
diff --git a/crates/nvisy-toolkit/Cargo.toml b/crates/nvisy-toolkit/Cargo.toml
index 3a485c61..9b2a3e63 100644
--- a/crates/nvisy-toolkit/Cargo.toml
+++ b/crates/nvisy-toolkit/Cargo.toml
@@ -80,7 +80,7 @@ unicode-normalization = { workspace = true, features = [] }
 [dev-dependencies]
 # Internal test utilities (Entity::test_builder, …).
 nvisy-core = { workspace = true, features = ["test-utils"] }
-# Boosting<R> wrapper type returned by PatternRecognizer::build().
+# ContextEnhanced<R> wrapper type returned by PatternRecognizer::build().
 nvisy-context = { workspace = true, features = [] }
 # Codec front door for E2E pipeline tests. Production builds pull
 # nvisy-codec via the toolkit's per-modality features; the dev
diff --git a/crates/nvisy-toolkit/examples/pipeline.rs b/crates/nvisy-toolkit/examples/pipeline.rs
index d4703f4f..01554730 100644
--- a/crates/nvisy-toolkit/examples/pipeline.rs
+++ b/crates/nvisy-toolkit/examples/pipeline.rs
@@ -56,7 +56,7 @@ async fn main() -> Result<()> {
     let pattern = PatternRecognizer::builder()
         .with_builtin_patterns()
         .with_builtin_dictionaries()
-        .build()?;
+        .build_context_enhanced()?;
     let detection = RecognizerRegistry::new().with_recognizer(pattern);
 
     let input = RecognizerInput::new(TextData::new(SAMPLE.to_owned()));
diff --git a/crates/nvisy-toolkit/tests/fixtures/registries.rs b/crates/nvisy-toolkit/tests/fixtures/registries.rs
index 3c77d299..30098801 100644
--- a/crates/nvisy-toolkit/tests/fixtures/registries.rs
+++ b/crates/nvisy-toolkit/tests/fixtures/registries.rs
@@ -1,7 +1,7 @@
 //! Shared recognizer + redaction registry constructors and dedup
 //! params used by every codec E2E test.
 
-use nvisy_context::Boosting;
+use nvisy_context::ContextEnhanced;
 use nvisy_core::entity::builtins;
 use nvisy_core::modality::Modality;
 use nvisy_core::primitive::ConfidenceThreshold;
@@ -11,12 +11,12 @@ use nvisy_toolkit::redaction::anonymizer::{Mask, Replace};
 use nvisy_toolkit::redaction::{Anonymizer, RedactionRegistry};
 
 /// Build the shipped pattern recognizer from every built-in
-/// pattern + dictionary, wrapped in its [`Boosting`] layer.
-pub fn shipped_recognizer() -> Boosting<PatternRecognizer> {
+/// pattern + dictionary, wrapped in its [`ContextEnhanced`] layer.
+pub fn shipped_recognizer() -> ContextEnhanced<PatternRecognizer> {
     PatternRecognizer::builder()
         .with_builtin_patterns()
         .with_builtin_dictionaries()
-        .build()
+        .build_context_enhanced()
         .expect("shipped recognizer builds")
 }
 
diff --git a/crates/nvisy-toolkit/tests/recognition_registry.rs b/crates/nvisy-toolkit/tests/recognition_registry.rs
index 80460d41..640afb74 100644
--- a/crates/nvisy-toolkit/tests/recognition_registry.rs
+++ b/crates/nvisy-toolkit/tests/recognition_registry.rs
@@ -48,7 +48,7 @@ fn build_registry() -> RecognizerRegistry {
     let pattern = PatternRecognizer::builder()
         .with_builtin_patterns()
         .with_builtin_dictionaries()
-        .build()
+        .build_context_enhanced()
         .expect("pattern recognizer builds from builtin set");
 
     let bento_url = env_or("NVISY_BENTO_URL", "http://localhost:3000");

From 756a9fa0e47b15635796239f5d1ccb50bd18afa7 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Mon, 15 Jun 2026 02:42:04 +0200
Subject: [PATCH 07/14] feat(pattern,context): per-language context keywords +
 primary-subtag matching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- nvisy-pattern: new `Context` enum (Global | PerLanguage) replaces
  `Vec<String>` on Regex and Dictionary. Untagged serde keeps the
  flat TOML form working unchanged; new form is `[context.en] = [...]`.
  Shipped phone, credit_card, date_of_birth, datetime patterns now
  carry EN/ES/DE/FR keyword sets.
- nvisy-context: BoostRule gains `language: Option<LanguageTag>`.
  Enhancer storage flips to `HashMap<Label, Vec<BoostRule>>` —
  one bucket per label, distinct language scopes inside.
  Enhancer::enhance takes a language hint; ContextEnhanced
  threads input.language through.
- nvisy-core: LanguageTag::new returns nvisy_core::Error;
  LanguageTag::matches compares primary subtags case-insensitively
  so `en` matches `en-US` / `en-GB`. BoostRule::applies_to_language
  and RecognizerInput::applies_to_language switch from `==` to
  `matches()` so language-scoped rules fire under regional variants.
- Tests: TOML round-trip both forms; per-language boost fires for
  matching language; no boost for non-matching language; no-hint
  unions all per-language keywords; regional variants
  (`en-US`) trigger `en`-scoped rules.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                                    |   1 +
 crates/nvisy-context/src/enhancer.rs          | 105 ++++++---
 crates/nvisy-context/src/rule.rs              |  56 ++++-
 crates/nvisy-context/src/wrapper.rs           |   4 +-
 .../nvisy-core/src/primitive/language/tag.rs  |  75 ++++++
 crates/nvisy-core/src/recognition/input.rs    |   6 +-
 crates/nvisy-pattern/Cargo.toml               |   1 +
 .../assets/patterns/contact/phone.toml        |   7 +-
 .../assets/patterns/finance/credit_card.toml  |   7 +-
 .../patterns/personal/date_of_birth.toml      |   7 +-
 .../assets/patterns/personal/datetime.toml    |   7 +-
 crates/nvisy-pattern/src/lib.rs               |   2 +-
 .../nvisy-pattern/src/recognition/context.rs  | 163 +++++++++++++
 .../src/recognition/dictionary.rs             |   8 +-
 crates/nvisy-pattern/src/recognition/mod.rs   |   2 +
 .../src/recognition/recognizer.rs             | 220 +++++++++++++++++-
 crates/nvisy-pattern/src/recognition/regex.rs |   7 +-
 17 files changed, 621 insertions(+), 57 deletions(-)
 create mode 100644 crates/nvisy-pattern/src/recognition/context.rs

diff --git a/Cargo.lock b/Cargo.lock
index 5d31d114..5028cb9d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3034,6 +3034,7 @@ dependencies = [
  "async-trait",
  "csv",
  "derive_builder",
+ "derive_more",
  "nvisy-context",
  "nvisy-core",
  "regex",
diff --git a/crates/nvisy-context/src/enhancer.rs b/crates/nvisy-context/src/enhancer.rs
index cd3103b1..176efcc9 100644
--- a/crates/nvisy-context/src/enhancer.rs
+++ b/crates/nvisy-context/src/enhancer.rs
@@ -5,6 +5,7 @@ use std::collections::HashMap;
 
 use nvisy_core::entity::{Entity, EntityLabelRef, TrailStep};
 use nvisy_core::modality::Text;
+use nvisy_core::primitive::LanguageTag;
 use unicode_segmentation::UnicodeSegmentation;
 
 use super::matcher::KeywordMatcher;
@@ -33,28 +34,37 @@ const TRAIL_SOURCE: &str = "context";
 /// [`SubstringMatcher`]: super::SubstringMatcher
 /// [`LemmaMatcher`]: super::LemmaMatcher
 pub struct Enhancer {
-    rules: HashMap<EntityLabelRef, BoostRule>,
+    /// Rules bucketed by label. Within one bucket, each entry is
+    /// a distinct `(language)` scope; rules sharing the same
+    /// `(label, language)` are pre-merged via [`BoostRule::merge`]
+    /// at construction. Per-entity application looks up the
+    /// bucket once by label, then walks the small inner vec
+    /// filtering on the per-call language hint.
+    rules: HashMap<EntityLabelRef, Vec<BoostRule>>,
     matcher: Box<dyn KeywordMatcher>,
 }
 
 impl Enhancer {
     /// Construct from a rule iterator and matcher. Rules sharing
-    /// the same label are merged via [`BoostRule::merge`].
+    /// the same `(label, language)` are merged via
+    /// [`BoostRule::merge`]; rules with the same label but
+    /// distinct languages live as separate entries inside the
+    /// label's bucket.
     pub fn new(
         rules: impl IntoIterator<Item = BoostRule>,
         matcher: Box<dyn KeywordMatcher>,
     ) -> Self {
-        let mut map: HashMap<EntityLabelRef, BoostRule> = HashMap::new();
+        let mut buckets: HashMap<EntityLabelRef, Vec<BoostRule>> = HashMap::new();
         for rule in rules {
-            match map.get_mut(&rule.label) {
-                Some(existing) => existing.merge(rule),
-                None => {
-                    map.insert(rule.label.clone(), rule);
-                }
+            let bucket = buckets.entry(rule.label.clone()).or_default();
+            if let Some(existing) = bucket.iter_mut().find(|r| r.language == rule.language) {
+                existing.merge(rule);
+            } else {
+                bucket.push(rule);
             }
         }
         Self {
-            rules: map,
+            rules: buckets,
             matcher,
         }
     }
@@ -75,7 +85,8 @@ impl Enhancer {
     }
 
     /// Apply boost rules to `entities` in place. For each entity:
-    /// look up the rule for its label, walk a window of
+    /// walk every rule registered for its label whose language
+    /// scope applies under `language`, walk a window of
     /// `prefix_words` words before and `suffix_words` words after
     /// the entity's location, ask the matcher whether any keyword
     /// fires, and on a hit lift confidence by the rule's `boost`
@@ -87,25 +98,55 @@ impl Enhancer {
     /// against the token stream; when absent, words are derived
     /// from the source text via Unicode word segmentation.
     ///
+    /// `language` is the per-call language hint. `None` means
+    /// "unknown" — every per-language rule applies as a
+    /// permissive fallback.
+    ///
     /// [`Confidence`]: nvisy_core::primitive::Confidence
     /// [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
-    pub fn enhance(&self, entities: &mut [Entity<Text>], text: &str, tokens: Option<&[Token]>) {
+    pub fn enhance(
+        &self,
+        entities: &mut [Entity<Text>],
+        text: &str,
+        tokens: Option<&[Token]>,
+        language: Option<&LanguageTag>,
+    ) {
         if self.rules.is_empty() {
             return;
         }
         for entity in entities {
-            self.enhance_one(entity, text, tokens);
+            self.enhance_one(entity, text, tokens, language);
         }
     }
 
-    fn enhance_one(&self, entity: &mut Entity<Text>, text: &str, tokens: Option<&[Token]>) {
-        let Some(rule) = self.rules.get(&entity.label) else {
+    fn enhance_one(
+        &self,
+        entity: &mut Entity<Text>,
+        text: &str,
+        tokens: Option<&[Token]>,
+        language: Option<&LanguageTag>,
+    ) {
+        let Some(bucket) = self.rules.get(&entity.label) else {
             return;
         };
-        if rule.keywords.is_empty() {
-            return;
+        for rule in bucket {
+            if !rule.applies_to_language(language) {
+                continue;
+            }
+            if rule.keywords.is_empty() {
+                continue;
+            }
+            self.apply_rule(entity, rule, text, tokens);
         }
+    }
 
+    fn apply_rule(
+        &self,
+        entity: &mut Entity<Text>,
+        rule: &BoostRule,
+        text: &str,
+        tokens: Option<&[Token]>,
+    ) {
         let start = entity.location.start;
         let end = entity.location.end;
 
@@ -311,7 +352,7 @@ mod tests {
         )]);
         let text = "Your SSN: 123-45-6789";
         let mut entities = vec![entity(govid_label(), 10, 21, 0.6)];
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert!(entities[0].confidence.get() > 0.6);
         assert!(
             entities[0]
@@ -326,7 +367,7 @@ mod tests {
         let enhancer = enhancer(vec![rule(govid_label(), &["social"], 0, 5, 0.2)]);
         let text = "123-45-6789 (social security number)";
         let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert!(
             entities[0].confidence.get() > 0.6,
             "trailing keyword within suffix window should boost",
@@ -340,7 +381,7 @@ mod tests {
         let text = "123-45-6789 (social security number)";
         let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -350,7 +391,7 @@ mod tests {
         let text = "Mr. Smith is named in the report.";
         let mut entities = vec![entity(person_label(), 4, 9, 0.5)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -364,7 +405,7 @@ mod tests {
         let xyz_end = xyz_start + "XYZ".len();
         let mut entities = vec![entity(govid_label(), xyz_start, xyz_end, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -373,7 +414,7 @@ mod tests {
         let enhancer = enhancer(vec![rule(govid_label(), &["here"], 5, 5, 0.9)]);
         let text = "the value is right here in plain sight";
         let mut entities = vec![entity(govid_label(), 16, 21, 0.95)];
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert!((entities[0].confidence.get() - 1.0).abs() < f64::EPSILON);
     }
 
@@ -397,7 +438,7 @@ mod tests {
         let ssn_entity_start = ssn_only.find("123").unwrap();
         let ssn_entity_end = ssn_entity_start + "123-45-6789".len();
         let mut from_first = vec![entity(govid_label(), ssn_entity_start, ssn_entity_end, 0.6)];
-        make_enhancer().enhance(&mut from_first, ssn_only, None);
+        make_enhancer().enhance(&mut from_first, ssn_only, None, None);
         assert!(
             from_first[0].confidence.get() > 0.6,
             "keyword `ssn` from the first rule must still boost after merge",
@@ -408,7 +449,7 @@ mod tests {
         let tax_entity_start = taxid_only.find("987").unwrap();
         let tax_entity_end = tax_entity_start + "987-65-4329".len();
         let mut from_second = vec![entity(govid_label(), tax_entity_start, tax_entity_end, 0.6)];
-        make_enhancer().enhance(&mut from_second, taxid_only, None);
+        make_enhancer().enhance(&mut from_second, taxid_only, None, None);
         assert!(
             from_second[0].confidence.get() > 0.6,
             "keyword `tax id` from the second rule must still boost after merge",
@@ -423,7 +464,7 @@ mod tests {
         let entity_start = text.find("123").unwrap();
         let entity_end = entity_start + "123-45-6789".len();
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert!(
             entities[0].confidence.get() > 0.6,
             "unicode word should be reachable within 3-word prefix",
@@ -439,7 +480,7 @@ mod tests {
         let entity_end = entity_start + "123-45-6789".len();
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None);
+        enhancer.enhance(&mut entities, text, None, None);
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -453,8 +494,8 @@ mod tests {
         let text = "Your SSN: 123-45-6789";
         let mut from_none = vec![entity(govid_label(), 10, 21, 0.6)];
         let mut from_empty = vec![entity(govid_label(), 10, 21, 0.6)];
-        enhancer.enhance(&mut from_none, text, None);
-        enhancer.enhance(&mut from_empty, text, Some(&[]));
+        enhancer.enhance(&mut from_none, text, None, None);
+        enhancer.enhance(&mut from_empty, text, Some(&[]), None);
         assert_eq!(
             from_none[0].confidence.get(),
             from_empty[0].confidence.get(),
@@ -486,7 +527,7 @@ mod tests {
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, Some(&tokens));
+        enhancer.enhance(&mut entities, text, Some(&tokens), None);
         assert_eq!(
             entities[0].confidence.get(),
             before,
@@ -508,7 +549,7 @@ mod tests {
             Token::from_text("123-45-6789", 22..33),
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, Some(&tokens));
+        enhancer.enhance(&mut entities, text, Some(&tokens), None);
         assert!(
             entities[0].confidence.get() > 0.6,
             "2-word prefix should reach the `social security` token",
@@ -537,7 +578,7 @@ mod tests {
             Token::from_text("system", 41..47),
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, Some(&tokens));
+        enhancer.enhance(&mut entities, text, Some(&tokens), None);
         assert!(
             entities[0].confidence.get() > 0.6,
             "lemma matcher should match `run` against the `running` token's lemma",
@@ -570,7 +611,7 @@ mod tests {
             Token::from_text("document", 18..26),
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, Some(&tokens));
+        enhancer.enhance(&mut entities, text, Some(&tokens), None);
         assert!(
             entities[0].confidence.get() > 0.6,
             "tokens that don't overlap the entity must fall back to the word window",
diff --git a/crates/nvisy-context/src/rule.rs b/crates/nvisy-context/src/rule.rs
index 7f88cf78..f45c423a 100644
--- a/crates/nvisy-context/src/rule.rs
+++ b/crates/nvisy-context/src/rule.rs
@@ -20,7 +20,7 @@ use std::collections::HashSet;
 
 use hipstr::HipStr;
 use nvisy_core::entity::EntityLabelRef;
-use nvisy_core::primitive::Confidence;
+use nvisy_core::primitive::{Confidence, LanguageTag};
 
 /// Default window radius in words *before* an entity match.
 /// Mirrors Presidio's `context_prefix_count = 5`.
@@ -49,6 +49,11 @@ pub struct BoostRule {
     ///
     /// [`label`]: nvisy_core::entity::Entity::label
     pub label: EntityLabelRef,
+    /// Language scope. `None` means the rule applies regardless
+    /// of the per-call language hint; `Some(lang)` means the rule
+    /// only fires when the caller's language matches, or when no
+    /// hint is set (permissive fallback).
+    pub language: Option<LanguageTag>,
     /// Keywords whose presence near a match lifts the entity's
     /// confidence. Stored as [`HipStr`] for cheap clones across
     /// per-pass rule sets.
@@ -71,8 +76,12 @@ pub struct BoostRule {
 
 impl BoostRule {
     /// Construct a rule for `label` with explicit window radii
-    /// and `boost`. Most callers want [`BoostRule::for_label`]
-    /// instead — it bakes in the default window / boost values.
+    /// and `boost`. The rule is language-agnostic; use
+    /// [`with_language`] to scope it. Most callers want
+    /// [`BoostRule::for_label`] instead — it bakes in the default
+    /// window / boost values.
+    ///
+    /// [`with_language`]: Self::with_language
     #[must_use]
     pub fn new(
         label: EntityLabelRef,
@@ -83,6 +92,7 @@ impl BoostRule {
     ) -> Self {
         Self {
             label,
+            language: None,
             keywords: keywords.into_iter().map(Into::into).collect(),
             prefix_words,
             suffix_words,
@@ -113,6 +123,36 @@ impl BoostRule {
         )
     }
 
+    /// Scope this rule to a single language.
+    ///
+    /// At apply time the rule fires only when the caller's
+    /// language hint matches `language`, or when no hint is set
+    /// (permissive fallback).
+    #[must_use]
+    pub fn with_language(mut self, language: LanguageTag) -> Self {
+        self.language = Some(language);
+        self
+    }
+
+    /// Return `true` when this rule applies under the per-call
+    /// language hint.
+    ///
+    /// - Language-agnostic rules (`self.language == None`)
+    ///   always apply.
+    /// - Language-scoped rules apply when the hint shares a
+    ///   primary subtag with the scope (so a rule scoped to
+    ///   `"en"` fires for `"en-US"` and `"en-GB"` hints), or
+    ///   when no hint is set (permissive fallback so callers
+    ///   who don't pass a language still get boosts).
+    #[must_use]
+    pub fn applies_to_language(&self, hint: Option<&LanguageTag>) -> bool {
+        match (&self.language, hint) {
+            (None, _) => true,
+            (Some(_), None) => true,
+            (Some(scope), Some(hint)) => scope.matches(hint),
+        }
+    }
+
     /// Merge `other` into this rule by extending the keyword set
     /// with any keywords not already present. Window radii and
     /// `boost` are kept from `self` — callers that need different
@@ -121,14 +161,18 @@ impl BoostRule {
     ///
     /// # Panics
     ///
-    /// Debug-asserts when the labels differ. Merging across labels
-    /// is a caller bug — rules are keyed by label and the engine
-    /// looks them up by label.
+    /// Debug-asserts when the labels or languages differ. Merging
+    /// across keys is a caller bug — rules are keyed by
+    /// `(label, language)` and the engine looks them up by both.
     pub fn merge(&mut self, other: BoostRule) {
         debug_assert_eq!(
             self.label, other.label,
             "BoostRule::merge requires matching labels",
         );
+        debug_assert_eq!(
+            self.language, other.language,
+            "BoostRule::merge requires matching languages",
+        );
         let existing: HashSet<&str> = self.keywords.iter().map(HipStr::as_str).collect();
         let additions: Vec<HipStr<'static>> = other
             .keywords
diff --git a/crates/nvisy-context/src/wrapper.rs b/crates/nvisy-context/src/wrapper.rs
index c4128f0f..c9d415ec 100644
--- a/crates/nvisy-context/src/wrapper.rs
+++ b/crates/nvisy-context/src/wrapper.rs
@@ -70,7 +70,9 @@ where
         }
         let text = input.data.text.as_str();
         let tokens = input.artifacts.get::<Tokens>().map(Tokens::as_slice);
-        self.enhancer.enhance(&mut output.entities, text, tokens);
+        let language = input.language.as_ref();
+        self.enhancer
+            .enhance(&mut output.entities, text, tokens, language);
         Ok(output)
     }
 }
diff --git a/crates/nvisy-core/src/primitive/language/tag.rs b/crates/nvisy-core/src/primitive/language/tag.rs
index e0da8fac..412dc65b 100644
--- a/crates/nvisy-core/src/primitive/language/tag.rs
+++ b/crates/nvisy-core/src/primitive/language/tag.rs
@@ -3,6 +3,8 @@
 use derive_more::{Display, FromStr};
 use serde::{Deserialize, Serialize};
 
+use crate::Error;
+
 /// A validated [BCP-47] language tag.
 ///
 /// Wraps [`LanguageTag`] with serde support. Use `#[schemars(with =
@@ -26,6 +28,24 @@ use serde::{Deserialize, Serialize};
 pub struct LanguageTag(oxilangtag::LanguageTag<String>);
 
 impl LanguageTag {
+    /// Parse a BCP-47 language tag from a string.
+    ///
+    /// Convenience over the `FromStr` impl when the input is
+    /// already a `&str` literal.
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when `tag` is not a valid
+    /// BCP-47 tag.
+    pub fn new(tag: &str) -> Result<Self, Error> {
+        tag.parse().map_err(|e| {
+            Error::validation(
+                format!("invalid BCP-47 language tag `{tag}`: {e}"),
+                "nvisy-core",
+            )
+        })
+    }
+
     /// Returns the tag as a string slice.
     pub fn as_str(&self) -> &str {
         self.0.as_str()
@@ -35,4 +55,59 @@ impl LanguageTag {
     pub fn primary_language(&self) -> &str {
         self.0.primary_language()
     }
+
+    /// Return `true` when `self` and `other` share the same
+    /// primary language subtag.
+    ///
+    /// Compares only the primary subtag, so `"en"`, `"en-US"`, and
+    /// `"en-GB"` all match each other; `"en"` does not match
+    /// `"de"`. ASCII case-insensitive (BCP-47 tags are
+    /// case-insensitive by spec).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use nvisy_core::primitive::LanguageTag;
+    ///
+    /// let en = LanguageTag::new("en").unwrap();
+    /// let en_us = LanguageTag::new("en-US").unwrap();
+    /// let de = LanguageTag::new("de").unwrap();
+    ///
+    /// assert!(en.matches(&en_us));
+    /// assert!(en_us.matches(&en));
+    /// assert!(!en.matches(&de));
+    /// ```
+    #[must_use]
+    pub fn matches(&self, other: &Self) -> bool {
+        self.primary_language()
+            .eq_ignore_ascii_case(other.primary_language())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn tag(s: &str) -> LanguageTag {
+        LanguageTag::new(s).expect("valid BCP-47 tag")
+    }
+
+    #[test]
+    fn matches_same_primary_subtag() {
+        assert!(tag("en").matches(&tag("en-US")));
+        assert!(tag("en-US").matches(&tag("en")));
+        assert!(tag("en-US").matches(&tag("en-GB")));
+        assert!(tag("en").matches(&tag("en")));
+    }
+
+    #[test]
+    fn matches_rejects_distinct_primary_subtags() {
+        assert!(!tag("en").matches(&tag("de")));
+        assert!(!tag("en-US").matches(&tag("de-DE")));
+    }
+
+    #[test]
+    fn matches_is_case_insensitive() {
+        assert!(tag("EN").matches(&tag("en-us")));
+    }
 }
diff --git a/crates/nvisy-core/src/recognition/input.rs b/crates/nvisy-core/src/recognition/input.rs
index 0594b5a4..0192878c 100644
--- a/crates/nvisy-core/src/recognition/input.rs
+++ b/crates/nvisy-core/src/recognition/input.rs
@@ -127,7 +127,9 @@ impl<M: Modality> RecognizerInput<M> {
     /// - An empty `allowed` list means the rule is language-agnostic
     ///   and always runs.
     /// - When `allowed` is non-empty and [`language`] is `Some(_)`,
-    ///   the rule runs only if the hint is in the list.
+    ///   the rule runs when the hint shares a primary subtag with
+    ///   any entry in `allowed` (so an `["en"]` rule fires for
+    ///   `"en-US"` and `"en-GB"` hints).
     /// - When [`language`] is `None`, the rule still runs — we can't
     ///   disprove applicability without a hint.
     ///
@@ -138,7 +140,7 @@ impl<M: Modality> RecognizerInput<M> {
             return true;
         }
         match self.language.as_ref() {
-            Some(l) => allowed.iter().any(|a| a == l),
+            Some(hint) => allowed.iter().any(|a| a.matches(hint)),
             None => true,
         }
     }
diff --git a/crates/nvisy-pattern/Cargo.toml b/crates/nvisy-pattern/Cargo.toml
index d89d43fc..08fe0410 100644
--- a/crates/nvisy-pattern/Cargo.toml
+++ b/crates/nvisy-pattern/Cargo.toml
@@ -33,6 +33,7 @@ toml = { workspace = true, features = ["parse"] }
 
 # Derive macros and error handling
 derive_builder = { workspace = true, features = [] }
+derive_more = { workspace = true, features = ["from"] }
 
 # Async runtime and parallelism
 async-trait = { workspace = true, features = [] }
diff --git a/crates/nvisy-pattern/assets/patterns/contact/phone.toml b/crates/nvisy-pattern/assets/patterns/contact/phone.toml
index 01df2224..ce5fe9dd 100644
--- a/crates/nvisy-pattern/assets/patterns/contact/phone.toml
+++ b/crates/nvisy-pattern/assets/patterns/contact/phone.toml
@@ -1,6 +1,11 @@
 name = "phone"
 label = "phone_number"
-context = ["phone", "call", "mobile", "tel", "fax", "contact"]
+
+[context]
+en = ["phone", "call", "mobile", "tel", "fax", "contact"]
+es = ["teléfono", "telefono", "llamar", "móvil", "movil", "celular", "tel", "fax", "contacto"]
+de = ["telefon", "anruf", "mobil", "handy", "tel", "fax", "kontakt"]
+fr = ["téléphone", "telephone", "appel", "mobile", "portable", "tel", "fax", "contact"]
 
 [[variants]]
 regex = "(?:\\+\\d{1,3}[\\s.\\-]?)?\\(?\\d{2,4}\\)?[\\s.\\-]?\\d{3,4}[\\s.\\-]?\\d{4}\\b"
diff --git a/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml b/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
index 78b3325a..cfe4ab2b 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
+++ b/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
@@ -1,6 +1,11 @@
 name = "credit-card"
 label = "payment_card"
-context = ["card", "credit", "debit", "payment", "visa", "mastercard", "amex"]
+
+[context]
+en = ["card", "credit", "debit", "payment", "visa", "mastercard", "amex"]
+es = ["tarjeta", "crédito", "credito", "débito", "debito", "pago", "visa", "mastercard", "amex"]
+de = ["karte", "kredit", "kreditkarte", "debit", "zahlung", "visa", "mastercard", "amex"]
+fr = ["carte", "crédit", "credit", "débit", "debit", "paiement", "visa", "mastercard", "amex"]
 
 [[variants]]
 regex = "\\b(?:\\d[ \\-]*?){13,19}\\b"
diff --git a/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml b/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
index c88f21b9..a178c41d 100644
--- a/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
+++ b/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
@@ -1,6 +1,11 @@
 name = "date_of_birth"
 label = "date_of_birth"
-context = ["birth", "born", "dob", "birthday"]
+
+[context]
+en = ["birth", "born", "dob", "birthday", "date of birth"]
+es = ["nacimiento", "nacido", "nacida", "fecha de nacimiento", "cumpleaños", "cumpleanos"]
+de = ["geburt", "geboren", "geburtsdatum", "geburtstag"]
+fr = ["naissance", "né", "nee", "née", "date de naissance", "anniversaire"]
 
 [[variants]]
 regex = "\\b(?:(?:0[1-9]|1[0-2]|[1-9])[/\\-](?:0[1-9]|[12]\\d|3[01]|[1-9])[/\\-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/\\-](?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01]))\\b"
diff --git a/crates/nvisy-pattern/assets/patterns/personal/datetime.toml b/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
index c1e00f7a..e37edf92 100644
--- a/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
+++ b/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
@@ -1,6 +1,11 @@
 name = "datetime"
 label = "date_time"
-context = ["timestamp", "created", "modified", "logged", "at", "time"]
+
+[context]
+en = ["timestamp", "created", "modified", "logged", "at", "time"]
+es = ["marca de tiempo", "creado", "creada", "modificado", "modificada", "registrado", "a las", "hora", "fecha"]
+de = ["zeitstempel", "erstellt", "geändert", "geandert", "protokolliert", "um", "uhrzeit", "zeit"]
+fr = ["horodatage", "créé", "cree", "créée", "creee", "modifié", "modifie", "à", "heure", "date"]
 
 [[variants]]
 regex = "\\b(?:19|20)\\d{2}[/\\-](?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01])[T ](?:[01]\\d|2[0-3]):[0-5]\\d(?::[0-5]\\d)?(?:Z|[+\\-]\\d{2}:?\\d{2})?\\b"
diff --git a/crates/nvisy-pattern/src/lib.rs b/crates/nvisy-pattern/src/lib.rs
index 129b002f..ca00f5f9 100644
--- a/crates/nvisy-pattern/src/lib.rs
+++ b/crates/nvisy-pattern/src/lib.rs
@@ -7,7 +7,7 @@ mod shipped;
 pub mod validators;
 
 pub use self::recognition::{
-    Dictionary, DictionaryBuilder, PatternRecognizer, PatternRecognizerBuilder, Regex,
+    Context, Dictionary, DictionaryBuilder, PatternRecognizer, PatternRecognizerBuilder, Regex,
     RegexBuilder, Scoring, Term, Variant,
 };
 pub use self::shipped::{dictionaries, patterns};
diff --git a/crates/nvisy-pattern/src/recognition/context.rs b/crates/nvisy-pattern/src/recognition/context.rs
new file mode 100644
index 00000000..90f37504
--- /dev/null
+++ b/crates/nvisy-pattern/src/recognition/context.rs
@@ -0,0 +1,163 @@
+//! [`Context`]: per-rule keyword set used by the post-recognition
+//! [`ContextEnhanced`] layer.
+//!
+//! Two shapes:
+//!
+//! - [`Global`] — one flat keyword list applied regardless of the
+//!   per-call language hint.
+//! - [`PerLanguage`] — keyword lists keyed by [`LanguageTag`]; the
+//!   enhancer picks the entry matching `RecognizerInput.language`.
+//!   When no language hint is set, the union of every per-language
+//!   keyword fires (matches the crate's "missing language = any"
+//!   theme used by [`Regex::languages`] / [`Dictionary::languages`]).
+//!
+//! [`Global`]: Context::Global
+//! [`PerLanguage`]: Context::PerLanguage
+//! [`ContextEnhanced`]: nvisy_context::ContextEnhanced
+//! [`Regex::languages`]: super::Regex::languages
+//! [`Dictionary::languages`]: super::Dictionary::languages
+
+use std::collections::HashMap;
+use std::collections::hash_map::Iter;
+
+use derive_more::From;
+use nvisy_core::primitive::LanguageTag;
+use serde::Deserialize;
+
+/// Per-rule context keyword set.
+///
+/// Either a single flat list ([`Global`]) or a map keyed by
+/// language ([`PerLanguage`]).
+///
+/// [`Global`]: Self::Global
+/// [`PerLanguage`]: Self::PerLanguage
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, From)]
+#[serde(untagged)]
+pub enum Context {
+    /// One flat keyword list applied regardless of the per-call
+    /// language hint.
+    Global(Vec<String>),
+    /// Per-language keyword lists. The enhancer picks the entry
+    /// matching `RecognizerInput.language`, or unions every list
+    /// when no hint is set.
+    PerLanguage(HashMap<LanguageTag, Vec<String>>),
+}
+
+impl Context {
+    /// Return `true` when no keywords are declared in any scope.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        match self {
+            Self::Global(kws) => kws.is_empty(),
+            Self::PerLanguage(map) => map.values().all(Vec::is_empty),
+        }
+    }
+
+    /// Iterate over `(language, keywords)` pairs.
+    ///
+    /// [`Global`] yields one entry with `language = None`;
+    /// [`PerLanguage`] yields one entry per language.
+    ///
+    /// [`Global`]: Self::Global
+    /// [`PerLanguage`]: Self::PerLanguage
+    pub fn iter(&self) -> ContextIter<'_> {
+        match self {
+            Self::Global(kws) => ContextIter::Global(Some(kws.as_slice())),
+            Self::PerLanguage(map) => ContextIter::PerLanguage(map.iter()),
+        }
+    }
+}
+
+impl Default for Context {
+    fn default() -> Self {
+        Self::Global(Vec::new())
+    }
+}
+
+/// Iterator returned by [`Context::iter`].
+pub enum ContextIter<'a> {
+    Global(Option<&'a [String]>),
+    PerLanguage(Iter<'a, LanguageTag, Vec<String>>),
+}
+
+impl<'a> Iterator for ContextIter<'a> {
+    type Item = (Option<&'a LanguageTag>, &'a [String]);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            Self::Global(slot) => slot.take().map(|kws| (None, kws)),
+            Self::PerLanguage(it) => it.next().map(|(lang, kws)| (Some(lang), kws.as_slice())),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Deserialize)]
+    struct Wrap {
+        context: Context,
+    }
+
+    #[test]
+    fn parses_flat_array_as_global() {
+        let toml = r#"context = ["a", "b"]"#;
+        let w: Wrap = toml::from_str(toml).unwrap();
+        assert_eq!(w.context, Context::Global(vec!["a".into(), "b".into()]));
+    }
+
+    #[test]
+    fn parses_table_as_per_language() {
+        let toml = r#"
+            [context]
+            en = ["card"]
+            es = ["tarjeta"]
+        "#;
+        let w: Wrap = toml::from_str(toml).unwrap();
+        let map = match w.context {
+            Context::PerLanguage(m) => m,
+            _ => panic!("expected PerLanguage"),
+        };
+        assert_eq!(map.len(), 2);
+        assert_eq!(
+            map.get(&LanguageTag::new("en").unwrap()).unwrap(),
+            &vec!["card".to_owned()]
+        );
+        assert_eq!(
+            map.get(&LanguageTag::new("es").unwrap()).unwrap(),
+            &vec!["tarjeta".to_owned()]
+        );
+    }
+
+    #[test]
+    fn iter_global_yields_one_none_entry() {
+        let ctx = Context::Global(vec!["a".into(), "b".into()]);
+        let collected: Vec<_> = ctx
+            .iter()
+            .map(|(lang, kws)| (lang.cloned(), kws.to_vec()))
+            .collect();
+        assert_eq!(collected.len(), 1);
+        assert!(collected[0].0.is_none());
+        assert_eq!(collected[0].1, vec!["a".to_owned(), "b".to_owned()]);
+    }
+
+    #[test]
+    fn iter_per_language_yields_one_entry_per_language() {
+        let mut map = HashMap::new();
+        map.insert(LanguageTag::new("en").unwrap(), vec!["card".into()]);
+        map.insert(LanguageTag::new("es").unwrap(), vec!["tarjeta".into()]);
+        let ctx = Context::PerLanguage(map);
+        let collected: Vec<_> = ctx
+            .iter()
+            .map(|(lang, kws)| (lang.unwrap().to_string(), kws.to_vec()))
+            .collect();
+        assert_eq!(collected.len(), 2);
+    }
+
+    #[test]
+    fn default_is_empty_global() {
+        let ctx = Context::default();
+        assert!(ctx.is_empty());
+    }
+}
diff --git a/crates/nvisy-pattern/src/recognition/dictionary.rs b/crates/nvisy-pattern/src/recognition/dictionary.rs
index 3285f7af..3be046c6 100644
--- a/crates/nvisy-pattern/src/recognition/dictionary.rs
+++ b/crates/nvisy-pattern/src/recognition/dictionary.rs
@@ -6,6 +6,7 @@ use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 use serde::Deserialize;
 
+use super::context::Context;
 use super::term::Term;
 
 /// Confidence policy for a [`Dictionary`]'s matches.
@@ -129,10 +130,11 @@ pub struct Dictionary {
     #[serde(default, rename = "score")]
     pub scoring: Scoring,
     /// Context keywords that lift confidence when one of them
-    /// appears near a match.
+    /// appears near a match. Either a flat list applied
+    /// regardless of language, or a per-language map.
     #[builder(default)]
     #[serde(default)]
-    pub context: Vec<String>,
+    pub context: Context,
     /// BCP-47 language tags the dictionary applies to. Empty means
     /// "any language"; otherwise the recognizer skips the
     /// dictionary when the per-call language hint is not in the
@@ -222,7 +224,7 @@ struct DictionaryMetadata {
     #[serde(default)]
     score: Option<Scoring>,
     #[serde(default)]
-    context: Option<Vec<String>>,
+    context: Option<Context>,
     #[serde(default)]
     word_boundary: Option<bool>,
 }
diff --git a/crates/nvisy-pattern/src/recognition/mod.rs b/crates/nvisy-pattern/src/recognition/mod.rs
index e9be1ed0..7dff66a5 100644
--- a/crates/nvisy-pattern/src/recognition/mod.rs
+++ b/crates/nvisy-pattern/src/recognition/mod.rs
@@ -9,11 +9,13 @@
 //! on matches near a declared keyword.
 
 mod compiled;
+mod context;
 mod dictionary;
 mod recognizer;
 mod regex;
 mod term;
 
+pub use self::context::Context;
 pub use self::dictionary::{Dictionary, DictionaryBuilder, Scoring};
 pub use self::recognizer::{PatternRecognizer, PatternRecognizerBuilder};
 pub use self::regex::{Regex, RegexBuilder, Variant};
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index f4ad3db9..3423e80b 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -4,6 +4,7 @@ use aho_corasick::{AhoCorasick, MatchKind};
 use nvisy_context::{BoostRule, ContextEnhanced, Enhancer, SubstringMatcher};
 use nvisy_core::entity::{Entity, EntityLabelCatalog, EntityLabelRef};
 use nvisy_core::modality::Text;
+use nvisy_core::primitive::LanguageTag;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput, RecognizerOutput};
 use nvisy_core::{Error, Result};
 use regex::RegexSet;
@@ -365,27 +366,53 @@ impl PatternRecognizerBuilder {
 
     /// Build the wrapping [`Enhancer`] from per-pattern and
     /// per-dictionary context keywords.
+    ///
+    /// Per-rule [`Context`] produces one [`BoostRule`] per
+    /// language scope (global rules carry
+    /// `language = None`; per-language rules carry the language
+    /// tag). The enhancer keys these by label and filters them
+    /// against the per-call language hint at apply time.
+    ///
+    /// [`Context`]: super::Context
     fn build_enhancer(&self) -> Enhancer {
         let boost_rules: Vec<BoostRule> = self
             .context_keywords()
-            .map(|(label, keywords)| BoostRule::for_label(label.clone(), keywords.iter().cloned()))
+            .map(|(label, language, keywords)| {
+                let rule = BoostRule::for_label(label.clone(), keywords.iter().cloned());
+                match language {
+                    Some(lang) => rule.with_language(lang.clone()),
+                    None => rule,
+                }
+            })
             .collect();
         Enhancer::new(boost_rules, Box::new(SubstringMatcher))
     }
 
-    /// Yield `(label, keywords)` for every pattern and dictionary
-    /// that declares a non-empty context.
-    fn context_keywords(&self) -> impl Iterator<Item = (&EntityLabelRef, &[String])> {
+    /// Yield `(label, language, keywords)` for every pattern and
+    /// dictionary that declares a non-empty context. Global
+    /// keywords carry `language = None`; per-language keywords
+    /// carry `Some(tag)`.
+    fn context_keywords(
+        &self,
+    ) -> impl Iterator<Item = (&EntityLabelRef, Option<&LanguageTag>, &[String])> {
         let pattern_keywords = self
             .patterns
             .iter()
             .filter(|p| !p.context.is_empty())
-            .map(|p| (&p.label, p.context.as_slice()));
+            .flat_map(|p| {
+                p.context
+                    .iter()
+                    .map(move |(lang, kws)| (&p.label, lang, kws))
+            });
         let dict_keywords = self
             .dictionaries
             .iter()
             .filter(|d| !d.context.is_empty())
-            .map(|d| (&d.label, d.context.as_slice()));
+            .flat_map(|d| {
+                d.context
+                    .iter()
+                    .map(move |(lang, kws)| (&d.label, lang, kws))
+            });
         pattern_keywords.chain(dict_keywords)
     }
 }
@@ -436,8 +463,11 @@ impl EntityRecognizer<Text> for PatternRecognizer {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use nvisy_core::entity::{Entity, EntityLabelRef, builtins};
     use nvisy_core::modality::{Text, TextData};
+    use nvisy_core::primitive::Confidence;
     use nvisy_core::recognition::RecognizerInput;
 
     use super::*;
@@ -492,4 +522,182 @@ mod tests {
         let entities = run(&recognizer, "example").await;
         assert_eq!(entities.len(), 1, "substring match must be kept");
     }
+
+    #[test]
+    fn regex_parses_flat_context_as_global() {
+        let toml = r#"
+            name = "x"
+            label = "government_id"
+            context = ["ssn", "social security"]
+            [[variants]]
+            regex = "\\d+"
+        "#;
+        let regex = crate::Regex::from_toml(toml).expect("flat-context TOML parses");
+        assert!(matches!(regex.context, crate::Context::Global(_)));
+    }
+
+    #[test]
+    fn regex_parses_table_context_as_per_language() {
+        let toml = r#"
+            name = "x"
+            label = "payment_card"
+            [context]
+            en = ["card", "credit"]
+            es = ["tarjeta", "crédito"]
+            [[variants]]
+            regex = "\\d+"
+        "#;
+        let regex = crate::Regex::from_toml(toml).expect("table-context TOML parses");
+        let map = match regex.context {
+            crate::Context::PerLanguage(m) => m,
+            _ => panic!("expected PerLanguage"),
+        };
+        assert_eq!(map.len(), 2);
+    }
+
+    async fn run_with_language(
+        recognizer: &impl EntityRecognizer<Text>,
+        text: &str,
+        language: Option<&str>,
+    ) -> Vec<Entity<Text>> {
+        let mut input = RecognizerInput::new(TextData::new(text.to_owned()));
+        if let Some(lang) = language {
+            input = input.with_language(LanguageTag::new(lang).expect("language tag parses"));
+        }
+        recognizer
+            .recognize(&input)
+            .await
+            .expect("recognize succeeds")
+            .entities
+    }
+
+    fn per_language_credit_card_regex() -> crate::Regex {
+        let variant = crate::Variant::new(r"\b\d{16}\b")
+            .expect("variant builds")
+            .with_score(Confidence::clamped(0.5));
+        let mut context = HashMap::new();
+        context.insert(
+            LanguageTag::new("en").unwrap(),
+            vec!["credit".to_owned(), "card".to_owned()],
+        );
+        context.insert(
+            LanguageTag::new("es").unwrap(),
+            vec!["tarjeta".to_owned(), "crédito".to_owned()],
+        );
+        crate::Regex::builder()
+            .with_name("credit_card")
+            .with_label(builtins::PAYMENT_CARD.label_ref())
+            .with_context(crate::Context::PerLanguage(context))
+            .with_variants(vec![variant])
+            .build()
+            .expect("regex builds")
+    }
+
+    #[tokio::test]
+    async fn per_language_boost_fires_for_matching_language() {
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(per_language_credit_card_regex())
+            .build_context_enhanced()
+            .expect("recognizer builds");
+
+        let text = "Pay with your credit card 4111111111111111 today";
+        let entities = run_with_language(&recognizer, text, Some("en")).await;
+        let card = entities
+            .iter()
+            .find(|e| &text[e.location.start..e.location.end] == "4111111111111111")
+            .expect("card match present");
+        assert!(
+            card.confidence.get() > 0.5,
+            "English keyword `credit` should boost under en hint",
+        );
+    }
+
+    #[tokio::test]
+    async fn per_language_boost_fires_for_regional_variant() {
+        // Pattern is scoped `en`; hint is `en-US`. Primary subtag
+        // matches, so the boost must fire.
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(per_language_credit_card_regex())
+            .build_context_enhanced()
+            .expect("recognizer builds");
+
+        let text = "Pay with your credit card 4111111111111111 today";
+        let entities = run_with_language(&recognizer, text, Some("en-US")).await;
+        let card = entities
+            .iter()
+            .find(|e| &text[e.location.start..e.location.end] == "4111111111111111")
+            .expect("card match present");
+        assert!(
+            card.confidence.get() > 0.5,
+            "`en-US` hint should fire the `en`-scoped boost",
+        );
+    }
+
+    #[tokio::test]
+    async fn rule_language_filter_accepts_regional_variant() {
+        // Pattern is scoped `languages = ["en"]`; the per-call
+        // hint is `en-US`. The rule must still run.
+        let variant = crate::Variant::new(r"\b\d{3}-\d{2}-\d{4}\b")
+            .expect("variant builds")
+            .with_score(Confidence::clamped(0.5));
+        let regex = crate::Regex::builder()
+            .with_name("ssn")
+            .with_label(builtins::GOVERNMENT_ID.label_ref())
+            .with_variants(vec![variant])
+            .with_languages(vec![LanguageTag::new("en").unwrap()])
+            .build()
+            .expect("regex builds");
+
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(regex)
+            .build()
+            .expect("recognizer builds");
+
+        let entities = run_with_language(&recognizer, "SSN: 123-45-6789", Some("en-US")).await;
+        assert_eq!(
+            entities.len(),
+            1,
+            "`en`-scoped rule must run for `en-US` input",
+        );
+    }
+
+    #[tokio::test]
+    async fn per_language_boost_skipped_for_non_matching_language() {
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(per_language_credit_card_regex())
+            .build_context_enhanced()
+            .expect("recognizer builds");
+
+        // English keywords near the match, but caller asserted Spanish.
+        let text = "Pay with your credit card 4111111111111111 today";
+        let entities = run_with_language(&recognizer, text, Some("es")).await;
+        let card = entities
+            .iter()
+            .find(|e| &text[e.location.start..e.location.end] == "4111111111111111")
+            .expect("card match present");
+        assert!(
+            (card.confidence.get() - 0.5).abs() < f64::EPSILON,
+            "English keywords must not boost under es hint",
+        );
+    }
+
+    #[tokio::test]
+    async fn no_language_hint_unions_per_language_keywords() {
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(per_language_credit_card_regex())
+            .build_context_enhanced()
+            .expect("recognizer builds");
+
+        // English keyword near the match, no language hint set.
+        let text = "Pay with your credit card 4111111111111111 today";
+        let entities = run_with_language(&recognizer, text, None).await;
+        let card = entities
+            .iter()
+            .find(|e| &text[e.location.start..e.location.end] == "4111111111111111")
+            .expect("card match present");
+        assert!(
+            card.confidence.get() > 0.5,
+            "missing language hint should permit any per-language keyword to boost",
+        );
+    }
 }
diff --git a/crates/nvisy-pattern/src/recognition/regex.rs b/crates/nvisy-pattern/src/recognition/regex.rs
index f084f4a0..6c926ff4 100644
--- a/crates/nvisy-pattern/src/recognition/regex.rs
+++ b/crates/nvisy-pattern/src/recognition/regex.rs
@@ -6,6 +6,8 @@ use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 use serde::Deserialize;
 
+use super::context::Context;
+
 /// One regex strategy inside a [`Regex`] rule.
 ///
 /// A variant pairs a regex source with the confidence stamped on
@@ -133,10 +135,11 @@ pub struct Regex {
     /// Entity label every variant emits.
     pub label: EntityLabelRef,
     /// Context keywords that lift confidence when one of them
-    /// appears near a match.
+    /// appears near a match. Either a flat list applied
+    /// regardless of language, or a per-language map.
     #[builder(default)]
     #[serde(default)]
-    pub context: Vec<String>,
+    pub context: Context,
     /// Regex variants. At least one is required to produce matches;
     /// the recognizer skips rules with an empty variant list.
     pub variants: Vec<Variant>,

From 40d49750fbbc2d4bd776f64139319c03707670ba Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Mon, 15 Jun 2026 06:51:38 +0200
Subject: [PATCH 08/14] feat(toolkit): SuppressionLayer with allow-list
 false-positive filter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- New `nvisy-toolkit::deduplication::suppress::{SuppressionLayer,
  SuppressionParams}`. Three independent allow-list shapes apply
  by union:
  - `allow_values` — exact, ASCII case-insensitive
  - `allow_values_substring` — entity text contains the value
  - `allow_values_regex` — regex matched against entity text
- Operates on the entity's resolved text via `TextAt::text_at`.
  Fail-open when the resolver returns `None`: keep the entity
  rather than silently drop something we can't verify.
- Empty entries are filtered at construction (otherwise an empty
  substring would drop every entity via `str::contains("")`).
- `LayerParams` gains a nested `suppression: SuppressionParams`
  field; `LayerPipeline::from_params` becomes fallible and inserts
  the new layer between fuse and resolve, growing the canonical
  recipe to: calibrate → filter → fuse → suppress → resolve.
- Six `from_params` call sites updated (engine pipeline, engine
  tests, toolkit fixtures, toolkit example).
- 15 unit tests in `suppress::tests` (exact / substring / regex
  modes, case insensitivity, partial-overlap semantics, empty-
  entry filtering, union across modes, unresolved-location
  fail-open, invalid-regex error). Plus a pipeline-order test in
  `pipeline::tests` that pins the architectural intent that fuse
  collapses before suppress sees, suppress drops before resolve
  adjudicates.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                                    |   1 +
 .../src/detection/phases/deduplication.rs     |   2 +-
 crates/nvisy-engine/tests/deduplication.rs    |   6 +-
 crates/nvisy-toolkit/Cargo.toml               |   1 +
 crates/nvisy-toolkit/examples/pipeline.rs     |   2 +-
 crates/nvisy-toolkit/src/deduplication/mod.rs |   5 +-
 .../nvisy-toolkit/src/deduplication/params.rs |  17 +-
 .../src/deduplication/pipeline.rs             |  93 ++++-
 .../src/deduplication/suppress/mod.rs         | 366 ++++++++++++++++++
 .../src/deduplication/suppress/params.rs      |  82 ++++
 .../nvisy-toolkit/tests/fixtures/pipeline.rs  |   2 +
 11 files changed, 562 insertions(+), 15 deletions(-)
 create mode 100644 crates/nvisy-toolkit/src/deduplication/suppress/mod.rs
 create mode 100644 crates/nvisy-toolkit/src/deduplication/suppress/params.rs

diff --git a/Cargo.lock b/Cargo.lock
index 5028cb9d..d2403490 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3091,6 +3091,7 @@ dependencies = [
  "nvisy-ocr",
  "nvisy-pattern",
  "nvisy-stt",
+ "regex",
  "schemars",
  "serde",
  "serde_json",
diff --git a/crates/nvisy-engine/src/detection/phases/deduplication.rs b/crates/nvisy-engine/src/detection/phases/deduplication.rs
index 8c3dd742..1a4019f4 100644
--- a/crates/nvisy-engine/src/detection/phases/deduplication.rs
+++ b/crates/nvisy-engine/src/detection/phases/deduplication.rs
@@ -120,7 +120,7 @@ where
     // rewrap without losing audit state.
     let records = mem::take(&mut tree.root.audit.records);
     let entities: Vec<Entity<M>> = records.into_iter().map(|r| r.entity).collect();
-    let pipeline: LayerPipeline<M, _> = LayerPipeline::from_params(dedup);
+    let pipeline: LayerPipeline<M, _> = LayerPipeline::from_params(dedup)?;
     let ctx = LayerContext::new(&*tree).with_correlation_id(run_id);
     let deduped = pipeline.run(entities, &ctx).await;
     tree.root.audit.records = deduped.into_iter().map(EntityRecord::new).collect();
diff --git a/crates/nvisy-engine/tests/deduplication.rs b/crates/nvisy-engine/tests/deduplication.rs
index 49b497ce..b041f35a 100644
--- a/crates/nvisy-engine/tests/deduplication.rs
+++ b/crates/nvisy-engine/tests/deduplication.rs
@@ -83,7 +83,8 @@ async fn confidence_threshold_filters() {
             .with_confidence(conf(0.5))
             .test_build(),
     ];
-    let pipeline: LayerPipeline<Text, _> = LayerPipeline::from_params(&params);
+    let pipeline: LayerPipeline<Text, _> =
+        LayerPipeline::from_params(&params).expect("pipeline builds");
     let ctx = LayerContext::new(&tree).with_correlation_id(Uuid::nil());
     let result = pipeline.run(entities, &ctx).await;
     assert_eq!(result.len(), 1);
@@ -111,7 +112,8 @@ async fn full_pipeline() {
             .with_confidence(conf(0.85))
             .test_build(),
     ];
-    let pipeline: LayerPipeline<Text, _> = LayerPipeline::from_params(&LayerParams::default());
+    let pipeline: LayerPipeline<Text, _> =
+        LayerPipeline::from_params(&LayerParams::default()).expect("pipeline builds");
     let ctx = LayerContext::new(&tree).with_correlation_id(Uuid::nil());
     let result = pipeline.run(entities, &ctx).await;
     assert_eq!(result.len(), 1);
diff --git a/crates/nvisy-toolkit/Cargo.toml b/crates/nvisy-toolkit/Cargo.toml
index 9b2a3e63..f16707d0 100644
--- a/crates/nvisy-toolkit/Cargo.toml
+++ b/crates/nvisy-toolkit/Cargo.toml
@@ -76,6 +76,7 @@ tracing = { workspace = true, features = [] }
 
 # Text processing (unicode-aware folding for leak detection)
 unicode-normalization = { workspace = true, features = [] }
+regex = { workspace = true, features = [] }
 
 [dev-dependencies]
 # Internal test utilities (Entity::test_builder, …).
diff --git a/crates/nvisy-toolkit/examples/pipeline.rs b/crates/nvisy-toolkit/examples/pipeline.rs
index 01554730..66c6c908 100644
--- a/crates/nvisy-toolkit/examples/pipeline.rs
+++ b/crates/nvisy-toolkit/examples/pipeline.rs
@@ -93,7 +93,7 @@ async fn main() -> Result<()> {
         ..LayerParams::default()
     };
     let ctx = LayerContext::<Text, DocumentHandle<Text>>::new(&source);
-    let dedup = LayerPipeline::<Text, DocumentHandle<Text>>::from_params(&params);
+    let dedup = LayerPipeline::<Text, DocumentHandle<Text>>::from_params(&params)?;
 
     let before = entities.len();
     let entities = dedup.run(entities, &ctx).await;
diff --git a/crates/nvisy-toolkit/src/deduplication/mod.rs b/crates/nvisy-toolkit/src/deduplication/mod.rs
index 94852ada..c5cb05ab 100644
--- a/crates/nvisy-toolkit/src/deduplication/mod.rs
+++ b/crates/nvisy-toolkit/src/deduplication/mod.rs
@@ -30,7 +30,9 @@
 //! 1. **Calibrate** raw confidence scores per-recognizer.
 //! 2. **Filter** by allowed kinds + confidence floor.
 //! 3. **Fuse** co-referent entities into one (group + combine).
-//! 4. **Resolve conflicts** between different kinds on the same span.
+//! 4. **Suppress** entities whose matched text is on a
+//!    caller-supplied allow list.
+//! 5. **Resolve conflicts** between different kinds on the same span.
 //!
 //! Operators can swap steps, drop steps, or insert their own custom
 //! [`Layer`] impls by building the pipeline manually with
@@ -40,6 +42,7 @@ pub mod calibrate;
 pub mod filter;
 pub mod fuse;
 pub mod resolve;
+pub mod suppress;
 
 mod layer;
 mod params;
diff --git a/crates/nvisy-toolkit/src/deduplication/params.rs b/crates/nvisy-toolkit/src/deduplication/params.rs
index cc8ca7f8..8f73b6ae 100644
--- a/crates/nvisy-toolkit/src/deduplication/params.rs
+++ b/crates/nvisy-toolkit/src/deduplication/params.rs
@@ -1,17 +1,18 @@
 //! [`LayerParams`]: the per-call knob bag that drives the
 //! canonical deduplication recipe.
 //!
-//! Bundles every per-layer setting the four-step recipe needs
+//! Bundles every per-layer setting the five-step recipe needs
 //! ([`CalibrationMap`], filtering thresholds + allowed kinds,
 //! [`DeduplicationStrategy`], [`GroupingCriteria`],
-//! [`ConflictResolution`]) into a single deserialisable shape
-//! callers set once per request.
+//! [`SuppressionParams`], [`ConflictResolution`]) into a single
+//! deserialisable shape callers set once per request.
 //! [`LayerPipeline::from_params`] reads it and assembles the
-//! four-step pipeline.
+//! five-step pipeline.
 //!
 //! [`CalibrationMap`]: super::calibrate::CalibrationMap
 //! [`DeduplicationStrategy`]: super::fuse::DeduplicationStrategy
 //! [`GroupingCriteria`]: super::fuse::GroupingCriteria
+//! [`SuppressionParams`]: super::suppress::SuppressionParams
 //! [`ConflictResolution`]: super::resolve::ConflictResolution
 //! [`LayerPipeline::from_params`]: super::pipeline::LayerPipeline::from_params
 
@@ -23,8 +24,9 @@ use serde::{Deserialize, Serialize};
 use super::calibrate::CalibrationMap;
 use super::fuse::{DeduplicationStrategy, GroupingCriteria};
 use super::resolve::ConflictResolution;
+use super::suppress::SuppressionParams;
 
-/// Configuration for the deduplication pipeline's four-step recipe.
+/// Configuration for the deduplication pipeline's five-step recipe.
 ///
 /// Owns the sole confidence threshold in the pipeline: detection
 /// layers and recognizers do not filter on confidence themselves —
@@ -56,6 +58,11 @@ pub struct LayerParams {
     /// group.
     #[serde(default)]
     pub strategy: DeduplicationStrategy,
+    /// Allow-list inputs consumed by [`SuppressionLayer`].
+    ///
+    /// [`SuppressionLayer`]: super::suppress::SuppressionLayer
+    #[serde(default, skip_serializing_if = "SuppressionParams::is_empty")]
+    pub suppression: SuppressionParams,
     /// How to resolve conflicts when different entity kinds overlap
     /// the same span.
     #[serde(default)]
diff --git a/crates/nvisy-toolkit/src/deduplication/pipeline.rs b/crates/nvisy-toolkit/src/deduplication/pipeline.rs
index b431f9d3..35bc7ea5 100644
--- a/crates/nvisy-toolkit/src/deduplication/pipeline.rs
+++ b/crates/nvisy-toolkit/src/deduplication/pipeline.rs
@@ -7,6 +7,7 @@
 
 use std::marker::PhantomData;
 
+use nvisy_core::Error;
 use nvisy_core::entity::Entity;
 use nvisy_core::extraction::TextAt;
 use nvisy_core::modality::{Modality, Overlap};
@@ -18,6 +19,7 @@ use super::layer::{Layer, LayerContext};
 use super::params::LayerParams;
 use super::resolve::ResolveConflictsLayer;
 use super::span_size::SpanSize;
+use super::suppress::SuppressionLayer;
 
 const TARGET: &str = "nvisy_toolkit::deduplication";
 
@@ -92,17 +94,25 @@ where
     M::Location: Overlap + SpanSize,
     R: TextAt<M> + ?Sized,
 {
-    /// Build the canonical four-layer recipe: calibrate → filter →
-    /// fuse → resolve. Every layer's config is read from `params`.
-    pub fn from_params(params: &LayerParams) -> Self {
+    /// Build the canonical five-layer recipe: calibrate → filter →
+    /// fuse → suppress → resolve. Every layer's config is read
+    /// from `params`.
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when any
+    /// `params.allow_values_regex` entry fails to compile.
+    pub fn from_params(params: &LayerParams) -> Result<Self, Error> {
         let filter = FilterLayer::new()
             .with_allowed_labels(params.allowed_labels.clone())
             .with_confidence_threshold(params.confidence_threshold);
-        Self::new()
+        let suppress = SuppressionLayer::from_params(&params.suppression)?;
+        Ok(Self::new()
             .with_layer(CalibrateLayer::new(params.calibration.clone()))
             .with_layer(filter)
             .with_layer(FuseLayer::new(params.strategy.clone(), params.grouping))
-            .with_layer(ResolveConflictsLayer::new(params.conflict_resolution))
+            .with_layer(suppress)
+            .with_layer(ResolveConflictsLayer::new(params.conflict_resolution)))
     }
 }
 
@@ -111,3 +121,76 @@ impl<M: Modality, R: TextAt<M> + ?Sized> Default for LayerPipeline<M, R> {
         Self::new()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use async_trait::async_trait;
+    use nvisy_core::entity::{Entity, builtins};
+    use nvisy_core::modality::{Text, TextLocation};
+
+    use super::*;
+    use crate::deduplication::suppress::SuppressionParams;
+
+    struct TextSliceResolver(Arc<String>);
+
+    #[async_trait]
+    impl TextAt<Text> for TextSliceResolver {
+        async fn text_at(&self, location: &TextLocation) -> Option<String> {
+            self.0.get(location.start..location.end).map(String::from)
+        }
+    }
+
+    fn email(start: usize, end: usize) -> Entity<Text> {
+        Entity::test_builder(start, end)
+            .with_label(builtins::EMAIL_ADDRESS.label_ref())
+            .test_build()
+    }
+
+    fn url(start: usize, end: usize) -> Entity<Text> {
+        Entity::test_builder(start, end)
+            .with_label(builtins::URL.label_ref())
+            .test_build()
+    }
+
+    /// Pipeline-order contract: fuse collapses same-kind duplicates
+    /// before suppress sees them; suppress drops allowlisted
+    /// entities before resolve adjudicates cross-kind conflicts.
+    ///
+    /// Inputs: two PERSON_NAME hits at the same span (duplicates
+    /// of an allowlisted email-like value) plus one URL hit at an
+    /// overlapping span. After the pipeline, only the URL should
+    /// survive.
+    ///
+    /// Without `fuse → suppress` ordering, the duplicate would
+    /// survive (suppress only drops one of the two). Without
+    /// `suppress → resolve` ordering, the resolve step would
+    /// pick a winner between EMAIL and URL — possibly the EMAIL —
+    /// before the allow-list could remove it.
+    #[tokio::test]
+    async fn fuse_then_suppress_then_resolve() {
+        let source = "noreply@foo.com /docs";
+        let resolver = TextSliceResolver(Arc::new(source.to_owned()));
+
+        let params = LayerParams {
+            suppression: SuppressionParams::new()
+                .with_allow_values(vec!["noreply@foo.com".to_owned()]),
+            ..Default::default()
+        };
+
+        let pipeline: LayerPipeline<Text, _> =
+            LayerPipeline::from_params(&params).expect("pipeline builds");
+        let ctx = LayerContext::new(&resolver);
+
+        // Two EMAIL hits at [0, 15) — same kind, same span, fuse
+        // collapses to one. The collapsed entity matches the
+        // allow-list; suppress drops it. A URL at [0, 21) remains
+        // for resolve to leave untouched.
+        let entities = vec![email(0, 15), email(0, 15), url(0, 21)];
+
+        let survivors = pipeline.run(entities, &ctx).await;
+        assert_eq!(survivors.len(), 1, "only the URL should survive");
+        assert_eq!(survivors[0].label, builtins::URL.label_ref());
+    }
+}
diff --git a/crates/nvisy-toolkit/src/deduplication/suppress/mod.rs b/crates/nvisy-toolkit/src/deduplication/suppress/mod.rs
new file mode 100644
index 00000000..71b46a26
--- /dev/null
+++ b/crates/nvisy-toolkit/src/deduplication/suppress/mod.rs
@@ -0,0 +1,366 @@
+//! [`SuppressionLayer`]: drop entities whose matched text is on a
+//! caller-supplied allow list.
+//!
+//! See [`SuppressionParams`] for the three allow-list shapes.
+//!
+//! All three operate on the **entity's resolved text** (sliced from
+//! the source via [`TextAt::text_at`]), not the surrounding
+//! document. When the resolver returns `None` (e.g. malformed
+//! location), the entity is kept — better to surface a false
+//! positive than silently drop something we can't verify.
+//!
+//! Returns dropped entities from [`Layer::apply`] so the pipeline
+//! can attribute them in its drop-reason roll-up.
+//!
+//! [`Layer::apply`]: super::layer::Layer::apply
+//! [`TextAt::text_at`]: nvisy_core::extraction::TextAt::text_at
+
+mod params;
+
+use nvisy_core::Error;
+use nvisy_core::entity::Entity;
+use nvisy_core::extraction::TextAt;
+use nvisy_core::modality::Modality;
+use regex::Regex;
+
+pub use self::params::SuppressionParams;
+use super::layer::{Layer, LayerContext};
+
+const TARGET: &str = "nvisy_toolkit::deduplication::suppress";
+
+/// [`Layer`] that drops entities whose resolved text is on a
+/// caller-supplied allow list.
+///
+/// Construct via [`SuppressionLayer::new`] (empty, fast no-op) or
+/// [`SuppressionLayer::from_params`] (pre-validates the regex
+/// inputs). An empty layer short-circuits in
+/// [`Layer::apply`] without touching the resolver.
+#[derive(Debug, Clone, Default)]
+pub struct SuppressionLayer {
+    /// Pre-lowercased exact-match values.
+    allow_values: Vec<String>,
+    /// Pre-lowercased substring values.
+    allow_values_substring: Vec<String>,
+    /// Pre-compiled regex patterns.
+    allow_values_regex: Vec<Regex>,
+}
+
+impl SuppressionLayer {
+    /// Empty layer: passes every entity through unchanged.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Construct from a [`SuppressionParams`]. Each regex source
+    /// is compiled once here.
+    ///
+    /// Empty strings are silently dropped at construction from
+    /// all three lists: each would match every entity (or every
+    /// position) and is virtually never what the author meant.
+    /// Treating them as configuration mistakes and ignoring them
+    /// is safer than wiping every result.
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when any non-empty entry in
+    /// [`SuppressionParams::allow_values_regex`] is not a valid
+    /// regular expression.
+    pub fn from_params(params: &SuppressionParams) -> Result<Self, Error> {
+        let allow_values = params
+            .allow_values
+            .iter()
+            .filter(|v| !v.is_empty())
+            .map(|v| v.to_ascii_lowercase())
+            .collect();
+        let allow_values_substring = params
+            .allow_values_substring
+            .iter()
+            .filter(|v| !v.is_empty())
+            .map(|v| v.to_ascii_lowercase())
+            .collect();
+        let allow_values_regex = params
+            .allow_values_regex
+            .iter()
+            .filter(|src| !src.is_empty())
+            .map(|src| {
+                Regex::new(src).map_err(|e| {
+                    Error::validation(
+                        format!("invalid allow_values_regex `{src}`: {e}"),
+                        "nvisy-toolkit",
+                    )
+                })
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(Self {
+            allow_values,
+            allow_values_substring,
+            allow_values_regex,
+        })
+    }
+
+    /// Return `true` when no allow-list values are configured.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.allow_values.is_empty()
+            && self.allow_values_substring.is_empty()
+            && self.allow_values_regex.is_empty()
+    }
+
+    /// Return `true` when `text` matches any configured allow-list
+    /// entry under exact / substring / regex semantics.
+    #[must_use]
+    pub fn suppresses(&self, text: &str) -> bool {
+        let lowered = text.to_ascii_lowercase();
+        if self.allow_values.iter().any(|v| v == &lowered) {
+            return true;
+        }
+        if self
+            .allow_values_substring
+            .iter()
+            .any(|v| lowered.contains(v.as_str()))
+        {
+            return true;
+        }
+        if self.allow_values_regex.iter().any(|r| r.is_match(text)) {
+            return true;
+        }
+        false
+    }
+}
+
+#[async_trait::async_trait]
+impl<M, R> Layer<M, R> for SuppressionLayer
+where
+    M: Modality,
+    R: TextAt<M> + ?Sized,
+{
+    async fn apply(
+        &self,
+        entities: &mut Vec<Entity<M>>,
+        ctx: &LayerContext<'_, M, R>,
+    ) -> Vec<Entity<M>> {
+        if self.is_empty() || entities.is_empty() {
+            return Vec::new();
+        }
+
+        let mut suppressed_flags = Vec::with_capacity(entities.len());
+        for entity in entities.iter() {
+            let suppress = match ctx.resolver.text_at(&entity.location).await {
+                Some(text) => self.suppresses(&text),
+                None => false,
+            };
+            suppressed_flags.push(suppress);
+        }
+
+        let mut suppressed_count = 0usize;
+        let mut dropped = Vec::new();
+        let mut idx = 0usize;
+        entities.retain(|entity| {
+            let drop = suppressed_flags[idx];
+            idx += 1;
+            if drop {
+                suppressed_count += 1;
+                dropped.push(entity.clone());
+            }
+            !drop
+        });
+
+        if suppressed_count > 0 {
+            tracing::debug!(
+                target: TARGET,
+                suppressed = suppressed_count,
+                "entities suppressed by allow list",
+            );
+        }
+        dropped
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use nvisy_core::entity::{Entity, builtins};
+    use nvisy_core::modality::{Text, TextLocation};
+
+    use super::*;
+
+    /// Test resolver that resolves locations to a slice of a known
+    /// string. The Noop test_resolver in the parent module returns
+    /// `None`, which is fine for layers that don't touch text but
+    /// useless here.
+    struct TextSliceResolver {
+        text: Arc<String>,
+    }
+
+    #[async_trait::async_trait]
+    impl TextAt<Text> for TextSliceResolver {
+        async fn text_at(&self, location: &TextLocation) -> Option<String> {
+            self.text
+                .get(location.start..location.end)
+                .map(String::from)
+        }
+    }
+
+    fn entity(start: usize, end: usize) -> Entity<Text> {
+        Entity::test_builder(start, end)
+            .with_label(builtins::EMAIL_ADDRESS.label_ref())
+            .test_build()
+    }
+
+    fn params(values: &[&str], substrings: &[&str], regexes: &[&str]) -> SuppressionParams {
+        SuppressionParams {
+            allow_values: values.iter().map(|s| (*s).to_owned()).collect(),
+            allow_values_substring: substrings.iter().map(|s| (*s).to_owned()).collect(),
+            allow_values_regex: regexes.iter().map(|s| (*s).to_owned()).collect(),
+        }
+    }
+
+    async fn apply_to(
+        layer: &SuppressionLayer,
+        source: &str,
+        mut entities: Vec<Entity<Text>>,
+    ) -> (Vec<Entity<Text>>, Vec<Entity<Text>>) {
+        let resolver = TextSliceResolver {
+            text: Arc::new(source.to_owned()),
+        };
+        let ctx = LayerContext::new(&resolver);
+        let dropped = layer.apply(&mut entities, &ctx).await;
+        (entities, dropped)
+    }
+
+    #[tokio::test]
+    async fn empty_layer_is_noop() {
+        let layer = SuppressionLayer::new();
+        let source = "noreply@foo.com matters";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, 15)]).await;
+        assert_eq!(kept.len(), 1);
+        assert!(dropped.is_empty());
+    }
+
+    #[tokio::test]
+    async fn exact_match_drops_entity() {
+        let layer = SuppressionLayer::from_params(&params(&["noreply@foo.com"], &[], &[]))
+            .expect("layer builds");
+        let source = "noreply@foo.com matters";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, 15)]).await;
+        assert!(kept.is_empty());
+        assert_eq!(dropped.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn exact_match_is_case_insensitive() {
+        let layer = SuppressionLayer::from_params(&params(&["NoReply@Foo.com"], &[], &[]))
+            .expect("layer builds");
+        let source = "noreply@foo.com matters";
+        let (kept, _) = apply_to(&layer, source, vec![entity(0, 15)]).await;
+        assert!(kept.is_empty(), "case-insensitive allow-list should drop");
+    }
+
+    #[tokio::test]
+    async fn exact_match_does_not_drop_partial_overlap() {
+        // Allow value is a substring of the entity, but not an
+        // exact equal — exact mode keeps it.
+        let layer = SuppressionLayer::from_params(&params(&["noreply@foo.com"], &[], &[]))
+            .expect("layer builds");
+        let source = "noreply@foo.com support team";
+        let (kept, _) = apply_to(&layer, source, vec![entity(0, 28)]).await;
+        assert_eq!(kept.len(), 1, "exact mode must not drop on partial overlap");
+    }
+
+    #[tokio::test]
+    async fn substring_match_drops_partial_overlap() {
+        let layer = SuppressionLayer::from_params(&params(&[], &["noreply@foo.com"], &[]))
+            .expect("layer builds");
+        let source = "noreply@foo.com support team";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, 28)]).await;
+        assert!(kept.is_empty());
+        assert_eq!(dropped.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn regex_match_drops_entity() {
+        let layer = SuppressionLayer::from_params(&params(&[], &[], &[r"^test-.*@foo\.com$"]))
+            .expect("layer builds");
+        let source = "test-1234@foo.com";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, source.len())]).await;
+        assert!(kept.is_empty());
+        assert_eq!(dropped.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn invalid_regex_at_construction_errors() {
+        let result = SuppressionLayer::from_params(&params(&[], &[], &["["]));
+        assert!(result.is_err(), "invalid regex must error at construction");
+    }
+
+    #[tokio::test]
+    async fn unresolved_text_keeps_entity() {
+        // Pass an entity with a location outside the source text.
+        // text_at returns None, the layer falls open and keeps the
+        // entity rather than silently dropping it.
+        let layer = SuppressionLayer::from_params(&params(&["noreply@foo.com"], &[], &[]))
+            .expect("layer builds");
+        let source = "short";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(100, 200)]).await;
+        assert_eq!(kept.len(), 1);
+        assert!(dropped.is_empty());
+    }
+
+    #[tokio::test]
+    async fn empty_substring_entry_does_not_suppress_everything() {
+        // `str::contains("")` is always true; without the
+        // construction-time filter, an empty entry would wipe
+        // every match. Confirm the filter holds.
+        let layer = SuppressionLayer::from_params(&params(&[], &[""], &[])).expect("layer builds");
+        let source = "noreply@foo.com matters";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, 15)]).await;
+        assert_eq!(kept.len(), 1, "empty substring must not drop");
+        assert!(dropped.is_empty());
+    }
+
+    #[tokio::test]
+    async fn empty_exact_entry_is_ignored() {
+        // An empty exact entry could only match an empty entity,
+        // which recognizers don't emit. Filtering it costs
+        // nothing and keeps the lookup short.
+        let layer = SuppressionLayer::from_params(&params(&[""], &[], &[])).expect("layer builds");
+        let source = "noreply@foo.com matters";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, 15)]).await;
+        assert_eq!(kept.len(), 1);
+        assert!(dropped.is_empty());
+    }
+
+    #[tokio::test]
+    async fn empty_regex_entry_is_ignored() {
+        // An empty regex matches at every position. Same
+        // catastrophe as empty substring; filter at construction.
+        let layer = SuppressionLayer::from_params(&params(&[], &[], &[""])).expect("layer builds");
+        let source = "noreply@foo.com matters";
+        let (kept, dropped) = apply_to(&layer, source, vec![entity(0, 15)]).await;
+        assert_eq!(kept.len(), 1, "empty regex must not drop");
+        assert!(dropped.is_empty());
+    }
+
+    #[tokio::test]
+    async fn union_across_modes() {
+        // Three allow-list shapes, three entities. Each entity is
+        // suppressed by exactly one mode; all three drop.
+        let layer = SuppressionLayer::from_params(&params(
+            &["alpha@x.com"],
+            &["bravo"],
+            &[r"^charlie-\d+$"],
+        ))
+        .expect("layer builds");
+        let source = "alpha@x.com bravo-team-12 charlie-99";
+        let entities = vec![
+            entity(0, 11),  // exact
+            entity(12, 25), // substring
+            entity(26, 36), // regex
+        ];
+        let (kept, dropped) = apply_to(&layer, source, entities).await;
+        assert!(kept.is_empty(), "all three should be suppressed");
+        assert_eq!(dropped.len(), 3);
+    }
+}
diff --git a/crates/nvisy-toolkit/src/deduplication/suppress/params.rs b/crates/nvisy-toolkit/src/deduplication/suppress/params.rs
new file mode 100644
index 00000000..d229572d
--- /dev/null
+++ b/crates/nvisy-toolkit/src/deduplication/suppress/params.rs
@@ -0,0 +1,82 @@
+//! [`SuppressionParams`]: caller-supplied allow lists consumed by
+//! [`SuppressionLayer`].
+//!
+//! Three independent allow-list shapes apply by union — an entity
+//! is dropped when **any** of them fires:
+//!
+//! - exact ASCII case-insensitive equality
+//! - substring containment
+//! - regex match
+//!
+//! All three operate on the entity's resolved text (sliced from
+//! the source via [`TextAt::text_at`]), not the surrounding
+//! document.
+//!
+//! [`SuppressionLayer`]: super::SuppressionLayer
+//! [`TextAt::text_at`]: nvisy_core::extraction::TextAt::text_at
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+/// Caller-supplied allow lists consumed by [`SuppressionLayer`].
+///
+/// All three lists default to empty; the layer short-circuits as
+/// a fast no-op when every list is empty.
+///
+/// [`SuppressionLayer`]: super::SuppressionLayer
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, JsonSchema)]
+pub struct SuppressionParams {
+    /// Drop entities whose matched text equals one of these values
+    /// (ASCII case-insensitive). Use for known false-positive
+    /// values like `noreply@yourcompany.com`.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub allow_values: Vec<String>,
+    /// Drop entities whose matched text contains one of these
+    /// values as a substring (ASCII case-insensitive). Use when an
+    /// over-matching recognizer surrounds a known false-positive
+    /// value with extra text.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub allow_values_substring: Vec<String>,
+    /// Drop entities whose matched text matches one of these
+    /// regular expressions. Compiled once at layer construction.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub allow_values_regex: Vec<String>,
+}
+
+impl SuppressionParams {
+    /// Empty params: every allow list defaults to empty.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the exact-match list.
+    #[must_use]
+    pub fn with_allow_values(mut self, values: Vec<String>) -> Self {
+        self.allow_values = values;
+        self
+    }
+
+    /// Set the substring-match list.
+    #[must_use]
+    pub fn with_allow_values_substring(mut self, values: Vec<String>) -> Self {
+        self.allow_values_substring = values;
+        self
+    }
+
+    /// Set the regex-match list.
+    #[must_use]
+    pub fn with_allow_values_regex(mut self, values: Vec<String>) -> Self {
+        self.allow_values_regex = values;
+        self
+    }
+
+    /// Return `true` when no allow-list values are configured.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.allow_values.is_empty()
+            && self.allow_values_substring.is_empty()
+            && self.allow_values_regex.is_empty()
+    }
+}
diff --git a/crates/nvisy-toolkit/tests/fixtures/pipeline.rs b/crates/nvisy-toolkit/tests/fixtures/pipeline.rs
index c3ba3f0e..9963227e 100644
--- a/crates/nvisy-toolkit/tests/fixtures/pipeline.rs
+++ b/crates/nvisy-toolkit/tests/fixtures/pipeline.rs
@@ -64,6 +64,7 @@ impl Fixture {
 
         let ctx = LayerContext::<Text, _>::new(&buffer);
         let entities = LayerPipeline::<Text, _>::from_params(&dedup_params())
+            .expect("pipeline builds")
             .run(detected, &ctx)
             .await;
 
@@ -103,6 +104,7 @@ impl Fixture {
 
         let ctx = LayerContext::<Tabular, _>::new(&buffer);
         let entities = LayerPipeline::<Tabular, _>::from_params(&dedup_params())
+            .expect("pipeline builds")
             .run(detected, &ctx)
             .await;
 

From 85940d9ffb2f3a58c27e3059e8d4d0a633140d4c Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 16 Jun 2026 01:37:39 +0200
Subject: [PATCH 09/14] feat(pattern,core): country scope + Presidio-aligned
 shipped pattern set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- nvisy-core: new `CountryCode` (ISO 3166-1 alpha-2, validated via
  `celes`). `RecognizerInput.country: Option<CountryCode>` +
  `applies_to_country` mirror the existing language scoping.
- nvisy-pattern: `Regex.countries` / `Dictionary.countries`
  (Vec<CountryCode>, empty = world). `PatternRecognizer::recognize`
  honours per-call jurisdiction hints alongside language hints.
- Asset tree reorganized into `world/`, `us/`, `uk/` subtrees;
  shipped accessors split into per-region modules
  (`shipped::patterns::{world,us,uk}`, dictionaries::world).
  Macro helpers exported as `__shipped_pattern` /
  `__shipped_dictionary` so sub-modules resolve their own
  include_str! paths.
- Pattern count grows 23 → 34: world unchanged at 18; us 5 → 10
  (+itin, npi, mbi, bank_account, medical_license); uk added at 6
  (nhs, nino, driving_licence, postcode, vehicle_registration,
  passport).
- Validators split into per-country sub-modules with dotted names
  (`us.ssn`, `us.aba_routing`, `us.npi`, `us.dea_number`,
  `uk.nhs`, `uk.nino`). Shared `luhn`, `iban`, `phone`, `date`
  stay flat. World pattern set extended (brand-aware credit_card,
  RFC5322-loose email, Cisco-form MAC, IPv4 CIDR, comprehensive
  IPv6 alternation set).
- Pattern scores normalized to a single conservative-baseline
  scheme (most regex-only matches land at 0.1–0.5 before context
  boost). Confidence threshold in the toolkit test fixture
  lowered to 0.35 to match.
- `assets/NOTICE.md` documents third-party regex provenance for
  the shipped pattern assets.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                                    |  11 ++
 Cargo.toml                                    |   1 +
 crates/nvisy-context/README.md                |   4 +-
 crates/nvisy-context/src/rule.rs              |   9 +-
 crates/nvisy-core/Cargo.toml                  |   1 +
 crates/nvisy-core/src/primitive/country.rs    | 179 ++++++++++++++++++
 crates/nvisy-core/src/primitive/mod.rs        |   6 +-
 crates/nvisy-core/src/recognition/input.rs    |  40 +++-
 crates/nvisy-core/src/recognition/mod.rs      |   2 +-
 .../nvisy-llm/src/recognition/file_prompt.rs  |   8 +-
 .../nvisy-ner/src/recognition/aggregation.rs  |  12 +-
 crates/nvisy-ner/src/recognition/config.rs    |   6 +-
 crates/nvisy-pattern/README.md                |  10 +-
 crates/nvisy-pattern/assets/NOTICE.md         |  38 ++++
 .../{ => world}/finance/cryptocurrencies.csv  |   0
 .../{ => world}/finance/cryptocurrencies.toml |   0
 .../{ => world}/finance/currencies.csv        |   0
 .../{ => world}/finance/currencies.toml       |   0
 .../{general => world/personal}/languages.csv |   0
 .../personal}/languages.toml                  |   0
 .../personal}/nationalities.toml              |   0
 .../personal}/nationalities.txt               |   0
 .../personal}/religions.toml                  |   0
 .../{general => world/personal}/religions.txt |   0
 .../assets/patterns/contact/email.toml        |   6 -
 .../patterns/finance/us_bank_routing.toml     |   6 -
 .../assets/patterns/identity/ssn.toml         |   8 -
 .../patterns/identity/us_drivers_license.toml |   6 -
 .../assets/patterns/network/ipv4.toml         |   6 -
 .../assets/patterns/network/ipv6.toml         |   6 -
 .../assets/patterns/network/mac_address.toml  |   6 -
 .../assets/patterns/uk/contact/postcode.toml  |  23 +++
 .../patterns/uk/identity/driving_licence.toml |  24 +++
 .../assets/patterns/uk/identity/nhs.toml      |  17 ++
 .../assets/patterns/uk/identity/nino.toml     |  16 ++
 .../assets/patterns/uk/identity/passport.toml |  26 +++
 .../patterns/uk/vehicle/registration.toml     |  39 ++++
 .../patterns/us/finance/bank_account.toml     |  26 +++
 .../patterns/us/finance/bank_routing.toml     |  32 ++++
 .../assets/patterns/us/health/mbi.toml        |  27 +++
 .../patterns/us/health/medical_license.toml   |  36 ++++
 .../assets/patterns/us/health/npi.toml        |  28 +++
 .../patterns/us/identity/drivers_license.toml |  26 +++
 .../assets/patterns/us/identity/itin.toml     |  28 +++
 .../identity/passport.toml}                   |   3 +-
 .../identity/postal_code.toml}                |   1 +
 .../assets/patterns/us/identity/ssn.toml      |  14 ++
 .../assets/patterns/world/contact/email.toml  |  12 ++
 .../patterns/{ => world}/contact/phone.toml   |   2 +-
 .../patterns/{ => world}/contact/url.toml     |   2 +-
 .../{ => world}/credentials/aws_key.toml      |   0
 .../credentials/generic_api_key.toml          |   2 +-
 .../{ => world}/credentials/github_token.toml |   0
 .../{ => world}/credentials/private_key.toml  |   0
 .../{ => world}/credentials/stripe_key.toml   |   0
 .../{ => world}/finance/bitcoin_address.toml  |   2 +-
 .../{ => world}/finance/credit_card.toml      |  10 +-
 .../{ => world}/finance/ethereum_address.toml |   2 +-
 .../patterns/{ => world}/finance/iban.toml    |   2 +-
 .../{ => world}/finance/swift_code.toml       |   2 +-
 .../assets/patterns/world/network/ipv4.toml   |   8 +
 .../assets/patterns/world/network/ipv6.toml   |  25 +++
 .../patterns/world/network/mac_address.toml   |  16 ++
 .../{ => world}/personal/date_of_birth.toml   |   2 +-
 .../{ => world}/personal/datetime.toml        |   2 +-
 .../nvisy-pattern/src/recognition/compiled.rs |   8 +-
 .../src/recognition/dictionary.rs             |  18 +-
 .../src/recognition/recognizer.rs             | 150 ++++++++++++++-
 crates/nvisy-pattern/src/recognition/regex.rs |  20 +-
 .../nvisy-pattern/src/shipped/dictionaries.rs | 102 ----------
 .../src/shipped/dictionaries/mod.rs           |  77 ++++++++
 .../src/shipped/dictionaries/world.rs         |  50 +++++
 crates/nvisy-pattern/src/shipped/patterns.rs  | 162 ----------------
 .../nvisy-pattern/src/shipped/patterns/mod.rs | 108 +++++++++++
 .../nvisy-pattern/src/shipped/patterns/uk.rs  |  46 +++++
 .../nvisy-pattern/src/shipped/patterns/us.rs  |  64 +++++++
 .../src/shipped/patterns/world.rs             | 106 +++++++++++
 crates/nvisy-pattern/src/validators/mod.rs    |  34 +++-
 crates/nvisy-pattern/src/validators/uk/mod.rs |  12 ++
 crates/nvisy-pattern/src/validators/uk/nhs.rs |  67 +++++++
 .../nvisy-pattern/src/validators/uk/nino.rs   |  57 ++++++
 .../src/validators/us/aba_routing.rs          |  64 +++++++
 .../src/validators/us/dea_number.rs           |  83 ++++++++
 crates/nvisy-pattern/src/validators/us/mod.rs |  16 ++
 crates/nvisy-pattern/src/validators/us/npi.rs |  66 +++++++
 .../src/validators/{ => us}/ssn.rs            |   8 +-
 .../testdata/inputs/identity.txt              |   3 +
 crates/nvisy-pattern/testdata/inputs/uk.txt   |  12 ++
 .../nvisy-pattern/tests/shipped_detection.rs  |  53 ++++++
 .../tests/fixtures/registries.rs              |  11 +-
 90 files changed, 1848 insertions(+), 375 deletions(-)
 create mode 100644 crates/nvisy-core/src/primitive/country.rs
 create mode 100644 crates/nvisy-pattern/assets/NOTICE.md
 rename crates/nvisy-pattern/assets/dictionaries/{ => world}/finance/cryptocurrencies.csv (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{ => world}/finance/cryptocurrencies.toml (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{ => world}/finance/currencies.csv (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{ => world}/finance/currencies.toml (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{general => world/personal}/languages.csv (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{general => world/personal}/languages.toml (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{general => world/personal}/nationalities.toml (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{general => world/personal}/nationalities.txt (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{general => world/personal}/religions.toml (100%)
 rename crates/nvisy-pattern/assets/dictionaries/{general => world/personal}/religions.txt (100%)
 delete mode 100644 crates/nvisy-pattern/assets/patterns/contact/email.toml
 delete mode 100644 crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml
 delete mode 100644 crates/nvisy-pattern/assets/patterns/identity/ssn.toml
 delete mode 100644 crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml
 delete mode 100644 crates/nvisy-pattern/assets/patterns/network/ipv4.toml
 delete mode 100644 crates/nvisy-pattern/assets/patterns/network/ipv6.toml
 delete mode 100644 crates/nvisy-pattern/assets/patterns/network/mac_address.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/uk/contact/postcode.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/nhs.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/nino.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/passport.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/finance/bank_account.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/finance/bank_routing.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/health/mbi.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/health/npi.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/drivers_license.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/itin.toml
 rename crates/nvisy-pattern/assets/patterns/{identity/us_passport.toml => us/identity/passport.toml} (74%)
 rename crates/nvisy-pattern/assets/patterns/{identity/us_postal_code.toml => us/identity/postal_code.toml} (84%)
 create mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/ssn.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/world/contact/email.toml
 rename crates/nvisy-pattern/assets/patterns/{ => world}/contact/phone.toml (97%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/contact/url.toml (87%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/credentials/aws_key.toml (100%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/credentials/generic_api_key.toml (94%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/credentials/github_token.toml (100%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/credentials/private_key.toml (100%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/credentials/stripe_key.toml (100%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/finance/bitcoin_address.toml (91%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/finance/credit_card.toml (55%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/finance/ethereum_address.toml (88%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/finance/iban.toml (93%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/finance/swift_code.toml (90%)
 create mode 100644 crates/nvisy-pattern/assets/patterns/world/network/ipv4.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/world/network/ipv6.toml
 create mode 100644 crates/nvisy-pattern/assets/patterns/world/network/mac_address.toml
 rename crates/nvisy-pattern/assets/patterns/{ => world}/personal/date_of_birth.toml (97%)
 rename crates/nvisy-pattern/assets/patterns/{ => world}/personal/datetime.toml (98%)
 delete mode 100644 crates/nvisy-pattern/src/shipped/dictionaries.rs
 create mode 100644 crates/nvisy-pattern/src/shipped/dictionaries/mod.rs
 create mode 100644 crates/nvisy-pattern/src/shipped/dictionaries/world.rs
 delete mode 100644 crates/nvisy-pattern/src/shipped/patterns.rs
 create mode 100644 crates/nvisy-pattern/src/shipped/patterns/mod.rs
 create mode 100644 crates/nvisy-pattern/src/shipped/patterns/uk.rs
 create mode 100644 crates/nvisy-pattern/src/shipped/patterns/us.rs
 create mode 100644 crates/nvisy-pattern/src/shipped/patterns/world.rs
 create mode 100644 crates/nvisy-pattern/src/validators/uk/mod.rs
 create mode 100644 crates/nvisy-pattern/src/validators/uk/nhs.rs
 create mode 100644 crates/nvisy-pattern/src/validators/uk/nino.rs
 create mode 100644 crates/nvisy-pattern/src/validators/us/aba_routing.rs
 create mode 100644 crates/nvisy-pattern/src/validators/us/dea_number.rs
 create mode 100644 crates/nvisy-pattern/src/validators/us/mod.rs
 create mode 100644 crates/nvisy-pattern/src/validators/us/npi.rs
 rename crates/nvisy-pattern/src/validators/{ => us}/ssn.rs (89%)
 create mode 100644 crates/nvisy-pattern/testdata/inputs/uk.txt

diff --git a/Cargo.lock b/Cargo.lock
index d2403490..f987251d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -590,6 +590,16 @@ dependencies = [
  "shlex",
 ]
 
+[[package]]
+name = "celes"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55028d5b1eebb35237512a3838ce5583211434a233c8bb179551a7197ffb7bd4"
+dependencies = [
+ "phf",
+ "serde",
+]
+
 [[package]]
 name = "cfb"
 version = "0.7.3"
@@ -2906,6 +2916,7 @@ version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
+ "celes",
  "derive_builder",
  "derive_more",
  "hipstr",
diff --git a/Cargo.toml b/Cargo.toml
index 0e9387ec..77a0822a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -71,6 +71,7 @@ hipstr = { version = "0.8", features = ["serde"] }
 jiff = { version = "0.2", features = ["serde"] }
 semver = { version = "1.0", features = ["serde"] }
 oxilangtag = { version = "0.1", features = ["serde"] }
+celes = { version = "2.8", features = [] }
 humantime = { version = "2.1", features = [] }
 humantime-serde = { version = "1.1", features = [] }
 type-map = { version = "0.5", features = [] }
diff --git a/crates/nvisy-context/README.md b/crates/nvisy-context/README.md
index e8653b7a..cf59218a 100644
--- a/crates/nvisy-context/README.md
+++ b/crates/nvisy-context/README.md
@@ -6,8 +6,8 @@ Post-recognition keyword-boost enhancer for the Nvisy runtime.
 
 ## Overview
 
-Mirrors Presidio's `ContextAwareEnhancer` pattern. Every recognizer
-that wants score boosting declares a `Context` (a list of keywords
+Context-aware confidence boosting. Every recognizer that wants
+score boosting declares a `Context` (a list of keywords
 plus optional window / boost overrides), registered against the
 recognizer's name. After recognition, `ContextEnhancer` walks each
 detected `Entity<Text>`, looks the recognizer name up in the
diff --git a/crates/nvisy-context/src/rule.rs b/crates/nvisy-context/src/rule.rs
index f45c423a..da3c809d 100644
--- a/crates/nvisy-context/src/rule.rs
+++ b/crates/nvisy-context/src/rule.rs
@@ -23,19 +23,16 @@ use nvisy_core::entity::EntityLabelRef;
 use nvisy_core::primitive::{Confidence, LanguageTag};
 
 /// Default window radius in words *before* an entity match.
-/// Mirrors Presidio's `context_prefix_count = 5`.
 pub const DEFAULT_PREFIX_WORDS: usize = 5;
 
 /// Default window radius in words *after* an entity match. Set
 /// equal to [`DEFAULT_PREFIX_WORDS`] so trailing context like
 /// "123-45-6789 (social security)" boosts the same as leading
-/// context. Presidio defaults `context_suffix_count` to `0`; we
-/// pick symmetric defaults because operators rarely realize the
-/// asymmetry exists, and one-sided windows surprise people.
+/// context. Asymmetric windows surprise operators who rarely
+/// realize the asymmetry exists, so we pick symmetric defaults.
 pub const DEFAULT_SUFFIX_WORDS: usize = 5;
 
-/// Default additive boost applied when a keyword fires. Matches
-/// Presidio's `context_similarity_factor = 0.35`.
+/// Default additive boost applied when a keyword fires.
 pub const DEFAULT_BOOST: f64 = 0.35;
 
 /// Per-label boost rule the [`Enhancer`] applies at runtime.
diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml
index e815c4ca..3d9e4c03 100644
--- a/crates/nvisy-core/Cargo.toml
+++ b/crates/nvisy-core/Cargo.toml
@@ -45,6 +45,7 @@ uuid = { workspace = true, features = [] }
 bytes = { workspace = true, features = [] }
 hipstr = { workspace = true, features = [] }
 oxilangtag = { workspace = true, features = [] }
+celes = { workspace = true, features = [] }
 type-map = { workspace = true, features = [] }
 
 # Async runtime and parallelism
diff --git a/crates/nvisy-core/src/primitive/country.rs b/crates/nvisy-core/src/primitive/country.rs
new file mode 100644
index 00000000..6fc9fca0
--- /dev/null
+++ b/crates/nvisy-core/src/primitive/country.rs
@@ -0,0 +1,179 @@
+//! [ISO 3166-1 alpha-2] country code type.
+//!
+//! Thin wrapper around [`celes::Country`] that exposes only the
+//! alpha-2 surface — alpha-3, numeric, and long-name forms are
+//! reachable via [`CountryCode::into_inner`] for the rare consumer
+//! that needs them.
+//!
+//! [ISO 3166-1 alpha-2]: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+
+use std::fmt;
+use std::str::FromStr;
+
+use serde::{Deserialize, Serialize};
+
+use crate::Error;
+
+/// A validated [ISO 3166-1 alpha-2] country code.
+///
+/// Two-letter uppercase code identifying a country or region.
+/// Construction accepts any case (`"us"`, `"US"`, `"uS"`) and
+/// rejects anything that isn't a known ISO 3166-1 alpha-2 code.
+///
+/// # Examples
+///
+/// ```
+/// use nvisy_core::primitive::CountryCode;
+///
+/// let us = CountryCode::new("us").unwrap();
+/// assert_eq!(us.as_str(), "US");
+///
+/// assert!(CountryCode::new("USA").is_err());
+/// assert!(CountryCode::new("XZ").is_err());
+/// ```
+///
+/// [ISO 3166-1 alpha-2]: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Serialize, Deserialize)]
+#[serde(try_from = "String", into = "String")]
+pub struct CountryCode(celes::Country);
+
+impl CountryCode {
+    /// Parse and validate a country code.
+    ///
+    /// Input is case-insensitive; the canonical form returned by
+    /// [`as_str`] is uppercase.
+    ///
+    /// # Errors
+    ///
+    /// Returns a validation error when `code` is not a known
+    /// ISO 3166-1 alpha-2 code.
+    ///
+    /// [`as_str`]: Self::as_str
+    pub fn new(code: &str) -> Result<Self, Error> {
+        celes::Country::from_alpha2(code).map(Self).map_err(|_| {
+            Error::validation(
+                format!("country code `{code}` is not a known ISO 3166-1 alpha-2 code"),
+                "nvisy-core",
+            )
+        })
+    }
+
+    /// Return the canonical (uppercase) alpha-2 representation.
+    #[must_use]
+    pub fn as_str(&self) -> &'static str {
+        self.0.alpha2
+    }
+
+    /// Borrow the underlying [`celes::Country`] for callers that
+    /// need alpha-3, numeric, or the country's long name.
+    #[must_use]
+    pub fn into_inner(self) -> celes::Country {
+        self.0
+    }
+}
+
+impl fmt::Display for CountryCode {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+impl FromStr for CountryCode {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Self::new(s)
+    }
+}
+
+impl TryFrom<String> for CountryCode {
+    type Error = Error;
+
+    fn try_from(value: String) -> Result<Self, Self::Error> {
+        Self::new(&value)
+    }
+}
+
+impl From<CountryCode> for String {
+    fn from(code: CountryCode) -> Self {
+        code.as_str().to_owned()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn accepts_uppercase_alpha2() {
+        let us = CountryCode::new("US").unwrap();
+        assert_eq!(us.as_str(), "US");
+    }
+
+    #[test]
+    fn accepts_lowercase_alpha2_and_normalises() {
+        let us = CountryCode::new("us").unwrap();
+        assert_eq!(us.as_str(), "US");
+    }
+
+    #[test]
+    fn accepts_mixed_case() {
+        let gb = CountryCode::new("Gb").unwrap();
+        assert_eq!(gb.as_str(), "GB");
+    }
+
+    #[test]
+    fn rejects_alpha3_code() {
+        assert!(CountryCode::new("USA").is_err());
+        assert!(CountryCode::new("GBR").is_err());
+    }
+
+    #[test]
+    fn rejects_unassigned_two_letter_code() {
+        assert!(CountryCode::new("XZ").is_err());
+        assert!(CountryCode::new("ZZ").is_err());
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(CountryCode::new("U").is_err());
+        assert!(CountryCode::new("").is_err());
+        assert!(CountryCode::new("USAB").is_err());
+    }
+
+    #[test]
+    fn rejects_non_alpha() {
+        assert!(CountryCode::new("U1").is_err());
+        assert!(CountryCode::new("00").is_err());
+    }
+
+    #[test]
+    fn equality_is_canonical() {
+        // `us` and `US` should compare equal once parsed.
+        let lower = CountryCode::new("us").unwrap();
+        let upper = CountryCode::new("US").unwrap();
+        assert_eq!(lower, upper);
+    }
+
+    #[test]
+    fn serde_roundtrip_uppercase() {
+        let us = CountryCode::new("us").unwrap();
+        let json = serde_json::to_string(&us).unwrap();
+        assert_eq!(json, "\"US\"");
+        let back: CountryCode = serde_json::from_str(&json).unwrap();
+        assert_eq!(back, us);
+    }
+
+    #[test]
+    fn from_str_parses_alpha2() {
+        let de: CountryCode = "DE".parse().unwrap();
+        assert_eq!(de.as_str(), "DE");
+    }
+
+    #[test]
+    fn display_writes_alpha2() {
+        let fr = CountryCode::new("FR").unwrap();
+        assert_eq!(format!("{fr}"), "FR");
+    }
+}
diff --git a/crates/nvisy-core/src/primitive/mod.rs b/crates/nvisy-core/src/primitive/mod.rs
index e2fb3927..1744a0b7 100644
--- a/crates/nvisy-core/src/primitive/mod.rs
+++ b/crates/nvisy-core/src/primitive/mod.rs
@@ -8,16 +8,18 @@
 //!   ([`LanguageTag`], [`LanguageDetection`]).
 //! - `rendering` — output-side knobs ([`Color`], [`Dpi`]).
 //!
-//! [`TimeSpan`] is the single root-level primitive — temporal
-//! intervals don't have any companion types worth grouping yet.
+//! Root-level primitives: [`CountryCode`] (ISO 3166-1 alpha-2),
+//! [`TimeSpan`] (temporal intervals).
 
 mod confidence;
+mod country;
 mod geometry;
 mod language;
 mod rendering;
 mod time_span;
 
 pub use self::confidence::{Confidence, ConfidenceThreshold};
+pub use self::country::CountryCode;
 pub use self::geometry::{
     BoundingBox, Dimensions, IBoundingBox, NormalizedBoundingBox, Polygon, Vertex,
 };
diff --git a/crates/nvisy-core/src/recognition/input.rs b/crates/nvisy-core/src/recognition/input.rs
index 0192878c..2bbc57ba 100644
--- a/crates/nvisy-core/src/recognition/input.rs
+++ b/crates/nvisy-core/src/recognition/input.rs
@@ -20,7 +20,7 @@ use uuid::Uuid;
 use super::Hint;
 use crate::extraction::Artifacts;
 use crate::modality::Modality;
-use crate::primitive::LanguageTag;
+use crate::primitive::{CountryCode, LanguageTag};
 
 /// Per-call input for an [`EntityRecognizer<M>`].
 ///
@@ -48,6 +48,12 @@ pub struct RecognizerInput<M: Modality> {
     ///
     /// [`language`]: Self::language
     pub candidate_languages: Vec<LanguageTag>,
+    /// Caller-asserted jurisdiction. When `Some`, recognizers
+    /// that carry per-rule `countries` scopes skip rules that
+    /// don't match. `None` means "any" — rules that declare
+    /// countries still run as a permissive fallback so callers
+    /// who don't pass a hint don't lose detections.
+    pub country: Option<CountryCode>,
     /// Uploader-supplied hint regions in modality-native coordinates.
     /// Recognizers that support hint adjudication (LLM-based NER, VLM)
     /// read this; recognizers that don't (pattern, dictionary) ignore
@@ -73,6 +79,7 @@ impl<M: Modality> RecognizerInput<M> {
             artifacts: Artifacts::new(),
             language: None,
             candidate_languages: Vec::new(),
+            country: None,
             hints: Vec::new(),
             labels: Vec::new(),
             correlation_id: None,
@@ -100,6 +107,13 @@ impl<M: Modality> RecognizerInput<M> {
         self
     }
 
+    /// Set the asserted jurisdiction.
+    #[must_use]
+    pub fn with_country(mut self, country: CountryCode) -> Self {
+        self.country = Some(country);
+        self
+    }
+
     /// Attach uploader-supplied hint regions.
     #[must_use]
     pub fn with_hints(mut self, hints: Vec<Hint<M>>) -> Self {
@@ -144,4 +158,28 @@ impl<M: Modality> RecognizerInput<M> {
             None => true,
         }
     }
+
+    /// Whether a recognizer rule scoped to `allowed` countries
+    /// should run for this call.
+    ///
+    /// - An empty `allowed` list means the rule is jurisdiction-
+    ///   agnostic and always runs.
+    /// - When `allowed` is non-empty and [`country`] is `Some(_)`,
+    ///   the rule runs only when the hint is in `allowed`.
+    /// - When [`country`] is `None`, the rule still runs — we
+    ///   can't disprove applicability without a hint, and
+    ///   silently dropping detections would surprise callers
+    ///   who simply forgot to set the field.
+    ///
+    /// [`country`]: Self::country
+    #[must_use]
+    pub fn applies_to_country(&self, allowed: &[CountryCode]) -> bool {
+        if allowed.is_empty() {
+            return true;
+        }
+        match self.country.as_ref() {
+            Some(hint) => allowed.iter().any(|a| a == hint),
+            None => true,
+        }
+    }
 }
diff --git a/crates/nvisy-core/src/recognition/mod.rs b/crates/nvisy-core/src/recognition/mod.rs
index cd111f0a..25786aaf 100644
--- a/crates/nvisy-core/src/recognition/mod.rs
+++ b/crates/nvisy-core/src/recognition/mod.rs
@@ -1,4 +1,4 @@
-//! [`EntityRecognizer<M>`]: the Presidio-style entity-detection trait.
+//! [`EntityRecognizer<M>`]: the entity-detection trait.
 //!
 //! Every detector that emits [`Entity<M>`] for some modality `M`
 //! implements this trait — pattern recognizers, NER bento clients,
diff --git a/crates/nvisy-llm/src/recognition/file_prompt.rs b/crates/nvisy-llm/src/recognition/file_prompt.rs
index 510694da..ff153840 100644
--- a/crates/nvisy-llm/src/recognition/file_prompt.rs
+++ b/crates/nvisy-llm/src/recognition/file_prompt.rs
@@ -1,9 +1,9 @@
 //! [`FilePrompt`]: load a [`Prompt`] from a TOML file.
 //!
-//! Mirrors Presidio's prompt-as-data model: the user-prompt template
-//! plus the label map plus the labels-to-ignore set all live in a
-//! single TOML file. Users swap behaviour by editing the file, not
-//! by writing Rust. Templates use Jinja2 syntax via `minijinja`.
+//! Prompt-as-data shape: the user-prompt template plus the label
+//! map plus the labels-to-ignore set all live in a single TOML
+//! file. Users swap behaviour by editing the file, not by writing
+//! Rust. Templates use Jinja2 syntax via `minijinja`.
 //!
 //! # TOML schema
 //!
diff --git a/crates/nvisy-ner/src/recognition/aggregation.rs b/crates/nvisy-ner/src/recognition/aggregation.rs
index e79fb791..7e5f5993 100644
--- a/crates/nvisy-ner/src/recognition/aggregation.rs
+++ b/crates/nvisy-ner/src/recognition/aggregation.rs
@@ -1,13 +1,11 @@
 //! [`AggregationStrategy`] and [`AlignmentMode`]: policies for
 //! collapsing per-token NER predictions into entity spans.
 //!
-//! Mirror the equivalent knobs on Presidio's
-//! `NerModelConfiguration`. The producer engine may apply them
-//! server-side (the Bento `inference-gliner` already returns
-//! aggregated spans), in which case the consumer-side knobs are
-//! advisory; or the producer may emit unaggregated
-//! token-classification output, in which case [`NerRecognizer`]
-//! applies them itself.
+//! The producer engine may apply them server-side (the Bento
+//! `inference-gliner` already returns aggregated spans), in which
+//! case the consumer-side knobs are advisory; or the producer may
+//! emit unaggregated token-classification output, in which case
+//! [`NerRecognizer`] applies them itself.
 //!
 //! [`NerRecognizer`]: super::NerRecognizer
 
diff --git a/crates/nvisy-ner/src/recognition/config.rs b/crates/nvisy-ner/src/recognition/config.rs
index c8af8c87..e8216d00 100644
--- a/crates/nvisy-ner/src/recognition/config.rs
+++ b/crates/nvisy-ner/src/recognition/config.rs
@@ -1,8 +1,8 @@
 //! [`NerModel`]: client-side NER tuning knobs.
 //!
-//! Mirrors Presidio's `NerModelConfiguration`. Applied inside
-//! [`NerRecognizer`] before entities are emitted, so backends stay
-//! dumb and label normalization is uniform across them.
+//! Applied inside [`NerRecognizer`] before entities are emitted,
+//! so backends stay dumb and label normalization is uniform
+//! across them.
 //!
 //! Construct via [`NerModel::default`] for the canonical defaults
 //! (canonical label map, no ignored labels, score = 0.85, no
diff --git a/crates/nvisy-pattern/README.md b/crates/nvisy-pattern/README.md
index f39a99e2..fa8acd75 100644
--- a/crates/nvisy-pattern/README.md
+++ b/crates/nvisy-pattern/README.md
@@ -8,11 +8,11 @@ Nvisy runtime.
 ## Overview
 
 `PatternRecognizer` compiles a set of `Regex` rules (each holding
-one or more regex `Variant`s, a Presidio-shaped multi-strategy
-group) and `Dictionary` term lists into pooled scanners — one
-shared `regex::RegexSet` for the regex side and one shared
-`aho_corasick::AhoCorasick` automaton for the literal side. A
-single walk over the input runs both scanners and emits
+one or more regex `Variant`s grouped as a multi-strategy detector
+for one entity type) and `Dictionary` term lists into pooled
+scanners — one shared `regex::RegexSet` for the regex side and
+one shared `aho_corasick::AhoCorasick` automaton for the literal
+side. A single walk over the input runs both scanners and emits
 `Entity<Text>` values in modality-local byte coordinates.
 
 Rules may declare per-label context keywords. Calling
diff --git a/crates/nvisy-pattern/assets/NOTICE.md b/crates/nvisy-pattern/assets/NOTICE.md
new file mode 100644
index 00000000..161792e5
--- /dev/null
+++ b/crates/nvisy-pattern/assets/NOTICE.md
@@ -0,0 +1,38 @@
+# Third-party attribution: shipped pattern assets
+
+Several shipped pattern TOMLs under this directory carry regular
+expressions adapted from [Microsoft Presidio][presidio]
+(`microsoft/presidio`, MIT licensed), specifically the
+`presidio-analyzer/presidio_analyzer/predefined_recognizers/`
+classes referenced inline in each TOML's leading comment.
+
+The Presidio MIT license text is reproduced below, per its
+`Permission notice` clause.
+
+[presidio]: https://github.com/microsoft/presidio
+
+---
+
+```
+MIT License
+
+Copyright (c) Microsoft Corporation.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+```
diff --git a/crates/nvisy-pattern/assets/dictionaries/finance/cryptocurrencies.csv b/crates/nvisy-pattern/assets/dictionaries/world/finance/cryptocurrencies.csv
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/finance/cryptocurrencies.csv
rename to crates/nvisy-pattern/assets/dictionaries/world/finance/cryptocurrencies.csv
diff --git a/crates/nvisy-pattern/assets/dictionaries/finance/cryptocurrencies.toml b/crates/nvisy-pattern/assets/dictionaries/world/finance/cryptocurrencies.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/finance/cryptocurrencies.toml
rename to crates/nvisy-pattern/assets/dictionaries/world/finance/cryptocurrencies.toml
diff --git a/crates/nvisy-pattern/assets/dictionaries/finance/currencies.csv b/crates/nvisy-pattern/assets/dictionaries/world/finance/currencies.csv
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/finance/currencies.csv
rename to crates/nvisy-pattern/assets/dictionaries/world/finance/currencies.csv
diff --git a/crates/nvisy-pattern/assets/dictionaries/finance/currencies.toml b/crates/nvisy-pattern/assets/dictionaries/world/finance/currencies.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/finance/currencies.toml
rename to crates/nvisy-pattern/assets/dictionaries/world/finance/currencies.toml
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/languages.csv b/crates/nvisy-pattern/assets/dictionaries/world/personal/languages.csv
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/general/languages.csv
rename to crates/nvisy-pattern/assets/dictionaries/world/personal/languages.csv
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/languages.toml b/crates/nvisy-pattern/assets/dictionaries/world/personal/languages.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/general/languages.toml
rename to crates/nvisy-pattern/assets/dictionaries/world/personal/languages.toml
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/nationalities.toml b/crates/nvisy-pattern/assets/dictionaries/world/personal/nationalities.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/general/nationalities.toml
rename to crates/nvisy-pattern/assets/dictionaries/world/personal/nationalities.toml
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/nationalities.txt b/crates/nvisy-pattern/assets/dictionaries/world/personal/nationalities.txt
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/general/nationalities.txt
rename to crates/nvisy-pattern/assets/dictionaries/world/personal/nationalities.txt
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/religions.toml b/crates/nvisy-pattern/assets/dictionaries/world/personal/religions.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/general/religions.toml
rename to crates/nvisy-pattern/assets/dictionaries/world/personal/religions.toml
diff --git a/crates/nvisy-pattern/assets/dictionaries/general/religions.txt b/crates/nvisy-pattern/assets/dictionaries/world/personal/religions.txt
similarity index 100%
rename from crates/nvisy-pattern/assets/dictionaries/general/religions.txt
rename to crates/nvisy-pattern/assets/dictionaries/world/personal/religions.txt
diff --git a/crates/nvisy-pattern/assets/patterns/contact/email.toml b/crates/nvisy-pattern/assets/patterns/contact/email.toml
deleted file mode 100644
index fb37ff45..00000000
--- a/crates/nvisy-pattern/assets/patterns/contact/email.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-name = "email"
-label = "email_address"
-
-[[variants]]
-regex = "\\b[a-zA-Z0-9._%+\\-]+@[a-zA-Z0-9.\\-]+\\.[a-zA-Z]{2,}\\b"
-score = 0.95
diff --git a/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml b/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml
deleted file mode 100644
index 12010716..00000000
--- a/crates/nvisy-pattern/assets/patterns/finance/us_bank_routing.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-name = "us-bank-routing"
-label = "bank_routing"
-
-[[variants]]
-regex = "\\b(?:0[1-9]|[12]\\d|3[0-2])\\d{7}\\b"
-score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/identity/ssn.toml b/crates/nvisy-pattern/assets/patterns/identity/ssn.toml
deleted file mode 100644
index f2076b26..00000000
--- a/crates/nvisy-pattern/assets/patterns/identity/ssn.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-name = "ssn"
-label = "government_id"
-context = ["social security", "ssn", "tax id", "taxpayer identification"]
-
-[[variants]]
-regex = "\\b(\\d{3})-(\\d{2})-(\\d{4})\\b"
-score = 0.9
-validator = "ssn"
diff --git a/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml b/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml
deleted file mode 100644
index 873af318..00000000
--- a/crates/nvisy-pattern/assets/patterns/identity/us_drivers_license.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-name = "us-drivers-license"
-label = "drivers_license"
-
-[[variants]]
-regex = "\\b[A-Z]\\d{3}-\\d{4}-\\d{4}\\b"
-score = 0.4
diff --git a/crates/nvisy-pattern/assets/patterns/network/ipv4.toml b/crates/nvisy-pattern/assets/patterns/network/ipv4.toml
deleted file mode 100644
index d64403dd..00000000
--- a/crates/nvisy-pattern/assets/patterns/network/ipv4.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-name = "ipv4"
-label = "ip_address"
-
-[[variants]]
-regex = "\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b"
-score = 0.75
diff --git a/crates/nvisy-pattern/assets/patterns/network/ipv6.toml b/crates/nvisy-pattern/assets/patterns/network/ipv6.toml
deleted file mode 100644
index dfc12ecd..00000000
--- a/crates/nvisy-pattern/assets/patterns/network/ipv6.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-name = "ipv6"
-label = "ip_address"
-
-[[variants]]
-regex = "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b|(?:[0-9a-fA-F]{1,4}:){1,7}:|::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\\b"
-score = 0.75
diff --git a/crates/nvisy-pattern/assets/patterns/network/mac_address.toml b/crates/nvisy-pattern/assets/patterns/network/mac_address.toml
deleted file mode 100644
index fcca5944..00000000
--- a/crates/nvisy-pattern/assets/patterns/network/mac_address.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-name = "mac-address"
-label = "mac_address"
-
-[[variants]]
-regex = "\\b(?:[0-9A-Fa-f]{2}[:\\-]){5}[0-9A-Fa-f]{2}\\b"
-score = 0.85
diff --git a/crates/nvisy-pattern/assets/patterns/uk/contact/postcode.toml b/crates/nvisy-pattern/assets/patterns/uk/contact/postcode.toml
new file mode 100644
index 00000000..8c0db110
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/uk/contact/postcode.toml
@@ -0,0 +1,23 @@
+# UK Postcode.
+#
+# Reference: https://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom
+
+name = "uk-postcode"
+label = "postal_code"
+countries = ["GB"]
+languages = ["en"]
+context = [
+  "postcode",
+  "post code",
+  "postal code",
+  "zip",
+  "address",
+  "delivery",
+  "mailing",
+  "shipping",
+  "correspondence",
+]
+
+[[variants]]
+regex = '\b(GIR\s?0AA|[A-PR-UWYZ][0-9][ABCDEFGHJKPSTUW]?\s?[0-9][ABD-HJLNP-UW-Z]{2}|[A-PR-UWYZ][0-9]{2}\s?[0-9][ABD-HJLNP-UW-Z]{2}|[A-PR-UWYZ][A-HK-Y][0-9][ABEHMNPRVWXY]?\s?[0-9][ABD-HJLNP-UW-Z]{2}|[A-PR-UWYZ][A-HK-Y][0-9]{2}\s?[0-9][ABD-HJLNP-UW-Z]{2})\b'
+score = 0.1
diff --git a/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml b/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml
new file mode 100644
index 00000000..47ec45b8
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml
@@ -0,0 +1,24 @@
+# UK Driving Licence Number (DVLA).
+#
+# Format: 16-char alphanumeric — 5-char surname (letters padded
+# with 9s) + decade digit + month (01-12 or 51-62 for female) +
+# day + year digit + 2 initial chars (or 9) + check + 2 letters.
+
+name = "uk-driving-licence"
+label = "drivers_license"
+countries = ["GB"]
+languages = ["en"]
+context = [
+  "driving licence",
+  "driving license",
+  "driver's licence",
+  "driver's license",
+  "dvla",
+  "dl number",
+  "licence number",
+  "license number",
+]
+
+[[variants]]
+regex = '\b[A-Z9]{5}[0-9](?:0[1-9]|1[0-2]|5[1-9]|6[0-2])(?:0[1-9]|[12][0-9]|3[01])[0-9][A-Z9]{2}[A-Z0-9][A-Z]{2}\b'
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/uk/identity/nhs.toml b/crates/nvisy-pattern/assets/patterns/uk/identity/nhs.toml
new file mode 100644
index 00000000..8c39ee2e
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/uk/identity/nhs.toml
@@ -0,0 +1,17 @@
+# UK National Health Service number.
+
+name = "uk-nhs"
+label = "medical_id"
+countries = ["GB"]
+languages = ["en"]
+context = [
+  "nhs",
+  "national health service",
+  "health services authority",
+  "health authority",
+]
+
+[[variants]]
+regex = '\b([0-9]{3})[- ]?([0-9]{3})[- ]?([0-9]{4})\b'
+score = 0.5
+validator = "uk.nhs"
diff --git a/crates/nvisy-pattern/assets/patterns/uk/identity/nino.toml b/crates/nvisy-pattern/assets/patterns/uk/identity/nino.toml
new file mode 100644
index 00000000..8de7d04e
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/uk/identity/nino.toml
@@ -0,0 +1,16 @@
+# UK National Insurance Number.
+#
+# Rust's `regex` crate doesn't support look-around, so the
+# reserved-prefix exclusions live in the `uk.nino` validator
+# rather than the regex.
+
+name = "uk-nino"
+label = "national_insurance_number"
+countries = ["GB"]
+languages = ["en"]
+context = ["national insurance", "ni number", "nino"]
+
+[[variants]]
+regex = '\b([a-ceghj-pr-tw-zA-CEGHJ-PR-TW-Z][a-ceghj-npr-tw-zA-CEGHJ-NPR-TW-Z]) ?([0-9]{2}) ?([0-9]{2}) ?([0-9]{2}) ?([a-dA-D])\b'
+score = 0.5
+validator = "uk.nino"
diff --git a/crates/nvisy-pattern/assets/patterns/uk/identity/passport.toml b/crates/nvisy-pattern/assets/patterns/uk/identity/passport.toml
new file mode 100644
index 00000000..03b8b9a4
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/uk/identity/passport.toml
@@ -0,0 +1,26 @@
+# UK Passport Number (post-2015 format).
+#
+# 2 letters + 7 digits. Same shape as several other country
+# passports — without context-keyword boost the FP rate is high.
+# Low score by default so callers rely on context to lift
+# legitimate matches.
+
+name = "uk-passport"
+label = "passport_number"
+countries = ["GB"]
+languages = ["en"]
+context = [
+  "passport",
+  "passport number",
+  "travel document",
+  "uk passport",
+  "british passport",
+  "her majesty",
+  "his majesty",
+  "hm passport",
+  "hmpo",
+]
+
+[[variants]]
+regex = '\b[A-Z]{2}\d{7}\b'
+score = 0.1
diff --git a/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml b/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml
new file mode 100644
index 00000000..0d585be7
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml
@@ -0,0 +1,39 @@
+# UK Vehicle Registration Number.
+#
+# Covers three eras still seen on the road:
+# - Current (2001+): 2 area letters + 2-digit age + 3 random
+#   letters, e.g. "AB51 ABC"
+# - Prefix (1983-2001): year letter + 1-3 digits + 3 letters
+# - Suffix (1963-1983): 3 letters + 1-3 digits + year letter
+
+name = "uk-vehicle-registration"
+label = "license_plate"
+countries = ["GB"]
+languages = ["en"]
+context = [
+  "vehicle",
+  "registration",
+  "number plate",
+  "licence plate",
+  "license plate",
+  "reg",
+  "vrn",
+  "dvla",
+  "v5c",
+  "logbook",
+  "mot",
+  "car",
+  "insured vehicle",
+]
+
+[[variants]]
+regex = '\b[A-HJ-PR-Y][A-HJ-PR-Y](?:0[1-9]|[1-7][0-9])[- ]?[A-HJ-PR-Z]{3}\b'
+score = 0.3
+
+[[variants]]
+regex = '\b[A-HJ-NPR-TV-Y]\d{1,3}[- ]?[A-HJ-PR-Y][A-HJ-PR-Z]{2}\b'
+score = 0.2
+
+[[variants]]
+regex = '\b[A-HJ-PR-Z]{3}[- ]?\d{1,3}[- ]?[A-HJ-NPR-TV-Y]\b'
+score = 0.15
diff --git a/crates/nvisy-pattern/assets/patterns/us/finance/bank_account.toml b/crates/nvisy-pattern/assets/patterns/us/finance/bank_account.toml
new file mode 100644
index 00000000..03dd30ef
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/finance/bank_account.toml
@@ -0,0 +1,26 @@
+# US Bank Account Number.
+#
+# 8-17 digit run with no checksum — usable only with strong
+# surrounding context. The very-weak score relies on the
+# context-keyword boost to lift legitimate matches above
+# downstream confidence thresholds.
+
+name = "us-bank-account"
+label = "bank_account"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "check",
+  "checking account",
+  "account",
+  "account#",
+  "acct",
+  "bank",
+  "save",
+  "savings",
+  "debit",
+]
+
+[[variants]]
+regex = '\b\d{8,17}\b'
+score = 0.05
diff --git a/crates/nvisy-pattern/assets/patterns/us/finance/bank_routing.toml b/crates/nvisy-pattern/assets/patterns/us/finance/bank_routing.toml
new file mode 100644
index 00000000..687512ae
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/finance/bank_routing.toml
@@ -0,0 +1,32 @@
+# US ABA Routing Number.
+#
+# Two variants:
+# - 9-digit unbroken form, prefix-validated to the Fed routing
+#   ranges (01-12, 21-32) and checksum-validated by `us.aba_routing`.
+# - Dashed form `\b[0123678]\d{3}-\d{4}-\d\b` with the same
+#   checksum.
+
+name = "us-bank-routing"
+label = "bank_routing"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "routing",
+  "aba",
+  "rtn",
+  "transit",
+  "bank",
+  "deposit",
+  "wire",
+  "ach",
+]
+
+[[variants]]
+regex = '\b(?:0[1-9]|[12]\d|3[0-2])\d{7}\b'
+score = 0.5
+validator = "us.aba_routing"
+
+[[variants]]
+regex = '\b[0123678]\d{3}-\d{4}-\d\b'
+score = 0.4
+validator = "us.aba_routing"
diff --git a/crates/nvisy-pattern/assets/patterns/us/health/mbi.toml b/crates/nvisy-pattern/assets/patterns/us/health/mbi.toml
new file mode 100644
index 00000000..1359e09d
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/health/mbi.toml
@@ -0,0 +1,27 @@
+# US Medicare Beneficiary Identifier (MBI).
+#
+# 11-character CMS identifier with position-specific
+# character-class rules and letters restricted to
+# `ACDEFGHJKMNPQRTUVWXY` (excludes S, L, O, I, B, Z).
+
+name = "us-mbi"
+label = "medical_id"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "mbi",
+  "medicare beneficiary",
+  "medicare",
+  "beneficiary",
+  "cms",
+]
+
+# 11-character form (no dashes), positions: N A AN N A AN N A A N N.
+[[variants]]
+regex = '\b[0-9][ACDEFGHJKMNPQRTUVWXY][0-9ACDEFGHJKMNPQRTUVWXY][0-9][ACDEFGHJKMNPQRTUVWXY][0-9ACDEFGHJKMNPQRTUVWXY][0-9][ACDEFGHJKMNPQRTUVWXY][ACDEFGHJKMNPQRTUVWXY][0-9][0-9]\b'
+score = 0.3
+
+# Dashed display form `XXXX-XXX-XXXX`.
+[[variants]]
+regex = '\b[0-9][ACDEFGHJKMNPQRTUVWXY][0-9ACDEFGHJKMNPQRTUVWXY][0-9]-[ACDEFGHJKMNPQRTUVWXY][0-9ACDEFGHJKMNPQRTUVWXY][0-9]-[ACDEFGHJKMNPQRTUVWXY][ACDEFGHJKMNPQRTUVWXY][0-9][0-9]\b'
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml b/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml
new file mode 100644
index 00000000..80e80390
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml
@@ -0,0 +1,36 @@
+# US DEA (Drug Enforcement Administration) registration number,
+# used as a medical license identifier.
+#
+# 9-character format: two letters (DEA registration type plus
+# surname initial) followed by 7 digits, the last of which is a
+# checksum. Without context the pattern has a high false-positive
+# rate (matches arbitrary letter-pair plus digit-run strings), so
+# the regex score is low and the boost layer is expected to lift
+# legitimate matches.
+
+name = "us-medical-license"
+label = "medical_id"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "medical",
+  "license",
+  "licence",
+  "certificate",
+  "dea",
+  "controlled substance",
+  "prescription",
+  "prescriber",
+]
+
+# DEA registration type letters: A, B, C, D, E, F, G, H, J, K, L,
+# M, P, R, S, T, U, X (plus mid-2000s practitioner-9 series).
+[[variants]]
+regex = '[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX][a-zA-Z]\d{7}'
+score = 0.4
+validator = "us.dea_number"
+
+[[variants]]
+regex = '[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX]9\d{7}'
+score = 0.4
+validator = "us.dea_number"
diff --git a/crates/nvisy-pattern/assets/patterns/us/health/npi.toml b/crates/nvisy-pattern/assets/patterns/us/health/npi.toml
new file mode 100644
index 00000000..0c657209
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/health/npi.toml
@@ -0,0 +1,28 @@
+# US National Provider Identifier (NPI).
+#
+# 10-digit number assigned by CMS; validated via Luhn checksum on
+# `80840` + the 10 digits.
+
+name = "us-npi"
+label = "medical_id"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "npi",
+  "national provider",
+  "provider",
+  "npi number",
+  "provider id",
+  "provider identifier",
+  "taxonomy",
+]
+
+[[variants]]
+regex = '\b[12]\d{9}\b'
+score = 0.1
+validator = "us.npi"
+
+[[variants]]
+regex = '\b[12]\d{3}[ -]\d{3}[ -]\d{3}\b'
+score = 0.4
+validator = "us.npi"
diff --git a/crates/nvisy-pattern/assets/patterns/us/identity/drivers_license.toml b/crates/nvisy-pattern/assets/patterns/us/identity/drivers_license.toml
new file mode 100644
index 00000000..a72501ee
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/drivers_license.toml
@@ -0,0 +1,26 @@
+# US Driver's License (state-shape union).
+#
+# Omits a bare-digit weak variant (`\b[0-9]{6,14}\b`) since it
+# matches phone numbers, dates, etc. and is unusable without
+# strong context.
+
+name = "us-drivers-license"
+label = "drivers_license"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "driver",
+  "drivers",
+  "driver's",
+  "license",
+  "licence",
+  "dl",
+  "dl#",
+  "dlnum",
+]
+
+# Alphanumeric shapes — most US state license formats start with
+# letters, often with a hyphenated structure (e.g. MI: D123-4567-8901).
+[[variants]]
+regex = '\b(?:[A-Z]\d{3}-\d{4}-\d{4}|[A-Z]\d{3,6}|[A-Z]\d{5,9}|[A-Z]\d{6,8}|[A-Z]\d{4,8}|[A-Z]\d{9,11}|[A-Z]{1,2}\d{5,6}|H\d{8}|V\d{6}|X\d{8}|[A-Z]{2}\d{2,5}|[A-Z]{2}\d{3,7}|\d{2}[A-Z]{3}\d{5,6}|[A-Z]\d{13,14}|[A-Z]\d{18}|[A-Z]\d{6}R|[A-Z]\d{9}|[A-Z]\d{1,12}|\d{9}[A-Z]|[A-Z]{2}\d{6}[A-Z]|\d{8}[A-Z]{2}|\d{3}[A-Z]{2}\d{4}|[A-Z]\d[A-Z]\d[A-Z]|\d{7,8}[A-Z])\b'
+score = 0.4
diff --git a/crates/nvisy-pattern/assets/patterns/us/identity/itin.toml b/crates/nvisy-pattern/assets/patterns/us/identity/itin.toml
new file mode 100644
index 00000000..726861f5
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/itin.toml
@@ -0,0 +1,28 @@
+# US Individual Taxpayer Identification Number (ITIN).
+#
+# Format: 9NN-MM-NNNN where the middle group restricts to the
+# IRS-published ranges. No checksum.
+
+name = "us-itin"
+label = "tax_id"
+countries = ["US"]
+languages = ["en"]
+context = [
+  "individual",
+  "taxpayer",
+  "itin",
+  "tax",
+  "payer",
+  "taxid",
+  "tin",
+]
+
+# Medium: dashes / spaces around the IRS-published middle range.
+[[variants]]
+regex = '\b9\d{2}[- ](?:5\d|6[0-5]|7\d|8[0-8]|9(?:[0-2]|[4-9]))[- ]\d{4}\b'
+score = 0.5
+
+# Weak: unbroken 9-digit form.
+[[variants]]
+regex = '\b9\d{2}(?:5\d|6[0-5]|7\d|8[0-8]|9(?:[0-2]|[4-9]))\d{4}\b'
+score = 0.3
diff --git a/crates/nvisy-pattern/assets/patterns/identity/us_passport.toml b/crates/nvisy-pattern/assets/patterns/us/identity/passport.toml
similarity index 74%
rename from crates/nvisy-pattern/assets/patterns/identity/us_passport.toml
rename to crates/nvisy-pattern/assets/patterns/us/identity/passport.toml
index d7087d83..439529a6 100644
--- a/crates/nvisy-pattern/assets/patterns/identity/us_passport.toml
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/passport.toml
@@ -1,6 +1,7 @@
 name = "us-passport"
 label = "passport_number"
+countries = ["US"]
 
 [[variants]]
 regex = "\\b[A-Z]\\d{8}\\b"
-score = 0.5
+score = 0.1
diff --git a/crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml b/crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml
similarity index 84%
rename from crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml
rename to crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml
index 737b391f..53ee38c9 100644
--- a/crates/nvisy-pattern/assets/patterns/identity/us_postal_code.toml
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml
@@ -1,5 +1,6 @@
 name = "us-postal-code"
 label = "postal_code"
+countries = ["US"]
 
 [[variants]]
 regex = "\\b\\d{5}(?:-\\d{4})?\\b"
diff --git a/crates/nvisy-pattern/assets/patterns/us/identity/ssn.toml b/crates/nvisy-pattern/assets/patterns/us/identity/ssn.toml
new file mode 100644
index 00000000..18b11034
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/ssn.toml
@@ -0,0 +1,14 @@
+# US Social Security Number.
+#
+# Omits very-weak variants (bare 9 digits, off-position splits).
+# Context-keyword boost is expected to lift legitimate matches.
+
+name = "ssn"
+label = "government_id"
+countries = ["US"]
+context = ["social security", "ssn", "tax id", "taxpayer identification"]
+
+[[variants]]
+regex = '\b(\d{3})[- .](\d{2})[- .](\d{4})\b'
+score = 0.5
+validator = "us.ssn"
diff --git a/crates/nvisy-pattern/assets/patterns/world/contact/email.toml b/crates/nvisy-pattern/assets/patterns/world/contact/email.toml
new file mode 100644
index 00000000..68114a4c
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/world/contact/email.toml
@@ -0,0 +1,12 @@
+# Email address (RFC 5322-loose).
+#
+# Accepts the RFC 5322 atom characters in the local part
+# (`!#$%&'*+\-/=?^_`{|}~`) so plus-addressing, dotted locals, and
+# similar are caught.
+
+name = "email"
+label = "email_address"
+
+[[variants]]
+regex = '''\b(?:[!#$%&'*+\-/=?^_`{|}~\w]|[!#$%&'*+\-/=?^_`{|}~\w][!#$%&'*+\-/=?^_`{|}~.\w]{0,}[!#$%&'*+\-/=?^_`{|}~\w])@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*\b'''
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/contact/phone.toml b/crates/nvisy-pattern/assets/patterns/world/contact/phone.toml
similarity index 97%
rename from crates/nvisy-pattern/assets/patterns/contact/phone.toml
rename to crates/nvisy-pattern/assets/patterns/world/contact/phone.toml
index ce5fe9dd..83e0bc96 100644
--- a/crates/nvisy-pattern/assets/patterns/contact/phone.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/contact/phone.toml
@@ -9,5 +9,5 @@ fr = ["téléphone", "telephone", "appel", "mobile", "portable", "tel", "fax", "
 
 [[variants]]
 regex = "(?:\\+\\d{1,3}[\\s.\\-]?)?\\(?\\d{2,4}\\)?[\\s.\\-]?\\d{3,4}[\\s.\\-]?\\d{4}\\b"
-score = 0.8
+score = 0.4
 validator = "phone"
diff --git a/crates/nvisy-pattern/assets/patterns/contact/url.toml b/crates/nvisy-pattern/assets/patterns/world/contact/url.toml
similarity index 87%
rename from crates/nvisy-pattern/assets/patterns/contact/url.toml
rename to crates/nvisy-pattern/assets/patterns/world/contact/url.toml
index ec11fcee..7096570a 100644
--- a/crates/nvisy-pattern/assets/patterns/contact/url.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/contact/url.toml
@@ -3,4 +3,4 @@ label = "url"
 
 [[variants]]
 regex = "\\bhttps?://[^\\s/$.?#][^\\s]*\\b"
-score = 0.9
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/aws_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/patterns/credentials/aws_key.toml
rename to crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml
similarity index 94%
rename from crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml
rename to crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml
index be69abc5..bcd59d63 100644
--- a/crates/nvisy-pattern/assets/patterns/credentials/generic_api_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml
@@ -3,4 +3,4 @@ label = "api_key"
 
 [[variants]]
 regex = "(?i)(?:api[_\\-]?key|api[_\\-]?secret|access[_\\-]?token|secret[_\\-]?key|bearer)\\s*[:=]\\s*[\"']?([a-zA-Z0-9_\\-]{20,})[\"']?"
-score = 0.7
+score = 0.4
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/github_token.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/patterns/credentials/github_token.toml
rename to crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/private_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/patterns/credentials/private_key.toml
rename to crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml
diff --git a/crates/nvisy-pattern/assets/patterns/credentials/stripe_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/stripe_key.toml
similarity index 100%
rename from crates/nvisy-pattern/assets/patterns/credentials/stripe_key.toml
rename to crates/nvisy-pattern/assets/patterns/world/credentials/stripe_key.toml
diff --git a/crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml b/crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml
similarity index 91%
rename from crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml
rename to crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml
index a68c435a..31f6fad6 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/bitcoin_address.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml
@@ -3,4 +3,4 @@ label = "crypto_address"
 
 [[variants]]
 regex = "\\b(?:bc1[a-z0-9]{25,39}|[13][a-km-zA-HJ-NP-Z1-9]{25,34})\\b"
-score = 0.85
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml b/crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml
similarity index 55%
rename from crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
rename to crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml
index cfe4ab2b..d3412b4f 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/credit_card.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml
@@ -1,3 +1,9 @@
+# Credit / debit card number.
+#
+# Brand-prefix variants matching Visa, Mastercard, Discover, etc.
+# Luhn-validated. The Luhn validator drops false positives that
+# a length-based fallback regex would otherwise pick up.
+
 name = "credit-card"
 label = "payment_card"
 
@@ -8,6 +14,6 @@ de = ["karte", "kredit", "kreditkarte", "debit", "zahlung", "visa", "mastercard"
 fr = ["carte", "crédit", "credit", "débit", "debit", "paiement", "visa", "mastercard", "amex"]
 
 [[variants]]
-regex = "\\b(?:\\d[ \\-]*?){13,19}\\b"
-score = 0.85
+regex = '\b(?:(?:4\d{3})|(?:5[0-5]\d{2})|(?:6\d{3})|(?:1\d{3})|(?:3\d{3}))[- ]?(?:\d{3,4})[- ]?(?:\d{3,4})[- ]?(?:\d{3,5})\b'
+score = 0.5
 validator = "luhn"
diff --git a/crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml b/crates/nvisy-pattern/assets/patterns/world/finance/ethereum_address.toml
similarity index 88%
rename from crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml
rename to crates/nvisy-pattern/assets/patterns/world/finance/ethereum_address.toml
index 2860d8a4..939ed818 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/ethereum_address.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/ethereum_address.toml
@@ -3,4 +3,4 @@ label = "crypto_address"
 
 [[variants]]
 regex = "\\b0x[0-9a-fA-F]{40}\\b"
-score = 0.85
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/finance/iban.toml b/crates/nvisy-pattern/assets/patterns/world/finance/iban.toml
similarity index 93%
rename from crates/nvisy-pattern/assets/patterns/finance/iban.toml
rename to crates/nvisy-pattern/assets/patterns/world/finance/iban.toml
index 7256b240..3680ee9e 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/iban.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/iban.toml
@@ -4,5 +4,5 @@ context = ["iban", "bank", "account", "transfer", "swift"]
 
 [[variants]]
 regex = "\\b[A-Z]{2}\\d{2}\\s?[A-Z0-9]{4}\\s?(?:\\d{4}\\s?){2,7}\\d{1,4}\\b"
-score = 0.85
+score = 0.5
 validator = "iban"
diff --git a/crates/nvisy-pattern/assets/patterns/finance/swift_code.toml b/crates/nvisy-pattern/assets/patterns/world/finance/swift_code.toml
similarity index 90%
rename from crates/nvisy-pattern/assets/patterns/finance/swift_code.toml
rename to crates/nvisy-pattern/assets/patterns/world/finance/swift_code.toml
index 7147b65c..e1f1a60c 100644
--- a/crates/nvisy-pattern/assets/patterns/finance/swift_code.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/swift_code.toml
@@ -3,4 +3,4 @@ label = "swift_code"
 
 [[variants]]
 regex = "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b"
-score = 0.7
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/world/network/ipv4.toml b/crates/nvisy-pattern/assets/patterns/world/network/ipv4.toml
new file mode 100644
index 00000000..926d8890
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/world/network/ipv4.toml
@@ -0,0 +1,8 @@
+# IPv4 address with optional CIDR suffix.
+
+name = "ipv4"
+label = "ip_address"
+
+[[variants]]
+regex = '\b(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)(?:/(?:[0-2]?\d|3[0-2]))?\b'
+score = 0.6
diff --git a/crates/nvisy-pattern/assets/patterns/world/network/ipv6.toml b/crates/nvisy-pattern/assets/patterns/world/network/ipv6.toml
new file mode 100644
index 00000000..5ff02317
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/world/network/ipv6.toml
@@ -0,0 +1,25 @@
+# IPv6 address (comprehensive form set).
+#
+# Rust's `regex` crate doesn't support look-around, so the
+# variants use `\b` boundaries to limit match overlap inside
+# larger IPv6-shaped strings. Over-matching at sub-token
+# boundaries is rare in practice and gets collapsed by the entity
+# dedup pipeline downstream.
+
+name = "ipv6"
+label = "ip_address"
+
+# IPv4-mapped IPv6 (::ffff:0:0:1.2.3.4 et al).
+[[variants]]
+regex = '\b::(?:ffff(?::0{1,4})?:)?(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:/(?:12[0-8]|1[01]\d|[1-9]?\d))?\b'
+score = 0.6
+
+# IPv4-embedded IPv6 (X:X:…:1.2.3.4).
+[[variants]]
+regex = '\b(?:(?:[0-9A-Fa-f]{1,4}:){1,5}:(?:[0-9A-Fa-f]{1,4}:){0,4}|(?:[0-9A-Fa-f]{1,4}:){6})(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:/(?:12[0-8]|1[01]\d|[1-9]?\d))?\b'
+score = 0.6
+
+# Standard IPv6 with all compressed forms.
+[[variants]]
+regex = '\b(?:(?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}|(?:[0-9A-Fa-f]{1,4}:){1,7}:|:(?::[0-9A-Fa-f]{1,4}){1,7}|(?:[0-9A-Fa-f]{1,4}:){1,6}:[0-9A-Fa-f]{1,4}|(?:[0-9A-Fa-f]{1,4}:){1,5}(?::[0-9A-Fa-f]{1,4}){1,2}|(?:[0-9A-Fa-f]{1,4}:){1,4}(?::[0-9A-Fa-f]{1,4}){1,3}|(?:[0-9A-Fa-f]{1,4}:){1,3}(?::[0-9A-Fa-f]{1,4}){1,4}|(?:[0-9A-Fa-f]{1,4}:){1,2}(?::[0-9A-Fa-f]{1,4}){1,5}|[0-9A-Fa-f]{1,4}:(?::[0-9A-Fa-f]{1,4}){1,6}|:(?::[0-9A-Fa-f]{1,4}){1,6})(?:%[0-9a-zA-Z]+)?(?:/(?:12[0-8]|1[01]\d|[1-9]?\d))?\b'
+score = 0.6
diff --git a/crates/nvisy-pattern/assets/patterns/world/network/mac_address.toml b/crates/nvisy-pattern/assets/patterns/world/network/mac_address.toml
new file mode 100644
index 00000000..c7425050
--- /dev/null
+++ b/crates/nvisy-pattern/assets/patterns/world/network/mac_address.toml
@@ -0,0 +1,16 @@
+# MAC (Ethernet) address.
+#
+# Two variants:
+# - Colon/hyphen form: `00:1A:2B:3C:4D:5E` or `00-1A-2B-3C-4D-5E`.
+# - Cisco dot-form: `0000.0000.0000` (three 4-hex-digit groups).
+
+name = "mac-address"
+label = "mac_address"
+
+[[variants]]
+regex = "\\b(?:[0-9A-Fa-f]{2}[:\\-]){5}[0-9A-Fa-f]{2}\\b"
+score = 0.5
+
+[[variants]]
+regex = "\\b[0-9A-Fa-f]{4}\\.[0-9A-Fa-f]{4}\\.[0-9A-Fa-f]{4}\\b"
+score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml b/crates/nvisy-pattern/assets/patterns/world/personal/date_of_birth.toml
similarity index 97%
rename from crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
rename to crates/nvisy-pattern/assets/patterns/world/personal/date_of_birth.toml
index a178c41d..0045927b 100644
--- a/crates/nvisy-pattern/assets/patterns/personal/date_of_birth.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/personal/date_of_birth.toml
@@ -9,5 +9,5 @@ fr = ["naissance", "né", "nee", "née", "date de naissance", "anniversaire"]
 
 [[variants]]
 regex = "\\b(?:(?:0[1-9]|1[0-2]|[1-9])[/\\-](?:0[1-9]|[12]\\d|3[01]|[1-9])[/\\-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/\\-](?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01]))\\b"
-score = 0.6
+score = 0.1
 validator = "date"
diff --git a/crates/nvisy-pattern/assets/patterns/personal/datetime.toml b/crates/nvisy-pattern/assets/patterns/world/personal/datetime.toml
similarity index 98%
rename from crates/nvisy-pattern/assets/patterns/personal/datetime.toml
rename to crates/nvisy-pattern/assets/patterns/world/personal/datetime.toml
index e37edf92..76d0e1df 100644
--- a/crates/nvisy-pattern/assets/patterns/personal/datetime.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/personal/datetime.toml
@@ -9,4 +9,4 @@ fr = ["horodatage", "créé", "cree", "créée", "creee", "modifié", "modifie",
 
 [[variants]]
 regex = "\\b(?:19|20)\\d{2}[/\\-](?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01])[T ](?:[01]\\d|2[0-3]):[0-5]\\d(?::[0-5]\\d)?(?:Z|[+\\-]\\d{2}:?\\d{2})?\\b"
-score = 0.7
+score = 0.1
diff --git a/crates/nvisy-pattern/src/recognition/compiled.rs b/crates/nvisy-pattern/src/recognition/compiled.rs
index d1a61556..c9fe8d6e 100644
--- a/crates/nvisy-pattern/src/recognition/compiled.rs
+++ b/crates/nvisy-pattern/src/recognition/compiled.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 
 use nvisy_core::entity::{Entity, EntityLabelRef, PatternProvenance, TrailProvenance, TrailStep};
 use nvisy_core::modality::{Text, TextLocation};
-use nvisy_core::primitive::{Confidence, LanguageTag};
+use nvisy_core::primitive::{Confidence, CountryCode, LanguageTag};
 use regex::Regex;
 
 use crate::validators::Validator;
@@ -44,6 +44,9 @@ pub(super) struct CompiledPattern {
     /// Languages the parent pattern applies to.
     /// Empty means "any language".
     pub languages: Vec<LanguageTag>,
+    /// Countries the parent pattern applies to.
+    /// Empty means "any country".
+    pub countries: Vec<CountryCode>,
 }
 
 impl CompiledPattern {
@@ -93,6 +96,9 @@ pub(super) struct CompiledDictionary {
     /// Languages this dictionary applies to. Empty means "any
     /// language".
     pub languages: Vec<LanguageTag>,
+    /// Countries this dictionary applies to. Empty means "any
+    /// country".
+    pub countries: Vec<CountryCode>,
     /// Reject matches whose immediate neighbours are word
     /// characters (alphanumeric or `_`). Mirrors regex `\b`.
     pub word_boundary: bool,
diff --git a/crates/nvisy-pattern/src/recognition/dictionary.rs b/crates/nvisy-pattern/src/recognition/dictionary.rs
index 3be046c6..e5acdce5 100644
--- a/crates/nvisy-pattern/src/recognition/dictionary.rs
+++ b/crates/nvisy-pattern/src/recognition/dictionary.rs
@@ -3,7 +3,7 @@
 use derive_builder::Builder;
 use nvisy_core::Error;
 use nvisy_core::entity::EntityLabelRef;
-use nvisy_core::primitive::{Confidence, LanguageTag};
+use nvisy_core::primitive::{Confidence, CountryCode, LanguageTag};
 use serde::Deserialize;
 
 use super::context::Context;
@@ -142,6 +142,12 @@ pub struct Dictionary {
     #[builder(default)]
     #[serde(default)]
     pub languages: Vec<LanguageTag>,
+    /// ISO 3166-1 alpha-2 country codes the dictionary applies
+    /// to. Empty means "any country" — the dictionary fires
+    /// regardless of the per-call jurisdiction hint.
+    #[builder(default)]
+    #[serde(default)]
+    pub countries: Vec<CountryCode>,
     /// Require word-boundary surroundings on every match.
     ///
     /// With the default of `true`, the term `"am"` matches the
@@ -213,6 +219,12 @@ impl Dictionary {
         if let Some(wb) = metadata.word_boundary {
             builder = builder.with_word_boundary(wb);
         }
+        if let Some(languages) = metadata.languages {
+            builder = builder.with_languages(languages);
+        }
+        if let Some(countries) = metadata.countries {
+            builder = builder.with_countries(countries);
+        }
         Ok(builder)
     }
 }
@@ -227,4 +239,8 @@ struct DictionaryMetadata {
     context: Option<Context>,
     #[serde(default)]
     word_boundary: Option<bool>,
+    #[serde(default)]
+    languages: Option<Vec<LanguageTag>>,
+    #[serde(default)]
+    countries: Option<Vec<CountryCode>>,
 }
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index 3423e80b..9f6bbb3c 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -269,6 +269,7 @@ impl PatternRecognizerBuilder {
                     score: variant.score,
                     validator,
                     languages: pattern.languages.clone(),
+                    countries: pattern.countries.clone(),
                 });
             }
         }
@@ -335,6 +336,7 @@ impl PatternRecognizerBuilder {
                 term_end,
                 term_scores,
                 languages: dict.languages.clone(),
+                countries: dict.countries.clone(),
                 word_boundary: dict.word_boundary,
             });
         }
@@ -429,6 +431,9 @@ impl EntityRecognizer<Text> for PatternRecognizer {
                 if !input.applies_to_language(&pat.languages) {
                     continue;
                 }
+                if !input.applies_to_country(&pat.countries) {
+                    continue;
+                }
                 for m in pat.regex.find_iter(text) {
                     if let Some(validator) = pat.validator.as_ref()
                         && !validator.validate(m.as_str())
@@ -449,6 +454,9 @@ impl EntityRecognizer<Text> for PatternRecognizer {
                 if !input.applies_to_language(&dict.languages) {
                     continue;
                 }
+                if !input.applies_to_country(&dict.countries) {
+                    continue;
+                }
                 if dict.word_boundary && !has_word_boundaries(text, mat.start(), mat.end()) {
                     continue;
                 }
@@ -467,7 +475,7 @@ mod tests {
 
     use nvisy_core::entity::{Entity, EntityLabelRef, builtins};
     use nvisy_core::modality::{Text, TextData};
-    use nvisy_core::primitive::Confidence;
+    use nvisy_core::primitive::{Confidence, CountryCode};
     use nvisy_core::recognition::RecognizerInput;
 
     use super::*;
@@ -555,6 +563,77 @@ mod tests {
         assert_eq!(map.len(), 2);
     }
 
+    #[test]
+    fn regex_omits_countries_by_default() {
+        let toml = r#"
+            name = "x"
+            label = "government_id"
+            [[variants]]
+            regex = "\\d+"
+        "#;
+        let regex = crate::Regex::from_toml(toml).expect("TOML parses");
+        assert!(regex.countries.is_empty(), "default countries must be empty");
+    }
+
+    #[test]
+    fn regex_parses_countries_field() {
+        let toml = r#"
+            name = "ssn"
+            label = "government_id"
+            countries = ["US"]
+            [[variants]]
+            regex = "\\d+"
+        "#;
+        let regex = crate::Regex::from_toml(toml).expect("TOML parses");
+        assert_eq!(regex.countries.len(), 1);
+        assert_eq!(regex.countries[0].as_str(), "US");
+    }
+
+    #[test]
+    fn regex_parses_multiple_countries() {
+        let toml = r#"
+            name = "eu-vat"
+            label = "tax_id"
+            countries = ["de", "FR", "iT"]
+            [[variants]]
+            regex = "\\d+"
+        "#;
+        let regex = crate::Regex::from_toml(toml).expect("TOML parses");
+        assert_eq!(regex.countries.len(), 3);
+        // Construction normalises to uppercase.
+        let codes: Vec<&str> = regex.countries.iter().map(CountryCode::as_str).collect();
+        assert_eq!(codes, vec!["DE", "FR", "IT"]);
+    }
+
+    #[test]
+    fn regex_rejects_invalid_country() {
+        let toml = r#"
+            name = "x"
+            label = "government_id"
+            countries = ["XZ"]
+            [[variants]]
+            regex = "\\d+"
+        "#;
+        assert!(
+            crate::Regex::from_toml(toml).is_err(),
+            "unassigned country code must error",
+        );
+    }
+
+    #[test]
+    fn regex_builder_accepts_countries() {
+        let variant = crate::Variant::new(r"\d{3}-\d{2}-\d{4}").unwrap();
+        let regex = crate::Regex::builder()
+            .with_name("ssn")
+            .with_label(builtins::GOVERNMENT_ID.label_ref())
+            .with_variants(vec![variant])
+            .with_countries(vec![CountryCode::new("US").unwrap()])
+            .build()
+            .expect("regex builds");
+        assert_eq!(regex.countries.len(), 1);
+        assert_eq!(regex.countries[0].as_str(), "US");
+    }
+
     async fn run_with_language(
         recognizer: &impl EntityRecognizer<Text>,
         text: &str,
@@ -571,6 +650,75 @@ mod tests {
             .entities
     }
 
+    async fn run_with_country(
+        recognizer: &impl EntityRecognizer<Text>,
+        text: &str,
+        country: Option<&str>,
+    ) -> Vec<Entity<Text>> {
+        let mut input = RecognizerInput::new(TextData::new(text.to_owned()));
+        if let Some(c) = country {
+            input = input.with_country(CountryCode::new(c).expect("country code parses"));
+        }
+        recognizer
+            .recognize(&input)
+            .await
+            .expect("recognize succeeds")
+            .entities
+    }
+
+    fn us_ssn_regex() -> crate::Regex {
+        let variant = crate::Variant::new(r"\b\d{3}-\d{2}-\d{4}\b")
+            .expect("variant builds")
+            .with_score(Confidence::clamped(0.5));
+        crate::Regex::builder()
+            .with_name("ssn")
+            .with_label(builtins::GOVERNMENT_ID.label_ref())
+            .with_variants(vec![variant])
+            .with_countries(vec![CountryCode::new("US").unwrap()])
+            .build()
+            .expect("regex builds")
+    }
+
+    #[tokio::test]
+    async fn country_scoped_rule_fires_under_matching_hint() {
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(us_ssn_regex())
+            .build()
+            .expect("recognizer builds");
+        let entities = run_with_country(&recognizer, "SSN: 123-45-6789", Some("US")).await;
+        assert_eq!(entities.len(), 1, "US-scoped rule must fire under US hint");
+    }
+
+    #[tokio::test]
+    async fn country_scoped_rule_skipped_under_non_matching_hint() {
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(us_ssn_regex())
+            .build()
+            .expect("recognizer builds");
+        let entities = run_with_country(&recognizer, "SSN: 123-45-6789", Some("GB")).await;
+        assert!(
+            entities.is_empty(),
+            "US-scoped rule must not fire under GB hint",
+        );
+    }
+
+    #[tokio::test]
+    async fn country_scoped_rule_fires_without_hint() {
+        // Permissive fallback: missing hint shouldn't drop the
+        // detection. Matches the existing `applies_to_language`
+        // semantic.
+        let recognizer = PatternRecognizer::builder()
+            .with_pattern(us_ssn_regex())
+            .build()
+            .expect("recognizer builds");
+        let entities = run_with_country(&recognizer, "SSN: 123-45-6789", None).await;
+        assert_eq!(
+            entities.len(),
+            1,
+            "missing country hint must permit US-scoped rule to run",
+        );
+    }
+
     fn per_language_credit_card_regex() -> crate::Regex {
         let variant = crate::Variant::new(r"\b\d{16}\b")
             .expect("variant builds")
diff --git a/crates/nvisy-pattern/src/recognition/regex.rs b/crates/nvisy-pattern/src/recognition/regex.rs
index 6c926ff4..0690ce34 100644
--- a/crates/nvisy-pattern/src/recognition/regex.rs
+++ b/crates/nvisy-pattern/src/recognition/regex.rs
@@ -3,7 +3,7 @@
 use derive_builder::Builder;
 use nvisy_core::Error;
 use nvisy_core::entity::EntityLabelRef;
-use nvisy_core::primitive::{Confidence, LanguageTag};
+use nvisy_core::primitive::{Confidence, CountryCode, LanguageTag};
 use serde::Deserialize;
 
 use super::context::Context;
@@ -92,11 +92,11 @@ fn default_score() -> Confidence {
 /// Regex detection rule: one label, optional keyword boosts, and
 /// one or more [`Variant`]s.
 ///
-/// Mirrors the Presidio "pattern recognizer" shape — several regex
-/// strategies for one entity type, plus a shared context-keyword
-/// list. Every variant emits the same [`label`]; context keywords
-/// are harvested by [`PatternRecognizer`] into a wrapping boost
-/// layer and are never read by the rule itself.
+/// A rule groups several regex strategies under a single entity
+/// type plus a shared context-keyword list. Every variant emits
+/// the same [`label`]; context keywords are harvested by
+/// [`PatternRecognizer`] into a wrapping boost layer and are
+/// never read by the rule itself.
 ///
 /// # Examples
 ///
@@ -149,6 +149,14 @@ pub struct Regex {
     #[builder(default)]
     #[serde(default)]
     pub languages: Vec<LanguageTag>,
+    /// ISO 3166-1 alpha-2 country codes the rule applies to.
+    /// Empty means "any country" — the rule fires regardless of
+    /// the per-call jurisdiction hint. Use this to scope a
+    /// pattern to specific national formats (e.g. `["US"]` for
+    /// the SSN regex).
+    #[builder(default)]
+    #[serde(default)]
+    pub countries: Vec<CountryCode>,
 }
 
 impl Regex {
diff --git a/crates/nvisy-pattern/src/shipped/dictionaries.rs b/crates/nvisy-pattern/src/shipped/dictionaries.rs
deleted file mode 100644
index dac8baa8..00000000
--- a/crates/nvisy-pattern/src/shipped/dictionaries.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-//! Built-in [`Dictionary`]s, embedded at compile time.
-//!
-//! Each accessor pairs a TOML metadata sidecar
-//! (`assets/dictionaries/**/*.toml`) with a term source (`*.csv`
-//! for multi-column term lists, `*.txt` for one-per-line), merging
-//! them via [`Dictionary::metadata_from_toml`] + [`Term::from_csv`]
-//! / [`Term::from_text`].
-//!
-//! [`Dictionary`]: crate::Dictionary
-
-use crate::recognition::{Dictionary, Term};
-
-macro_rules! shipped_dictionary {
-    ($(#[$meta:meta])* fn $name:ident from $meta_path:literal with csv $terms:literal) => {
-        $(#[$meta])*
-        #[must_use]
-        pub fn $name() -> Dictionary {
-            let terms = Term::from_csv(include_str!(concat!(
-                "../../assets/dictionaries/",
-                $terms
-            )))
-            .expect(concat!("shipped term source `", $terms, "` parses"));
-            Dictionary::metadata_from_toml(include_str!(concat!(
-                "../../assets/dictionaries/",
-                $meta_path
-            )))
-            .expect(concat!("shipped metadata `", $meta_path, "` is well-formed"))
-            .with_terms(terms)
-            .build()
-            .expect(concat!("shipped dictionary `", $meta_path, "` builds"))
-        }
-    };
-    ($(#[$meta:meta])* fn $name:ident from $meta_path:literal with text $terms:literal) => {
-        $(#[$meta])*
-        #[must_use]
-        pub fn $name() -> Dictionary {
-            let terms = Term::from_text(include_str!(concat!(
-                "../../assets/dictionaries/",
-                $terms
-            )));
-            Dictionary::metadata_from_toml(include_str!(concat!(
-                "../../assets/dictionaries/",
-                $meta_path
-            )))
-            .expect(concat!("shipped metadata `", $meta_path, "` is well-formed"))
-            .with_terms(terms)
-            .build()
-            .expect(concat!("shipped dictionary `", $meta_path, "` builds"))
-        }
-    };
-}
-
-shipped_dictionary!(
-    /// Cryptocurrency names and ticker symbols (BTC, Bitcoin, ETH,
-    /// Ethereum, …).
-    fn cryptocurrencies from "finance/cryptocurrencies.toml" with csv "finance/cryptocurrencies.csv"
-);
-shipped_dictionary!(
-    /// Fiat currency names and ISO 4217 codes (USD, US Dollar, EUR,
-    /// Euro, …).
-    fn currencies from "finance/currencies.toml" with csv "finance/currencies.csv"
-);
-shipped_dictionary!(
-    /// Human-language names and ISO 639 codes (English, en,
-    /// French, fr, …).
-    fn languages from "general/languages.toml" with csv "general/languages.csv"
-);
-shipped_dictionary!(
-    /// Demonyms and nationality terms (American, French, …).
-    fn nationalities from "general/nationalities.toml" with text "general/nationalities.txt"
-);
-shipped_dictionary!(
-    /// Religious affiliations (Christianity, Islam, …).
-    fn religions from "general/religions.toml" with text "general/religions.txt"
-);
-
-/// Every built-in dictionary shipped by this crate, in arbitrary
-/// stable order.
-#[must_use]
-pub fn all() -> Vec<Dictionary> {
-    vec![
-        cryptocurrencies(),
-        currencies(),
-        languages(),
-        nationalities(),
-        religions(),
-    ]
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn every_shipped_dictionary_parses() {
-        let dicts = all();
-        assert_eq!(dicts.len(), 5);
-        for dict in &dicts {
-            assert!(!dict.terms.is_empty(), "{} has no terms", dict.name);
-        }
-    }
-}
diff --git a/crates/nvisy-pattern/src/shipped/dictionaries/mod.rs b/crates/nvisy-pattern/src/shipped/dictionaries/mod.rs
new file mode 100644
index 00000000..2190100e
--- /dev/null
+++ b/crates/nvisy-pattern/src/shipped/dictionaries/mod.rs
@@ -0,0 +1,77 @@
+//! Built-in [`Dictionary`]s, embedded at compile time.
+//!
+//! Accessors are grouped by region — `world::*` for universal
+//! dictionaries; future country-specific dictionaries land in
+//! `<country>::*` sub-modules. Each pairs a TOML metadata sidecar
+//! (`assets/dictionaries/<region>/<domain>/*.toml`) with a term
+//! source (`*.csv` for multi-column lists, `*.txt` for one-per-line),
+//! merging them via [`Dictionary::metadata_from_toml`] +
+//! [`crate::Term::from_csv`] / [`crate::Term::from_text`].
+//!
+//! [`Dictionary`]: crate::Dictionary
+
+pub mod world;
+
+use crate::Dictionary;
+
+/// Helper used by every per-region sub-module to define a shipped
+/// dictionary accessor.
+///
+/// Paths are resolved with `include_str!` against the path of the
+/// file that *expands* the macro, so callers in sub-modules pass
+/// paths relative to themselves.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __shipped_dictionary {
+    ($(#[$meta:meta])* fn $name:ident from $meta_path:literal with csv $terms:literal) => {
+        $(#[$meta])*
+        #[must_use]
+        pub fn $name() -> $crate::Dictionary {
+            let terms = $crate::Term::from_csv(include_str!($terms))
+                .expect(concat!("shipped term source `", $terms, "` parses"));
+            $crate::Dictionary::metadata_from_toml(include_str!($meta_path))
+                .expect(concat!("shipped metadata `", $meta_path, "` is well-formed"))
+                .with_terms(terms)
+                .build()
+                .expect(concat!("shipped dictionary `", $meta_path, "` builds"))
+        }
+    };
+    ($(#[$meta:meta])* fn $name:ident from $meta_path:literal with text $terms:literal) => {
+        $(#[$meta])*
+        #[must_use]
+        pub fn $name() -> $crate::Dictionary {
+            let terms = $crate::Term::from_text(include_str!($terms));
+            $crate::Dictionary::metadata_from_toml(include_str!($meta_path))
+                .expect(concat!("shipped metadata `", $meta_path, "` is well-formed"))
+                .with_terms(terms)
+                .build()
+                .expect(concat!("shipped dictionary `", $meta_path, "` builds"))
+        }
+    };
+}
+
+/// Every built-in dictionary shipped by this crate, regardless of
+/// region.
+#[must_use]
+pub fn all() -> Vec<Dictionary> {
+    world::all()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn every_shipped_dictionary_parses() {
+        let dicts = all();
+        assert_eq!(dicts.len(), 5);
+        for dict in &dicts {
+            assert!(!dict.terms.is_empty(), "{} has no terms", dict.name);
+        }
+    }
+
+    #[test]
+    fn world_set_has_5_dictionaries() {
+        assert_eq!(world::all().len(), 5);
+    }
+}
diff --git a/crates/nvisy-pattern/src/shipped/dictionaries/world.rs b/crates/nvisy-pattern/src/shipped/dictionaries/world.rs
new file mode 100644
index 00000000..9941d11b
--- /dev/null
+++ b/crates/nvisy-pattern/src/shipped/dictionaries/world.rs
@@ -0,0 +1,50 @@
+//! Universal dictionaries — apply regardless of jurisdiction.
+
+use crate::Dictionary;
+use crate::__shipped_dictionary as shipped_dictionary;
+
+shipped_dictionary!(
+    /// Cryptocurrency names and ticker symbols (BTC, Bitcoin, ETH,
+    /// Ethereum, …).
+    fn cryptocurrencies
+        from "../../../assets/dictionaries/world/finance/cryptocurrencies.toml"
+        with csv "../../../assets/dictionaries/world/finance/cryptocurrencies.csv"
+);
+shipped_dictionary!(
+    /// Fiat currency names and ISO 4217 codes (USD, US Dollar,
+    /// EUR, Euro, …).
+    fn currencies
+        from "../../../assets/dictionaries/world/finance/currencies.toml"
+        with csv "../../../assets/dictionaries/world/finance/currencies.csv"
+);
+shipped_dictionary!(
+    /// Human-language names and ISO 639 codes (English, en,
+    /// French, fr, …).
+    fn languages
+        from "../../../assets/dictionaries/world/personal/languages.toml"
+        with csv "../../../assets/dictionaries/world/personal/languages.csv"
+);
+shipped_dictionary!(
+    /// Demonyms and nationality terms (American, French, …).
+    fn nationalities
+        from "../../../assets/dictionaries/world/personal/nationalities.toml"
+        with text "../../../assets/dictionaries/world/personal/nationalities.txt"
+);
+shipped_dictionary!(
+    /// Religious affiliations (Christianity, Islam, …).
+    fn religions
+        from "../../../assets/dictionaries/world/personal/religions.toml"
+        with text "../../../assets/dictionaries/world/personal/religions.txt"
+);
+
+/// Every world-scoped built-in dictionary.
+#[must_use]
+pub fn all() -> Vec<Dictionary> {
+    vec![
+        cryptocurrencies(),
+        currencies(),
+        languages(),
+        nationalities(),
+        religions(),
+    ]
+}
diff --git a/crates/nvisy-pattern/src/shipped/patterns.rs b/crates/nvisy-pattern/src/shipped/patterns.rs
deleted file mode 100644
index f9fc4140..00000000
--- a/crates/nvisy-pattern/src/shipped/patterns.rs
+++ /dev/null
@@ -1,162 +0,0 @@
-//! Built-in [`Regex`] rules, embedded at compile time.
-//!
-//! Each accessor returns a fresh [`Regex`] parsed from a TOML
-//! definition file under `assets/patterns/`. The parse happens on
-//! every call — rules are cheap to construct since
-//! [`PatternRecognizer::build`] does the heavy compilation.
-//!
-//! [`Regex`]: crate::Regex
-//! [`PatternRecognizer::build`]: crate::PatternRecognizer
-
-use crate::Regex;
-
-macro_rules! shipped_pattern {
-    ($(#[$meta:meta])* fn $name:ident from $path:literal) => {
-        $(#[$meta])*
-        #[must_use]
-        pub fn $name() -> Regex {
-            Regex::from_toml(include_str!(concat!("../../assets/patterns/", $path)))
-                .expect(concat!("shipped pattern `", $path, "` is well-formed"))
-        }
-    };
-}
-
-shipped_pattern!(
-    /// Email address (RFC-loose).
-    fn email from "contact/email.toml"
-);
-shipped_pattern!(
-    /// International phone numbers.
-    fn phone from "contact/phone.toml"
-);
-shipped_pattern!(
-    /// URLs (HTTP/HTTPS/FTP).
-    fn url from "contact/url.toml"
-);
-
-shipped_pattern!(
-    /// AWS access key IDs.
-    fn aws_key from "credentials/aws_key.toml"
-);
-shipped_pattern!(
-    /// Heuristic generic API key.
-    fn generic_api_key from "credentials/generic_api_key.toml"
-);
-shipped_pattern!(
-    /// GitHub personal access tokens.
-    fn github_token from "credentials/github_token.toml"
-);
-shipped_pattern!(
-    /// PEM-formatted private keys.
-    fn private_key from "credentials/private_key.toml"
-);
-shipped_pattern!(
-    /// Stripe live/test secret keys.
-    fn stripe_key from "credentials/stripe_key.toml"
-);
-
-shipped_pattern!(
-    /// Bitcoin (legacy + bech32) addresses.
-    fn bitcoin_address from "finance/bitcoin_address.toml"
-);
-shipped_pattern!(
-    /// Credit-card numbers, Luhn-validated.
-    fn credit_card from "finance/credit_card.toml"
-);
-shipped_pattern!(
-    /// Ethereum addresses.
-    fn ethereum_address from "finance/ethereum_address.toml"
-);
-shipped_pattern!(
-    /// International Bank Account Numbers.
-    fn iban from "finance/iban.toml"
-);
-shipped_pattern!(
-    /// SWIFT / BIC codes.
-    fn swift_code from "finance/swift_code.toml"
-);
-shipped_pattern!(
-    /// US bank routing numbers (ABA RTN).
-    fn us_bank_routing from "finance/us_bank_routing.toml"
-);
-
-shipped_pattern!(
-    /// US Social Security numbers (AAA-GG-SSSS).
-    fn ssn from "identity/ssn.toml"
-);
-shipped_pattern!(
-    /// US driver's license numbers.
-    fn us_drivers_license from "identity/us_drivers_license.toml"
-);
-shipped_pattern!(
-    /// US passport numbers.
-    fn us_passport from "identity/us_passport.toml"
-);
-shipped_pattern!(
-    /// US ZIP and ZIP+4 postal codes.
-    fn us_postal_code from "identity/us_postal_code.toml"
-);
-
-shipped_pattern!(
-    /// IPv4 addresses.
-    fn ipv4 from "network/ipv4.toml"
-);
-shipped_pattern!(
-    /// IPv6 addresses.
-    fn ipv6 from "network/ipv6.toml"
-);
-shipped_pattern!(
-    /// MAC (Ethernet) addresses.
-    fn mac_address from "network/mac_address.toml"
-);
-
-shipped_pattern!(
-    /// Date of birth in common written formats.
-    fn date_of_birth from "personal/date_of_birth.toml"
-);
-shipped_pattern!(
-    /// Date + time stamps in ISO-like formats.
-    fn datetime from "personal/datetime.toml"
-);
-
-/// Every built-in pattern shipped by this crate, in arbitrary
-/// stable order.
-#[must_use]
-pub fn all() -> Vec<Regex> {
-    vec![
-        email(),
-        phone(),
-        url(),
-        aws_key(),
-        generic_api_key(),
-        github_token(),
-        private_key(),
-        stripe_key(),
-        bitcoin_address(),
-        credit_card(),
-        ethereum_address(),
-        iban(),
-        swift_code(),
-        us_bank_routing(),
-        ssn(),
-        us_drivers_license(),
-        us_passport(),
-        us_postal_code(),
-        ipv4(),
-        ipv6(),
-        mac_address(),
-        date_of_birth(),
-        datetime(),
-    ]
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn every_shipped_pattern_parses() {
-        let patterns = all();
-        assert_eq!(patterns.len(), 23);
-    }
-}
diff --git a/crates/nvisy-pattern/src/shipped/patterns/mod.rs b/crates/nvisy-pattern/src/shipped/patterns/mod.rs
new file mode 100644
index 00000000..4ee09223
--- /dev/null
+++ b/crates/nvisy-pattern/src/shipped/patterns/mod.rs
@@ -0,0 +1,108 @@
+//! Built-in [`Regex`] rules, embedded at compile time.
+//!
+//! Accessors are grouped by region — `world::*` for universal
+//! patterns, `<country>::*` (e.g. `us::*`, `uk::*`) for
+//! country-specific ones. Each returns a fresh [`Regex`] parsed
+//! from a TOML definition under
+//! `assets/patterns/<region>/<domain>/`. The parse happens on
+//! every call — rules are cheap to construct since
+//! [`PatternRecognizer::build`] does the heavy compilation.
+//!
+//! [`Regex`]: crate::Regex
+//! [`PatternRecognizer::build`]: crate::PatternRecognizer
+
+pub mod uk;
+pub mod us;
+pub mod world;
+
+use crate::Regex;
+
+/// Helper used by every per-region sub-module to define a shipped
+/// pattern accessor.
+///
+/// The `$path` is resolved with `include_str!` against the path
+/// of the file that *expands* the macro, so callers in sub-modules
+/// (e.g. `world.rs`) pass paths relative to themselves.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __shipped_pattern {
+    ($(#[$meta:meta])* fn $name:ident from $path:literal) => {
+        $(#[$meta])*
+        #[must_use]
+        pub fn $name() -> $crate::Regex {
+            $crate::Regex::from_toml(include_str!($path))
+                .expect(concat!("shipped pattern `", $path, "` is well-formed"))
+        }
+    };
+}
+
+/// Every built-in pattern shipped by this crate, regardless of
+/// region.
+#[must_use]
+pub fn all() -> Vec<Regex> {
+    let mut out = world::all();
+    out.extend(us::all());
+    out.extend(uk::all());
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn every_shipped_pattern_parses() {
+        let patterns = all();
+        assert_eq!(patterns.len(), 34);
+    }
+
+    #[test]
+    fn world_set_has_18_patterns() {
+        assert_eq!(world::all().len(), 18);
+    }
+
+    #[test]
+    fn us_set_has_10_patterns() {
+        assert_eq!(us::all().len(), 10);
+    }
+
+    #[test]
+    fn uk_set_has_6_patterns() {
+        assert_eq!(uk::all().len(), 6);
+    }
+
+    #[test]
+    fn world_patterns_have_no_country_scope() {
+        for pattern in world::all() {
+            assert!(
+                pattern.countries.is_empty(),
+                "world-scoped pattern `{}` must not declare countries",
+                pattern.name,
+            );
+        }
+    }
+
+    #[test]
+    fn us_patterns_are_country_scoped_to_us() {
+        for pattern in us::all() {
+            assert_eq!(
+                pattern.countries.iter().map(|c| c.as_str()).collect::<Vec<_>>(),
+                vec!["US"],
+                "US-scoped pattern `{}` must declare countries = [US]",
+                pattern.name,
+            );
+        }
+    }
+
+    #[test]
+    fn uk_patterns_are_country_scoped_to_gb() {
+        for pattern in uk::all() {
+            assert_eq!(
+                pattern.countries.iter().map(|c| c.as_str()).collect::<Vec<_>>(),
+                vec!["GB"],
+                "UK-scoped pattern `{}` must declare countries = [GB]",
+                pattern.name,
+            );
+        }
+    }
+}
diff --git a/crates/nvisy-pattern/src/shipped/patterns/uk.rs b/crates/nvisy-pattern/src/shipped/patterns/uk.rs
new file mode 100644
index 00000000..d10dcd01
--- /dev/null
+++ b/crates/nvisy-pattern/src/shipped/patterns/uk.rs
@@ -0,0 +1,46 @@
+//! United Kingdom — patterns scoped to UK jurisdictional formats.
+//!
+//! See `assets/NOTICE.md` for third-party attribution.
+
+use crate::Regex;
+use crate::__shipped_pattern as shipped_pattern;
+
+shipped_pattern!(
+    /// UK NHS numbers (10-digit, mod-11 validated).
+    fn nhs from "../../../assets/patterns/uk/identity/nhs.toml"
+);
+shipped_pattern!(
+    /// UK National Insurance numbers (NINO).
+    fn nino from "../../../assets/patterns/uk/identity/nino.toml"
+);
+shipped_pattern!(
+    /// UK driving licence numbers (DVLA 16-character).
+    fn driving_licence from "../../../assets/patterns/uk/identity/driving_licence.toml"
+);
+shipped_pattern!(
+    /// UK postcodes (BS7666 format, plus GIR 0AA).
+    fn postcode from "../../../assets/patterns/uk/contact/postcode.toml"
+);
+shipped_pattern!(
+    /// UK vehicle registration numbers (current, prefix, and
+    /// suffix eras).
+    fn vehicle_registration from "../../../assets/patterns/uk/vehicle/registration.toml"
+);
+shipped_pattern!(
+    /// UK passport numbers (post-2015 format). Weak score; relies
+    /// on the context-keyword boost.
+    fn passport from "../../../assets/patterns/uk/identity/passport.toml"
+);
+
+/// Every UK-scoped built-in pattern.
+#[must_use]
+pub fn all() -> Vec<Regex> {
+    vec![
+        nhs(),
+        nino(),
+        driving_licence(),
+        postcode(),
+        vehicle_registration(),
+        passport(),
+    ]
+}
diff --git a/crates/nvisy-pattern/src/shipped/patterns/us.rs b/crates/nvisy-pattern/src/shipped/patterns/us.rs
new file mode 100644
index 00000000..7f3215a8
--- /dev/null
+++ b/crates/nvisy-pattern/src/shipped/patterns/us.rs
@@ -0,0 +1,64 @@
+//! United States — patterns scoped to US jurisdictional formats.
+
+use crate::Regex;
+use crate::__shipped_pattern as shipped_pattern;
+
+shipped_pattern!(
+    /// US bank routing numbers (ABA RTN, mod-10 validated).
+    fn bank_routing from "../../../assets/patterns/us/finance/bank_routing.toml"
+);
+shipped_pattern!(
+    /// US Social Security numbers (AAA-GG-SSSS).
+    fn ssn from "../../../assets/patterns/us/identity/ssn.toml"
+);
+shipped_pattern!(
+    /// US driver's license numbers (state-shape union).
+    fn drivers_license from "../../../assets/patterns/us/identity/drivers_license.toml"
+);
+shipped_pattern!(
+    /// US passport numbers.
+    fn passport from "../../../assets/patterns/us/identity/passport.toml"
+);
+shipped_pattern!(
+    /// US ZIP and ZIP+4 postal codes.
+    fn postal_code from "../../../assets/patterns/us/identity/postal_code.toml"
+);
+shipped_pattern!(
+    /// US Individual Taxpayer Identification Number (ITIN).
+    fn itin from "../../../assets/patterns/us/identity/itin.toml"
+);
+shipped_pattern!(
+    /// US National Provider Identifier (NPI, Luhn-on-80840 validated).
+    fn npi from "../../../assets/patterns/us/health/npi.toml"
+);
+shipped_pattern!(
+    /// US Medicare Beneficiary Identifier (MBI).
+    fn mbi from "../../../assets/patterns/us/health/mbi.toml"
+);
+shipped_pattern!(
+    /// US generic bank account number (8-17 digits, no checksum).
+    /// Very weak score; relies on the context-keyword boost.
+    fn bank_account from "../../../assets/patterns/us/finance/bank_account.toml"
+);
+shipped_pattern!(
+    /// US DEA registration number (medical license,
+    /// checksum-validated).
+    fn medical_license from "../../../assets/patterns/us/health/medical_license.toml"
+);
+
+/// Every US-scoped built-in pattern.
+#[must_use]
+pub fn all() -> Vec<Regex> {
+    vec![
+        bank_routing(),
+        ssn(),
+        drivers_license(),
+        passport(),
+        postal_code(),
+        itin(),
+        npi(),
+        mbi(),
+        bank_account(),
+        medical_license(),
+    ]
+}
diff --git a/crates/nvisy-pattern/src/shipped/patterns/world.rs b/crates/nvisy-pattern/src/shipped/patterns/world.rs
new file mode 100644
index 00000000..d4f928a2
--- /dev/null
+++ b/crates/nvisy-pattern/src/shipped/patterns/world.rs
@@ -0,0 +1,106 @@
+//! Universal patterns — apply regardless of jurisdiction.
+
+use crate::Regex;
+use crate::__shipped_pattern as shipped_pattern;
+
+shipped_pattern!(
+    /// Email address (RFC-loose).
+    fn email from "../../../assets/patterns/world/contact/email.toml"
+);
+shipped_pattern!(
+    /// International phone numbers.
+    fn phone from "../../../assets/patterns/world/contact/phone.toml"
+);
+shipped_pattern!(
+    /// URLs (HTTP/HTTPS/FTP).
+    fn url from "../../../assets/patterns/world/contact/url.toml"
+);
+
+shipped_pattern!(
+    /// AWS access key IDs.
+    fn aws_key from "../../../assets/patterns/world/credentials/aws_key.toml"
+);
+shipped_pattern!(
+    /// Heuristic generic API key.
+    fn generic_api_key from "../../../assets/patterns/world/credentials/generic_api_key.toml"
+);
+shipped_pattern!(
+    /// GitHub personal access tokens.
+    fn github_token from "../../../assets/patterns/world/credentials/github_token.toml"
+);
+shipped_pattern!(
+    /// PEM-formatted private keys.
+    fn private_key from "../../../assets/patterns/world/credentials/private_key.toml"
+);
+shipped_pattern!(
+    /// Stripe live/test secret keys.
+    fn stripe_key from "../../../assets/patterns/world/credentials/stripe_key.toml"
+);
+
+shipped_pattern!(
+    /// Bitcoin (legacy + bech32) addresses.
+    fn bitcoin_address from "../../../assets/patterns/world/finance/bitcoin_address.toml"
+);
+shipped_pattern!(
+    /// Credit-card numbers, Luhn-validated.
+    fn credit_card from "../../../assets/patterns/world/finance/credit_card.toml"
+);
+shipped_pattern!(
+    /// Ethereum addresses.
+    fn ethereum_address from "../../../assets/patterns/world/finance/ethereum_address.toml"
+);
+shipped_pattern!(
+    /// International Bank Account Numbers.
+    fn iban from "../../../assets/patterns/world/finance/iban.toml"
+);
+shipped_pattern!(
+    /// SWIFT / BIC codes.
+    fn swift_code from "../../../assets/patterns/world/finance/swift_code.toml"
+);
+
+shipped_pattern!(
+    /// IPv4 addresses.
+    fn ipv4 from "../../../assets/patterns/world/network/ipv4.toml"
+);
+shipped_pattern!(
+    /// IPv6 addresses.
+    fn ipv6 from "../../../assets/patterns/world/network/ipv6.toml"
+);
+shipped_pattern!(
+    /// MAC (Ethernet) addresses.
+    fn mac_address from "../../../assets/patterns/world/network/mac_address.toml"
+);
+
+shipped_pattern!(
+    /// Date of birth in common written formats.
+    fn date_of_birth from "../../../assets/patterns/world/personal/date_of_birth.toml"
+);
+shipped_pattern!(
+    /// Date + time stamps in ISO-like formats.
+    fn datetime from "../../../assets/patterns/world/personal/datetime.toml"
+);
+
+/// Every world-scoped built-in pattern.
+#[must_use]
+pub fn all() -> Vec<Regex> {
+    vec![
+        email(),
+        phone(),
+        url(),
+        aws_key(),
+        generic_api_key(),
+        github_token(),
+        private_key(),
+        stripe_key(),
+        bitcoin_address(),
+        credit_card(),
+        ethereum_address(),
+        iban(),
+        swift_code(),
+        ipv4(),
+        ipv6(),
+        mac_address(),
+        date_of_birth(),
+        datetime(),
+    ]
+}
diff --git a/crates/nvisy-pattern/src/validators/mod.rs b/crates/nvisy-pattern/src/validators/mod.rs
index 46395652..4eb8d853 100644
--- a/crates/nvisy-pattern/src/validators/mod.rs
+++ b/crates/nvisy-pattern/src/validators/mod.rs
@@ -7,10 +7,13 @@
 //! out structurally-suspect false positives that a regex alone
 //! can't.
 //!
-//! [`ValidatorRegistry::builtin`] ships with [`luhn`], [`iban`],
-//! [`ssn`], [`phone`], and [`date`]. Each validator is also
-//! re-exported as a free function so consumers can compose a
-//! custom registry without taking the full set.
+//! [`ValidatorRegistry::builtin`] ships universal validators
+//! ([`luhn`], [`iban`], [`phone`], [`date`]) plus jurisdiction-
+//! scoped sets re-exported from [`us`] (`"us.ssn"`,
+//! `"us.aba_routing"`, `"us.npi"`, `"us.dea_number"`) and [`uk`]
+//! (`"uk.nhs"`, `"uk.nino"`). Each validator is also re-exported
+//! as a free function so consumers can compose a custom registry
+//! without taking the full set.
 //!
 //! [`Variant`]: crate::Variant
 //! [`Regex`]: crate::Regex
@@ -19,7 +22,9 @@ mod date;
 mod iban;
 mod luhn;
 mod phone;
-mod ssn;
+
+pub mod uk;
+pub mod us;
 
 use std::borrow::Cow;
 use std::collections::HashMap;
@@ -29,7 +34,6 @@ pub use self::date::date;
 pub use self::iban::iban;
 pub use self::luhn::luhn;
 pub use self::phone::phone;
-pub use self::ssn::ssn;
 
 /// Post-match validator returning whether a matched string is
 /// structurally valid.
@@ -74,16 +78,28 @@ impl ValidatorRegistry {
         Self::default()
     }
 
-    /// Construct a registry pre-loaded with the built-in
-    /// validators: [`luhn`], [`iban`], [`ssn`], [`phone`], [`date`].
+    /// Construct a registry pre-loaded with the shipped built-in
+    /// validators.
+    ///
+    /// Universal keys: `"luhn"`, `"iban"`, `"phone"`, `"date"`.
+    ///
+    /// US-scoped: `"us.ssn"`, `"us.aba_routing"`, `"us.npi"`,
+    /// `"us.dea_number"`.
+    ///
+    /// UK-scoped: `"uk.nhs"`, `"uk.nino"`.
     #[must_use]
     pub fn builtin() -> Self {
         Self::empty()
             .with("luhn", luhn)
             .with("iban", iban)
-            .with("ssn", ssn)
             .with("phone", phone)
             .with("date", date)
+            .with("us.ssn", us::ssn)
+            .with("us.aba_routing", us::aba_routing)
+            .with("us.npi", us::npi)
+            .with("us.dea_number", us::dea_number)
+            .with("uk.nhs", uk::nhs)
+            .with("uk.nino", uk::nino)
     }
 
     /// Register `validator` under `name`, overwriting any previous
diff --git a/crates/nvisy-pattern/src/validators/uk/mod.rs b/crates/nvisy-pattern/src/validators/uk/mod.rs
new file mode 100644
index 00000000..37eadd66
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/uk/mod.rs
@@ -0,0 +1,12 @@
+//! UK-specific post-match validators.
+//!
+//! Registered under the [`ValidatorRegistry::builtin`] set with
+//! dotted names — `"uk.nhs"`, `"uk.nino"`.
+//!
+//! [`ValidatorRegistry::builtin`]: super::ValidatorRegistry::builtin
+
+mod nhs;
+mod nino;
+
+pub use self::nhs::nhs;
+pub use self::nino::nino;
diff --git a/crates/nvisy-pattern/src/validators/uk/nhs.rs b/crates/nvisy-pattern/src/validators/uk/nhs.rs
new file mode 100644
index 00000000..582683e4
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/uk/nhs.rs
@@ -0,0 +1,67 @@
+//! UK NHS number checksum validator.
+//!
+//! See `assets/NOTICE.md` for third-party attribution.
+
+/// Return `true` if `value` is a valid 10-digit UK NHS number.
+///
+/// The NHS algorithm multiplies each of the 10 digits by descending
+/// weights `[10, 9, 8, …, 1]` and accepts the number when the sum
+/// is divisible by 11. Equivalent to checking that the last digit
+/// equals `(11 - (weighted_sum_of_first_9 % 11)) % 11`, rejecting
+/// the special case where the expected check digit would be 10.
+///
+/// Whitespace and `-` separators are stripped before validation,
+/// so `"943 476 5919"`, `"943-476-5919"`, and `"9434765919"` are
+/// all equivalent inputs.
+pub fn nhs(value: &str) -> bool {
+    let digits: Vec<u32> = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace() && *c != '-')
+        .map(|c| c.to_digit(10))
+        .collect::<Option<Vec<_>>>()
+        .unwrap_or_default();
+    if digits.len() != 10 {
+        return false;
+    }
+    let total: u32 = digits
+        .iter()
+        .zip((1..=10).rev())
+        .map(|(d, w)| d * w)
+        .sum();
+    total.is_multiple_of(11)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn valid_known_numbers() {
+        // Test number commonly used in NHS sandboxes.
+        assert!(nhs("9434765919"));
+        // Spaces and dashes are stripped.
+        assert!(nhs("943 476 5919"));
+        assert!(nhs("943-476-5919"));
+    }
+
+    #[test]
+    fn invalid_check_digit() {
+        // Wrong final digit fails the mod-11 check.
+        assert!(!nhs("9434765918"));
+        assert!(!nhs("9434765910"));
+    }
+
+    #[test]
+    fn rejects_non_digit_payload() {
+        // Embedded letters can never become a 10-digit checksum
+        // input.
+        assert!(!nhs("ABC4765919"));
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(!nhs("123"));
+        assert!(!nhs("12345678901"));
+        assert!(!nhs(""));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/uk/nino.rs b/crates/nvisy-pattern/src/validators/uk/nino.rs
new file mode 100644
index 00000000..0ff86604
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/uk/nino.rs
@@ -0,0 +1,57 @@
+//! UK National Insurance Number prefix validator.
+//!
+//! Reserved-prefix exclusion lives here in the validator because
+//! Rust's `regex` crate does not support look-around.
+
+/// Return `true` when `value`'s leading two-letter prefix is not
+/// a reserved NINO prefix.
+///
+/// Reserved prefixes (case-insensitive): `BG`, `GB`, `NK`, `KN`,
+/// `NT`, `TN`, `ZZ`. The check is structural only — it does not
+/// confirm the trailing suffix letter or any HMRC issuance state.
+pub fn nino(value: &str) -> bool {
+    let prefix: String = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace())
+        .take(2)
+        .collect();
+    if prefix.len() != 2 || !prefix.chars().all(|c| c.is_ascii_alphabetic()) {
+        return false;
+    }
+    let upper = prefix.to_ascii_uppercase();
+    !matches!(
+        upper.as_str(),
+        "BG" | "GB" | "NK" | "KN" | "NT" | "TN" | "ZZ"
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn accepts_valid_prefix() {
+        assert!(nino("AB123456C"));
+        assert!(nino("JK 12 34 56 A"));
+    }
+
+    #[test]
+    fn rejects_reserved_prefixes() {
+        for reserved in ["BG", "GB", "NK", "KN", "NT", "TN", "ZZ"] {
+            let value = format!("{reserved}123456A");
+            assert!(!nino(&value), "{reserved} must be rejected");
+        }
+    }
+
+    #[test]
+    fn rejection_is_case_insensitive() {
+        assert!(!nino("bg123456A"));
+        assert!(!nino("Zz123456A"));
+    }
+
+    #[test]
+    fn rejects_non_alpha_prefix() {
+        assert!(!nino("12345678A"));
+        assert!(!nino(""));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/us/aba_routing.rs b/crates/nvisy-pattern/src/validators/us/aba_routing.rs
new file mode 100644
index 00000000..9ea3042d
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/us/aba_routing.rs
@@ -0,0 +1,64 @@
+//! US ABA routing number checksum validator.
+
+/// Return `true` if `value` is a valid 9-digit ABA RTN.
+///
+/// The ABA checksum sums the 9 digits with cyclic weights
+/// `[3, 7, 1]` and accepts the number when the total is
+/// divisible by 10.
+///
+/// Whitespace and `-` separators are stripped before validation,
+/// so `"121000358"`, `"1210-0035-8"`, and `"121 000 358"` are
+/// equivalent inputs.
+pub fn aba_routing(value: &str) -> bool {
+    let digits: Vec<u32> = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace() && *c != '-')
+        .map(|c| c.to_digit(10))
+        .collect::<Option<Vec<_>>>()
+        .unwrap_or_default();
+    if digits.len() != 9 {
+        return false;
+    }
+    let weights = [3, 7, 1, 3, 7, 1, 3, 7, 1];
+    let total: u32 = digits.iter().zip(weights).map(|(d, w)| d * w).sum();
+    total.is_multiple_of(10)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn valid_known_numbers() {
+        // Wells Fargo SF (verified test vector).
+        assert!(aba_routing("121000358"));
+        // JPMorgan Chase NY.
+        assert!(aba_routing("021000021"));
+        // Citibank NY.
+        assert!(aba_routing("021000089"));
+    }
+
+    #[test]
+    fn strips_separators() {
+        assert!(aba_routing("121-000-358"));
+        assert!(aba_routing("121 000 358"));
+    }
+
+    #[test]
+    fn rejects_wrong_checksum() {
+        assert!(!aba_routing("121000359"));
+        assert!(!aba_routing("000000001"));
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(!aba_routing("12100035"));
+        assert!(!aba_routing("1210003580"));
+        assert!(!aba_routing(""));
+    }
+
+    #[test]
+    fn rejects_non_digit_payload() {
+        assert!(!aba_routing("12100035A"));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/us/dea_number.rs b/crates/nvisy-pattern/src/validators/us/dea_number.rs
new file mode 100644
index 00000000..ecb8a68d
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/us/dea_number.rs
@@ -0,0 +1,83 @@
+//! US DEA (Drug Enforcement Administration) registration number
+//! checksum validator.
+//!
+//! See `assets/NOTICE.md` for third-party attribution.
+
+/// Return `true` if `value` is a valid DEA registration number.
+///
+/// DEA numbers are 9 characters: two letters (the registration
+/// type and the surname initial) followed by seven digits, where
+/// the last digit is a checksum.
+///
+/// The check takes the odd-position digits `d1, d3, d5` and the
+/// even-position digits `d2, d4, d6`, then verifies that
+/// `(sum(odd) + 2 * sum(even)) % 10 == d7`.
+///
+/// Whitespace and `-` separators are stripped before validation.
+pub fn dea_number(value: &str) -> bool {
+    let cleaned: String = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace() && *c != '-')
+        .collect();
+    if cleaned.len() != 9 {
+        return false;
+    }
+    let mut chars = cleaned.chars();
+    let first = chars.next().unwrap();
+    let second = chars.next().unwrap();
+    if !first.is_ascii_alphabetic() || !second.is_ascii_alphabetic() {
+        return false;
+    }
+    let digits: Vec<u32> = chars.map(|c| c.to_digit(10)).collect::<Option<Vec<_>>>().unwrap_or_default();
+    if digits.len() != 7 {
+        return false;
+    }
+    let sum_odd = digits[0] + digits[2] + digits[4];
+    let sum_even = digits[1] + digits[3] + digits[5];
+    let expected = (sum_odd + 2 * sum_even) % 10;
+    expected == digits[6]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn valid_known_dea_numbers() {
+        // AB1234563: odd = 1+3+5 = 9, even = 2+4+6 = 12,
+        // (9 + 24) % 10 = 3 → matches d7 = 3.
+        assert!(dea_number("AB1234563"));
+        // BC9876562: odd = 9+7+5 = 21, even = 8+6+6 = 20,
+        // (21 + 40) % 10 = 1 → mismatch with d7 = 2. Let me pick
+        // a passing one. AF3456788: odd = 3+5+7 = 15, even =
+        // 4+6+8 = 18, (15 + 36) % 10 = 1 → mismatch d7 = 8.
+        // Easier: BB0000000 → odd = 0+0+0 = 0, even = 0+0+0 = 0,
+        // d7 = 0. Valid.
+        assert!(dea_number("BB0000000"));
+    }
+
+    #[test]
+    fn strips_separators() {
+        assert!(dea_number("AB-12-34563"));
+        assert!(dea_number("AB 12 34563"));
+    }
+
+    #[test]
+    fn rejects_wrong_check_digit() {
+        assert!(!dea_number("AB1234560"));
+        assert!(!dea_number("AB1234565"));
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(!dea_number("AB123"));
+        assert!(!dea_number("AB12345630"));
+        assert!(!dea_number(""));
+    }
+
+    #[test]
+    fn rejects_non_letter_prefix() {
+        assert!(!dea_number("123456789"));
+        assert!(!dea_number("A21234563"));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/us/mod.rs b/crates/nvisy-pattern/src/validators/us/mod.rs
new file mode 100644
index 00000000..60b1d300
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/us/mod.rs
@@ -0,0 +1,16 @@
+//! US-specific post-match validators.
+//!
+//! Registered under the [`ValidatorRegistry::builtin`] set with
+//! dotted names — `"us.ssn"`, `"us.aba_routing"`, etc.
+//!
+//! [`ValidatorRegistry::builtin`]: super::ValidatorRegistry::builtin
+
+mod aba_routing;
+mod dea_number;
+mod npi;
+mod ssn;
+
+pub use self::aba_routing::aba_routing;
+pub use self::dea_number::dea_number;
+pub use self::npi::npi;
+pub use self::ssn::ssn;
diff --git a/crates/nvisy-pattern/src/validators/us/npi.rs b/crates/nvisy-pattern/src/validators/us/npi.rs
new file mode 100644
index 00000000..533a6608
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/us/npi.rs
@@ -0,0 +1,66 @@
+//! US National Provider Identifier (NPI) checksum validator.
+//!
+//! See `assets/NOTICE.md` for third-party attribution.
+
+use super::super::luhn::luhn;
+
+/// Return `true` if `value` is a valid 10-digit US NPI.
+///
+/// The CMS algorithm prepends the constant `"80840"` to the
+/// 10-digit identifier and runs the standard Luhn checksum on
+/// the resulting 15-digit string.
+///
+/// Whitespace and `-` separators are stripped before validation.
+pub fn npi(value: &str) -> bool {
+    let digits: String = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace() && *c != '-')
+        .collect();
+    if digits.len() != 10 || !digits.chars().all(|c| c.is_ascii_digit()) {
+        return false;
+    }
+    // Reject all-same-digit bodies (e.g. `1111111111`); they pass
+    // Luhn but are not real provider numbers.
+    let body = &digits[..9];
+    if body.chars().all(|c| c == body.chars().next().unwrap()) {
+        return false;
+    }
+    let prefixed = format!("80840{digits}");
+    luhn(&prefixed)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn valid_known_npi_number() {
+        // Test vector validated against the CMS Luhn-on-80840 algorithm.
+        assert!(npi("1234567893"));
+    }
+
+    #[test]
+    fn strips_separators() {
+        assert!(npi("1234-567-893"));
+        assert!(npi("1234 567 893"));
+    }
+
+    #[test]
+    fn rejects_wrong_check_digit() {
+        assert!(!npi("1234567890"));
+        assert!(!npi("1234567899"));
+    }
+
+    #[test]
+    fn rejects_degenerate_all_same_digits() {
+        assert!(!npi("1111111111"));
+        assert!(!npi("2222222222"));
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(!npi("123456789"));
+        assert!(!npi("12345678901"));
+        assert!(!npi(""));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/ssn.rs b/crates/nvisy-pattern/src/validators/us/ssn.rs
similarity index 89%
rename from crates/nvisy-pattern/src/validators/ssn.rs
rename to crates/nvisy-pattern/src/validators/us/ssn.rs
index 223408eb..d31cc66e 100644
--- a/crates/nvisy-pattern/src/validators/ssn.rs
+++ b/crates/nvisy-pattern/src/validators/us/ssn.rs
@@ -12,7 +12,7 @@
 /// This is a format check only — not a verification against SSA
 /// records.
 pub fn ssn(value: &str) -> bool {
-    let parts: Vec<&str> = value.split('-').collect();
+    let parts: Vec<&str> = value.split(['-', ' ', '.']).collect();
     if parts.len() != 3 {
         return false;
     }
@@ -42,6 +42,12 @@ mod tests {
         assert!(ssn("899-99-9999"));
     }
 
+    #[test]
+    fn accepts_space_and_dot_separators() {
+        assert!(ssn("123 45 6789"));
+        assert!(ssn("123.45.6789"));
+    }
+
     #[test]
     fn invalid_area_zero() {
         assert!(!ssn("000-45-6789"));
diff --git a/crates/nvisy-pattern/testdata/inputs/identity.txt b/crates/nvisy-pattern/testdata/inputs/identity.txt
index 045a6238..cdcbd923 100644
--- a/crates/nvisy-pattern/testdata/inputs/identity.txt
+++ b/crates/nvisy-pattern/testdata/inputs/identity.txt
@@ -3,9 +3,12 @@ PATIENT INTAKE FORM
 Full name:        Jane Smith
 Date of birth:    1985-03-14
 SSN:              123-45-6789
+ITIN:             912-71-1234
 Driver license:   D123-4567-8901
 Passport (US):    A12345678
 Mailing address:  742 Evergreen Terrace
                   Springfield, OR 97477-1234
+Provider NPI:     1234567893
+Medicare MBI:     1EG4-TE5-MK73
 
 Insurance card number on file (see attached).
diff --git a/crates/nvisy-pattern/testdata/inputs/uk.txt b/crates/nvisy-pattern/testdata/inputs/uk.txt
new file mode 100644
index 00000000..05dc0b42
--- /dev/null
+++ b/crates/nvisy-pattern/testdata/inputs/uk.txt
@@ -0,0 +1,12 @@
+Patient handover for Mrs A. Patel.
+
+Personal details:
+  - NHS number: 943 476 5919
+  - NINO: AB123456C
+  - Driving licence (DVLA): MORGA753116SM9IJ
+  - Address: 10 Downing Street, London SW1A 2AA
+
+Vehicle: BMW 3 Series, registration AB51 ABC, V5C on file.
+
+Please update the patient record (national health service form 4)
+and bill the National Insurance reference shown above.
diff --git a/crates/nvisy-pattern/tests/shipped_detection.rs b/crates/nvisy-pattern/tests/shipped_detection.rs
index 32d0ac13..b6b82cc6 100644
--- a/crates/nvisy-pattern/tests/shipped_detection.rs
+++ b/crates/nvisy-pattern/tests/shipped_detection.rs
@@ -79,6 +79,24 @@ async fn identity_inputs_yield_expected_entities() {
         builtins::DATE_OF_BIRTH.label_ref(),
         "1985-03-14",
     );
+    assert_match(
+        &text,
+        &entities,
+        builtins::TAX_ID.label_ref(),
+        "912-71-1234",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "1234567893",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "1EG4-TE5-MK73",
+    );
 }
 
 #[tokio::test]
@@ -195,3 +213,38 @@ async fn personal_inputs_yield_expected_entities() {
         "expected at least one Language"
     );
 }
+
+#[tokio::test]
+async fn uk_inputs_yield_expected_entities() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/uk.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "943 476 5919",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::NATIONAL_INSURANCE_NUMBER.label_ref(),
+        "AB123456C",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::DRIVERS_LICENSE.label_ref(),
+        "MORGA753116SM9IJ",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::POSTAL_CODE.label_ref(),
+        "SW1A 2AA",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::LICENSE_PLATE.label_ref(),
+        "AB51 ABC",
+    );
+}
diff --git a/crates/nvisy-toolkit/tests/fixtures/registries.rs b/crates/nvisy-toolkit/tests/fixtures/registries.rs
index 30098801..2056a1d0 100644
--- a/crates/nvisy-toolkit/tests/fixtures/registries.rs
+++ b/crates/nvisy-toolkit/tests/fixtures/registries.rs
@@ -50,13 +50,14 @@ where
         .insert_label(builtins::PAYMENT_CARD.label_ref(), Mask::stars())
 }
 
-/// Standard dedup params: a `0.5` confidence threshold drops the
-/// low-confidence ISO-639 short-code matches from the languages
-/// dictionary (see `assets/dictionaries/general/languages.toml`'s
-/// `column_scores`).
+/// Standard dedup params: a `0.35` confidence threshold sized
+/// for our shipped patterns' baseline (most regex-only matches
+/// land in 0.1–0.5 before context boost); a tighter threshold
+/// would drop legitimate weak-pattern matches the context layer
+/// is expected to lift.
 pub fn dedup_params() -> LayerParams {
     LayerParams {
-        confidence_threshold: Some(ConfidenceThreshold::new(0.5).unwrap()),
+        confidence_threshold: Some(ConfidenceThreshold::new(0.35).unwrap()),
         ..LayerParams::default()
     }
 }

From ae6a6409734623da2093d5c215f26b26c2af4d48 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 16 Jun 2026 03:25:50 +0200
Subject: [PATCH 10/14] test(pattern): split shipped_detection into per-region
 e2e binaries

Restructure pattern crate end-to-end tests:

- Replace single tests/shipped_detection.rs with three per-region
  binaries: tests/builtin.rs (5 world tests), tests/builtin_us.rs
  (3 US tests), tests/builtin_uk.rs (3 UK tests). 11 tests total.
- Move shared scan + assert_match + assert_label_present helpers to
  tests/fixtures/mod.rs, declared via mod fixtures; from each binary.
  Both helpers carry #[track_caller] for better failure attribution.
- Reshape testdata/inputs/ to mirror the asset tree:
  - world fixtures move from monolithic domain files into
    inputs/{contact,credentials,finance,network,personal}.txt
  - inputs/us/{identity,finance,health}.txt
  - inputs/uk/{identity,contact,vehicle}.txt (split from old uk.txt)
- Each test scans one fixture, asserting substring + label matches
  against a recognizer loaded with every shipped pattern and
  dictionary via build_context_enhanced.
- builtin_uk_identity asserts NATIONALITY (world dictionary firing
  on "British") to keep assert_label_present reachable across all
  three binaries.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../uk.txt => builtin/uk/identity.txt}        |   0
 .../nvisy-pattern/testdata/inputs/contact.txt |   9 +-
 .../testdata/inputs/credentials.txt           |  10 +-
 .../nvisy-pattern/testdata/inputs/finance.txt |   9 +-
 .../testdata/inputs/uk/contact.txt            |   7 +
 .../testdata/inputs/uk/identity.txt           |   8 +
 .../testdata/inputs/uk/vehicle.txt            |   8 +
 .../testdata/inputs/us/finance.txt            |  10 +
 .../testdata/inputs/us/health.txt             |  15 ++
 .../testdata/inputs/{ => us}/identity.txt     |  10 +-
 crates/nvisy-pattern/tests/builtin.rs         | 143 ++++++++++
 crates/nvisy-pattern/tests/builtin_uk.rs      |  65 +++++
 crates/nvisy-pattern/tests/builtin_us.rs      |  85 ++++++
 crates/nvisy-pattern/tests/fixtures/mod.rs    |  60 +++++
 .../nvisy-pattern/tests/shipped_detection.rs  | 250 ------------------
 15 files changed, 420 insertions(+), 269 deletions(-)
 rename crates/nvisy-pattern/testdata/{inputs/uk.txt => builtin/uk/identity.txt} (100%)
 create mode 100644 crates/nvisy-pattern/testdata/inputs/uk/contact.txt
 create mode 100644 crates/nvisy-pattern/testdata/inputs/uk/identity.txt
 create mode 100644 crates/nvisy-pattern/testdata/inputs/uk/vehicle.txt
 create mode 100644 crates/nvisy-pattern/testdata/inputs/us/finance.txt
 create mode 100644 crates/nvisy-pattern/testdata/inputs/us/health.txt
 rename crates/nvisy-pattern/testdata/inputs/{ => us}/identity.txt (50%)
 create mode 100644 crates/nvisy-pattern/tests/builtin.rs
 create mode 100644 crates/nvisy-pattern/tests/builtin_uk.rs
 create mode 100644 crates/nvisy-pattern/tests/builtin_us.rs
 create mode 100644 crates/nvisy-pattern/tests/fixtures/mod.rs
 delete mode 100644 crates/nvisy-pattern/tests/shipped_detection.rs

diff --git a/crates/nvisy-pattern/testdata/inputs/uk.txt b/crates/nvisy-pattern/testdata/builtin/uk/identity.txt
similarity index 100%
rename from crates/nvisy-pattern/testdata/inputs/uk.txt
rename to crates/nvisy-pattern/testdata/builtin/uk/identity.txt
diff --git a/crates/nvisy-pattern/testdata/inputs/contact.txt b/crates/nvisy-pattern/testdata/inputs/contact.txt
index 975ccea5..fb6af992 100644
--- a/crates/nvisy-pattern/testdata/inputs/contact.txt
+++ b/crates/nvisy-pattern/testdata/inputs/contact.txt
@@ -1,9 +1,10 @@
 Hi team,
 
-Please reach out to alice.johnson@example.com if you have questions about
-the proposal. For urgent matters, call me at +1 (415) 555-0142 or my office
-line 415.555.0188. Background materials live at https://docs.example.com/proposal
-and the secondary mirror is http://backup.example.org/proposal-v2.
+Please reach out to alice.johnson@example.com if you have
+questions about the proposal. For urgent matters, call me at
++1 (415) 555-0142 or my office line 415.555.0188. Background
+materials live at https://docs.example.com/proposal and the
+secondary mirror is http://backup.example.org/proposal-v2.
 
 Best,
 Bob
diff --git a/crates/nvisy-pattern/testdata/inputs/credentials.txt b/crates/nvisy-pattern/testdata/inputs/credentials.txt
index 100d9eb8..94b8c803 100644
--- a/crates/nvisy-pattern/testdata/inputs/credentials.txt
+++ b/crates/nvisy-pattern/testdata/inputs/credentials.txt
@@ -1,8 +1,10 @@
 # config.env (DO NOT COMMIT)
-# Note: all credentials below are obvious-fake placeholders chosen to
-# exercise the credentials patterns without tripping push-protection
-# scanners. AWS uses AWS's own documented example key; Stripe uses
-# `sk_test_` (test-mode prefix); GitHub uses an EXAMPLE-suffixed token.
+#
+# Note: all credentials below are obvious-fake placeholders chosen
+# to exercise the credential patterns without tripping
+# push-protection scanners. AWS uses AWS's own documented example
+# key; Stripe uses `sk_test_` (test-mode prefix); GitHub uses an
+# EXAMPLE-suffixed token.
 
 AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
 GITHUB_TOKEN=ghp_EXAMPLE00000abcdefghijklmnopqrstuvwx
diff --git a/crates/nvisy-pattern/testdata/inputs/finance.txt b/crates/nvisy-pattern/testdata/inputs/finance.txt
index 367d0dd7..17d2d32d 100644
--- a/crates/nvisy-pattern/testdata/inputs/finance.txt
+++ b/crates/nvisy-pattern/testdata/inputs/finance.txt
@@ -1,12 +1,11 @@
 Wire transfer authorization
 ---------------------------
 
-Beneficiary: Acme Industries Ltd.
-IBAN: GB29 NWBK 6016 1331 9268 19
-SWIFT/BIC: NWBKGB2L
-Routing (US correspondent): 021000021
+Beneficiary:    Acme Industries Ltd.
+IBAN:           GB29 NWBK 6016 1331 9268 19
+SWIFT/BIC:      NWBKGB2L
 Charge card on file (backup): 4539 1488 0343 6467
-Settlement: in US Dollar, optionally EUR.
+Settlement:     in US Dollar, optionally EUR.
 
 Crypto reimbursement options:
 - Bitcoin: 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa
diff --git a/crates/nvisy-pattern/testdata/inputs/uk/contact.txt b/crates/nvisy-pattern/testdata/inputs/uk/contact.txt
new file mode 100644
index 00000000..9ce5dff9
--- /dev/null
+++ b/crates/nvisy-pattern/testdata/inputs/uk/contact.txt
@@ -0,0 +1,7 @@
+Correspondence address for Royal Mail delivery:
+
+  10 Downing Street
+  London SW1A 2AA
+  United Kingdom
+
+Use the postcode above when filing the mailing label.
diff --git a/crates/nvisy-pattern/testdata/inputs/uk/identity.txt b/crates/nvisy-pattern/testdata/inputs/uk/identity.txt
new file mode 100644
index 00000000..52b36edb
--- /dev/null
+++ b/crates/nvisy-pattern/testdata/inputs/uk/identity.txt
@@ -0,0 +1,8 @@
+Patient handover for Mrs A. Patel — refer to the NHS patient
+file, the National Insurance reference, the DVLA driving licence,
+and her HM Passport Office passport.
+
+NHS number:             943 476 5919
+NI Number (NINO):       AB123456C
+Driving licence (DVLA): MORGA753116SM9IJ
+British passport:       AB1234567
diff --git a/crates/nvisy-pattern/testdata/inputs/uk/vehicle.txt b/crates/nvisy-pattern/testdata/inputs/uk/vehicle.txt
new file mode 100644
index 00000000..b6d248c3
--- /dev/null
+++ b/crates/nvisy-pattern/testdata/inputs/uk/vehicle.txt
@@ -0,0 +1,8 @@
+Vehicle on file:
+
+  Make / model:           BMW 3 Series
+  Registration plate:     AB51 ABC
+  DVLA V5C log book:      yes
+  MOT current:            yes
+
+Insurance to be billed against the registered vehicle above.
diff --git a/crates/nvisy-pattern/testdata/inputs/us/finance.txt b/crates/nvisy-pattern/testdata/inputs/us/finance.txt
new file mode 100644
index 00000000..aa0619b9
--- /dev/null
+++ b/crates/nvisy-pattern/testdata/inputs/us/finance.txt
@@ -0,0 +1,10 @@
+ACH transfer instruction (US correspondent)
+-------------------------------------------
+
+Beneficiary bank:    Wells Fargo Bank, N.A.
+Routing (ABA RTN):   121000358
+Checking account:    0123456789012
+Wire memo:           consultancy invoice 0042
+
+Use the checking account number above for the ACH debit and the
+routing number above for the bank deposit instruction.
diff --git a/crates/nvisy-pattern/testdata/inputs/us/health.txt b/crates/nvisy-pattern/testdata/inputs/us/health.txt
new file mode 100644
index 00000000..00ac7cb2
--- /dev/null
+++ b/crates/nvisy-pattern/testdata/inputs/us/health.txt
@@ -0,0 +1,15 @@
+Provider claim — Medicare crossover
+-----------------------------------
+
+Rendering provider:
+  Name:           Dr. Robert Hayes, MD
+  NPI:            1234567893
+  DEA license:    BB0000000
+  Taxonomy code:  207R00000X
+
+Beneficiary:
+  Name:           Henry Davies
+  Medicare MBI:   1EG4-TE5-MK73
+
+Claim submitted for evaluation under Medicare Part B; the DEA
+registration above authorizes prescribing of controlled substances.
diff --git a/crates/nvisy-pattern/testdata/inputs/identity.txt b/crates/nvisy-pattern/testdata/inputs/us/identity.txt
similarity index 50%
rename from crates/nvisy-pattern/testdata/inputs/identity.txt
rename to crates/nvisy-pattern/testdata/inputs/us/identity.txt
index cdcbd923..ea9e0c77 100644
--- a/crates/nvisy-pattern/testdata/inputs/identity.txt
+++ b/crates/nvisy-pattern/testdata/inputs/us/identity.txt
@@ -2,13 +2,11 @@ PATIENT INTAKE FORM
 
 Full name:        Jane Smith
 Date of birth:    1985-03-14
-SSN:              123-45-6789
-ITIN:             912-71-1234
+Social security:  123-45-6789
+ITIN (taxpayer):  912-71-1234
 Driver license:   D123-4567-8901
-Passport (US):    A12345678
+US passport:      A12345678
 Mailing address:  742 Evergreen Terrace
                   Springfield, OR 97477-1234
-Provider NPI:     1234567893
-Medicare MBI:     1EG4-TE5-MK73
 
-Insurance card number on file (see attached).
+Confirm insurance card on file (see attached).
diff --git a/crates/nvisy-pattern/tests/builtin.rs b/crates/nvisy-pattern/tests/builtin.rs
new file mode 100644
index 00000000..2025384d
--- /dev/null
+++ b/crates/nvisy-pattern/tests/builtin.rs
@@ -0,0 +1,143 @@
+//! End-to-end: shipped patterns + dictionaries against the
+//! cross-jurisdiction (`world`) fixtures.
+//!
+//! Each test scans one `testdata/inputs/<domain>.txt` fixture
+//! through a recognizer wired with every shipped pattern and
+//! dictionary, then asserts the entities a real document of that
+//! domain is expected to surface (substring + label, not
+//! byte-offset, so fixtures and regexes can evolve without
+//! brittle churn).
+
+mod fixtures;
+
+use fixtures::{assert_label_present, assert_match, scan};
+use nvisy_core::entity::builtins;
+
+#[tokio::test]
+async fn builtin_contact() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/contact.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::EMAIL_ADDRESS.label_ref(),
+        "alice.johnson@example.com",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::URL.label_ref(),
+        "https://docs.example.com/proposal",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::URL.label_ref(),
+        "http://backup.example.org/proposal-v2",
+    );
+    assert_label_present(&entities, builtins::PHONE_NUMBER.label_ref());
+}
+
+#[tokio::test]
+async fn builtin_credentials() {
+    let (_, entities) = scan(include_str!("../testdata/inputs/credentials.txt")).await;
+    assert_label_present(&entities, builtins::API_KEY.label_ref());
+    assert_label_present(&entities, builtins::PRIVATE_KEY.label_ref());
+    assert_label_present(&entities, builtins::AUTH_TOKEN.label_ref());
+}
+
+#[tokio::test]
+async fn builtin_finance() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/finance.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::IBAN.label_ref(),
+        "GB29 NWBK 6016 1331 9268 19",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::SWIFT_CODE.label_ref(),
+        "NWBKGB2L",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::PAYMENT_CARD.label_ref(),
+        "4539 1488 0343 6467",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::CRYPTO_ADDRESS.label_ref(),
+        "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::CRYPTO_ADDRESS.label_ref(),
+        "0x742d35Cc6634C0532925a3b844Bc9e7595f6E842",
+    );
+    // Currency dictionaries pick up `USD`, `EUR`, `Tether`, `USDC`.
+    assert_label_present(&entities, builtins::CURRENCY.label_ref());
+}
+
+#[tokio::test]
+async fn builtin_network() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/network.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::IP_ADDRESS.label_ref(),
+        "192.168.1.42",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::IP_ADDRESS.label_ref(),
+        "10.0.0.7",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::IP_ADDRESS.label_ref(),
+        "203.0.113.55",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::IP_ADDRESS.label_ref(),
+        "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::MAC_ADDRESS.label_ref(),
+        "00:1A:2B:3C:4D:5E",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::MAC_ADDRESS.label_ref(),
+        "3C-22-FB-A1-B2-C3",
+    );
+}
+
+#[tokio::test]
+async fn builtin_personal() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/personal.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::DATE_OF_BIRTH.label_ref(),
+        "04/22/1979",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::DATE_TIME.label_ref(),
+        "2024-06-15T09:30:00Z",
+    );
+    assert_label_present(&entities, builtins::NATIONALITY.label_ref());
+    assert_label_present(&entities, builtins::LANGUAGE.label_ref());
+}
diff --git a/crates/nvisy-pattern/tests/builtin_uk.rs b/crates/nvisy-pattern/tests/builtin_uk.rs
new file mode 100644
index 00000000..e166c73a
--- /dev/null
+++ b/crates/nvisy-pattern/tests/builtin_uk.rs
@@ -0,0 +1,65 @@
+//! End-to-end: shipped patterns + dictionaries against the
+//! UK-jurisdiction fixtures (`testdata/inputs/uk/<domain>.txt`).
+//!
+//! Each test scans one UK fixture through a recognizer wired
+//! with every shipped pattern and dictionary, then asserts the
+//! entities a real UK document of that domain is expected to
+//! surface (substring + label).
+
+mod fixtures;
+
+use fixtures::{assert_label_present, assert_match, scan};
+use nvisy_core::entity::builtins;
+
+#[tokio::test]
+async fn builtin_identity() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/uk/identity.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "943 476 5919",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::NATIONAL_INSURANCE_NUMBER.label_ref(),
+        "AB123456C",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::DRIVERS_LICENSE.label_ref(),
+        "MORGA753116SM9IJ",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::PASSPORT_NUMBER.label_ref(),
+        "AB1234567",
+    );
+    // World nationality dictionary activates on UK text ("British").
+    assert_label_present(&entities, builtins::NATIONALITY.label_ref());
+}
+
+#[tokio::test]
+async fn builtin_contact() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/uk/contact.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::POSTAL_CODE.label_ref(),
+        "SW1A 2AA",
+    );
+}
+
+#[tokio::test]
+async fn builtin_vehicle() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/uk/vehicle.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::LICENSE_PLATE.label_ref(),
+        "AB51 ABC",
+    );
+}
diff --git a/crates/nvisy-pattern/tests/builtin_us.rs b/crates/nvisy-pattern/tests/builtin_us.rs
new file mode 100644
index 00000000..24b714cb
--- /dev/null
+++ b/crates/nvisy-pattern/tests/builtin_us.rs
@@ -0,0 +1,85 @@
+//! End-to-end: shipped patterns + dictionaries against the
+//! US-jurisdiction fixtures (`testdata/inputs/us/<domain>.txt`).
+//!
+//! Each test scans one US fixture through a recognizer wired
+//! with every shipped pattern and dictionary, then asserts the
+//! entities a real US document of that domain is expected to
+//! surface (substring + label).
+
+mod fixtures;
+
+use fixtures::{assert_label_present, assert_match, scan};
+use nvisy_core::entity::builtins;
+
+#[tokio::test]
+async fn builtin_identity() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/us/identity.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::GOVERNMENT_ID.label_ref(),
+        "123-45-6789",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::TAX_ID.label_ref(),
+        "912-71-1234",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::DRIVERS_LICENSE.label_ref(),
+        "D123-4567-8901",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::PASSPORT_NUMBER.label_ref(),
+        "A12345678",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::POSTAL_CODE.label_ref(),
+        "97477-1234",
+    );
+}
+
+#[tokio::test]
+async fn builtin_finance() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/us/finance.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::BANK_ROUTING.label_ref(),
+        "121000358",
+    );
+    // bank_account is `\b\d{8,17}\b` with score 0.05 — it requires
+    // a context-keyword boost (e.g. `account`) to clear the
+    // confidence threshold. The fixture provides one.
+    assert_label_present(&entities, builtins::BANK_ACCOUNT.label_ref());
+}
+
+#[tokio::test]
+async fn builtin_health() {
+    let (text, entities) = scan(include_str!("../testdata/inputs/us/health.txt")).await;
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "1234567893",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "1EG4-TE5-MK73",
+    );
+    assert_match(
+        &text,
+        &entities,
+        builtins::MEDICAL_ID.label_ref(),
+        "BB0000000",
+    );
+}
diff --git a/crates/nvisy-pattern/tests/fixtures/mod.rs b/crates/nvisy-pattern/tests/fixtures/mod.rs
new file mode 100644
index 00000000..1b27792e
--- /dev/null
+++ b/crates/nvisy-pattern/tests/fixtures/mod.rs
@@ -0,0 +1,60 @@
+//! Shared helpers for the `builtin_*` end-to-end test suites.
+//!
+//! Each per-region test file (`tests/builtin_world.rs`,
+//! `tests/builtin_us.rs`, `tests/builtin_uk.rs`) declares this
+//! module via `mod fixtures;` and calls [`scan`] + the
+//! `assert_*` helpers to express expectations against a single
+//! shared [`PatternRecognizer`] built from every shipped pattern
+//! and dictionary.
+
+use nvisy_core::entity::{Entity, EntityLabelRef};
+use nvisy_core::modality::{Text, TextData};
+use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
+use nvisy_pattern::PatternRecognizer;
+
+pub async fn scan(text: &str) -> (String, Vec<Entity<Text>>) {
+    let recognizer = PatternRecognizer::builder()
+        .with_builtin_patterns()
+        .with_builtin_dictionaries()
+        .build_context_enhanced()
+        .expect("shipped recognizer builds");
+    let input = RecognizerInput::new(TextData::new(text.to_owned()));
+    let entities = recognizer
+        .recognize(&input)
+        .await
+        .expect("shipped recognize")
+        .entities;
+    (text.to_owned(), entities)
+}
+
+#[track_caller]
+pub fn assert_match(
+    text: &str,
+    entities: &[Entity<Text>],
+    label: EntityLabelRef,
+    needle: &str,
+) {
+    let hit = entities
+        .iter()
+        .any(|e| e.label == label && &text[e.location.start..e.location.end] == needle);
+    assert!(
+        hit,
+        "expected `{needle}` as {label:?}; got: {:?}",
+        entities
+            .iter()
+            .map(|e| (e.label.clone(), &text[e.location.start..e.location.end]))
+            .collect::<Vec<_>>()
+    );
+}
+
+#[track_caller]
+pub fn assert_label_present(entities: &[Entity<Text>], label: EntityLabelRef) {
+    assert!(
+        entities.iter().any(|e| e.label == label),
+        "expected at least one {label:?} entity; got labels: {:?}",
+        entities
+            .iter()
+            .map(|e| e.label.clone())
+            .collect::<Vec<_>>()
+    );
+}
diff --git a/crates/nvisy-pattern/tests/shipped_detection.rs b/crates/nvisy-pattern/tests/shipped_detection.rs
deleted file mode 100644
index b6b82cc6..00000000
--- a/crates/nvisy-pattern/tests/shipped_detection.rs
+++ /dev/null
@@ -1,250 +0,0 @@
-//! End-to-end: load every shipped pattern + dictionary into one
-//! [`PatternRecognizer`], scan each `testdata/inputs/*.txt`, and
-//! assert the entities a real document of that category is expected
-//! to surface (by substring + kind).
-//!
-//! These are intentionally substring-based rather than offset-based
-//! so the fixtures and shipped regexes can both evolve without
-//! brittle byte-position churn.
-
-use nvisy_core::entity::{Entity, EntityLabelRef, builtins};
-use nvisy_core::modality::{Text, TextData};
-use nvisy_core::recognition::{EntityRecognizer, RecognizerInput};
-use nvisy_pattern::PatternRecognizer;
-
-async fn scan(text: &str) -> (String, Vec<Entity<Text>>) {
-    let recognizer = PatternRecognizer::builder()
-        .with_builtin_patterns()
-        .with_builtin_dictionaries()
-        .build_context_enhanced()
-        .expect("shipped recognizer builds");
-    let input = RecognizerInput::new(TextData::new(text.to_owned()));
-    let entities = recognizer
-        .recognize(&input)
-        .await
-        .expect("shipped recognize")
-        .entities;
-    (text.to_owned(), entities)
-}
-
-fn assert_match(text: &str, entities: &[Entity<Text>], label: EntityLabelRef, needle: &str) {
-    let hit = entities
-        .iter()
-        .any(|e| e.label == label && &text[e.location.start..e.location.end] == needle);
-    assert!(
-        hit,
-        "expected `{needle}` as {label:?}; got: {:?}",
-        entities
-            .iter()
-            .map(|e| (e.label.clone(), &text[e.location.start..e.location.end]))
-            .collect::<Vec<_>>()
-    );
-}
-
-#[tokio::test]
-async fn contact_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/contact.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::EMAIL_ADDRESS.label_ref(),
-        "alice.johnson@example.com",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::URL.label_ref(),
-        "https://docs.example.com/proposal",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::URL.label_ref(),
-        "http://backup.example.org/proposal-v2",
-    );
-}
-
-#[tokio::test]
-async fn identity_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/identity.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::GOVERNMENT_ID.label_ref(),
-        "123-45-6789",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::DATE_OF_BIRTH.label_ref(),
-        "1985-03-14",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::TAX_ID.label_ref(),
-        "912-71-1234",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::MEDICAL_ID.label_ref(),
-        "1234567893",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::MEDICAL_ID.label_ref(),
-        "1EG4-TE5-MK73",
-    );
-}
-
-#[tokio::test]
-async fn finance_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/finance.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::PAYMENT_CARD.label_ref(),
-        "4539 1488 0343 6467",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::CRYPTO_ADDRESS.label_ref(),
-        "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::CRYPTO_ADDRESS.label_ref(),
-        "0x742d35Cc6634C0532925a3b844Bc9e7595f6E842",
-    );
-    // Currency and cryptocurrency dictionaries emit `Currency`;
-    // pick up `USD`, `EUR`, `Tether`, `USDC`, …
-    assert!(
-        entities
-            .iter()
-            .any(|e| e.label == builtins::CURRENCY.label_ref()),
-        "expected at least one currency/crypto dictionary hit"
-    );
-}
-
-#[tokio::test]
-async fn credentials_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/credentials.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::API_KEY.label_ref(),
-        "AKIAIOSFODNN7EXAMPLE",
-    );
-    // Private-key pattern matches the BEGIN header.
-    assert!(
-        entities
-            .iter()
-            .any(|e| e.label == builtins::PRIVATE_KEY.label_ref()),
-        "expected at least one PrivateKey entity"
-    );
-}
-
-#[tokio::test]
-async fn network_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/network.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::IP_ADDRESS.label_ref(),
-        "192.168.1.42",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::IP_ADDRESS.label_ref(),
-        "10.0.0.7",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::IP_ADDRESS.label_ref(),
-        "203.0.113.55",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::IP_ADDRESS.label_ref(),
-        "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::MAC_ADDRESS.label_ref(),
-        "00:1A:2B:3C:4D:5E",
-    );
-}
-
-#[tokio::test]
-async fn personal_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/personal.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::DATE_OF_BIRTH.label_ref(),
-        "04/22/1979",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::DATE_TIME.label_ref(),
-        "2024-06-15T09:30:00Z",
-    );
-    // Nationality and language dictionaries pick up `Italian`,
-    // `Canadian`, `English`, `Spanish`.
-    assert!(
-        entities
-            .iter()
-            .any(|e| e.label == builtins::NATIONALITY.label_ref()),
-        "expected at least one Nationality"
-    );
-    assert!(
-        entities
-            .iter()
-            .any(|e| e.label == builtins::LANGUAGE.label_ref()),
-        "expected at least one Language"
-    );
-}
-
-#[tokio::test]
-async fn uk_inputs_yield_expected_entities() {
-    let (text, entities) = scan(include_str!("../testdata/inputs/uk.txt")).await;
-    assert_match(
-        &text,
-        &entities,
-        builtins::MEDICAL_ID.label_ref(),
-        "943 476 5919",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::NATIONAL_INSURANCE_NUMBER.label_ref(),
-        "AB123456C",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::DRIVERS_LICENSE.label_ref(),
-        "MORGA753116SM9IJ",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::POSTAL_CODE.label_ref(),
-        "SW1A 2AA",
-    );
-    assert_match(
-        &text,
-        &entities,
-        builtins::LICENSE_PLATE.label_ref(),
-        "AB51 ABC",
-    );
-}

From 2137db585ba62ae3b64cc3aa923858c1af057bbe Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 16 Jun 2026 06:57:01 +0200
Subject: [PATCH 11/14] feat(pattern,context): Presidio-aligned audit fixes +
 tabular context propagation

Tier 1 (correctness bugs):

- world/iban regex: extend middle groups from \d{4} to [A-Z0-9]{4}
  and separator \s? to [\s\-]?; accepts IBANs with letters past
  position 8 (UK NWBK, IE, MT, MK, GI) and hyphenated forms that
  the mod-97 validator was already prepared to handle.
- world/private_key: match the full BEGIN..END PEM block instead
  of only the header line; add ENCRYPTED PRIVATE KEY, PGP, SSH2,
  and PuTTY-User-Key-File-{2,3} variants.
- us/medical_license: add \b anchors to both DEA variants; prior
  pattern matched inside longer alphanumeric tokens.
- uk.nino validator: reject O as the position-0 letter (HMRC
  reserved); the character class blocks D/F/I/Q/U/V but allows
  O via the j-p range.
- us/passport: add Presidio's context = [passport, passport#,
  travel document, us passport, united states passport] so the
  0.1-base pattern can boost above threshold.
- us/postal_code: drop score 0.5 -> 0.1, add context, ship a
  us.postal_code validator that rejects 00000.

Tier 2 (coverage + scoring):

- world/bitcoin_address: split legacy (Base58) from Bech32; bump
  Bech32 cap {25,39}->{25,59} for Taproot (bc1p...). Add a
  crypto.btc validator using bs58::decode_check.
- world/credit_card: add Mastercard 2-series (2221-2720); drop
  score 0.5 -> 0.3 to match Presidio's deliberate baseline that
  expects context boost to do the rest.
- world/aws_key: broaden access-key ID prefix to also catch
  ASIA/AIDA/AROA/ANPA/AGPA/AIPA; add a second variant for the
  40-char secret access key; ship Presidio-style context.
- world/github_token: add github_pat_[A-Z0-9_]{82} variant for
  the fine-grained PAT format introduced in 2022.
- world/generic_api_key: accept whitespace separator alongside
  [:=] so `Authorization: Bearer <token>` matches.
- uk/driving_licence + uk/vehicle/registration: add the Presidio
  validators we'd left on the table (99999 surname rejection,
  age-ID range 02-29 / 51-79 for current-format plates).

Validator infrastructure:

- world/phone: replace the regex+length validator with a
  phonenumber-crate-backed region-aware validator. The validator
  parses E.164 directly and falls back to the caller-specified
  country (via RecognizerInput.country) when present.
  Introduces a workspace-wide phonenumber = 0.3 dependency.
- ValidatorRegistry::with_simple convenience for the ten
  context-free validators; with() stays the canonical entry
  point for ctx-aware validators (only phone today).

Context-enhancer architecture:

- Add RecognizerInput.context_hints: Vec<String> for out-of-band
  context strings (CSV column headers, JSON keys, log field
  names) the caller wants treated as in-context.
- nvisy-context::Enhancer::enhance now takes a Context bundle
  (text + tokens + language + hints) instead of four loose
  arguments. The hint path runs as a fallback when the in-text
  word window doesn't fire; at most one boost per rule per
  entity.
- LiftedFromText in nvisy-toolkit gains chunk_hints; Tabular
  surfaces column_name as a hint so a `card` column header
  lifts a per-cell CC=0.3 match to ~0.65 via the existing boost
  pipeline (no synthetic score patching to clear the threshold).

nvisy-context module split:

- enhancer.rs -> enhancer/{mod, context, window}.rs
- matcher.rs -> matching/{mod, matcher, lemma}.rs
- tokens.rs + wrapper.rs -> io/{mod, tokens, wrapper}.rs
- Public surface (Context, Enhancer, BoostRule, KeywordMatcher,
  SubstringMatcher, LemmaMatcher, ContextEnhanced, Token,
  Tokens) stays at the crate root via re-exports.
- Drop 3 redundant enhancer tests (suffix-symmetry duplicate,
  unicode-too-distant duplicate, token-window symmetry); keep
  the 13 unique behaviors plus 2 new hint-path tests.

Known gap: html_codec_e2e payment_card assertion fails because
HTML chunks at text-node boundary and `<code>4111...</code>`
loses the surrounding "payment card" context. The fix requires
moving chunk_hints from LiftedFromText onto the Handler trait
and overriding it on HtmlHandler to emit parent-element text
as a hint. Tracked as follow-up.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Cargo.lock                                    | 159 ++++++++-
 Cargo.toml                                    |   4 +
 crates/nvisy-context/src/enhancer/context.rs  |  68 ++++
 .../src/{enhancer.rs => enhancer/mod.rs}      | 324 ++++++------------
 crates/nvisy-context/src/enhancer/window.rs   | 118 +++++++
 crates/nvisy-context/src/io/mod.rs            |  20 ++
 crates/nvisy-context/src/{ => io}/tokens.rs   |   0
 crates/nvisy-context/src/{ => io}/wrapper.rs  |  16 +-
 crates/nvisy-context/src/lib.rs               |  12 +-
 crates/nvisy-context/src/matcher.rs           | 134 --------
 crates/nvisy-context/src/matching/lemma.rs    |  65 ++++
 crates/nvisy-context/src/matching/matcher.rs  |  70 ++++
 crates/nvisy-context/src/matching/mod.rs      |  19 +
 crates/nvisy-core/src/recognition/input.rs    |  17 +
 crates/nvisy-pattern/Cargo.toml               |   6 +
 .../patterns/uk/identity/driving_licence.toml |   1 +
 .../patterns/uk/vehicle/registration.toml     |   1 +
 .../patterns/us/health/medical_license.toml   |   4 +-
 .../assets/patterns/us/identity/passport.toml |   7 +
 .../patterns/us/identity/postal_code.toml     |  12 +-
 .../patterns/world/credentials/aws_key.toml   |  22 +-
 .../world/credentials/generic_api_key.toml    |   6 +-
 .../world/credentials/github_token.toml       |   8 +
 .../world/credentials/private_key.toml        |  13 +-
 .../world/finance/bitcoin_address.toml        |  13 +-
 .../patterns/world/finance/credit_card.toml   |   7 +-
 .../assets/patterns/world/finance/iban.toml   |   2 +-
 .../src/recognition/recognizer.rs             |   8 +-
 crates/nvisy-pattern/src/validators/btc.rs    |  53 +++
 crates/nvisy-pattern/src/validators/mod.rs    |  99 ++++--
 crates/nvisy-pattern/src/validators/phone.rs  | 109 ++----
 .../src/validators/uk/driving_licence.rs      |  71 ++++
 crates/nvisy-pattern/src/validators/uk/mod.rs |   7 +-
 .../nvisy-pattern/src/validators/uk/nino.rs   |  20 +-
 .../src/validators/uk/vehicle_registration.rs |  69 ++++
 crates/nvisy-pattern/src/validators/us/mod.rs |   2 +
 .../src/validators/us/postal_code.rs          |  40 +++
 .../testdata/builtin/uk/identity.txt          |  12 -
 crates/nvisy-toolkit/src/detection/chunks.rs  |  21 +-
 39 files changed, 1145 insertions(+), 494 deletions(-)
 create mode 100644 crates/nvisy-context/src/enhancer/context.rs
 rename crates/nvisy-context/src/{enhancer.rs => enhancer/mod.rs} (63%)
 create mode 100644 crates/nvisy-context/src/enhancer/window.rs
 create mode 100644 crates/nvisy-context/src/io/mod.rs
 rename crates/nvisy-context/src/{ => io}/tokens.rs (100%)
 rename crates/nvisy-context/src/{ => io}/wrapper.rs (84%)
 delete mode 100644 crates/nvisy-context/src/matcher.rs
 create mode 100644 crates/nvisy-context/src/matching/lemma.rs
 create mode 100644 crates/nvisy-context/src/matching/matcher.rs
 create mode 100644 crates/nvisy-context/src/matching/mod.rs
 create mode 100644 crates/nvisy-pattern/src/validators/btc.rs
 create mode 100644 crates/nvisy-pattern/src/validators/uk/driving_licence.rs
 create mode 100644 crates/nvisy-pattern/src/validators/uk/vehicle_registration.rs
 create mode 100644 crates/nvisy-pattern/src/validators/us/postal_code.rs
 delete mode 100644 crates/nvisy-pattern/testdata/builtin/uk/identity.txt

diff --git a/Cargo.lock b/Cargo.lock
index f987251d..2d9b1229 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -292,6 +292,15 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "atomic-polyfill"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
+dependencies = [
+ "critical-section",
+]
+
 [[package]]
 name = "atomic-waker"
 version = "1.1.2"
@@ -524,6 +533,16 @@ dependencies = [
  "alloc-stdlib",
 ]
 
+[[package]]
+name = "bs58"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4"
+dependencies = [
+ "sha2 0.10.9",
+ "tinyvec",
+]
+
 [[package]]
 name = "built"
 version = "0.8.1"
@@ -706,6 +725,15 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "cobs"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1"
+dependencies = [
+ "thiserror",
+]
+
 [[package]]
 name = "color_quant"
 version = "1.1.0"
@@ -841,6 +869,12 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "critical-section"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
+
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.6"
@@ -1234,6 +1268,18 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
 
+[[package]]
+name = "embedded-io"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
+
+[[package]]
+name = "embedded-io"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.35"
@@ -1684,6 +1730,15 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "hash32"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
+dependencies = [
+ "byteorder",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -1711,6 +1766,20 @@ version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
 
+[[package]]
+name = "heapless"
+version = "0.7.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
+dependencies = [
+ "atomic-polyfill",
+ "hash32",
+ "rustc_version",
+ "serde",
+ "spin",
+ "stable_deref_trait",
+]
+
 [[package]]
 name = "heck"
 version = "0.4.1"
@@ -2389,6 +2458,12 @@ dependencies = [
  "include_dir",
 ]
 
+[[package]]
+name = "linked-hash-map"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.12.1"
@@ -2469,6 +2544,15 @@ dependencies = [
  "weezl",
 ]
 
+[[package]]
+name = "lru-cache"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c"
+dependencies = [
+ "linked-hash-map",
+]
+
 [[package]]
 name = "lru-slab"
 version = "0.1.2"
@@ -3043,11 +3127,13 @@ version = "0.1.0"
 dependencies = [
  "aho-corasick",
  "async-trait",
+ "bs58",
  "csv",
  "derive_builder",
  "derive_more",
  "nvisy-context",
  "nvisy-core",
+ "phonenumber",
  "regex",
  "serde",
  "tokio",
@@ -3126,6 +3212,12 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
+[[package]]
+name = "oncemutex"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44d11de466f4a3006fe8a5e7ec84e93b79c70cb992ae0aa0eb631ad2df8abfe2"
+
 [[package]]
 name = "opaque-debug"
 version = "0.3.1"
@@ -3285,6 +3377,26 @@ dependencies = [
  "siphasher",
 ]
 
+[[package]]
+name = "phonenumber"
+version = "0.3.9+9.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9114f9c1683dd09c5f4fa024c89fdad783eaae21d3d52dd23ddaaffa29ffb168"
+dependencies = [
+ "either",
+ "fnv",
+ "nom 7.1.3",
+ "once_cell",
+ "postcard",
+ "quick-xml",
+ "regex",
+ "regex-cache",
+ "serde",
+ "serde_derive",
+ "strum 0.27.2",
+ "thiserror",
+]
+
 [[package]]
 name = "pin-project"
 version = "1.1.13"
@@ -3369,6 +3481,19 @@ dependencies = [
  "portable-atomic",
 ]
 
+[[package]]
+name = "postcard"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24"
+dependencies = [
+ "cobs",
+ "embedded-io 0.4.0",
+ "embedded-io 0.6.1",
+ "heapless",
+ "serde",
+]
+
 [[package]]
 name = "potential_utf"
 version = "0.1.5"
@@ -3508,6 +3633,15 @@ version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
 
+[[package]]
+name = "quick-xml"
+version = "0.38.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "quick_cache"
 version = "0.6.23"
@@ -3801,7 +3935,7 @@ dependencies = [
  "aho-corasick",
  "memchr",
  "regex-automata",
- "regex-syntax",
+ "regex-syntax 0.8.10",
 ]
 
 [[package]]
@@ -3812,7 +3946,19 @@ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax",
+ "regex-syntax 0.8.10",
+]
+
+[[package]]
+name = "regex-cache"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f7b62d69743b8b94f353b6b7c3deb4c5582828328bcb8d5fedf214373808793"
+dependencies = [
+ "lru-cache",
+ "oncemutex",
+ "regex",
+ "regex-syntax 0.6.29",
 ]
 
 [[package]]
@@ -3821,6 +3967,12 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
 
+[[package]]
+name = "regex-syntax"
+version = "0.6.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.10"
@@ -4622,6 +4774,9 @@ name = "strum"
 version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+dependencies = [
+ "strum_macros 0.27.2",
+]
 
 [[package]]
 name = "strum"
diff --git a/Cargo.toml b/Cargo.toml
index 77a0822a..ee8f2000 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -100,6 +100,10 @@ lingua = { version = "1.8", default-features = false, features = ["english"] }
 unicode-segmentation = { version = "1.13", features = [] }
 unicode-normalization = { version = "0.1", features = [] }
 
+# Checksum / encoding
+bs58 = { version = "0.5", features = ["check"] }
+phonenumber = { version = "0.3", default-features = false }
+
 # Tabular document parsing
 csv = { version = "1.0", features = [] }
 calamine = { version = "0.35", features = [] }
diff --git a/crates/nvisy-context/src/enhancer/context.rs b/crates/nvisy-context/src/enhancer/context.rs
new file mode 100644
index 00000000..85278b1c
--- /dev/null
+++ b/crates/nvisy-context/src/enhancer/context.rs
@@ -0,0 +1,68 @@
+//! [`Context`]: per-call inputs bundled for [`Enhancer::enhance`].
+//!
+//! [`Enhancer::enhance`]: super::Enhancer::enhance
+
+use nvisy_core::primitive::LanguageTag;
+
+use crate::io::Token;
+
+/// Per-call inputs bundled together so the enhancer's internal
+/// methods don't drag a long argument list through every layer.
+///
+/// All fields borrow; the value lives for the duration of one
+/// [`Enhancer::enhance`] call.
+///
+/// [`Enhancer::enhance`]: super::Enhancer::enhance
+#[derive(Clone, Copy)]
+pub struct Context<'a> {
+    /// Full text the entities' byte offsets index into.
+    pub text: &'a str,
+    /// Optional token artifact produced by an upstream NLP
+    /// engine. When present, word-window counting walks the token
+    /// stream; when absent, words are derived from `text` via
+    /// Unicode word segmentation.
+    pub tokens: Option<&'a [Token]>,
+    /// Per-call language hint. `None` means "unknown" — every
+    /// per-language rule applies as a permissive fallback.
+    pub language: Option<&'a LanguageTag>,
+    /// Out-of-band context strings (CSV column headers, JSON
+    /// object keys, log field names) the caller wants treated as
+    /// in-context. Each hint is fed to the matcher as its own
+    /// one-string window; a hit boosts the entity exactly as an
+    /// in-text keyword would.
+    pub hints: &'a [String],
+}
+
+impl<'a> Context<'a> {
+    /// Construct a context with just the source text; every
+    /// other field defaults to empty.
+    pub fn new(text: &'a str) -> Self {
+        Self {
+            text,
+            tokens: None,
+            language: None,
+            hints: &[],
+        }
+    }
+
+    /// Attach a token artifact.
+    #[must_use]
+    pub fn with_tokens(mut self, tokens: &'a [Token]) -> Self {
+        self.tokens = Some(tokens);
+        self
+    }
+
+    /// Attach a language hint.
+    #[must_use]
+    pub fn with_language(mut self, language: &'a LanguageTag) -> Self {
+        self.language = Some(language);
+        self
+    }
+
+    /// Attach out-of-band hint strings.
+    #[must_use]
+    pub fn with_hints(mut self, hints: &'a [String]) -> Self {
+        self.hints = hints;
+        self
+    }
+}
diff --git a/crates/nvisy-context/src/enhancer.rs b/crates/nvisy-context/src/enhancer/mod.rs
similarity index 63%
rename from crates/nvisy-context/src/enhancer.rs
rename to crates/nvisy-context/src/enhancer/mod.rs
index 176efcc9..fa744f83 100644
--- a/crates/nvisy-context/src/enhancer.rs
+++ b/crates/nvisy-context/src/enhancer/mod.rs
@@ -5,16 +5,25 @@ use std::collections::HashMap;
 
 use nvisy_core::entity::{Entity, EntityLabelRef, TrailStep};
 use nvisy_core::modality::Text;
-use nvisy_core::primitive::LanguageTag;
-use unicode_segmentation::UnicodeSegmentation;
 
-use super::matcher::KeywordMatcher;
-use super::rule::BoostRule;
-use super::tokens::Token;
+use crate::matching::KeywordMatcher;
+use crate::rule::BoostRule;
+use crate::io::Token;
 
-/// Source name stamped onto every refinement [`TrailStep`] the
-/// enhancer appends.
-const TRAIL_SOURCE: &str = "context";
+mod context;
+mod window;
+
+pub use self::context::Context;
+
+use self::window::{slice_tokens_around, token_span, word_window};
+
+/// Source name stamped onto refinement [`TrailStep`]s the
+/// enhancer appends when the in-text word window fires.
+const TRAIL_SOURCE_WINDOW: &str = "context";
+
+/// Source name stamped onto refinement [`TrailStep`]s the
+/// enhancer appends when an out-of-band hint fires.
+const TRAIL_SOURCE_HINT: &str = "context-hint";
 
 /// Post-recognition enhancer. Holds a label-keyed [`BoostRule`]
 /// map plus the keyword-matching strategy, and lifts the
@@ -31,8 +40,8 @@ const TRAIL_SOURCE: &str = "context";
 /// the enhancer: [`SubstringMatcher`] when no upstream NLP engine
 /// produces tokens, [`LemmaMatcher`] when one does.
 ///
-/// [`SubstringMatcher`]: super::SubstringMatcher
-/// [`LemmaMatcher`]: super::LemmaMatcher
+/// [`SubstringMatcher`]: crate::SubstringMatcher
+/// [`LemmaMatcher`]: crate::LemmaMatcher
 pub struct Enhancer {
     /// Rules bucketed by label. Within one bucket, each entry is
     /// a distinct `(language)` scope; rules sharing the same
@@ -86,67 +95,45 @@ impl Enhancer {
 
     /// Apply boost rules to `entities` in place. For each entity:
     /// walk every rule registered for its label whose language
-    /// scope applies under `language`, walk a window of
+    /// scope applies under `ctx.language`, walk a window of
     /// `prefix_words` words before and `suffix_words` words after
     /// the entity's location, ask the matcher whether any keyword
     /// fires, and on a hit lift confidence by the rule's `boost`
     /// (saturating at the [`Confidence`] ceiling) plus append a
     /// [`Refinement`] trail step.
     ///
-    /// `tokens` is the optional token artifact produced by an
-    /// upstream NLP engine. When present, words are counted
-    /// against the token stream; when absent, words are derived
-    /// from the source text via Unicode word segmentation.
-    ///
-    /// `language` is the per-call language hint. `None` means
-    /// "unknown" — every per-language rule applies as a
-    /// permissive fallback.
+    /// The in-text and hint paths are independent — at most one
+    /// boost per rule fires per entity (window first, hint as
+    /// fallback) so a rule with a long keyword list can't
+    /// double-dip.
     ///
     /// [`Confidence`]: nvisy_core::primitive::Confidence
     /// [`Refinement`]: nvisy_core::entity::TrailStepKind::Refinement
-    pub fn enhance(
-        &self,
-        entities: &mut [Entity<Text>],
-        text: &str,
-        tokens: Option<&[Token]>,
-        language: Option<&LanguageTag>,
-    ) {
+    pub fn enhance(&self, entities: &mut [Entity<Text>], ctx: &Context<'_>) {
         if self.rules.is_empty() {
             return;
         }
         for entity in entities {
-            self.enhance_one(entity, text, tokens, language);
+            self.enhance_one(entity, ctx);
         }
     }
 
-    fn enhance_one(
-        &self,
-        entity: &mut Entity<Text>,
-        text: &str,
-        tokens: Option<&[Token]>,
-        language: Option<&LanguageTag>,
-    ) {
+    fn enhance_one(&self, entity: &mut Entity<Text>, ctx: &Context<'_>) {
         let Some(bucket) = self.rules.get(&entity.label) else {
             return;
         };
         for rule in bucket {
-            if !rule.applies_to_language(language) {
+            if !rule.applies_to_language(ctx.language) {
                 continue;
             }
             if rule.keywords.is_empty() {
                 continue;
             }
-            self.apply_rule(entity, rule, text, tokens);
+            self.apply_rule(entity, rule, ctx);
         }
     }
 
-    fn apply_rule(
-        &self,
-        entity: &mut Entity<Text>,
-        rule: &BoostRule,
-        text: &str,
-        tokens: Option<&[Token]>,
-    ) {
+    fn apply_rule(&self, entity: &mut Entity<Text>, rule: &BoostRule, ctx: &Context<'_>) {
         let start = entity.location.start;
         let end = entity.location.end;
 
@@ -156,23 +143,32 @@ impl Enhancer {
         // `tokens: None`, `tokens: Some(&[])`, and the "tokens
         // present but none overlap the entity" case (e.g. NLP
         // engine only tokenized part of the document).
-        let token_slice = tokens
+        let token_slice = ctx
+            .tokens
             .map(|toks| slice_tokens_around(toks, start, end, rule.prefix_words, rule.suffix_words))
             .unwrap_or(&[]);
         let (snippet, tokens_in_window): (&str, &[Token]) = if token_slice.is_empty() {
-            let snippet = word_window(text, start, end, rule.prefix_words, rule.suffix_words);
+            let snippet = word_window(ctx.text, start, end, rule.prefix_words, rule.suffix_words);
             (snippet, &[])
         } else {
-            let snippet = token_span(text, token_slice, start, end);
+            let snippet = token_span(ctx.text, token_slice, start, end);
             (snippet, token_slice)
         };
 
-        if !self
+        let source = if self
             .matcher
             .any_match(snippet, tokens_in_window, &rule.keywords)
         {
+            TRAIL_SOURCE_WINDOW
+        } else if ctx
+            .hints
+            .iter()
+            .any(|h| self.matcher.any_match(h, &[], &rule.keywords))
+        {
+            TRAIL_SOURCE_HINT
+        } else {
             return;
-        }
+        };
 
         let original = entity.confidence;
         let adjusted = original.saturating_add(rule.boost.get());
@@ -182,7 +178,7 @@ impl Enhancer {
         entity.confidence = adjusted;
 
         entity.trail.push(TrailStep::refinement(
-            TRAIL_SOURCE,
+            source,
             original,
             adjusted,
             format!(
@@ -194,97 +190,6 @@ impl Enhancer {
     }
 }
 
-/// Walk `prefix` words before `[start, end)` and `suffix` words
-/// after, via Unicode word segmentation, and return the spanning
-/// substring (including any non-word whitespace and punctuation
-/// between words). The returned slice covers `[start, end)` itself
-/// plus the prefix / suffix words; the entity's own bytes are
-/// always inside.
-fn word_window(text: &str, start: usize, end: usize, prefix: usize, suffix: usize) -> &str {
-    let prefix_text = &text[..start.min(text.len())];
-    let suffix_text = &text[end.min(text.len())..];
-
-    // `unicode_word_indices` yields `(byte_offset, word_str)` for
-    // every "word" (alphanumeric run) in source order. Take the
-    // last `prefix` on the prefix side, the first `suffix` on the
-    // suffix side, and compute the spanning byte range.
-    let prefix_words: Vec<(usize, &str)> = prefix_text.unicode_word_indices().collect();
-    let prefix_take = prefix_words.len().saturating_sub(prefix);
-    let prefix_byte = prefix_words
-        .get(prefix_take)
-        .map(|(idx, _)| *idx)
-        .unwrap_or(start.min(text.len()));
-
-    let suffix_byte = if suffix == 0 {
-        end.min(text.len())
-    } else {
-        suffix_text
-            .unicode_word_indices()
-            .nth(suffix - 1)
-            .map(|(idx, word)| end + idx + word.len())
-            .unwrap_or(text.len())
-    };
-
-    let lo = floor_char_boundary(text, prefix_byte);
-    let hi = ceil_char_boundary(text, suffix_byte.min(text.len()));
-    &text[lo..hi]
-}
-
-fn floor_char_boundary(s: &str, mut pos: usize) -> usize {
-    while pos > 0 && !s.is_char_boundary(pos) {
-        pos -= 1;
-    }
-    pos
-}
-
-fn ceil_char_boundary(s: &str, mut pos: usize) -> usize {
-    while pos < s.len() && !s.is_char_boundary(pos) {
-        pos += 1;
-    }
-    pos
-}
-
-/// Slice tokens by *count*: take `prefix` tokens before the first
-/// token overlapping `[start, end)` and `suffix` tokens after the
-/// last. The returned slice is contiguous.
-fn slice_tokens_around(
-    tokens: &[Token],
-    start: usize,
-    end: usize,
-    prefix: usize,
-    suffix: usize,
-) -> &[Token] {
-    if tokens.is_empty() {
-        return &[];
-    }
-    // First token whose `offset.end > start` overlaps or follows the entity.
-    let first_overlap = tokens.partition_point(|t| t.offset.end <= start);
-    // One past the last token whose `offset.start < end` overlaps the entity.
-    let last_overlap = tokens.partition_point(|t| t.offset.start < end);
-    let lo = first_overlap.saturating_sub(prefix);
-    let hi = (last_overlap + suffix).min(tokens.len());
-    if lo >= hi {
-        return &[];
-    }
-    &tokens[lo..hi]
-}
-
-/// Spanning substring covering `tokens` plus the entity itself.
-/// Used to give the matcher a contiguous text window when slicing
-/// against the token stream.
-///
-/// Precondition: `tokens` is non-empty. Callers must take the
-/// `word_window` fallback path when their token slice is empty —
-/// see `Enhancer::enhance_one`.
-fn token_span<'a>(text: &'a str, tokens: &[Token], start: usize, end: usize) -> &'a str {
-    debug_assert!(!tokens.is_empty(), "token_span requires non-empty slice");
-    let lo = tokens[0].offset.start.min(start);
-    let hi = tokens[tokens.len() - 1].offset.end.max(end);
-    let lo = floor_char_boundary(text, lo.min(text.len()));
-    let hi = ceil_char_boundary(text, hi.min(text.len()));
-    &text[lo..hi]
-}
-
 #[cfg(test)]
 mod tests {
     use nvisy_core::entity::{
@@ -352,7 +257,7 @@ mod tests {
         )]);
         let text = "Your SSN: 123-45-6789";
         let mut entities = vec![entity(govid_label(), 10, 21, 0.6)];
-        enhancer.enhance(&mut entities, text, None, None);
+        enhancer.enhance(&mut entities, &Context::new(text));
         assert!(entities[0].confidence.get() > 0.6);
         assert!(
             entities[0]
@@ -362,18 +267,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn boosts_entity_when_keyword_in_suffix() {
-        let enhancer = enhancer(vec![rule(govid_label(), &["social"], 0, 5, 0.2)]);
-        let text = "123-45-6789 (social security number)";
-        let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
-        enhancer.enhance(&mut entities, text, None, None);
-        assert!(
-            entities[0].confidence.get() > 0.6,
-            "trailing keyword within suffix window should boost",
-        );
-    }
-
     #[test]
     fn suffix_zero_ignores_trailing_keyword() {
         // Prefix-only: trailing keyword must not boost.
@@ -381,7 +274,7 @@ mod tests {
         let text = "123-45-6789 (social security number)";
         let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None, None);
+        enhancer.enhance(&mut entities, &Context::new(text));
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -391,7 +284,7 @@ mod tests {
         let text = "Mr. Smith is named in the report.";
         let mut entities = vec![entity(person_label(), 4, 9, 0.5)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None, None);
+        enhancer.enhance(&mut entities, &Context::new(text));
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -405,7 +298,7 @@ mod tests {
         let xyz_end = xyz_start + "XYZ".len();
         let mut entities = vec![entity(govid_label(), xyz_start, xyz_end, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None, None);
+        enhancer.enhance(&mut entities, &Context::new(text));
         assert_eq!(entities[0].confidence.get(), before);
     }
 
@@ -414,7 +307,7 @@ mod tests {
         let enhancer = enhancer(vec![rule(govid_label(), &["here"], 5, 5, 0.9)]);
         let text = "the value is right here in plain sight";
         let mut entities = vec![entity(govid_label(), 16, 21, 0.95)];
-        enhancer.enhance(&mut entities, text, None, None);
+        enhancer.enhance(&mut entities, &Context::new(text));
         assert!((entities[0].confidence.get() - 1.0).abs() < f64::EPSILON);
     }
 
@@ -438,7 +331,7 @@ mod tests {
         let ssn_entity_start = ssn_only.find("123").unwrap();
         let ssn_entity_end = ssn_entity_start + "123-45-6789".len();
         let mut from_first = vec![entity(govid_label(), ssn_entity_start, ssn_entity_end, 0.6)];
-        make_enhancer().enhance(&mut from_first, ssn_only, None, None);
+        make_enhancer().enhance(&mut from_first, &Context::new(ssn_only));
         assert!(
             from_first[0].confidence.get() > 0.6,
             "keyword `ssn` from the first rule must still boost after merge",
@@ -449,7 +342,7 @@ mod tests {
         let tax_entity_start = taxid_only.find("987").unwrap();
         let tax_entity_end = tax_entity_start + "987-65-4329".len();
         let mut from_second = vec![entity(govid_label(), tax_entity_start, tax_entity_end, 0.6)];
-        make_enhancer().enhance(&mut from_second, taxid_only, None, None);
+        make_enhancer().enhance(&mut from_second, &Context::new(taxid_only));
         assert!(
             from_second[0].confidence.get() > 0.6,
             "keyword `tax id` from the second rule must still boost after merge",
@@ -464,38 +357,24 @@ mod tests {
         let entity_start = text.find("123").unwrap();
         let entity_end = entity_start + "123-45-6789".len();
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, None, None);
+        enhancer.enhance(&mut entities, &Context::new(text));
         assert!(
             entities[0].confidence.get() > 0.6,
             "unicode word should be reachable within 3-word prefix",
         );
     }
 
-    #[test]
-    fn word_window_excludes_too_distant_unicode() {
-        // 2-word prefix: "café" is the 3rd word before the entity.
-        let enhancer = enhancer(vec![rule(govid_label(), &["café"], 2, 0, 0.2)]);
-        let text = "café naïve resume — 123-45-6789";
-        let entity_start = text.find("123").unwrap();
-        let entity_end = entity_start + "123-45-6789".len();
-        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, None, None);
-        assert_eq!(entities[0].confidence.get(), before);
-    }
-
     #[test]
     fn empty_tokens_slice_matches_none_behaviour() {
-        // Keyword sits in the prefix word-window but outside the
-        // entity bytes. With the empty-slice fix, `Some(&[])` must
-        // not collapse the snippet to the entity bytes — it should
-        // fall back to the word-window path just like `None`.
+        // `Some(&[])` must not collapse the snippet to entity
+        // bytes — it should fall back to the word-window path
+        // just like `None`.
         let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
         let text = "Your SSN: 123-45-6789";
         let mut from_none = vec![entity(govid_label(), 10, 21, 0.6)];
         let mut from_empty = vec![entity(govid_label(), 10, 21, 0.6)];
-        enhancer.enhance(&mut from_none, text, None, None);
-        enhancer.enhance(&mut from_empty, text, Some(&[]), None);
+        enhancer.enhance(&mut from_none, &Context::new(text));
+        enhancer.enhance(&mut from_empty, &Context::new(text).with_tokens(&[]));
         assert_eq!(
             from_none[0].confidence.get(),
             from_empty[0].confidence.get(),
@@ -513,9 +392,7 @@ mod tests {
         // prefix reaches is the immediate predecessor token
         // "Your". The tokenizer here treats "social security"
         // as a single compound token outside the window, so the
-        // keyword "social security" must NOT fire — unlike a
-        // hypothetical caller that gave it the word-window path,
-        // which would split on whitespace.
+        // keyword "social security" must NOT fire.
         let enhancer = enhancer(vec![rule(govid_label(), &["social security"], 1, 0, 0.2)]);
         let text = "social security: Your 123-45-6789";
         let entity_start = text.find("123").unwrap();
@@ -527,7 +404,7 @@ mod tests {
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
         let before = entities[0].confidence.get();
-        enhancer.enhance(&mut entities, text, Some(&tokens), None);
+        enhancer.enhance(&mut entities, &Context::new(text).with_tokens(&tokens));
         assert_eq!(
             entities[0].confidence.get(),
             before,
@@ -535,27 +412,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn token_path_boosts_when_keyword_within_token_window() {
-        // Same tokens, 2-word prefix: now the `social security`
-        // token is reachable and the boost fires.
-        let enhancer = enhancer(vec![rule(govid_label(), &["social security"], 2, 0, 0.2)]);
-        let text = "social security: Your 123-45-6789";
-        let entity_start = text.find("123").unwrap();
-        let entity_end = entity_start + "123-45-6789".len();
-        let tokens: Vec<Token> = vec![
-            Token::from_text("social security", 0..15),
-            Token::from_text("Your", 17..21),
-            Token::from_text("123-45-6789", 22..33),
-        ];
-        let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, Some(&tokens), None);
-        assert!(
-            entities[0].confidence.get() > 0.6,
-            "2-word prefix should reach the `social security` token",
-        );
-    }
-
     #[test]
     fn lemma_matcher_boosts_on_morphological_variant() {
         // Substring matcher would miss `running` for keyword
@@ -578,31 +434,22 @@ mod tests {
             Token::from_text("system", 41..47),
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, Some(&tokens), None);
+        enhancer.enhance(&mut entities, &Context::new(text).with_tokens(&tokens));
         assert!(
             entities[0].confidence.get() > 0.6,
             "lemma matcher should match `run` against the `running` token's lemma",
         );
-        assert!(
-            entities[0]
-                .trail
-                .iter()
-                .any(|s| matches!(s.kind, TrailStepKind::Refinement)),
-        );
     }
 
     #[test]
     fn tokens_with_no_overlap_fall_back_to_word_window() {
         // Tokens cover the first half of the document; the entity
-        // is in the second half, outside any token's range.
-        // Without the fallback the token slice would be empty and
-        // the snippet would collapse to entity bytes. With the
-        // fallback, the word-window path reaches the keyword.
+        // is in the second half, outside any token's range. The
+        // word-window path must still reach the keyword.
         let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
         let text = "First half of the document. Your SSN: 123-45-6789";
         let entity_start = text.find("123").unwrap();
         let entity_end = entity_start + "123-45-6789".len();
-        // Tokens that cover only the first sentence.
         let tokens: Vec<Token> = vec![
             Token::from_text("First", 0..5),
             Token::from_text("half", 6..10),
@@ -611,10 +458,53 @@ mod tests {
             Token::from_text("document", 18..26),
         ];
         let mut entities = vec![entity(govid_label(), entity_start, entity_end, 0.6)];
-        enhancer.enhance(&mut entities, text, Some(&tokens), None);
+        enhancer.enhance(&mut entities, &Context::new(text).with_tokens(&tokens));
         assert!(
             entities[0].confidence.get() > 0.6,
             "tokens that don't overlap the entity must fall back to the word window",
         );
     }
+
+    #[test]
+    fn out_of_band_hint_boosts_when_window_is_empty() {
+        // Cell-only text has no surrounding context — the word
+        // window walk finds nothing — but the caller supplies the
+        // CSV column header as an out-of-band hint that contains
+        // a rule keyword. Confidence must lift, and the trail
+        // step must mark the source as `context-hint`.
+        let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
+        let text = "123-45-6789";
+        let hints = ["ssn".to_owned()];
+        let mut entities = vec![entity(govid_label(), 0, 11, 0.6)];
+        enhancer.enhance(&mut entities, &Context::new(text).with_hints(&hints));
+        assert!(
+            entities[0].confidence.get() > 0.6,
+            "out-of-band hint matching a rule keyword must boost",
+        );
+        assert!(
+            entities[0]
+                .trail
+                .iter()
+                .any(|s| s.source == "context-hint"),
+            "trail step must record the hint-source provenance",
+        );
+    }
+
+    #[test]
+    fn hint_path_is_independent_of_window_path() {
+        // The in-text window already fires, so the hint path
+        // shouldn't double-boost. Exactly one refinement step
+        // appears on the entity.
+        let enhancer = enhancer(vec![rule(govid_label(), &["ssn"], 5, 5, 0.2)]);
+        let text = "Your SSN: 123-45-6789";
+        let hints = ["ssn".to_owned()];
+        let mut entities = vec![entity(govid_label(), 10, 21, 0.6)];
+        enhancer.enhance(&mut entities, &Context::new(text).with_hints(&hints));
+        let refinements = entities[0]
+            .trail
+            .iter()
+            .filter(|s| matches!(s.kind, TrailStepKind::Refinement))
+            .count();
+        assert_eq!(refinements, 1, "rule must boost at most once per entity");
+    }
 }
diff --git a/crates/nvisy-context/src/enhancer/window.rs b/crates/nvisy-context/src/enhancer/window.rs
new file mode 100644
index 00000000..85252fc2
--- /dev/null
+++ b/crates/nvisy-context/src/enhancer/window.rs
@@ -0,0 +1,118 @@
+//! Window-slicing helpers shared by [`Enhancer::apply_rule`].
+//!
+//! Two coordinate systems matter here:
+//!
+//! - **Bytes**: source-text offsets. `word_window` walks Unicode
+//!   word segments to expand an entity's `[start, end)` to
+//!   `prefix`/`suffix` words on either side.
+//! - **Tokens**: pre-tokenized stream from an upstream NLP engine.
+//!   `slice_tokens_around` takes a `prefix`/`suffix` count and
+//!   returns the contiguous token slice that covers the entity
+//!   plus that many neighbours.
+//!
+//! Both paths feed the same downstream [`KeywordMatcher`] —
+//! [`token_span`] reduces a non-empty token slice back to its
+//! spanning substring for matchers that operate on raw text.
+//!
+//! [`Enhancer::apply_rule`]: super::Enhancer
+//! [`KeywordMatcher`]: crate::KeywordMatcher
+
+use unicode_segmentation::UnicodeSegmentation;
+
+use crate::io::Token;
+
+/// Walk `prefix` words before `[start, end)` and `suffix` words
+/// after, via Unicode word segmentation, and return the spanning
+/// substring (including any non-word whitespace and punctuation
+/// between words). The returned slice covers `[start, end)` itself
+/// plus the prefix / suffix words; the entity's own bytes are
+/// always inside.
+pub(super) fn word_window(
+    text: &str,
+    start: usize,
+    end: usize,
+    prefix: usize,
+    suffix: usize,
+) -> &str {
+    let prefix_text = &text[..start.min(text.len())];
+    let suffix_text = &text[end.min(text.len())..];
+
+    // `unicode_word_indices` yields `(byte_offset, word_str)` for
+    // every "word" (alphanumeric run) in source order. Take the
+    // last `prefix` on the prefix side, the first `suffix` on the
+    // suffix side, and compute the spanning byte range.
+    let prefix_words: Vec<(usize, &str)> = prefix_text.unicode_word_indices().collect();
+    let prefix_take = prefix_words.len().saturating_sub(prefix);
+    let prefix_byte = prefix_words
+        .get(prefix_take)
+        .map(|(idx, _)| *idx)
+        .unwrap_or(start.min(text.len()));
+
+    let suffix_byte = if suffix == 0 {
+        end.min(text.len())
+    } else {
+        suffix_text
+            .unicode_word_indices()
+            .nth(suffix - 1)
+            .map(|(idx, word)| end + idx + word.len())
+            .unwrap_or(text.len())
+    };
+
+    let lo = floor_char_boundary(text, prefix_byte);
+    let hi = ceil_char_boundary(text, suffix_byte.min(text.len()));
+    &text[lo..hi]
+}
+
+/// Slice tokens by *count*: take `prefix` tokens before the first
+/// token overlapping `[start, end)` and `suffix` tokens after the
+/// last. The returned slice is contiguous.
+pub(super) fn slice_tokens_around(
+    tokens: &[Token],
+    start: usize,
+    end: usize,
+    prefix: usize,
+    suffix: usize,
+) -> &[Token] {
+    if tokens.is_empty() {
+        return &[];
+    }
+    // First token whose `offset.end > start` overlaps or follows the entity.
+    let first_overlap = tokens.partition_point(|t| t.offset.end <= start);
+    // One past the last token whose `offset.start < end` overlaps the entity.
+    let last_overlap = tokens.partition_point(|t| t.offset.start < end);
+    let lo = first_overlap.saturating_sub(prefix);
+    let hi = (last_overlap + suffix).min(tokens.len());
+    if lo >= hi {
+        return &[];
+    }
+    &tokens[lo..hi]
+}
+
+/// Spanning substring covering `tokens` plus the entity itself.
+/// Used to give the matcher a contiguous text window when slicing
+/// against the token stream.
+///
+/// Precondition: `tokens` is non-empty. Callers must take the
+/// [`word_window`] fallback path when their token slice is empty.
+pub(super) fn token_span<'a>(text: &'a str, tokens: &[Token], start: usize, end: usize) -> &'a str {
+    debug_assert!(!tokens.is_empty(), "token_span requires non-empty slice");
+    let lo = tokens[0].offset.start.min(start);
+    let hi = tokens[tokens.len() - 1].offset.end.max(end);
+    let lo = floor_char_boundary(text, lo.min(text.len()));
+    let hi = ceil_char_boundary(text, hi.min(text.len()));
+    &text[lo..hi]
+}
+
+fn floor_char_boundary(s: &str, mut pos: usize) -> usize {
+    while pos > 0 && !s.is_char_boundary(pos) {
+        pos -= 1;
+    }
+    pos
+}
+
+fn ceil_char_boundary(s: &str, mut pos: usize) -> usize {
+    while pos < s.len() && !s.is_char_boundary(pos) {
+        pos += 1;
+    }
+    pos
+}
diff --git a/crates/nvisy-context/src/io/mod.rs b/crates/nvisy-context/src/io/mod.rs
new file mode 100644
index 00000000..df3b9fb0
--- /dev/null
+++ b/crates/nvisy-context/src/io/mod.rs
@@ -0,0 +1,20 @@
+//! Wiring between the [`Enhancer`] and the [`EntityRecognizer`]
+//! pipeline.
+//!
+//! - [`Token`] / [`Tokens`] is the shared NLP token artifact the
+//!   enhancer reads off `RecognizerInput.artifacts`.
+//! - [`ContextEnhanced`] wraps any [`EntityRecognizer<Text>`] so
+//!   the enhancer runs automatically after the inner recognizer's
+//!   pass.
+//!
+//! All three types are re-exported at the crate root.
+//!
+//! [`Enhancer`]: crate::Enhancer
+//! [`EntityRecognizer`]: nvisy_core::recognition::EntityRecognizer
+//! [`EntityRecognizer<Text>`]: nvisy_core::recognition::EntityRecognizer
+
+mod tokens;
+mod wrapper;
+
+pub use self::tokens::{Token, Tokens};
+pub use self::wrapper::ContextEnhanced;
diff --git a/crates/nvisy-context/src/tokens.rs b/crates/nvisy-context/src/io/tokens.rs
similarity index 100%
rename from crates/nvisy-context/src/tokens.rs
rename to crates/nvisy-context/src/io/tokens.rs
diff --git a/crates/nvisy-context/src/wrapper.rs b/crates/nvisy-context/src/io/wrapper.rs
similarity index 84%
rename from crates/nvisy-context/src/wrapper.rs
rename to crates/nvisy-context/src/io/wrapper.rs
index c9d415ec..6329bf5d 100644
--- a/crates/nvisy-context/src/wrapper.rs
+++ b/crates/nvisy-context/src/io/wrapper.rs
@@ -22,7 +22,8 @@ use nvisy_core::Result;
 use nvisy_core::modality::Text;
 use nvisy_core::recognition::{EntityRecognizer, RecognizerInput, RecognizerOutput};
 
-use super::{Enhancer, Tokens};
+use super::Tokens;
+use crate::{Context, Enhancer};
 
 /// Wraps an [`EntityRecognizer<Text>`] with a post-recognition
 /// [`Enhancer`] pass. Implements [`EntityRecognizer<Text>`] so
@@ -68,11 +69,14 @@ where
         if self.enhancer.is_empty() {
             return Ok(output);
         }
-        let text = input.data.text.as_str();
-        let tokens = input.artifacts.get::<Tokens>().map(Tokens::as_slice);
-        let language = input.language.as_ref();
-        self.enhancer
-            .enhance(&mut output.entities, text, tokens, language);
+        let mut ctx = Context::new(input.data.text.as_str()).with_hints(&input.context_hints);
+        if let Some(tokens) = input.artifacts.get::<Tokens>() {
+            ctx = ctx.with_tokens(tokens.as_slice());
+        }
+        if let Some(language) = input.language.as_ref() {
+            ctx = ctx.with_language(language);
+        }
+        self.enhancer.enhance(&mut output.entities, &ctx);
         Ok(output)
     }
 }
diff --git a/crates/nvisy-context/src/lib.rs b/crates/nvisy-context/src/lib.rs
index 244e113c..7c470d06 100644
--- a/crates/nvisy-context/src/lib.rs
+++ b/crates/nvisy-context/src/lib.rs
@@ -3,13 +3,11 @@
 #![doc = include_str!("../README.md")]
 
 mod enhancer;
-mod matcher;
+mod io;
+mod matching;
 mod rule;
-mod tokens;
-mod wrapper;
 
-pub use self::enhancer::Enhancer;
-pub use self::matcher::{KeywordMatcher, LemmaMatcher, SubstringMatcher};
+pub use self::enhancer::{Context, Enhancer};
+pub use self::io::{ContextEnhanced, Token, Tokens};
+pub use self::matching::{KeywordMatcher, LemmaMatcher, SubstringMatcher};
 pub use self::rule::{BoostRule, DEFAULT_BOOST, DEFAULT_PREFIX_WORDS, DEFAULT_SUFFIX_WORDS};
-pub use self::tokens::{Token, Tokens};
-pub use self::wrapper::ContextEnhanced;
diff --git a/crates/nvisy-context/src/matcher.rs b/crates/nvisy-context/src/matcher.rs
deleted file mode 100644
index 06beef22..00000000
--- a/crates/nvisy-context/src/matcher.rs
+++ /dev/null
@@ -1,134 +0,0 @@
-//! [`KeywordMatcher`] strategy + the two shipped implementations.
-//!
-//! - [`SubstringMatcher`] — ASCII case-insensitive substring search
-//!   over the raw text window. The fallback when no token artifact
-//!   is present on `RecognizerInput.artifacts`.
-//! - [`LemmaMatcher`] — matches keywords against lemmatized tokens
-//!   the upstream NLP engine stamped on `RecognizerInput.artifacts`
-//!   as a [`Tokens`] entry. Recognizes morphological variants
-//!   ("running" → "run", "SSNs" → "ssn") substring matching misses.
-//!
-//! Both implementations are stateless; the [`Enhancer`] owns one
-//! as a configured strategy.
-//!
-//! [`Tokens`]: super::Tokens
-//! [`Enhancer`]: super::Enhancer
-
-use hipstr::HipStr;
-
-use super::Token;
-
-/// Decide whether any keyword from `keywords` fires within the
-/// candidate region around an entity match.
-///
-/// The strategy slot that lets the enhancer swap raw substring
-/// matching for lemma-aware matching (or a third-party
-/// fuzzy/word-boundary implementation) without changing its core
-/// pipeline.
-///
-/// Implementations receive both a raw `window` slice of the source
-/// text (for substring strategies) and the `tokens` covering that
-/// same range (for token/lemma strategies). Either or both may be
-/// ignored; `tokens` is empty when no NLP engine produced a token
-/// artifact.
-pub trait KeywordMatcher: Send + Sync {
-    /// `true` if at least one keyword from `keywords` appears in
-    /// the input.
-    fn any_match(&self, window: &str, tokens: &[Token], keywords: &[HipStr<'static>]) -> bool;
-}
-
-/// ASCII case-insensitive substring matcher. The default —
-/// runs whenever no token artifact was stamped on
-/// `RecognizerInput.artifacts`, or whenever the caller explicitly
-/// picks raw matching.
-///
-/// Fast, allocation-light, permissive: the keyword `"email"` fires
-/// inside `"MyEmailAddress"`. Ignores the `tokens` argument.
-#[derive(Debug, Clone, Copy, Default)]
-pub struct SubstringMatcher;
-
-impl KeywordMatcher for SubstringMatcher {
-    fn any_match(&self, window: &str, _tokens: &[Token], keywords: &[HipStr<'static>]) -> bool {
-        let lowered = window.to_ascii_lowercase();
-        keywords
-            .iter()
-            .any(|kw| lowered.contains(kw.as_str().to_ascii_lowercase().as_str()))
-    }
-}
-
-/// Lemma-aware matcher. Compares each lemma in `tokens` against
-/// the keyword list with ASCII case-insensitive equality.
-///
-/// Falls back to [`SubstringMatcher`] semantics when `tokens` is
-/// empty (no shared NLP artifact was produced) so the enhancer
-/// runs uniformly regardless of whether the upstream pass emitted
-/// tokens.
-///
-/// Recognizes morphological variants the substring matcher cannot:
-/// `"running" → "run"`, `"dogs" → "dog"`, `"SSNs" → "ssn"`. Cost
-/// is one lowercase per keyword + one lowercase per lemma per
-/// match attempt.
-#[derive(Debug, Clone, Copy, Default)]
-pub struct LemmaMatcher;
-
-impl KeywordMatcher for LemmaMatcher {
-    fn any_match(&self, window: &str, tokens: &[Token], keywords: &[HipStr<'static>]) -> bool {
-        if tokens.is_empty() {
-            return SubstringMatcher.any_match(window, tokens, keywords);
-        }
-        let lowered_keywords: Vec<String> = keywords
-            .iter()
-            .map(|k| k.as_str().to_ascii_lowercase())
-            .collect();
-        tokens.iter().any(|tok| {
-            let lemma = tok.lemma.as_str().to_ascii_lowercase();
-            lowered_keywords.contains(&lemma)
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn kws(items: &[&'static str]) -> Vec<HipStr<'static>> {
-        items.iter().copied().map(HipStr::from).collect()
-    }
-
-    #[test]
-    fn substring_matches_case_insensitively() {
-        let m = SubstringMatcher;
-        assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
-        assert!(m.any_match(
-            "the SOCIAL SECURITY number",
-            &[],
-            &kws(&["social security"])
-        ));
-        assert!(!m.any_match("nothing here", &[], &kws(&["ssn"])));
-    }
-
-    #[test]
-    fn substring_is_permissive() {
-        let m = SubstringMatcher;
-        assert!(m.any_match("MyEmailAddress", &[], &kws(&["email"])));
-    }
-
-    #[test]
-    fn lemma_matches_morph_variants() {
-        let tokens = vec![
-            Token::from_text("the", 0..3),
-            Token::from_text("running", 4..11).with_lemma("run"),
-            Token::from_text("dogs", 12..16).with_lemma("dog"),
-        ];
-        let m = LemmaMatcher;
-        assert!(m.any_match("", &tokens, &kws(&["run"])));
-        assert!(m.any_match("", &tokens, &kws(&["dog"])));
-        assert!(!m.any_match("", &tokens, &kws(&["cat"])));
-    }
-
-    #[test]
-    fn lemma_falls_back_to_substring_without_tokens() {
-        let m = LemmaMatcher;
-        assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
-    }
-}
diff --git a/crates/nvisy-context/src/matching/lemma.rs b/crates/nvisy-context/src/matching/lemma.rs
new file mode 100644
index 00000000..0e1d5b5e
--- /dev/null
+++ b/crates/nvisy-context/src/matching/lemma.rs
@@ -0,0 +1,65 @@
+//! Lemma-aware [`KeywordMatcher`] implementation.
+
+use hipstr::HipStr;
+
+use super::matcher::{KeywordMatcher, SubstringMatcher};
+use crate::io::Token;
+
+/// Lemma-aware matcher. Compares each lemma in `tokens` against
+/// the keyword list with ASCII case-insensitive equality.
+///
+/// Falls back to [`SubstringMatcher`] semantics when `tokens` is
+/// empty (no shared NLP artifact was produced) so the enhancer
+/// runs uniformly regardless of whether the upstream pass emitted
+/// tokens.
+///
+/// Recognizes morphological variants the substring matcher cannot:
+/// `"running" → "run"`, `"dogs" → "dog"`, `"SSNs" → "ssn"`. Cost
+/// is one lowercase per keyword + one lowercase per lemma per
+/// match attempt.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct LemmaMatcher;
+
+impl KeywordMatcher for LemmaMatcher {
+    fn any_match(&self, window: &str, tokens: &[Token], keywords: &[HipStr<'static>]) -> bool {
+        if tokens.is_empty() {
+            return SubstringMatcher.any_match(window, tokens, keywords);
+        }
+        let lowered_keywords: Vec<String> = keywords
+            .iter()
+            .map(|k| k.as_str().to_ascii_lowercase())
+            .collect();
+        tokens.iter().any(|tok| {
+            let lemma = tok.lemma.as_str().to_ascii_lowercase();
+            lowered_keywords.contains(&lemma)
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn kws(items: &[&'static str]) -> Vec<HipStr<'static>> {
+        items.iter().copied().map(HipStr::from).collect()
+    }
+
+    #[test]
+    fn matches_morph_variants() {
+        let tokens = vec![
+            Token::from_text("the", 0..3),
+            Token::from_text("running", 4..11).with_lemma("run"),
+            Token::from_text("dogs", 12..16).with_lemma("dog"),
+        ];
+        let m = LemmaMatcher;
+        assert!(m.any_match("", &tokens, &kws(&["run"])));
+        assert!(m.any_match("", &tokens, &kws(&["dog"])));
+        assert!(!m.any_match("", &tokens, &kws(&["cat"])));
+    }
+
+    #[test]
+    fn falls_back_to_substring_without_tokens() {
+        let m = LemmaMatcher;
+        assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
+    }
+}
diff --git a/crates/nvisy-context/src/matching/matcher.rs b/crates/nvisy-context/src/matching/matcher.rs
new file mode 100644
index 00000000..5e7ac560
--- /dev/null
+++ b/crates/nvisy-context/src/matching/matcher.rs
@@ -0,0 +1,70 @@
+//! [`KeywordMatcher`] trait + the default [`SubstringMatcher`].
+
+use hipstr::HipStr;
+
+use crate::io::Token;
+
+/// Decide whether any keyword from `keywords` fires within the
+/// candidate region around an entity match.
+///
+/// The strategy slot that lets the enhancer swap raw substring
+/// matching for lemma-aware matching (or a third-party
+/// fuzzy/word-boundary implementation) without changing its core
+/// pipeline.
+///
+/// Implementations receive both a raw `window` slice of the source
+/// text (for substring strategies) and the `tokens` covering that
+/// same range (for token/lemma strategies). Either or both may be
+/// ignored; `tokens` is empty when no NLP engine produced a token
+/// artifact.
+pub trait KeywordMatcher: Send + Sync {
+    /// `true` if at least one keyword from `keywords` appears in
+    /// the input.
+    fn any_match(&self, window: &str, tokens: &[Token], keywords: &[HipStr<'static>]) -> bool;
+}
+
+/// ASCII case-insensitive substring matcher. The default —
+/// runs whenever no token artifact was stamped on
+/// `RecognizerInput.artifacts`, or whenever the caller explicitly
+/// picks raw matching.
+///
+/// Fast, allocation-light, permissive: the keyword `"email"` fires
+/// inside `"MyEmailAddress"`. Ignores the `tokens` argument.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct SubstringMatcher;
+
+impl KeywordMatcher for SubstringMatcher {
+    fn any_match(&self, window: &str, _tokens: &[Token], keywords: &[HipStr<'static>]) -> bool {
+        let lowered = window.to_ascii_lowercase();
+        keywords
+            .iter()
+            .any(|kw| lowered.contains(kw.as_str().to_ascii_lowercase().as_str()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn kws(items: &[&'static str]) -> Vec<HipStr<'static>> {
+        items.iter().copied().map(HipStr::from).collect()
+    }
+
+    #[test]
+    fn substring_matches_case_insensitively() {
+        let m = SubstringMatcher;
+        assert!(m.any_match("Your SSN: 123", &[], &kws(&["ssn"])));
+        assert!(m.any_match(
+            "the SOCIAL SECURITY number",
+            &[],
+            &kws(&["social security"])
+        ));
+        assert!(!m.any_match("nothing here", &[], &kws(&["ssn"])));
+    }
+
+    #[test]
+    fn substring_is_permissive() {
+        let m = SubstringMatcher;
+        assert!(m.any_match("MyEmailAddress", &[], &kws(&["email"])));
+    }
+}
diff --git a/crates/nvisy-context/src/matching/mod.rs b/crates/nvisy-context/src/matching/mod.rs
new file mode 100644
index 00000000..3ffe59cf
--- /dev/null
+++ b/crates/nvisy-context/src/matching/mod.rs
@@ -0,0 +1,19 @@
+//! Keyword-matching strategies plugged into the [`Enhancer`].
+//!
+//! - [`KeywordMatcher`] is the trait the enhancer talks to.
+//! - [`SubstringMatcher`] is the default: ASCII case-insensitive
+//!   substring search over the raw text window. Runs whenever no
+//!   token artifact is present on `RecognizerInput.artifacts`.
+//! - [`LemmaMatcher`] reads lemmatized tokens an upstream NLP
+//!   engine stamped on `RecognizerInput.artifacts`. Recognizes
+//!   morphological variants substring matching misses.
+//!
+//! All three are re-exported at the crate root.
+//!
+//! [`Enhancer`]: crate::Enhancer
+
+mod lemma;
+mod matcher;
+
+pub use self::lemma::LemmaMatcher;
+pub use self::matcher::{KeywordMatcher, SubstringMatcher};
diff --git a/crates/nvisy-core/src/recognition/input.rs b/crates/nvisy-core/src/recognition/input.rs
index 2bbc57ba..6da30fd7 100644
--- a/crates/nvisy-core/src/recognition/input.rs
+++ b/crates/nvisy-core/src/recognition/input.rs
@@ -64,6 +64,14 @@ pub struct RecognizerInput<M: Modality> {
     /// behavior for domain-specific terms; those that don't ignore the
     /// field.
     pub labels: Vec<String>,
+    /// Out-of-band context strings the caller wants treated as
+    /// in-context for confidence boosting (e.g. the column header
+    /// of a CSV cell, the JSON object key of a string value, the
+    /// log field name a value sits under). Recognizers that run a
+    /// context enhancer feed these to the enhancer alongside the
+    /// in-text word window; recognizers without an enhancer ignore
+    /// the field.
+    pub context_hints: Vec<String>,
     /// Correlation UUID propagated through the tracing span for this
     /// call. Recognizer bodies do not read this directly; it's set
     /// on the span by the caller.
@@ -82,6 +90,7 @@ impl<M: Modality> RecognizerInput<M> {
             country: None,
             hints: Vec::new(),
             labels: Vec::new(),
+            context_hints: Vec::new(),
             correlation_id: None,
         }
     }
@@ -128,6 +137,14 @@ impl<M: Modality> RecognizerInput<M> {
         self
     }
 
+    /// Attach out-of-band context hint strings (column headers,
+    /// JSON keys, …) the enhancer should treat as in-context.
+    #[must_use]
+    pub fn with_context_hints(mut self, hints: Vec<String>) -> Self {
+        self.context_hints = hints;
+        self
+    }
+
     /// Set the correlation id propagated through the tracing span.
     #[must_use]
     pub fn with_correlation_id(mut self, id: Uuid) -> Self {
diff --git a/crates/nvisy-pattern/Cargo.toml b/crates/nvisy-pattern/Cargo.toml
index 08fe0410..94c69b56 100644
--- a/crates/nvisy-pattern/Cargo.toml
+++ b/crates/nvisy-pattern/Cargo.toml
@@ -45,5 +45,11 @@ aho-corasick = { workspace = true, features = [] }
 # Tabular document parsing (dictionary loading from CSV)
 csv = { workspace = true, features = [] }
 
+# Base58Check decoder for the crypto.btc validator
+bs58 = { workspace = true, features = ["check"] }
+
+# Region-aware phone-number parsing for the phone validator
+phonenumber = { workspace = true }
+
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml b/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml
index 47ec45b8..85f2e388 100644
--- a/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml
+++ b/crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml
@@ -22,3 +22,4 @@ context = [
 [[variants]]
 regex = '\b[A-Z9]{5}[0-9](?:0[1-9]|1[0-2]|5[1-9]|6[0-2])(?:0[1-9]|[12][0-9]|3[01])[0-9][A-Z9]{2}[A-Z0-9][A-Z]{2}\b'
 score = 0.5
+validator = "uk.driving_licence"
diff --git a/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml b/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml
index 0d585be7..d23047ee 100644
--- a/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml
+++ b/crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml
@@ -29,6 +29,7 @@ context = [
 [[variants]]
 regex = '\b[A-HJ-PR-Y][A-HJ-PR-Y](?:0[1-9]|[1-7][0-9])[- ]?[A-HJ-PR-Z]{3}\b'
 score = 0.3
+validator = "uk.vehicle_registration"
 
 [[variants]]
 regex = '\b[A-HJ-NPR-TV-Y]\d{1,3}[- ]?[A-HJ-PR-Y][A-HJ-PR-Z]{2}\b'
diff --git a/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml b/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml
index 80e80390..3859126e 100644
--- a/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml
+++ b/crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml
@@ -26,11 +26,11 @@ context = [
 # DEA registration type letters: A, B, C, D, E, F, G, H, J, K, L,
 # M, P, R, S, T, U, X (plus mid-2000s practitioner-9 series).
 [[variants]]
-regex = '[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX][a-zA-Z]\d{7}'
+regex = '\b[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX][a-zA-Z]\d{7}\b'
 score = 0.4
 validator = "us.dea_number"
 
 [[variants]]
-regex = '[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX]9\d{7}'
+regex = '\b[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX]9\d{7}\b'
 score = 0.4
 validator = "us.dea_number"
diff --git a/crates/nvisy-pattern/assets/patterns/us/identity/passport.toml b/crates/nvisy-pattern/assets/patterns/us/identity/passport.toml
index 439529a6..7c650847 100644
--- a/crates/nvisy-pattern/assets/patterns/us/identity/passport.toml
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/passport.toml
@@ -1,6 +1,13 @@
 name = "us-passport"
 label = "passport_number"
 countries = ["US"]
+context = [
+  "passport",
+  "passport#",
+  "travel document",
+  "us passport",
+  "united states passport",
+]
 
 [[variants]]
 regex = "\\b[A-Z]\\d{8}\\b"
diff --git a/crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml b/crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml
index 53ee38c9..f6fd66f9 100644
--- a/crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml
+++ b/crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml
@@ -1,7 +1,17 @@
 name = "us-postal-code"
 label = "postal_code"
 countries = ["US"]
+context = [
+  "zip",
+  "zip code",
+  "zipcode",
+  "postal",
+  "postal code",
+  "address",
+  "mailing",
+]
 
 [[variants]]
 regex = "\\b\\d{5}(?:-\\d{4})?\\b"
-score = 0.5
+score = 0.1
+validator = "us.postal_code"
diff --git a/crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml
index 189aacc9..b4320c3c 100644
--- a/crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml
@@ -1,6 +1,26 @@
 name = "aws-key"
 label = "api_key"
+context = [
+  "aws",
+  "amazon",
+  "access key",
+  "secret access key",
+  "aws_access_key_id",
+  "aws_secret_access_key",
+]
 
+# AWS access key ID — 20 chars, fixed 4-letter principal prefix
+# plus 16 base32-ish chars. Prefixes per AWS docs: AKIA (IAM
+# user), ASIA (STS temporary), AIDA (IAM user identifier), AROA
+# (IAM role), ANPA / AGPA (managed policy / group), AIPA (EC2
+# instance profile).
 [[variants]]
-regex = "\\bAKIA[0-9A-Z]{16}\\b"
+regex = "\\b(?:AKIA|ASIA|AIDA|AROA|ANPA|AGPA|AIPA)[0-9A-Z]{16}\\b"
 score = 0.95
+
+# AWS secret access key — 40 chars of base64url. Without context
+# this collides with any 40-char base64 string (hashes, tokens),
+# so the score is modest; the boost layer lifts colocated hits.
+[[variants]]
+regex = "\\b[A-Za-z0-9/+=]{40}\\b"
+score = 0.3
diff --git a/crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml
index bcd59d63..c67d4956 100644
--- a/crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml
@@ -1,6 +1,10 @@
 name = "generic-api-key"
 label = "api_key"
 
+# Separator after the keyword is either `:` / `=` (assignment
+# style: `api_key="…"`) or one-or-more spaces (header style:
+# `Authorization: Bearer <token>`). Accepting whitespace-only is
+# what catches the dominant Authorization-header leak form.
 [[variants]]
-regex = "(?i)(?:api[_\\-]?key|api[_\\-]?secret|access[_\\-]?token|secret[_\\-]?key|bearer)\\s*[:=]\\s*[\"']?([a-zA-Z0-9_\\-]{20,})[\"']?"
+regex = "(?i)(?:api[_\\-]?key|api[_\\-]?secret|access[_\\-]?token|secret[_\\-]?key|bearer)(?:\\s*[:=]\\s*|\\s+)[\"']?([a-zA-Z0-9_\\-\\.]{20,})[\"']?"
 score = 0.4
diff --git a/crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml
index ba247e60..a1a76b41 100644
--- a/crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml
@@ -1,6 +1,14 @@
 name = "github-token"
 label = "auth_token"
 
+# Classic prefix tokens — gh{p,o,u,s,r}_ for PAT, OAuth,
+# user-to-server, server-to-server, refresh.
 [[variants]]
 regex = "\\bgh[pousr]_[a-zA-Z0-9]{36}\\b"
 score = 0.95
+
+# Fine-grained personal access token (introduced 2022) — current
+# recommended PAT form, 82-char body over [A-Z0-9_].
+[[variants]]
+regex = "\\bgithub_pat_[A-Z0-9_]{82}\\b"
+score = 0.95
diff --git a/crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml b/crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml
index 61d6977e..7f0aaf59 100644
--- a/crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml
@@ -2,5 +2,16 @@ name = "private-key"
 label = "private_key"
 
 [[variants]]
-regex = "-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
+regex = "-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED |PGP )?PRIVATE KEY(?: BLOCK)?-----[\\s\\S]*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED |PGP )?PRIVATE KEY(?: BLOCK)?-----"
+score = 0.98
+
+# RFC 4716 SSH2 private key (Tectia, older SSH implementations).
+[[variants]]
+regex = "---- BEGIN SSH2 ENCRYPTED PRIVATE KEY ----[\\s\\S]*?---- END SSH2 ENCRYPTED PRIVATE KEY ----"
+score = 0.98
+
+# PuTTY .ppk (PuTTY-User-Key-File-2 or -3). Body extends to the
+# `Private-MAC:` trailer which closes the keystore.
+[[variants]]
+regex = "PuTTY-User-Key-File-[23]:[\\s\\S]*?Private-MAC: [0-9a-f]+"
 score = 0.98
diff --git a/crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml b/crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml
index 31f6fad6..c4c3118c 100644
--- a/crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml
@@ -1,6 +1,17 @@
 name = "bitcoin-address"
 label = "crypto_address"
 
+# Legacy Base58 addresses (P2PKH `1…`, P2SH `3…`) carry a
+# four-byte double-SHA256 checksum that the `crypto.btc`
+# validator verifies.
 [[variants]]
-regex = "\\b(?:bc1[a-z0-9]{25,39}|[13][a-km-zA-HJ-NP-Z1-9]{25,34})\\b"
+regex = "\\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\\b"
+score = 0.5
+validator = "crypto.btc"
+
+# Bech32 / Bech32m segwit + Taproot. Length window covers
+# v0 P2WPKH/P2WSH (42/62 chars total, body 39/59) and
+# v1 P2TR Taproot (62 chars total, body 59).
+[[variants]]
+regex = "\\bbc1[a-z0-9]{25,59}\\b"
 score = 0.5
diff --git a/crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml b/crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml
index d3412b4f..a06bd0fe 100644
--- a/crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml
@@ -13,7 +13,10 @@ es = ["tarjeta", "crédito", "credito", "débito", "debito", "pago", "visa", "ma
 de = ["karte", "kredit", "kreditkarte", "debit", "zahlung", "visa", "mastercard", "amex"]
 fr = ["carte", "crédit", "credit", "débit", "debit", "paiement", "visa", "mastercard", "amex"]
 
+# Brand BIN ranges: Visa (4), Mastercard (51-55 and 2-series
+# 2221-2720 introduced 2017), Discover (6), Amex/Diners (3),
+# and a loose 1xxx catch-all that Luhn filters down.
 [[variants]]
-regex = '\b(?:(?:4\d{3})|(?:5[0-5]\d{2})|(?:6\d{3})|(?:1\d{3})|(?:3\d{3}))[- ]?(?:\d{3,4})[- ]?(?:\d{3,4})[- ]?(?:\d{3,5})\b'
-score = 0.5
+regex = '\b(?:(?:4\d{3})|(?:5[0-5]\d{2})|(?:2[2-7]\d{2})|(?:6\d{3})|(?:1\d{3})|(?:3\d{3}))[- ]?(?:\d{3,4})[- ]?(?:\d{3,4})[- ]?(?:\d{3,5})\b'
+score = 0.3
 validator = "luhn"
diff --git a/crates/nvisy-pattern/assets/patterns/world/finance/iban.toml b/crates/nvisy-pattern/assets/patterns/world/finance/iban.toml
index 3680ee9e..79d2bf30 100644
--- a/crates/nvisy-pattern/assets/patterns/world/finance/iban.toml
+++ b/crates/nvisy-pattern/assets/patterns/world/finance/iban.toml
@@ -3,6 +3,6 @@ label = "iban"
 context = ["iban", "bank", "account", "transfer", "swift"]
 
 [[variants]]
-regex = "\\b[A-Z]{2}\\d{2}\\s?[A-Z0-9]{4}\\s?(?:\\d{4}\\s?){2,7}\\d{1,4}\\b"
+regex = "\\b[A-Z]{2}\\d{2}[\\s\\-]?[A-Z0-9]{4}[\\s\\-]?(?:[A-Z0-9]{4}[\\s\\-]?){2,7}[A-Z0-9]{1,4}\\b"
 score = 0.5
 validator = "iban"
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index 9f6bbb3c..17d280f6 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -13,7 +13,7 @@ use super::compiled::{CompiledDictionary, CompiledPattern, has_word_boundaries};
 use super::dictionary::Dictionary;
 use super::regex::Regex;
 use crate::shipped;
-use crate::validators::ValidatorRegistry;
+use crate::validators::{ValidationContext, ValidatorRegistry};
 
 /// Runtime text recognizer composed of a regex pool and an
 /// Aho-Corasick automaton.
@@ -434,9 +434,13 @@ impl EntityRecognizer<Text> for PatternRecognizer {
                 if !input.applies_to_country(&pat.countries) {
                     continue;
                 }
+                let ctx = ValidationContext {
+                    country: input.country,
+                    language: input.language.clone(),
+                };
                 for m in pat.regex.find_iter(text) {
                     if let Some(validator) = pat.validator.as_ref()
-                        && !validator.validate(m.as_str())
+                        && !validator.validate(m.as_str(), &ctx)
                     {
                         continue;
                     }
diff --git a/crates/nvisy-pattern/src/validators/btc.rs b/crates/nvisy-pattern/src/validators/btc.rs
new file mode 100644
index 00000000..7abc6d67
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/btc.rs
@@ -0,0 +1,53 @@
+//! Bitcoin legacy-address (Base58Check) checksum validator.
+//!
+//! Validates P2PKH (`1…`) and P2SH (`3…`) addresses by decoding
+//! the Base58 payload and verifying its trailing four-byte
+//! double-SHA256 checksum. Bech32 / Bech32m addresses (`bc1…`)
+//! are not handled here — those use a different polynomial check.
+
+/// Return `true` if `value` is a structurally valid Base58Check
+/// Bitcoin address.
+///
+/// Accepts P2PKH (version byte `0x00`, `1…`) and P2SH
+/// (version byte `0x05`, `3…`) on mainnet. Rejects mismatched
+/// version bytes, broken Base58, and bad checksums.
+pub fn btc(value: &str) -> bool {
+    match bs58::decode(value.trim()).with_check(None).into_vec() {
+        Ok(bytes) if bytes.len() == 21 => matches!(bytes[0], 0x00 | 0x05),
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn accepts_known_p2pkh() {
+        // Satoshi's genesis-block coinbase address.
+        assert!(btc("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"));
+    }
+
+    #[test]
+    fn accepts_known_p2sh() {
+        assert!(btc("3J98t1WpEZ73CNmQviecrnyiWrnqRhWNLy"));
+    }
+
+    #[test]
+    fn rejects_bad_checksum() {
+        // Final char flipped.
+        assert!(!btc("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNb"));
+    }
+
+    #[test]
+    fn rejects_non_base58() {
+        assert!(!btc("1A1zP1eP5QGefi2DMPTfTL5SLmv7Divf0a"));
+        assert!(!btc(""));
+    }
+
+    #[test]
+    fn rejects_unknown_version() {
+        // Bitcoin testnet P2PKH (version byte `0x6f`).
+        assert!(!btc("mipcBbFg9gMiCh81Kj8tqqdgoZub1ZJRfn"));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/mod.rs b/crates/nvisy-pattern/src/validators/mod.rs
index 4eb8d853..39722f54 100644
--- a/crates/nvisy-pattern/src/validators/mod.rs
+++ b/crates/nvisy-pattern/src/validators/mod.rs
@@ -8,16 +8,19 @@
 //! can't.
 //!
 //! [`ValidatorRegistry::builtin`] ships universal validators
-//! ([`luhn`], [`iban`], [`phone`], [`date`]) plus jurisdiction-
-//! scoped sets re-exported from [`us`] (`"us.ssn"`,
-//! `"us.aba_routing"`, `"us.npi"`, `"us.dea_number"`) and [`uk`]
-//! (`"uk.nhs"`, `"uk.nino"`). Each validator is also re-exported
+//! ([`luhn`], [`iban`], [`phone`], [`date`], [`btc`]) plus
+//! jurisdiction-scoped sets re-exported from [`us`] (`"us.ssn"`,
+//! `"us.aba_routing"`, `"us.npi"`, `"us.dea_number"`,
+//! `"us.postal_code"`) and [`uk`]
+//! (`"uk.nhs"`, `"uk.nino"`, `"uk.driving_licence"`,
+//! `"uk.vehicle_registration"`). Each validator is also re-exported
 //! as a free function so consumers can compose a custom registry
 //! without taking the full set.
 //!
 //! [`Variant`]: crate::Variant
 //! [`Regex`]: crate::Regex
 
+mod btc;
 mod date;
 mod iban;
 mod luhn;
@@ -30,29 +33,52 @@ use std::borrow::Cow;
 use std::collections::HashMap;
 use std::sync::Arc;
 
+use nvisy_core::primitive::{CountryCode, LanguageTag};
+
+pub use self::btc::btc;
 pub use self::date::date;
 pub use self::iban::iban;
 pub use self::luhn::luhn;
 pub use self::phone::phone;
 
+/// Per-call hints supplied to validators alongside the matched
+/// string.
+///
+/// Carries the caller's [`RecognizerInput`] jurisdiction and
+/// language so validators that need region-aware semantics
+/// (e.g. `phone`) can honour the caller's intent instead of
+/// guessing across a fixed fallback set. Validators that don't
+/// need either field can ignore it via `_ctx`.
+///
+/// [`RecognizerInput`]: crate::recognition::RecognizerInput
+#[derive(Debug, Clone, Default)]
+pub struct ValidationContext {
+    /// ISO 3166-1 alpha-2 jurisdiction associated with the input,
+    /// when the caller specified one.
+    pub country: Option<CountryCode>,
+    /// BCP-47 language tag associated with the input, when the
+    /// caller specified one.
+    pub language: Option<LanguageTag>,
+}
+
 /// Post-match validator returning whether a matched string is
 /// structurally valid.
 ///
-/// Implemented by every `Fn(&str) -> bool + Send + Sync` via the
-/// blanket impl, so plain function pointers slot in without a
-/// wrapper type. Implement directly for types that need to carry
-/// state (e.g. a remote-lookup client).
+/// Implemented by every `Fn(&str, &ValidationContext) -> bool +
+/// Send + Sync` via the blanket impl, so plain function pointers
+/// slot in without a wrapper type. Implement directly for types
+/// that need to carry state (e.g. a remote-lookup client).
 pub trait Validator: Send + Sync {
     /// Return `true` to keep the match, `false` to drop it.
-    fn validate(&self, matched: &str) -> bool;
+    fn validate(&self, matched: &str, ctx: &ValidationContext) -> bool;
 }
 
 impl<F> Validator for F
 where
-    F: Fn(&str) -> bool + Send + Sync,
+    F: Fn(&str, &ValidationContext) -> bool + Send + Sync,
 {
-    fn validate(&self, matched: &str) -> bool {
-        self(matched)
+    fn validate(&self, matched: &str, ctx: &ValidationContext) -> bool {
+        self(matched, ctx)
     }
 }
 
@@ -81,32 +107,38 @@ impl ValidatorRegistry {
     /// Construct a registry pre-loaded with the shipped built-in
     /// validators.
     ///
-    /// Universal keys: `"luhn"`, `"iban"`, `"phone"`, `"date"`.
+    /// Universal keys: `"luhn"`, `"iban"`, `"phone"`, `"date"`,
+    /// `"crypto.btc"`.
     ///
     /// US-scoped: `"us.ssn"`, `"us.aba_routing"`, `"us.npi"`,
-    /// `"us.dea_number"`.
+    /// `"us.dea_number"`, `"us.postal_code"`.
     ///
-    /// UK-scoped: `"uk.nhs"`, `"uk.nino"`.
+    /// UK-scoped: `"uk.nhs"`, `"uk.nino"`,
+    /// `"uk.driving_licence"`, `"uk.vehicle_registration"`.
     #[must_use]
     pub fn builtin() -> Self {
         Self::empty()
-            .with("luhn", luhn)
-            .with("iban", iban)
+            .with_simple("luhn", luhn)
+            .with_simple("iban", iban)
             .with("phone", phone)
-            .with("date", date)
-            .with("us.ssn", us::ssn)
-            .with("us.aba_routing", us::aba_routing)
-            .with("us.npi", us::npi)
-            .with("us.dea_number", us::dea_number)
-            .with("uk.nhs", uk::nhs)
-            .with("uk.nino", uk::nino)
+            .with_simple("date", date)
+            .with_simple("crypto.btc", btc)
+            .with_simple("us.ssn", us::ssn)
+            .with_simple("us.aba_routing", us::aba_routing)
+            .with_simple("us.npi", us::npi)
+            .with_simple("us.dea_number", us::dea_number)
+            .with_simple("us.postal_code", us::postal_code)
+            .with_simple("uk.nhs", uk::nhs)
+            .with_simple("uk.nino", uk::nino)
+            .with_simple("uk.driving_licence", uk::driving_licence)
+            .with_simple("uk.vehicle_registration", uk::vehicle_registration)
     }
 
-    /// Register `validator` under `name`, overwriting any previous
-    /// entry with the same key.
+    /// Register a context-aware `validator` under `name`,
+    /// overwriting any previous entry with the same key.
     ///
     /// Override a built-in by registering under the same name
-    /// (e.g. `"luhn"`).
+    /// (e.g. `"phone"`).
     #[must_use]
     pub fn with<N, V>(mut self, name: N, validator: V) -> Self
     where
@@ -117,6 +149,19 @@ impl ValidatorRegistry {
         self
     }
 
+    /// Register a context-free `Fn(&str) -> bool` validator under
+    /// `name`. Convenience wrapper around [`Self::with`] for the
+    /// common case where the validator ignores
+    /// [`ValidationContext`].
+    #[must_use]
+    pub fn with_simple<N, F>(self, name: N, validator: F) -> Self
+    where
+        N: Into<Cow<'static, str>>,
+        F: Fn(&str) -> bool + Send + Sync + 'static,
+    {
+        self.with(name, move |s: &str, _: &ValidationContext| validator(s))
+    }
+
     /// Look up a validator by name.
     ///
     /// Returns `None` when the name is unregistered; the
diff --git a/crates/nvisy-pattern/src/validators/phone.rs b/crates/nvisy-pattern/src/validators/phone.rs
index 51d4cdd6..9539f0d4 100644
--- a/crates/nvisy-pattern/src/validators/phone.rs
+++ b/crates/nvisy-pattern/src/validators/phone.rs
@@ -1,73 +1,38 @@
-//! Phone-number structural validator.
-
-/// Return `true` if `value` has a plausible phone-number structure.
-///
-/// All non-digit characters are stripped, then checks:
-///
-/// - 7 to 15 digits (the ITU-T E.164 range).
-/// - When the original begins with `+` (explicit E.164), the
-///   digits must not start with `0` — no country code is `0…`.
-///   National formats such as UK `020 7946 0958` keep their
-///   trunk-prefix zero and remain valid.
-pub fn phone(value: &str) -> bool {
-    let digits: String = value.chars().filter(|c| c.is_ascii_digit()).collect();
-    let len = digits.len();
-
-    if !(7..=15).contains(&len) {
-        return false;
-    }
-
-    if value.trim_start().starts_with('+') && digits.starts_with('0') {
-        return false;
-    }
-
-    true
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn valid_us_numbers() {
-        assert!(phone("+1-555-123-4567"));
-        assert!(phone("(555) 123-4567"));
-        assert!(phone("555.123.4567"));
-        assert!(phone("5551234567"));
-    }
-
-    #[test]
-    fn valid_international() {
-        assert!(phone("+44 20 7946 0958"));
-        assert!(phone("+49 30 12345678"));
-        assert!(phone("+81 3 1234 5678"));
-    }
-
-    #[test]
-    fn too_few_digits() {
-        assert!(!phone("12345"));
-        assert!(!phone("123-45"));
-    }
-
-    #[test]
-    fn too_many_digits() {
-        assert!(!phone("1234567890123456"));
-    }
-
-    #[test]
-    fn e164_starting_with_zero_rejected() {
-        assert!(!phone("+0123456789012"));
-    }
-
-    #[test]
-    fn national_format_with_trunk_zero_accepted() {
-        // UK national format keeps the leading 0 trunk prefix.
-        assert!(phone("020 7946 0958"));
-        assert!(phone("0207946 0958"));
-    }
-
-    #[test]
-    fn local_number_with_seven_digits() {
-        assert!(phone("123-4567"));
-    }
+//! Region-aware phone-number validator backed by the
+//! `phonenumber` crate (Rust port of Google's libphonenumber).
+//!
+//! Two paths:
+//!
+//! 1. Inputs that parse as E.164 (carry their own `+CC` prefix)
+//!    validate directly, regardless of caller context.
+//! 2. Inputs in national format (no leading `+`) need a region
+//!    hint. When [`ValidationContext::country`] is set we use it;
+//!    otherwise we fail closed — region-less national-format
+//!    matching is genuinely ambiguous (a 13-digit run can be a
+//!    valid IL/IN phone *and* the leading 13 digits of a Visa
+//!    PAN), so without a country signal we'd rather miss a
+//!    handful of national-format numbers than mislabel card and
+//!    account numbers as phones.
+
+use phonenumber::country::Id;
+use phonenumber::parse;
+use std::str::FromStr;
+
+use super::ValidationContext;
+
+/// Return `true` when `value` parses as a valid phone number
+/// for the caller's jurisdiction (or as E.164 with an explicit
+/// `+CC` prefix).
+pub fn phone(value: &str, ctx: &ValidationContext) -> bool {
+    let trimmed = value.trim();
+
+    if parse(None, trimmed).map(|n| n.is_valid()).unwrap_or(false) {
+        return true;
+    }
+
+    ctx.country
+        .and_then(|c| Id::from_str(c.as_str()).ok())
+        .and_then(|region| parse(Some(region), trimmed).ok())
+        .map(|n| n.is_valid())
+        .unwrap_or(false)
 }
diff --git a/crates/nvisy-pattern/src/validators/uk/driving_licence.rs b/crates/nvisy-pattern/src/validators/uk/driving_licence.rs
new file mode 100644
index 00000000..a914a47a
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/uk/driving_licence.rs
@@ -0,0 +1,71 @@
+//! UK Driving Licence (DVLA) structural validator.
+//!
+//! The 16-char DVLA number opens with a 5-char surname slot —
+//! letters padded on the right with `9`s when the surname is
+//! shorter than five characters. A licence whose surname slot
+//! is *all* `9`s, or that places a `9` before a letter (e.g.
+//! `9ABCD…`, `A9BCD…`), violates the padding rule and is
+//! structurally invalid.
+
+/// Return `true` when the leading 5-char surname slot of a
+/// 16-char DVLA driving licence number is structurally valid.
+///
+/// Rejects an all-`9` surname and any `9` that appears before a
+/// letter within the slot. Does not re-validate the rest of the
+/// regex-matched number — that is the regex's job.
+pub fn driving_licence(value: &str) -> bool {
+    let surname: Vec<char> = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace())
+        .take(5)
+        .collect();
+    if surname.len() != 5 {
+        return false;
+    }
+    if surname.iter().all(|c| *c == '9') {
+        return false;
+    }
+    let mut padding_started = false;
+    for c in &surname {
+        match c {
+            '9' => padding_started = true,
+            c if c.is_ascii_uppercase() => {
+                if padding_started {
+                    return false;
+                }
+            }
+            _ => return false,
+        }
+    }
+    true
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn accepts_padded_short_surname() {
+        // 4-letter surname `MORG` padged with one `9`.
+        assert!(driving_licence("MORG9753116SM9IJ"));
+        // 5-letter surname `MORGA`, no padding.
+        assert!(driving_licence("MORGA753116SM9IJ"));
+    }
+
+    #[test]
+    fn rejects_all_nine_surname() {
+        assert!(!driving_licence("99999753116SM9IJ"));
+    }
+
+    #[test]
+    fn rejects_padding_before_letter() {
+        // `9` precedes a letter in the surname slot.
+        assert!(!driving_licence("9MORG753116SM9IJ"));
+        assert!(!driving_licence("A9ORG753116SM9IJ"));
+    }
+
+    #[test]
+    fn rejects_non_alpha_padding_in_surname() {
+        assert!(!driving_licence("M0RGA753116SM9IJ"));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/uk/mod.rs b/crates/nvisy-pattern/src/validators/uk/mod.rs
index 37eadd66..44f7c352 100644
--- a/crates/nvisy-pattern/src/validators/uk/mod.rs
+++ b/crates/nvisy-pattern/src/validators/uk/mod.rs
@@ -1,12 +1,17 @@
 //! UK-specific post-match validators.
 //!
 //! Registered under the [`ValidatorRegistry::builtin`] set with
-//! dotted names — `"uk.nhs"`, `"uk.nino"`.
+//! dotted names — `"uk.nhs"`, `"uk.nino"`,
+//! `"uk.driving_licence"`, `"uk.vehicle_registration"`.
 //!
 //! [`ValidatorRegistry::builtin`]: super::ValidatorRegistry::builtin
 
+mod driving_licence;
 mod nhs;
 mod nino;
+mod vehicle_registration;
 
+pub use self::driving_licence::driving_licence;
 pub use self::nhs::nhs;
 pub use self::nino::nino;
+pub use self::vehicle_registration::vehicle_registration;
diff --git a/crates/nvisy-pattern/src/validators/uk/nino.rs b/crates/nvisy-pattern/src/validators/uk/nino.rs
index 0ff86604..4933c2b8 100644
--- a/crates/nvisy-pattern/src/validators/uk/nino.rs
+++ b/crates/nvisy-pattern/src/validators/uk/nino.rs
@@ -6,9 +6,14 @@
 /// Return `true` when `value`'s leading two-letter prefix is not
 /// a reserved NINO prefix.
 ///
-/// Reserved prefixes (case-insensitive): `BG`, `GB`, `NK`, `KN`,
-/// `NT`, `TN`, `ZZ`. The check is structural only — it does not
-/// confirm the trailing suffix letter or any HMRC issuance state.
+/// Reserved prefixes (case-insensitive):
+///
+/// - Whole pair: `BG`, `GB`, `NK`, `KN`, `NT`, `TN`, `ZZ`.
+/// - First letter `O` (HMRC reserved; not blocked by the regex
+///   character class, which spans `j-p`).
+///
+/// The check is structural only — it does not confirm the
+/// trailing suffix letter or any HMRC issuance state.
 pub fn nino(value: &str) -> bool {
     let prefix: String = value
         .chars()
@@ -19,6 +24,9 @@ pub fn nino(value: &str) -> bool {
         return false;
     }
     let upper = prefix.to_ascii_uppercase();
+    if upper.starts_with('O') {
+        return false;
+    }
     !matches!(
         upper.as_str(),
         "BG" | "GB" | "NK" | "KN" | "NT" | "TN" | "ZZ"
@@ -54,4 +62,10 @@ mod tests {
         assert!(!nino("12345678A"));
         assert!(!nino(""));
     }
+
+    #[test]
+    fn rejects_o_at_position_zero() {
+        assert!(!nino("OA123456A"));
+        assert!(!nino("oa123456A"));
+    }
 }
diff --git a/crates/nvisy-pattern/src/validators/uk/vehicle_registration.rs b/crates/nvisy-pattern/src/validators/uk/vehicle_registration.rs
new file mode 100644
index 00000000..7ff755e5
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/uk/vehicle_registration.rs
@@ -0,0 +1,69 @@
+//! UK current-format Vehicle Registration Mark (VRM) age-ID
+//! validator.
+//!
+//! Current (2001+) plates encode the issuance half-year as a
+//! 2-digit "age identifier" at positions 3-4:
+//!
+//! - March issue: `02..=29` (March 2002 through March 2029)
+//! - September issue: `51..=79` (September 2001 through September
+//!   2029)
+//!
+//! The recognizer regex permits the broader range `01..=79`
+//! (cheap to express); this validator narrows it to the issued
+//! windows that the DVLA actually allocates.
+
+/// Return `true` when the 2-digit age identifier embedded in a
+/// 7-char current-format UK plate falls inside an issued range.
+///
+/// Strips whitespace and `-`, then reads characters at positions
+/// 2 and 3 of the canonicalized string.
+pub fn vehicle_registration(value: &str) -> bool {
+    let chars: Vec<char> = value
+        .chars()
+        .filter(|c| !c.is_ascii_whitespace() && *c != '-')
+        .collect();
+    if chars.len() != 7 {
+        return false;
+    }
+    let age = match (chars[2].to_digit(10), chars[3].to_digit(10)) {
+        (Some(a), Some(b)) => a * 10 + b,
+        _ => return false,
+    };
+    matches!(age, 2..=29 | 51..=79)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn accepts_march_window() {
+        assert!(vehicle_registration("AB02ABC"));
+        assert!(vehicle_registration("AB29ABC"));
+        assert!(vehicle_registration("AB 15 ABC"));
+    }
+
+    #[test]
+    fn accepts_september_window() {
+        assert!(vehicle_registration("AB51ABC"));
+        assert!(vehicle_registration("AB79ABC"));
+        assert!(vehicle_registration("AB-65-ABC"));
+    }
+
+    #[test]
+    fn rejects_out_of_range() {
+        // 01 was used briefly in 2001 but is not in the modern
+        // issued range; presidio rejects it too.
+        assert!(!vehicle_registration("AB01ABC"));
+        assert!(!vehicle_registration("AB30ABC"));
+        assert!(!vehicle_registration("AB50ABC"));
+        assert!(!vehicle_registration("AB80ABC"));
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(!vehicle_registration("AB51AB"));
+        assert!(!vehicle_registration("AB51ABCD"));
+        assert!(!vehicle_registration(""));
+    }
+}
diff --git a/crates/nvisy-pattern/src/validators/us/mod.rs b/crates/nvisy-pattern/src/validators/us/mod.rs
index 60b1d300..1a4d007c 100644
--- a/crates/nvisy-pattern/src/validators/us/mod.rs
+++ b/crates/nvisy-pattern/src/validators/us/mod.rs
@@ -8,9 +8,11 @@
 mod aba_routing;
 mod dea_number;
 mod npi;
+mod postal_code;
 mod ssn;
 
 pub use self::aba_routing::aba_routing;
 pub use self::dea_number::dea_number;
 pub use self::npi::npi;
+pub use self::postal_code::postal_code;
 pub use self::ssn::ssn;
diff --git a/crates/nvisy-pattern/src/validators/us/postal_code.rs b/crates/nvisy-pattern/src/validators/us/postal_code.rs
new file mode 100644
index 00000000..96a193a6
--- /dev/null
+++ b/crates/nvisy-pattern/src/validators/us/postal_code.rs
@@ -0,0 +1,40 @@
+//! US ZIP / ZIP+4 sanity validator.
+
+/// Return `true` if `value` is a plausible US ZIP code.
+///
+/// Accepts the 5-digit and 5-4 (`12345-1234`) forms; rejects the
+/// reserved all-zeros prefix (`00000`) which is not assigned by the
+/// USPS but is a frequent stand-in for "unknown".
+pub fn postal_code(value: &str) -> bool {
+    let digits: Vec<char> = value.chars().filter(char::is_ascii_digit).collect();
+    if digits.len() != 5 && digits.len() != 9 {
+        return false;
+    }
+    !digits[..5].iter().all(|c| *c == '0')
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn accepts_valid() {
+        assert!(postal_code("90210"));
+        assert!(postal_code("97477-1234"));
+        // USPS lowest assigned prefix is 00501 (Holtsville, NY).
+        assert!(postal_code("00501"));
+    }
+
+    #[test]
+    fn rejects_all_zero_prefix() {
+        assert!(!postal_code("00000"));
+        assert!(!postal_code("00000-1234"));
+    }
+
+    #[test]
+    fn rejects_wrong_length() {
+        assert!(!postal_code("1234"));
+        assert!(!postal_code("123456"));
+        assert!(!postal_code(""));
+    }
+}
diff --git a/crates/nvisy-pattern/testdata/builtin/uk/identity.txt b/crates/nvisy-pattern/testdata/builtin/uk/identity.txt
deleted file mode 100644
index 05dc0b42..00000000
--- a/crates/nvisy-pattern/testdata/builtin/uk/identity.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Patient handover for Mrs A. Patel.
-
-Personal details:
-  - NHS number: 943 476 5919
-  - NINO: AB123456C
-  - Driving licence (DVLA): MORGA753116SM9IJ
-  - Address: 10 Downing Street, London SW1A 2AA
-
-Vehicle: BMW 3 Series, registration AB51 ABC, V5C on file.
-
-Please update the patient record (national health service form 4)
-and bill the National Insurance reference shown above.
diff --git a/crates/nvisy-toolkit/src/detection/chunks.rs b/crates/nvisy-toolkit/src/detection/chunks.rs
index 6ae8f07c..2567b26b 100644
--- a/crates/nvisy-toolkit/src/detection/chunks.rs
+++ b/crates/nvisy-toolkit/src/detection/chunks.rs
@@ -71,7 +71,8 @@ impl RecognizerRegistryExt for RecognizerRegistry {
     {
         let mut out = Vec::new();
         while let Some(chunk) = handler.next_chunk().await? {
-            let input = RecognizerInput::new(chunk.data.clone().into());
+            let input = RecognizerInput::new(chunk.data.clone().into())
+                .with_context_hints(M::chunk_hints(&chunk.location));
             let text_entities = self.run::<Text>(input).await?;
             for text_entity in text_entities {
                 let Some(loc) = handler
@@ -108,6 +109,20 @@ pub trait LiftedFromText: Modality + Sized {
     /// against the source bytes of a chunk, plus the pre-lifted
     /// location, and produce a `Self`-modality entity.
     fn from_text(text_entity: Entity<Text>, location: Self::Location) -> Entity<Self>;
+
+    /// Out-of-band context strings the recognizer should treat
+    /// as in-context for a chunk at `location`. Surfaces handler
+    /// metadata that doesn't live inside the chunk's payload —
+    /// notably the column header of a CSV/XLSX cell, which lifts
+    /// confidence on a low-base-score regex match the way the
+    /// surrounding sentence would in plain text.
+    ///
+    /// Default returns an empty `Vec`; modalities whose chunks
+    /// don't carry out-of-band metadata (`Text`, image regions,
+    /// audio segments) keep that default.
+    fn chunk_hints(_location: &Self::Location) -> Vec<String> {
+        Vec::new()
+    }
 }
 
 impl LiftedFromText for Text {
@@ -133,4 +148,8 @@ impl LiftedFromText for Tabular {
         }
         builder.build().expect("entity reshape")
     }
+
+    fn chunk_hints(location: &TabularLocation) -> Vec<String> {
+        location.column_name.iter().cloned().collect()
+    }
 }

From c63a0e40722e93acd9905c7309f430a48653f188 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 16 Jun 2026 08:12:30 +0200
Subject: [PATCH 12/14] feat(codec,toolkit): chunk-level context hints for HTML
 + JSON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the tabular-cell hint mechanism to all chunked formats by
making hints a first-class field on Chunk<M>, populated alongside
data + location during next_chunk.

Architecture:

- Chunk<M> gains pub hints: Vec<String>. Hints are metadata the
  chunk's structural neighbours surface — CSV column headers,
  HTML parent-element text, JSON object keys — for downstream
  context-aware recognizers.
- nvisy-toolkit detect() reads chunk.hints directly. The earlier
  LiftedFromText::chunk_hints and the briefly-added
  Handler::chunk_hints methods are both removed; the field on
  Chunk avoids a second handler call to recompute information
  next_chunk already had.
- Handlers without useful out-of-band metadata (TXT, PDF, image,
  audio) initialise the field with Vec::new().

CSV handler:

- Populates chunk.hints from chunk.location.column_name.
  Replaces the prior LiftedFromText::chunk_hints<TabularLocation>
  override, which had the same effect via a less-direct path.

HTML handler:

- RedactableItem gains pub hints: Vec<String>. The DOM walk in
  build_items computes a per-text-node hint by collecting the
  text of the node's nearest block-level ancestor (excluding
  the node's own text). nearest_block_ancestor walks parents
  until it finds a tag in is_block_element's curated set (p,
  div, li, td, th, h1-h6, blockquote, dt, dd, section, article,
  aside, header, footer, main, nav, figcaption, caption).
- Stopping at the immediate inline parent would yield only the
  chunk's own text — the surrounding sentence lives in the
  enclosing block. `<p>...the payment card <code>4111…</code>
  is on file</p>` gives the <code> chunk a hint of "the payment
  card is on file", which lifts CC=0.3 above threshold via the
  existing context-enhancer.
- Note: neither html5ever, markup5ever, nor scraper exposes a
  block/inline classifier; the curated list is the simplest
  honest implementation. Future HTML elements not in the list
  graciously degrade (walk continues to root, hints stay empty)
  rather than corrupting detection.

JSON handler:

- Leaf gains pub hints: Vec<String>. parse_value and parse_array
  now thread an Option<&str> key_context; parse_object captures
  the just-parsed key and passes it to parse_value for the
  value. Array elements inherit the containing object's key so
  {"cards": ["4111…", "5555…"]} gives both PANs the "cards"
  hint. Top-level scalars and keys themselves stay hint-less.
- The leaf's hint is copied onto Chunk.hints in next_chunk so
  recognizers see it via input.context_hints.

Resolves codec_e2e_html and codec_e2e_json payment_card
assertions without touching scores.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/nvisy-codec/src/core/handler.rs        | 10 +-
 .../src/handler/audio/mp3_handler.rs          |  2 +-
 .../src/handler/audio/wav_handler.rs          |  2 +-
 .../nvisy-codec/src/handler/image/macros.rs   |  1 +
 .../src/handler/rich/pdf_handler.rs           |  1 +
 .../src/handler/tabular/csv_handler.rs        |  3 +-
 .../src/handler/text/html_handler.rs          | 11 ++-
 .../src/handler/text/html_loader.rs           | 91 +++++++++++++++++++
 .../src/handler/text/json_handler.rs          | 34 +++++--
 .../src/handler/text/txt_handler.rs           |  1 +
 crates/nvisy-toolkit/src/detection/chunks.rs  | 20 +---
 11 files changed, 144 insertions(+), 32 deletions(-)

diff --git a/crates/nvisy-codec/src/core/handler.rs b/crates/nvisy-codec/src/core/handler.rs
index 83106862..9ecd97b6 100644
--- a/crates/nvisy-codec/src/core/handler.rs
+++ b/crates/nvisy-codec/src/core/handler.rs
@@ -29,13 +29,21 @@ use crate::content::{ContentData, ContentSource};
 ///
 /// `data` is the per-modality wire payload; `location` is the
 /// coordinate the handler will accept in [`Handler::read`] /
-/// [`Handler::redact`] to address the same chunk again.
+/// [`Handler::redact`] to address the same chunk again. `hints`
+/// carries out-of-band context strings the chunk's structural
+/// neighbours surface — CSV/XLSX column headers, JSON object
+/// keys, HTML parent-element text — for downstream context-aware
+/// recognizers; handlers without such metadata leave it empty.
 #[derive(Debug, Clone, PartialEq)]
 pub struct Chunk<M: Modality> {
     /// Coordinate addressing this chunk inside the handler.
     pub location: M::Location,
     /// Wire payload at the chunk's location.
     pub data: M::Data,
+    /// Out-of-band context strings recognizers should treat as
+    /// in-context (column headers, parent element text, …).
+    /// Empty when the handler has no such metadata to surface.
+    pub hints: Vec<String>,
 }
 
 /// Per-modality capability trait every format handler implements.
diff --git a/crates/nvisy-codec/src/handler/audio/mp3_handler.rs b/crates/nvisy-codec/src/handler/audio/mp3_handler.rs
index e3d376a6..daa9894c 100644
--- a/crates/nvisy-codec/src/handler/audio/mp3_handler.rs
+++ b/crates/nvisy-codec/src/handler/audio/mp3_handler.rs
@@ -108,7 +108,7 @@ impl Handler<Audio> for Mp3Handler {
         let location = AudioLocation::new(TimeSpan::new(0, duration_us));
         let data = AudioData::new(self.bytes.clone()).with_filename(self.filename.clone());
         self.yielded = true;
-        Ok(Some(Chunk { location, data }))
+        Ok(Some(Chunk { location, data, hints: Vec::new() }))
     }
 
     async fn read(&self, _location: &AudioLocation) -> Result<Option<AudioData>, Error> {
diff --git a/crates/nvisy-codec/src/handler/audio/wav_handler.rs b/crates/nvisy-codec/src/handler/audio/wav_handler.rs
index 4eff21ad..85b2b084 100644
--- a/crates/nvisy-codec/src/handler/audio/wav_handler.rs
+++ b/crates/nvisy-codec/src/handler/audio/wav_handler.rs
@@ -111,7 +111,7 @@ impl Handler<Audio> for WavHandler {
         let location = AudioLocation::new(TimeSpan::new(0, duration_us));
         let data = AudioData::new(self.bytes.clone()).with_filename(self.filename.clone());
         self.yielded = true;
-        Ok(Some(Chunk { location, data }))
+        Ok(Some(Chunk { location, data, hints: Vec::new() }))
     }
 
     async fn read(&self, _location: &AudioLocation) -> Result<Option<AudioData>, Error> {
diff --git a/crates/nvisy-codec/src/handler/image/macros.rs b/crates/nvisy-codec/src/handler/image/macros.rs
index 97e76628..e32d3e48 100644
--- a/crates/nvisy-codec/src/handler/image/macros.rs
+++ b/crates/nvisy-codec/src/handler/image/macros.rs
@@ -105,6 +105,7 @@ macro_rules! impl_image_handler {
                 Ok(Some($crate::Chunk {
                     location,
                     data,
+                    hints: ::std::vec::Vec::new(),
                 }))
             }
 
diff --git a/crates/nvisy-codec/src/handler/rich/pdf_handler.rs b/crates/nvisy-codec/src/handler/rich/pdf_handler.rs
index 62fe64d3..f67c283d 100644
--- a/crates/nvisy-codec/src/handler/rich/pdf_handler.rs
+++ b/crates/nvisy-codec/src/handler/rich/pdf_handler.rs
@@ -195,6 +195,7 @@ impl Handler<Text> for PdfHandler {
                 ..Default::default()
             },
             data: TextData::from(text.as_str()),
+            hints: Vec::new(),
         }))
     }
 
diff --git a/crates/nvisy-codec/src/handler/tabular/csv_handler.rs b/crates/nvisy-codec/src/handler/tabular/csv_handler.rs
index 80924422..8180163f 100644
--- a/crates/nvisy-codec/src/handler/tabular/csv_handler.rs
+++ b/crates/nvisy-codec/src/handler/tabular/csv_handler.rs
@@ -113,9 +113,10 @@ impl Handler<Tabular> for CsvHandler {
         };
         let cell = self.cell_at(row, col).expect("bounds checked above");
         let data = TextData::from(cell.to_owned());
+        let hints = location.column_name.iter().cloned().collect();
 
         self.cursor.col += 1;
-        Ok(Some(Chunk { location, data }))
+        Ok(Some(Chunk { location, data, hints }))
     }
 
     fn lift_chunk(
diff --git a/crates/nvisy-codec/src/handler/text/html_handler.rs b/crates/nvisy-codec/src/handler/text/html_handler.rs
index ab9039b4..c47ecad5 100644
--- a/crates/nvisy-codec/src/handler/text/html_handler.rs
+++ b/crates/nvisy-codec/src/handler/text/html_handler.rs
@@ -63,6 +63,12 @@ pub struct RedactableItem {
     /// Text-node text, comment body, attribute value, or script /
     /// style element text.
     pub value: String,
+    /// Out-of-band context strings surfaced from the item's
+    /// structural neighbours — currently the parent element's
+    /// concatenated text content minus this item's own text.
+    /// Empty for items without useful surrounding context (e.g.
+    /// items whose parent element only contains the item itself).
+    pub hints: Vec<String>,
 }
 
 /// Where a [`RedactableItem`] lives inside the parsed HTML
@@ -150,7 +156,9 @@ impl Handler<Text> for HtmlHandler {
         let i = self.cursor;
         let start = self.item_starts[i];
         let end = self.item_starts[i + 1];
-        let value = &self.data.items[i].value;
+        let item = &self.data.items[i];
+        let value = &item.value;
+        let hints = item.hints.clone();
         self.cursor += 1;
         Ok(Some(Chunk {
             location: TextLocation {
@@ -159,6 +167,7 @@ impl Handler<Text> for HtmlHandler {
                 ..Default::default()
             },
             data: TextData::from(value.as_str()),
+            hints,
         }))
     }
 
diff --git a/crates/nvisy-codec/src/handler/text/html_loader.rs b/crates/nvisy-codec/src/handler/text/html_loader.rs
index 5b544750..ba255872 100644
--- a/crates/nvisy-codec/src/handler/text/html_loader.rs
+++ b/crates/nvisy-codec/src/handler/text/html_loader.rs
@@ -99,9 +99,11 @@ fn build_items(dom: &Html, loader: &HtmlLoader) -> Vec<RedactableItem> {
         match node.value() {
             Node::Text(t) => {
                 if !skip_text_under(node) {
+                    let hints = sibling_text_hint(node, &t.text);
                     items.push(RedactableItem {
                         kind: RedactableKind::TextNode { index: text_index },
                         value: t.text.to_string(),
+                        hints,
                     });
                 }
                 text_index += 1;
@@ -112,6 +114,7 @@ fn build_items(dom: &Html, loader: &HtmlLoader) -> Vec<RedactableItem> {
                         index: comment_index,
                     },
                     value: c.comment.to_string(),
+                    hints: Vec::new(),
                 });
                 comment_index += 1;
             }
@@ -131,6 +134,7 @@ fn build_items(dom: &Html, loader: &HtmlLoader) -> Vec<RedactableItem> {
                             },
                         },
                         value: val.to_string(),
+                        hints: Vec::new(),
                     });
                 }
 
@@ -154,6 +158,7 @@ fn build_items(dom: &Html, loader: &HtmlLoader) -> Vec<RedactableItem> {
                             target,
                         },
                         value: body,
+                        hints: Vec::new(),
                     });
                 }
 
@@ -166,6 +171,92 @@ fn build_items(dom: &Html, loader: &HtmlLoader) -> Vec<RedactableItem> {
     items
 }
 
+/// Collect the surrounding-text content of the text node's
+/// nearest block-level ancestor as a single hint string.
+///
+/// Used by [`build_items`] to surface the surrounding sentence
+/// (`"the payment card 4111… is on file"`) as an out-of-band
+/// hint when a text node sits inside an inline wrapper
+/// (`<code>4111…</code>`) that splits the prose into multiple
+/// chunks. The walk targets the nearest *block* ancestor
+/// (`<p>`, `<div>`, `<li>`, `<td>`, `<th>`, `<h1>`–`<h6>`,
+/// `<blockquote>`, `<dt>`, `<dd>`) — stopping at the immediate
+/// inline parent would yield only the chunk's own text.
+///
+/// `own_text` is excluded so the hint doesn't echo the node's
+/// own bytes. Returns an empty `Vec` when no useful surrounding
+/// text exists (no block ancestor, or the ancestor contains
+/// only this text).
+fn sibling_text_hint(text_node: NodeRef<'_, Node>, own_text: &str) -> Vec<String> {
+    let Some(ancestor) = nearest_block_ancestor(text_node) else {
+        return Vec::new();
+    };
+    let mut buf = String::new();
+    for descendant in ancestor.descendants() {
+        if let Node::Text(t) = descendant.value() {
+            let chunk = t.text.as_ref();
+            if chunk == own_text {
+                continue;
+            }
+            if !buf.is_empty() {
+                buf.push(' ');
+            }
+            buf.push_str(chunk);
+        }
+    }
+    let trimmed = buf.trim();
+    if trimmed.is_empty() {
+        Vec::new()
+    } else {
+        vec![trimmed.to_owned()]
+    }
+}
+
+/// Walk parents until we hit a block-level element (or root).
+/// Used to find the "sentence boundary" around an inline text
+/// node so the hint covers the full prose around an inline
+/// wrapper like `<code>` or `<span>`.
+fn nearest_block_ancestor(text_node: NodeRef<'_, Node>) -> Option<NodeRef<'_, Node>> {
+    let mut current = text_node.parent();
+    while let Some(node) = current {
+        if let Node::Element(e) = node.value() {
+            if is_block_element(e.name.local.as_ref()) {
+                return Some(node);
+            }
+        }
+        current = node.parent();
+    }
+    None
+}
+
+fn is_block_element(name: &str) -> bool {
+    matches!(
+        name,
+        "p" | "div"
+            | "li"
+            | "td"
+            | "th"
+            | "h1"
+            | "h2"
+            | "h3"
+            | "h4"
+            | "h5"
+            | "h6"
+            | "blockquote"
+            | "dt"
+            | "dd"
+            | "section"
+            | "article"
+            | "aside"
+            | "header"
+            | "footer"
+            | "main"
+            | "nav"
+            | "figcaption"
+            | "caption"
+    )
+}
+
 /// Don't emit text-node items for text that lives directly inside
 /// a `<script>` or `<style>` element — those bodies are handled by
 /// the script / style policy on the parent element instead. The
diff --git a/crates/nvisy-codec/src/handler/text/json_handler.rs b/crates/nvisy-codec/src/handler/text/json_handler.rs
index 827f3b9f..f99ff666 100644
--- a/crates/nvisy-codec/src/handler/text/json_handler.rs
+++ b/crates/nvisy-codec/src/handler/text/json_handler.rs
@@ -62,6 +62,11 @@ pub(super) struct Leaf {
     /// `"…"` with `\\` / `\"` escapes; for [`LeafKind::Scalar`]
     /// it is the bare literal.
     pub serialized: String,
+    /// Out-of-band context strings (currently the enclosing
+    /// object key) surfaced to recognizers as hints; empty for
+    /// keys and for value leaves outside any object (e.g. a
+    /// top-level scalar).
+    pub hints: Vec<String>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -129,6 +134,7 @@ impl Handler<Text> for JsonHandler {
                         ..Default::default()
                     },
                     data: TextData::from(leaf.value.as_str()),
+                    hints: leaf.hints.clone(),
                 }));
             }
         }
@@ -379,7 +385,7 @@ fn value_to_source_offset(leaf: &Leaf, slot_start: usize, value_offset: usize) -
 /// well-formed JSON.
 pub(super) fn parse_slots(src: &str) -> Result<Vec<Slot>, Error> {
     let mut p = SlotParser::new(src);
-    p.parse_value()?;
+    p.parse_value(None)?;
     p.flush_passthrough();
     p.consume_whitespace();
     p.flush_passthrough();
@@ -452,18 +458,24 @@ impl<'a> SlotParser<'a> {
         Ok(())
     }
 
-    fn parse_value(&mut self) -> Result<(), Error> {
+    fn parse_value(&mut self, key_context: Option<&str>) -> Result<(), Error> {
         self.consume_whitespace();
         match self.peek() {
             Some(b'{') => self.parse_object(),
-            Some(b'[') => self.parse_array(),
+            Some(b'[') => self.parse_array(key_context),
             Some(b'"') => {
-                let leaf = self.parse_string_leaf(LeafKind::StringValue)?;
+                let mut leaf = self.parse_string_leaf(LeafKind::StringValue)?;
+                if let Some(k) = key_context {
+                    leaf.hints.push(k.to_owned());
+                }
                 self.push_leaf(leaf);
                 Ok(())
             }
             Some(b't') | Some(b'f') | Some(b'n') | Some(b'-') | Some(b'0'..=b'9') => {
-                let leaf = self.parse_scalar()?;
+                let mut leaf = self.parse_scalar()?;
+                if let Some(k) = key_context {
+                    leaf.hints.push(k.to_owned());
+                }
                 self.push_leaf(leaf);
                 Ok(())
             }
@@ -488,10 +500,11 @@ impl<'a> SlotParser<'a> {
         loop {
             self.consume_whitespace();
             let key = self.parse_string_leaf(LeafKind::Key)?;
+            let key_value = key.value.clone();
             self.push_leaf(key);
             self.consume_whitespace();
             self.consume_punct(b':')?;
-            self.parse_value()?;
+            self.parse_value(Some(&key_value))?;
             self.consume_whitespace();
             match self.peek() {
                 Some(b',') => {
@@ -511,7 +524,7 @@ impl<'a> SlotParser<'a> {
         }
     }
 
-    fn parse_array(&mut self) -> Result<(), Error> {
+    fn parse_array(&mut self, key_context: Option<&str>) -> Result<(), Error> {
         self.consume_punct(b'[')?;
         self.consume_whitespace();
         if self.peek() == Some(b']') {
@@ -519,7 +532,10 @@ impl<'a> SlotParser<'a> {
             return Ok(());
         }
         loop {
-            self.parse_value()?;
+            // Array elements inherit the containing object key as
+            // their hint — `{"cards": ["4111…", "5555…"]}` should
+            // treat both PANs as living under `cards`.
+            self.parse_value(key_context)?;
             self.consume_whitespace();
             match self.peek() {
                 Some(b',') => {
@@ -558,6 +574,7 @@ impl<'a> SlotParser<'a> {
                         kind,
                         value,
                         serialized,
+                        hints: Vec::new(),
                     });
                 }
                 Some(b'\\') => {
@@ -639,6 +656,7 @@ impl<'a> SlotParser<'a> {
             kind: LeafKind::Scalar,
             value: literal.clone(),
             serialized: literal,
+            hints: Vec::new(),
         })
     }
 }
diff --git a/crates/nvisy-codec/src/handler/text/txt_handler.rs b/crates/nvisy-codec/src/handler/text/txt_handler.rs
index 21bc8de8..ed5b6e2e 100644
--- a/crates/nvisy-codec/src/handler/text/txt_handler.rs
+++ b/crates/nvisy-codec/src/handler/text/txt_handler.rs
@@ -84,6 +84,7 @@ impl Handler<Text> for TxtHandler {
                 ..Default::default()
             },
             data: TextData::from(line.as_str()),
+            hints: Vec::new(),
         }))
     }
 
diff --git a/crates/nvisy-toolkit/src/detection/chunks.rs b/crates/nvisy-toolkit/src/detection/chunks.rs
index 2567b26b..e16f71b8 100644
--- a/crates/nvisy-toolkit/src/detection/chunks.rs
+++ b/crates/nvisy-toolkit/src/detection/chunks.rs
@@ -72,7 +72,7 @@ impl RecognizerRegistryExt for RecognizerRegistry {
         let mut out = Vec::new();
         while let Some(chunk) = handler.next_chunk().await? {
             let input = RecognizerInput::new(chunk.data.clone().into())
-                .with_context_hints(M::chunk_hints(&chunk.location));
+                .with_context_hints(chunk.hints.clone());
             let text_entities = self.run::<Text>(input).await?;
             for text_entity in text_entities {
                 let Some(loc) = handler
@@ -109,20 +109,6 @@ pub trait LiftedFromText: Modality + Sized {
     /// against the source bytes of a chunk, plus the pre-lifted
     /// location, and produce a `Self`-modality entity.
     fn from_text(text_entity: Entity<Text>, location: Self::Location) -> Entity<Self>;
-
-    /// Out-of-band context strings the recognizer should treat
-    /// as in-context for a chunk at `location`. Surfaces handler
-    /// metadata that doesn't live inside the chunk's payload —
-    /// notably the column header of a CSV/XLSX cell, which lifts
-    /// confidence on a low-base-score regex match the way the
-    /// surrounding sentence would in plain text.
-    ///
-    /// Default returns an empty `Vec`; modalities whose chunks
-    /// don't carry out-of-band metadata (`Text`, image regions,
-    /// audio segments) keep that default.
-    fn chunk_hints(_location: &Self::Location) -> Vec<String> {
-        Vec::new()
-    }
 }
 
 impl LiftedFromText for Text {
@@ -148,8 +134,4 @@ impl LiftedFromText for Tabular {
         }
         builder.build().expect("entity reshape")
     }
-
-    fn chunk_hints(location: &TabularLocation) -> Vec<String> {
-        location.column_name.iter().cloned().collect()
-    }
 }

From e34babce25c659629f7b6113e9b501a28b932eb7 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 16 Jun 2026 11:06:51 +0200
Subject: [PATCH 13/14] refactor: hoist fully-qualified paths into use imports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply the prefer-use-imports style across the workspace: replace
inline `foo::bar::Baz::method(...)` and `impl std::fmt::Debug ...`
patterns with `use` lines at the top of each file, then refer to
the imported name directly.

Touches 51 files across nvisy-core, nvisy-context, nvisy-codec,
nvisy-engine, nvisy-fake, nvisy-llm, nvisy-pattern, nvisy-server,
nvisy-toolkit. Hoisted both crate paths (axum, aide, tower_http,
rig, image, symphonia, nvisy_core::*) and std paths
(std::fmt, std::cmp::{Ordering, Reverse}, std::marker::PhantomData,
std::any::type_name, std::io::ErrorKind, std::path::Path,
std::slice, std::vec).

Exceptions preserved:

- `#[async_trait::async_trait]` attributes stay inlined.
- `tracing::*` macros and attributes stay inlined.
- `*macros.rs` files keep fully-qualified paths for macro hygiene.
- String literals inside `#[serde(...)]` attributes are not
  code-position paths.

For collisions with locally-defined types (e.g. `Path`, `Json` in
the server crate), imports use aliasing — `axum::extract::Path
as AxumPath` rather than inlining.

Drive-by simplification in nvisy-llm/src/backend/rig/mod.rs: rig
0.38 now implements CompletionClient for Gemini, so the
`gemini::completion::CompletionModel::new(...)` workaround from
rig-core 0.31 is gone and the build site uses
`client.completion_model(...)` like the other providers.

No behavior changes; workspace test, clippy, doc all clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/nvisy-codec/src/core/loader.rs         |  5 ++--
 crates/nvisy-codec/src/document/mod.rs        |  9 ++++--
 .../src/handler/audio/mp3_codec.rs            | 17 +++++------
 .../src/handler/audio/mp3_handler.rs          |  6 +++-
 .../src/handler/audio/wav_handler.rs          |  6 +++-
 .../nvisy-codec/src/handler/image/redact.rs   |  8 ++---
 .../src/handler/tabular/csv_handler.rs        |  6 +++-
 crates/nvisy-context/src/enhancer/mod.rs      |  8 ++---
 crates/nvisy-context/src/io/tokens.rs         |  5 ++--
 .../src/entity/label/entity_label.rs          |  8 +++--
 crates/nvisy-core/src/modality/audio.rs       |  6 ++--
 crates/nvisy-core/src/modality/image.rs       |  4 ++-
 crates/nvisy-core/src/modality/tabular.rs     |  6 ++--
 crates/nvisy-core/src/modality/text.rs        |  5 ++--
 .../src/primitive/confidence/value.rs         |  3 +-
 crates/nvisy-engine/src/core/policy_store.rs  | 17 ++++++-----
 .../src/detection/phases/detection.rs         |  6 ++--
 crates/nvisy-engine/src/detection/pipeline.rs |  3 +-
 crates/nvisy-engine/src/detection/state.rs    |  3 +-
 .../nvisy-engine/src/redaction/phases/mod.rs  |  4 +--
 crates/nvisy-engine/src/redaction/state.rs    |  3 +-
 crates/nvisy-fake/src/anonymizer/mod.rs       | 11 ++++---
 crates/nvisy-llm/src/backend/http/mod.rs      |  3 +-
 crates/nvisy-llm/src/backend/rig/inner.rs     | 16 +++++-----
 crates/nvisy-llm/src/backend/rig/mod.rs       |  6 +---
 crates/nvisy-llm/src/error.rs                 |  3 +-
 .../nvisy-llm/src/provider/unauthenticated.rs |  5 ++--
 .../nvisy-llm/src/recognition/file_prompt.rs  |  7 ++---
 crates/nvisy-llm/tests/file_prompt.rs         | 12 ++++----
 .../src/recognition/recognizer.rs             |  5 +++-
 .../src/shipped/dictionaries/world.rs         |  3 +-
 .../nvisy-pattern/src/shipped/patterns/mod.rs | 12 ++++++--
 .../nvisy-pattern/src/shipped/patterns/uk.rs  |  3 +-
 .../nvisy-pattern/src/shipped/patterns/us.rs  |  3 +-
 .../src/shipped/patterns/world.rs             |  3 +-
 crates/nvisy-pattern/src/validators/mod.rs    |  5 ++--
 crates/nvisy-pattern/src/validators/phone.rs  |  3 +-
 crates/nvisy-pattern/src/validators/uk/nhs.rs |  6 +---
 .../src/validators/us/dea_number.rs           |  5 +++-
 crates/nvisy-pattern/tests/fixtures/mod.rs    | 12 ++------
 crates/nvisy-server/src/extract/json.rs       | 30 +++++++++----------
 crates/nvisy-server/src/extract/path.rs       | 17 +++++------
 .../src/handler/error/http_error.rs           | 14 +++++----
 crates/nvisy-server/src/handler/mod.rs        |  3 +-
 .../src/middleware/observability.rs           |  4 +--
 .../src/middleware/specification.rs           |  4 +--
 crates/nvisy-toolkit/src/deduplication/mod.rs | 18 ++++++-----
 crates/nvisy-toolkit/src/extraction/ocr.rs    |  3 +-
 .../nvisy-toolkit/src/redaction/registry.rs   |  5 ++--
 .../nvisy-toolkit/tests/fixtures/pipeline.rs  |  3 +-
 .../tests/recognition_registry.rs             |  4 +--
 51 files changed, 196 insertions(+), 170 deletions(-)

diff --git a/crates/nvisy-codec/src/core/loader.rs b/crates/nvisy-codec/src/core/loader.rs
index bd3e0a4f..f0dc5120 100644
--- a/crates/nvisy-codec/src/core/loader.rs
+++ b/crates/nvisy-codec/src/core/loader.rs
@@ -22,6 +22,7 @@
 //! [`Format::loader`]: super::Format::loader
 //! [`FormatId`]: super::FormatId
 
+use std::marker::PhantomData;
 use std::sync::Arc;
 
 use nvisy_core::Error;
@@ -100,7 +101,7 @@ where
 {
     Arc::new(LoaderAdapter {
         loader,
-        _phantom: std::marker::PhantomData,
+        _phantom: PhantomData,
     })
 }
 
@@ -109,7 +110,7 @@ where
 /// [`erase`]; not part of the public API.
 struct LoaderAdapter<M: Modality, L: Loader<M>> {
     loader: L,
-    _phantom: std::marker::PhantomData<fn() -> M>,
+    _phantom: PhantomData<fn() -> M>,
 }
 
 #[async_trait::async_trait]
diff --git a/crates/nvisy-codec/src/document/mod.rs b/crates/nvisy-codec/src/document/mod.rs
index 7afdcff4..7d0a668f 100644
--- a/crates/nvisy-codec/src/document/mod.rs
+++ b/crates/nvisy-codec/src/document/mod.rs
@@ -29,6 +29,9 @@ mod tabular;
 #[cfg(feature = "internal_text")]
 mod text;
 
+use std::any::type_name;
+use std::fmt;
+
 use derive_more::From;
 #[cfg(feature = "internal_audio")]
 use nvisy_core::modality::Audio;
@@ -226,11 +229,11 @@ impl<M: Modality> DocumentHandle<M> {
     }
 }
 
-impl<M: Modality> std::fmt::Debug for DocumentHandle<M> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl<M: Modality> fmt::Debug for DocumentHandle<M> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("DocumentHandle")
             .field("format_id", &self.format_id)
-            .field("modality", &std::any::type_name::<M>())
+            .field("modality", &type_name::<M>())
             .finish()
     }
 }
diff --git a/crates/nvisy-codec/src/handler/audio/mp3_codec.rs b/crates/nvisy-codec/src/handler/audio/mp3_codec.rs
index ed59c87d..e8b6f798 100644
--- a/crates/nvisy-codec/src/handler/audio/mp3_codec.rs
+++ b/crates/nvisy-codec/src/handler/audio/mp3_codec.rs
@@ -16,12 +16,13 @@
 //! redaction of MP3 streams. Callers wanting bit-perfect preservation
 //! of unredacted regions should not round-trip.
 
-use std::io::Cursor;
+use std::io::{Cursor, ErrorKind as IoErrorKind};
 
 use bytes::Bytes;
 use mp3lame_encoder::{Builder, FlushNoGap, InterleavedPcm, MonoPcm};
 use nvisy_core::Error;
-use symphonia::core::audio::{Audio as _, GenericAudioBufferRef};
+use symphonia::core::audio::conv::{ConvertibleSample, FromSample};
+use symphonia::core::audio::{Audio as _, AudioBuffer, GenericAudioBufferRef};
 use symphonia::core::codecs::audio::AudioDecoderOptions;
 use symphonia::core::errors::Error as SymError;
 use symphonia::core::formats::probe::Hint;
@@ -139,9 +140,7 @@ pub(super) fn decode_to_pcm(bytes: &Bytes) -> Result<DecodedMp3, Error> {
         let packet = match reader.next_packet() {
             Ok(Some(p)) => p,
             Ok(None) => break,
-            Err(SymError::IoError(io_err))
-                if io_err.kind() == std::io::ErrorKind::UnexpectedEof =>
-            {
+            Err(SymError::IoError(io_err)) if io_err.kind() == IoErrorKind::UnexpectedEof => {
                 break;
             }
             Err(e) => {
@@ -198,14 +197,12 @@ fn append_interleaved_f32(
     channels: usize,
     out: &mut Vec<f32>,
 ) {
-    use symphonia::core::audio::conv::ConvertibleSample;
-
     fn extend<S: ConvertibleSample + Copy>(
-        buf: &symphonia::core::audio::AudioBuffer<S>,
+        buf: &AudioBuffer<S>,
         channels: usize,
         out: &mut Vec<f32>,
     ) where
-        f32: symphonia::core::audio::conv::FromSample<S>,
+        f32: FromSample<S>,
     {
         let frames = buf.frames();
         out.reserve(frames * channels);
@@ -213,7 +210,7 @@ fn append_interleaved_f32(
             for ch in 0..channels {
                 let plane = buf.plane(ch).expect("plane for known channel index");
                 let sample = plane[frame];
-                out.push(<f32 as symphonia::core::audio::conv::FromSample<S>>::from_sample(sample));
+                out.push(<f32 as FromSample<S>>::from_sample(sample));
             }
         }
     }
diff --git a/crates/nvisy-codec/src/handler/audio/mp3_handler.rs b/crates/nvisy-codec/src/handler/audio/mp3_handler.rs
index daa9894c..c0b83422 100644
--- a/crates/nvisy-codec/src/handler/audio/mp3_handler.rs
+++ b/crates/nvisy-codec/src/handler/audio/mp3_handler.rs
@@ -108,7 +108,11 @@ impl Handler<Audio> for Mp3Handler {
         let location = AudioLocation::new(TimeSpan::new(0, duration_us));
         let data = AudioData::new(self.bytes.clone()).with_filename(self.filename.clone());
         self.yielded = true;
-        Ok(Some(Chunk { location, data, hints: Vec::new() }))
+        Ok(Some(Chunk {
+            location,
+            data,
+            hints: Vec::new(),
+        }))
     }
 
     async fn read(&self, _location: &AudioLocation) -> Result<Option<AudioData>, Error> {
diff --git a/crates/nvisy-codec/src/handler/audio/wav_handler.rs b/crates/nvisy-codec/src/handler/audio/wav_handler.rs
index 85b2b084..99297a0d 100644
--- a/crates/nvisy-codec/src/handler/audio/wav_handler.rs
+++ b/crates/nvisy-codec/src/handler/audio/wav_handler.rs
@@ -111,7 +111,11 @@ impl Handler<Audio> for WavHandler {
         let location = AudioLocation::new(TimeSpan::new(0, duration_us));
         let data = AudioData::new(self.bytes.clone()).with_filename(self.filename.clone());
         self.yielded = true;
-        Ok(Some(Chunk { location, data, hints: Vec::new() }))
+        Ok(Some(Chunk {
+            location,
+            data,
+            hints: Vec::new(),
+        }))
     }
 
     async fn read(&self, _location: &AudioLocation) -> Result<Option<AudioData>, Error> {
diff --git a/crates/nvisy-codec/src/handler/image/redact.rs b/crates/nvisy-codec/src/handler/image/redact.rs
index 82dcc7b5..84869913 100644
--- a/crates/nvisy-codec/src/handler/image/redact.rs
+++ b/crates/nvisy-codec/src/handler/image/redact.rs
@@ -6,6 +6,7 @@
 //! the [`ImageOps`] trait in [`super::image_ops`].
 
 use image::DynamicImage;
+use image::imageops::FilterType;
 use nvisy_core::primitive::BoundingBox;
 use nvisy_core::redaction::ImageReplacement;
 
@@ -47,11 +48,8 @@ pub(crate) fn apply(
                     return;
                 }
             };
-            let resized = replacement_img.resize_exact(
-                region.width,
-                region.height,
-                image::imageops::FilterType::Lanczos3,
-            );
+            let resized =
+                replacement_img.resize_exact(region.width, region.height, FilterType::Lanczos3);
             image::imageops::overlay(img, &resized, region.x as i64, region.y as i64);
         }
     }
diff --git a/crates/nvisy-codec/src/handler/tabular/csv_handler.rs b/crates/nvisy-codec/src/handler/tabular/csv_handler.rs
index 8180163f..43f114fd 100644
--- a/crates/nvisy-codec/src/handler/tabular/csv_handler.rs
+++ b/crates/nvisy-codec/src/handler/tabular/csv_handler.rs
@@ -116,7 +116,11 @@ impl Handler<Tabular> for CsvHandler {
         let hints = location.column_name.iter().cloned().collect();
 
         self.cursor.col += 1;
-        Ok(Some(Chunk { location, data, hints }))
+        Ok(Some(Chunk {
+            location,
+            data,
+            hints,
+        }))
     }
 
     fn lift_chunk(
diff --git a/crates/nvisy-context/src/enhancer/mod.rs b/crates/nvisy-context/src/enhancer/mod.rs
index fa744f83..1c94dcfb 100644
--- a/crates/nvisy-context/src/enhancer/mod.rs
+++ b/crates/nvisy-context/src/enhancer/mod.rs
@@ -6,15 +6,14 @@ use std::collections::HashMap;
 use nvisy_core::entity::{Entity, EntityLabelRef, TrailStep};
 use nvisy_core::modality::Text;
 
+use crate::io::Token;
 use crate::matching::KeywordMatcher;
 use crate::rule::BoostRule;
-use crate::io::Token;
 
 mod context;
 mod window;
 
 pub use self::context::Context;
-
 use self::window::{slice_tokens_around, token_span, word_window};
 
 /// Source name stamped onto refinement [`TrailStep`]s the
@@ -482,10 +481,7 @@ mod tests {
             "out-of-band hint matching a rule keyword must boost",
         );
         assert!(
-            entities[0]
-                .trail
-                .iter()
-                .any(|s| s.source == "context-hint"),
+            entities[0].trail.iter().any(|s| s.source == "context-hint"),
             "trail step must record the hint-source provenance",
         );
     }
diff --git a/crates/nvisy-context/src/io/tokens.rs b/crates/nvisy-context/src/io/tokens.rs
index eb490adc..6f491574 100644
--- a/crates/nvisy-context/src/io/tokens.rs
+++ b/crates/nvisy-context/src/io/tokens.rs
@@ -31,6 +31,7 @@
 //! engine.
 
 use std::ops::Range;
+use std::{slice, vec};
 
 use hipstr::HipStr;
 
@@ -144,7 +145,7 @@ impl Tokens {
     }
 
     /// Iterate tokens in source order.
-    pub fn iter(&self) -> std::slice::Iter<'_, Token> {
+    pub fn iter(&self) -> slice::Iter<'_, Token> {
         self.0.iter()
     }
 }
@@ -156,7 +157,7 @@ impl FromIterator<Token> for Tokens {
 }
 
 impl IntoIterator for Tokens {
-    type IntoIter = std::vec::IntoIter<Token>;
+    type IntoIter = vec::IntoIter<Token>;
     type Item = Token;
 
     fn into_iter(self) -> Self::IntoIter {
diff --git a/crates/nvisy-core/src/entity/label/entity_label.rs b/crates/nvisy-core/src/entity/label/entity_label.rs
index 2503bf4a..f282a0fa 100644
--- a/crates/nvisy-core/src/entity/label/entity_label.rs
+++ b/crates/nvisy-core/src/entity/label/entity_label.rs
@@ -26,6 +26,8 @@
 //! [`description`]: EntityLabel::description
 //! [`tags`]: EntityLabel::tags
 
+use std::fmt;
+
 use hipstr::HipStr;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
@@ -126,9 +128,9 @@ impl AsRef<str> for EntityLabel {
     }
 }
 
-impl std::fmt::Display for EntityLabel {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        std::fmt::Display::fmt(&self.name, f)
+impl fmt::Display for EntityLabel {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Display::fmt(&self.name, f)
     }
 }
 
diff --git a/crates/nvisy-core/src/modality/audio.rs b/crates/nvisy-core/src/modality/audio.rs
index 42e7fe46..e9b9fa6b 100644
--- a/crates/nvisy-core/src/modality/audio.rs
+++ b/crates/nvisy-core/src/modality/audio.rs
@@ -2,6 +2,8 @@
 //! [`AudioData`] per-call payload, and [`AudioExtraction`] provenance
 //! enum.
 
+use std::cmp::Ordering;
+
 use bytes::Bytes;
 use hipstr::HipStr;
 use schemars::JsonSchema;
@@ -60,14 +62,14 @@ impl Eq for AudioLocation {}
 impl Ord for AudioLocation {
     /// Lex order over `(time_span.start_us, time_span.end_us)`.
     /// `speaker_id` and `audio_id` are ignored.
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+    fn cmp(&self, other: &Self) -> Ordering {
         (self.time_span.start_us, self.time_span.end_us)
             .cmp(&(other.time_span.start_us, other.time_span.end_us))
     }
 }
 
 impl PartialOrd for AudioLocation {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }
diff --git a/crates/nvisy-core/src/modality/image.rs b/crates/nvisy-core/src/modality/image.rs
index 2f853055..69e4b857 100644
--- a/crates/nvisy-core/src/modality/image.rs
+++ b/crates/nvisy-core/src/modality/image.rs
@@ -2,6 +2,8 @@
 //! [`ImageData`] per-call payload, and [`ImageExtraction`] provenance
 //! enum.
 
+use std::cmp::Ordering;
+
 use bytes::Bytes;
 use hipstr::HipStr;
 use schemars::JsonSchema;
@@ -73,7 +75,7 @@ impl PartialOrd for ImageLocation {
     /// `width`. Float fields compare via [`f64::total_cmp`]. `Ord`
     /// is not implemented — `f64` is not `Eq`, so a total order
     /// can't be promised at the trait level.
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         let bb = &self.bounding_box;
         let ob = &other.bounding_box;
         Some(
diff --git a/crates/nvisy-core/src/modality/tabular.rs b/crates/nvisy-core/src/modality/tabular.rs
index 9ded5d2c..fc992763 100644
--- a/crates/nvisy-core/src/modality/tabular.rs
+++ b/crates/nvisy-core/src/modality/tabular.rs
@@ -1,6 +1,8 @@
 //! [`Tabular`] modality marker, [`TabularLocation`] coordinate type,
 //! and the [`TabularExtraction`] provenance enum.
 
+use std::cmp::Ordering;
+
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
@@ -79,7 +81,7 @@ impl Ord for TabularLocation {
     /// start_offset, end_offset)`. Absent intra-cell offsets sort
     /// as `0` / `usize::MAX` respectively so a whole-cell location
     /// brackets any sub-cell range. `column_name` is ignored.
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+    fn cmp(&self, other: &Self) -> Ordering {
         let (s1, e1) = self.cell_range();
         let (s2, e2) = other.cell_range();
         (&self.sheet_name, self.row_index, self.column_index, s1, e1).cmp(&(
@@ -93,7 +95,7 @@ impl Ord for TabularLocation {
 }
 
 impl PartialOrd for TabularLocation {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }
diff --git a/crates/nvisy-core/src/modality/text.rs b/crates/nvisy-core/src/modality/text.rs
index e32b531b..9a86a0c7 100644
--- a/crates/nvisy-core/src/modality/text.rs
+++ b/crates/nvisy-core/src/modality/text.rs
@@ -2,6 +2,7 @@
 //! [`TextData`] per-call payload, and [`TextExtraction`] provenance
 //! enum.
 
+use std::cmp::Ordering;
 use std::ops::Range;
 
 use derive_more::{AsRef, Deref, Display, From};
@@ -122,13 +123,13 @@ impl TextLocation {
 impl Ord for TextLocation {
     /// Lex order over `(start, end)`. `context` and `page_number`
     /// are ignored.
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+    fn cmp(&self, other: &Self) -> Ordering {
         (self.start, self.end).cmp(&(other.start, other.end))
     }
 }
 
 impl PartialOrd for TextLocation {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }
diff --git a/crates/nvisy-core/src/primitive/confidence/value.rs b/crates/nvisy-core/src/primitive/confidence/value.rs
index 540fc7e4..928e02b6 100644
--- a/crates/nvisy-core/src/primitive/confidence/value.rs
+++ b/crates/nvisy-core/src/primitive/confidence/value.rs
@@ -1,6 +1,7 @@
 //! [`Confidence`] — a validated `[0.0, 1.0]` confidence score.
 
 use schemars::JsonSchema;
+use serde::de::Error as DeError;
 use serde::{Deserialize, Deserializer, Serialize};
 
 /// A confidence score in the closed range `[0.0, 1.0]`.
@@ -43,7 +44,7 @@ impl<'de> Deserialize<'de> for Confidence {
     fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
         let value = f64::deserialize(deserializer)?;
         Self::new(value).ok_or_else(|| {
-            serde::de::Error::custom(format!(
+            DeError::custom(format!(
                 "confidence {value} out of range [0.0, 1.0] or non-finite"
             ))
         })
diff --git a/crates/nvisy-engine/src/core/policy_store.rs b/crates/nvisy-engine/src/core/policy_store.rs
index 60c251f3..55357b10 100644
--- a/crates/nvisy-engine/src/core/policy_store.rs
+++ b/crates/nvisy-engine/src/core/policy_store.rs
@@ -17,11 +17,12 @@
 //! [`SharedData`]: super::SharedData
 //! [`ModalityRedactions::operator_for`]: crate::policy::redaction::ModalityRedactions::operator_for
 
+use std::fmt;
 use std::sync::Arc;
 
 use hipstr::HipStr;
 use nvisy_codec::content::ContentDescriptor;
-use nvisy_core::entity::Entity;
+use nvisy_core::entity::{Entity, EntityLabelCatalog};
 
 use crate::modality::DocumentModality;
 use crate::policy::redaction::{ModalityRedactions, ProjectRedaction};
@@ -53,8 +54,8 @@ impl PolicyStore {
     /// validated at detection-time submission.
     ///
     /// [`EntityLabelCatalog`]: nvisy_core::entity::EntityLabelCatalog
-    pub(crate) fn catalog(&self) -> nvisy_core::entity::EntityLabelCatalog {
-        let mut catalog = nvisy_core::entity::EntityLabelCatalog::new();
+    pub(crate) fn catalog(&self) -> EntityLabelCatalog {
+        let mut catalog = EntityLabelCatalog::new();
         for p in &self.policies {
             for l in &p.labels {
                 catalog.insert(l.clone());
@@ -88,7 +89,7 @@ impl PolicyStore {
     pub(crate) fn resolve<M: DocumentModality + ProjectRedaction>(
         &self,
         entity: &Entity<M>,
-        catalog: &nvisy_core::entity::EntityLabelCatalog,
+        catalog: &EntityLabelCatalog,
         default_operators: &ModalityRedactions,
         document_labels: &[&str],
         descriptor: &ContentDescriptor,
@@ -117,8 +118,8 @@ impl PolicyStore {
     }
 }
 
-impl std::fmt::Debug for PolicyStore {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Debug for PolicyStore {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("PolicyStore")
             .field("count", &self.policies.len())
             .finish()
@@ -197,7 +198,7 @@ fn decide<M: DocumentModality + ProjectRedaction>(
 fn rule_matches<M: DocumentModality>(
     rule: &PolicyRule,
     entity: &Entity<M>,
-    catalog: &nvisy_core::entity::EntityLabelCatalog,
+    catalog: &EntityLabelCatalog,
     document_labels: &[&str],
     descriptor: &ContentDescriptor,
 ) -> bool {
@@ -255,7 +256,7 @@ mod tests {
         let store = PolicyStore::default();
         let entity = Entity::<Text>::test_builder(0, 4).test_build();
         let descriptor = ContentDescriptor::new();
-        let catalog = nvisy_core::entity::EntityLabelCatalog::new();
+        let catalog = EntityLabelCatalog::new();
         let defaults = ModalityRedactions::default();
         assert!(matches!(
             store.resolve::<Text>(&entity, &catalog, &defaults, &[], &descriptor),
diff --git a/crates/nvisy-engine/src/detection/phases/detection.rs b/crates/nvisy-engine/src/detection/phases/detection.rs
index ecbd163f..8e1f1f58 100644
--- a/crates/nvisy-engine/src/detection/phases/detection.rs
+++ b/crates/nvisy-engine/src/detection/phases/detection.rs
@@ -14,8 +14,8 @@ use std::sync::Arc;
 use nvisy_core::Result;
 use nvisy_core::entity::Entity;
 use nvisy_core::modality::{
-    Audio, AudioLocation, Image, ImageLocation, Overlap, Tabular, TabularLocation, Text, TextData,
-    TextLocation,
+    Audio, AudioLocation, Image, ImageLocation, Modality, Overlap, Tabular, TabularLocation, Text,
+    TextData, TextLocation,
 };
 use nvisy_core::recognition::RecognizerInput;
 use nvisy_toolkit::detection::RecognizerRegistry;
@@ -237,7 +237,7 @@ pub trait LiftFromBlock: DocumentModality + Sized {
         spans: &[Span<Self>],
         start: usize,
         end: usize,
-    ) -> Option<<Self as nvisy_core::modality::Modality>::Location>;
+    ) -> Option<<Self as Modality>::Location>;
 }
 
 impl LiftFromBlock for Text {
diff --git a/crates/nvisy-engine/src/detection/pipeline.rs b/crates/nvisy-engine/src/detection/pipeline.rs
index 43fdd871..6ff0a9dd 100644
--- a/crates/nvisy-engine/src/detection/pipeline.rs
+++ b/crates/nvisy-engine/src/detection/pipeline.rs
@@ -11,6 +11,7 @@ use std::sync::Arc;
 use jiff::Timestamp;
 use nvisy_codec::CodecRegistry;
 use nvisy_core::Error;
+use nvisy_core::entity::EntityLabelCatalog;
 use nvisy_toolkit::extraction::ExtractorRegistry;
 use tokio_util::sync::CancellationToken;
 use uuid::Uuid;
@@ -234,7 +235,7 @@ pub(crate) struct PreparedDetection {
     actor_id: Uuid,
     policies: Arc<PolicyStore>,
     policy_digests: Vec<PolicyDigest>,
-    catalog: nvisy_core::entity::EntityLabelCatalog,
+    catalog: EntityLabelCatalog,
     imports: Vec<ImportFile>,
     plan: crate::detection::DetectionPlan,
 }
diff --git a/crates/nvisy-engine/src/detection/state.rs b/crates/nvisy-engine/src/detection/state.rs
index 4109a26c..e8965146 100644
--- a/crates/nvisy-engine/src/detection/state.rs
+++ b/crates/nvisy-engine/src/detection/state.rs
@@ -5,6 +5,7 @@
 //! that's frozen into a [`DetectionResult`] once the pass reaches
 //! a terminal state.
 
+use std::cmp::Reverse;
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -222,7 +223,7 @@ impl DetectionState {
             .filter(|(_, r)| filter.status.is_none_or(|s| r.status == s))
             .map(|(id, r)| r.to_entry(*id))
             .collect();
-        out.sort_by_key(|e| std::cmp::Reverse(e.created_at));
+        out.sort_by_key(|e| Reverse(e.created_at));
         out
     }
 
diff --git a/crates/nvisy-engine/src/redaction/phases/mod.rs b/crates/nvisy-engine/src/redaction/phases/mod.rs
index 41ffe888..3f41c379 100644
--- a/crates/nvisy-engine/src/redaction/phases/mod.rs
+++ b/crates/nvisy-engine/src/redaction/phases/mod.rs
@@ -47,7 +47,7 @@ pub mod validation;
 
 use nvisy_codec::content::ContentDescriptor;
 use nvisy_core::Result;
-use nvisy_core::entity::is_excluded;
+use nvisy_core::entity::{EntityLabelCatalog, is_excluded};
 use nvisy_core::extraction::{DataAt, TextAt};
 use nvisy_core::modality::Overlap;
 use nvisy_core::primitive::ConfidenceThreshold;
@@ -77,7 +77,7 @@ pub(crate) async fn run_redaction<M>(
     tree: &mut DocumentTree<M>,
     descriptor: &ContentDescriptor,
     policies: &PolicyStore,
-    catalog: &nvisy_core::entity::EntityLabelCatalog,
+    catalog: &EntityLabelCatalog,
     default_operators: &ModalityRedactions,
     registry: &RedactionRegistry<M>,
 ) -> Result<()>
diff --git a/crates/nvisy-engine/src/redaction/state.rs b/crates/nvisy-engine/src/redaction/state.rs
index 00983a76..f95ea69f 100644
--- a/crates/nvisy-engine/src/redaction/state.rs
+++ b/crates/nvisy-engine/src/redaction/state.rs
@@ -1,5 +1,6 @@
 //! Volatile in-memory state for active redaction passes.
 
+use std::cmp::Reverse;
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -115,7 +116,7 @@ impl RedactionState {
             .filter(|(_, r)| filter.detection_id.is_none_or(|d| r.detection_id == d))
             .map(|(id, r)| r.to_entry(*id))
             .collect();
-        out.sort_by_key(|e| std::cmp::Reverse(e.created_at));
+        out.sort_by_key(|e| Reverse(e.created_at));
         out
     }
 
diff --git a/crates/nvisy-fake/src/anonymizer/mod.rs b/crates/nvisy-fake/src/anonymizer/mod.rs
index e1f0f456..e73c6363 100644
--- a/crates/nvisy-fake/src/anonymizer/mod.rs
+++ b/crates/nvisy-fake/src/anonymizer/mod.rs
@@ -1,6 +1,7 @@
 //! [`Fake`]: text-modality [`Anonymizer`] that swaps detected
 //! entities for plausible fake values.
 
+use std::fmt;
 use std::hash::{DefaultHasher, Hash, Hasher};
 use std::str::FromStr;
 use std::sync::Arc;
@@ -43,8 +44,8 @@ pub struct Fake {
     seed: u64,
 }
 
-impl std::fmt::Debug for Fake {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Debug for Fake {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("Fake")
             .field("default_language", &self.default_language)
             .field("seed", &self.seed)
@@ -224,6 +225,8 @@ mod tests {
     use std::collections::HashSet;
 
     use nvisy_core::entity::{EntityLabelRef, builtins};
+    use nvisy_core::modality::TabularLocation;
+    use nvisy_core::primitive::Confidence;
     use nvisy_core::redaction::Anonymizer as _;
     use nvisy_toolkit::redaction::anonymizer::{Mask, Replace};
 
@@ -359,7 +362,7 @@ mod tests {
         let op = fake();
         let entity = Entity::<Tabular>::builder()
             .with_label(builtins::PERSON_NAME.label_ref())
-            .with_location(nvisy_core::modality::TabularLocation {
+            .with_location(TabularLocation {
                 row_index: 0u32,
                 column_index: 0u32,
                 start_offset: None,
@@ -367,7 +370,7 @@ mod tests {
                 column_name: None,
                 sheet_name: None,
             })
-            .with_confidence(nvisy_core::primitive::Confidence::clamped(1.0))
+            .with_confidence(Confidence::clamped(1.0))
             .build()
             .unwrap();
         let out = op.apply(&entity, &TextData::new("alice")).await.unwrap();
diff --git a/crates/nvisy-llm/src/backend/http/mod.rs b/crates/nvisy-llm/src/backend/http/mod.rs
index 448352af..3be58cc9 100644
--- a/crates/nvisy-llm/src/backend/http/mod.rs
+++ b/crates/nvisy-llm/src/backend/http/mod.rs
@@ -15,6 +15,7 @@ mod config;
 mod middleware;
 
 use nvisy_core::{Error, Result};
+use reqwest_middleware::reqwest::Client;
 use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
 
 pub use self::config::HttpConfig;
@@ -45,7 +46,7 @@ pub fn build_http_client(config: &HttpConfig) -> Result<ClientWithMiddleware> {
 
     let policy = backoff_policy(config.max_retries);
 
-    let client = reqwest_middleware::reqwest::Client::builder()
+    let client = Client::builder()
         .timeout(config.timeout)
         .connect_timeout(config.connect_timeout)
         .pool_idle_timeout(config.idle_timeout)
diff --git a/crates/nvisy-llm/src/backend/rig/inner.rs b/crates/nvisy-llm/src/backend/rig/inner.rs
index 9700dbc5..537c1a18 100644
--- a/crates/nvisy-llm/src/backend/rig/inner.rs
+++ b/crates/nvisy-llm/src/backend/rig/inner.rs
@@ -7,21 +7,21 @@
 use reqwest_middleware::ClientWithMiddleware;
 use rig::agent::Agent;
 #[cfg(feature = "anthropic-claude")]
-use rig::providers::anthropic;
+use rig::providers::anthropic::completion::CompletionModel as AnthropicCompletionModel;
 #[cfg(feature = "google-gemini")]
-use rig::providers::gemini;
-use rig::providers::ollama;
+use rig::providers::gemini::completion::CompletionModel as GeminiCompletionModel;
+use rig::providers::ollama::CompletionModel as OllamaCompletionModel;
 #[cfg(feature = "openai-gpt")]
-use rig::providers::openai;
+use rig::providers::openai::completion::CompletionModel as OpenAiCompletionModel;
 
 pub(super) enum RigInner {
     #[cfg(feature = "openai-gpt")]
-    OpenAi(Agent<openai::completion::CompletionModel<ClientWithMiddleware>>),
+    OpenAi(Agent<OpenAiCompletionModel<ClientWithMiddleware>>),
     #[cfg(feature = "anthropic-claude")]
-    Anthropic(Agent<anthropic::completion::CompletionModel<ClientWithMiddleware>>),
+    Anthropic(Agent<AnthropicCompletionModel<ClientWithMiddleware>>),
     #[cfg(feature = "google-gemini")]
-    Gemini(Agent<gemini::completion::CompletionModel<ClientWithMiddleware>>),
-    Ollama(Agent<ollama::CompletionModel<ClientWithMiddleware>>),
+    Gemini(Agent<GeminiCompletionModel<ClientWithMiddleware>>),
+    Ollama(Agent<OllamaCompletionModel<ClientWithMiddleware>>),
 }
 
 macro_rules! dispatch {
diff --git a/crates/nvisy-llm/src/backend/rig/mod.rs b/crates/nvisy-llm/src/backend/rig/mod.rs
index dc6502a2..b3303621 100644
--- a/crates/nvisy-llm/src/backend/rig/mod.rs
+++ b/crates/nvisy-llm/src/backend/rig/mod.rs
@@ -17,8 +17,6 @@ use nvisy_core::{Error as CoreError, Result};
 use rig::agent::{Agent, AgentBuilder};
 use rig::client::CompletionClient;
 use rig::completion::{AssistantContent, Completion, CompletionModel, Message};
-#[cfg(feature = "google-gemini")]
-use rig::providers::gemini;
 
 pub use self::config::LlmConfig;
 pub use self::context::ContextWindow;
@@ -193,9 +191,7 @@ impl RigBackendBuilder {
             #[cfg(feature = "google-gemini")]
             LlmProvider::Gemini(p) => {
                 let client = p.gemini_client(http).map_err(crate::error::convert)?;
-                // rig-core 0.31: Gemini's Capabilities doesn't propagate H,
-                // so CompletionClient is unavailable for non-default H.
-                let model = gemini::completion::CompletionModel::new(client, p.model.as_str());
+                let model = client.completion_model(p.model.as_str());
                 RigInner::Gemini(build_agent(model, &config, preamble))
             }
             LlmProvider::Ollama(p) => {
diff --git a/crates/nvisy-llm/src/error.rs b/crates/nvisy-llm/src/error.rs
index 495c92b2..636b5fda 100644
--- a/crates/nvisy-llm/src/error.rs
+++ b/crates/nvisy-llm/src/error.rs
@@ -2,6 +2,7 @@
 
 use nvisy_core::{Error as CoreError, ErrorKind as CoreErrorKind};
 use rig::completion::{CompletionError, PromptError, StructuredOutputError};
+use rig::http_client::Error as HttpClientError;
 
 /// Internal error type for LLM provider interactions.
 ///
@@ -13,7 +14,7 @@ use rig::completion::{CompletionError, PromptError, StructuredOutputError};
 pub(crate) enum Error {
     /// An HTTP / network error from the LLM provider.
     #[error("HTTP error: {0}")]
-    Http(rig::http_client::Error),
+    Http(HttpClientError),
 
     /// A JSON (de)serialization error.
     #[error("JSON error: {0}")]
diff --git a/crates/nvisy-llm/src/provider/unauthenticated.rs b/crates/nvisy-llm/src/provider/unauthenticated.rs
index 01cebb2c..b89e22ca 100644
--- a/crates/nvisy-llm/src/provider/unauthenticated.rs
+++ b/crates/nvisy-llm/src/provider/unauthenticated.rs
@@ -1,6 +1,7 @@
 //! LLM providers that do not require an API key.
 
 use reqwest_middleware::ClientWithMiddleware;
+use rig::client::Nothing;
 use rig::providers::ollama;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
@@ -24,9 +25,7 @@ impl UnauthenticatedProvider {
         &self,
         http: ClientWithMiddleware,
     ) -> Result<ollama::Client<ClientWithMiddleware>, Error> {
-        let mut b = ollama::Client::builder()
-            .api_key(rig::client::Nothing)
-            .http_client(http);
+        let mut b = ollama::Client::builder().api_key(Nothing).http_client(http);
         if let Some(url) = &self.base_url {
             b = b.base_url(url);
         }
diff --git a/crates/nvisy-llm/src/recognition/file_prompt.rs b/crates/nvisy-llm/src/recognition/file_prompt.rs
index ff153840..bd78f709 100644
--- a/crates/nvisy-llm/src/recognition/file_prompt.rs
+++ b/crates/nvisy-llm/src/recognition/file_prompt.rs
@@ -48,7 +48,7 @@ use std::path::Path;
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
 use minijinja::{Environment, context};
-use nvisy_core::entity::Entity;
+use nvisy_core::entity::{Entity, EntityLabelRef};
 use nvisy_core::modality::{Image, Text};
 use nvisy_core::recognition::{LabelMap, RecognizerInput};
 use nvisy_core::{Error, Result};
@@ -114,10 +114,7 @@ impl<M> FilePrompt<M> {
         let mut label_map = LabelMap::new();
         if let Some(entries) = parsed.label_map {
             for (model_label, entity_label) in entries {
-                label_map = label_map.with_entry(
-                    model_label,
-                    nvisy_core::entity::EntityLabelRef::from(entity_label),
-                );
+                label_map = label_map.with_entry(model_label, EntityLabelRef::from(entity_label));
             }
         }
 
diff --git a/crates/nvisy-llm/tests/file_prompt.rs b/crates/nvisy-llm/tests/file_prompt.rs
index 2d5a99cb..4710071b 100644
--- a/crates/nvisy-llm/tests/file_prompt.rs
+++ b/crates/nvisy-llm/tests/file_prompt.rs
@@ -10,7 +10,7 @@
 //! `label_map` / `labels_to_ignore` policy on the lift side.
 
 use nvisy_core::entity::{EntityLabelRef, builtins};
-use nvisy_core::modality::{ImageData, ImageLocation, TextData, TextLocation};
+use nvisy_core::modality::{Image, ImageData, ImageLocation, Text, TextData, TextLocation};
 use nvisy_core::primitive::{BoundingBox, Dimensions};
 use nvisy_core::recognition::{Hint, RecognizerInput};
 use nvisy_llm::backend::LlmResponse;
@@ -20,15 +20,14 @@ const VLM_TOML: &str = include_str!("../testdata/prompts/vlm.toml");
 
 #[test]
 fn text_prompt_renders_template_and_lifts_entities() {
-    let prompt =
-        FilePrompt::<nvisy_core::modality::Text>::from_toml(NER_TOML).expect("ner.toml parses");
+    let prompt = FilePrompt::<Text>::from_toml(NER_TOML).expect("ner.toml parses");
 
     // Realistic doc text. Hint coordinates pick out "Alice Carter".
     let body = "From: Alice Carter <alice.carter@acme.test>\nSubject: hello";
     let alice_start = body.find("Alice Carter").expect("alice substring");
     let alice_end = alice_start + "Alice Carter".len();
 
-    let hint = Hint::<nvisy_core::modality::Text>::new(TextLocation::new(alice_start, alice_end))
+    let hint = Hint::<Text>::new(TextLocation::new(alice_start, alice_end))
         .with_name("uploader-alice")
         .with_label(builtins::PERSON_NAME.label_ref());
 
@@ -94,14 +93,13 @@ fn text_prompt_renders_template_and_lifts_entities() {
 
 #[test]
 fn image_prompt_renders_template_and_lifts_entities() {
-    let prompt =
-        FilePrompt::<nvisy_core::modality::Image>::from_toml(VLM_TOML).expect("vlm.toml parses");
+    let prompt = FilePrompt::<Image>::from_toml(VLM_TOML).expect("vlm.toml parses");
 
     // Tiny PNG-shaped payload — the prompt only base64-encodes it.
     let bytes = b"\x89PNG\r\n\x1a\nfake-image-bytes".to_vec();
     let dims = Dimensions::new(640, 480);
 
-    let hint = Hint::<nvisy_core::modality::Image>::new(ImageLocation::new(BoundingBox::new(
+    let hint = Hint::<Image>::new(ImageLocation::new(BoundingBox::new(
         10.0, 20.0, 100.0, 50.0,
     )))
     .with_name("uploader-face")
diff --git a/crates/nvisy-pattern/src/recognition/recognizer.rs b/crates/nvisy-pattern/src/recognition/recognizer.rs
index 17d280f6..5a01dd2f 100644
--- a/crates/nvisy-pattern/src/recognition/recognizer.rs
+++ b/crates/nvisy-pattern/src/recognition/recognizer.rs
@@ -576,7 +576,10 @@ mod tests {
             regex = "\\d+"
         "#;
         let regex = crate::Regex::from_toml(toml).expect("TOML parses");
-        assert!(regex.countries.is_empty(), "default countries must be empty");
+        assert!(
+            regex.countries.is_empty(),
+            "default countries must be empty"
+        );
     }
 
     #[test]
diff --git a/crates/nvisy-pattern/src/shipped/dictionaries/world.rs b/crates/nvisy-pattern/src/shipped/dictionaries/world.rs
index 9941d11b..7504a4e0 100644
--- a/crates/nvisy-pattern/src/shipped/dictionaries/world.rs
+++ b/crates/nvisy-pattern/src/shipped/dictionaries/world.rs
@@ -1,7 +1,6 @@
 //! Universal dictionaries — apply regardless of jurisdiction.
 
-use crate::Dictionary;
-use crate::__shipped_dictionary as shipped_dictionary;
+use crate::{__shipped_dictionary as shipped_dictionary, Dictionary};
 
 shipped_dictionary!(
     /// Cryptocurrency names and ticker symbols (BTC, Bitcoin, ETH,
diff --git a/crates/nvisy-pattern/src/shipped/patterns/mod.rs b/crates/nvisy-pattern/src/shipped/patterns/mod.rs
index 4ee09223..e0a82402 100644
--- a/crates/nvisy-pattern/src/shipped/patterns/mod.rs
+++ b/crates/nvisy-pattern/src/shipped/patterns/mod.rs
@@ -86,7 +86,11 @@ mod tests {
     fn us_patterns_are_country_scoped_to_us() {
         for pattern in us::all() {
             assert_eq!(
-                pattern.countries.iter().map(|c| c.as_str()).collect::<Vec<_>>(),
+                pattern
+                    .countries
+                    .iter()
+                    .map(|c| c.as_str())
+                    .collect::<Vec<_>>(),
                 vec!["US"],
                 "US-scoped pattern `{}` must declare countries = [US]",
                 pattern.name,
@@ -98,7 +102,11 @@ mod tests {
     fn uk_patterns_are_country_scoped_to_gb() {
         for pattern in uk::all() {
             assert_eq!(
-                pattern.countries.iter().map(|c| c.as_str()).collect::<Vec<_>>(),
+                pattern
+                    .countries
+                    .iter()
+                    .map(|c| c.as_str())
+                    .collect::<Vec<_>>(),
                 vec!["GB"],
                 "UK-scoped pattern `{}` must declare countries = [GB]",
                 pattern.name,
diff --git a/crates/nvisy-pattern/src/shipped/patterns/uk.rs b/crates/nvisy-pattern/src/shipped/patterns/uk.rs
index d10dcd01..d5e0fff0 100644
--- a/crates/nvisy-pattern/src/shipped/patterns/uk.rs
+++ b/crates/nvisy-pattern/src/shipped/patterns/uk.rs
@@ -2,8 +2,7 @@
 //!
 //! See `assets/NOTICE.md` for third-party attribution.
 
-use crate::Regex;
-use crate::__shipped_pattern as shipped_pattern;
+use crate::{__shipped_pattern as shipped_pattern, Regex};
 
 shipped_pattern!(
     /// UK NHS numbers (10-digit, mod-11 validated).
diff --git a/crates/nvisy-pattern/src/shipped/patterns/us.rs b/crates/nvisy-pattern/src/shipped/patterns/us.rs
index 7f3215a8..afde142f 100644
--- a/crates/nvisy-pattern/src/shipped/patterns/us.rs
+++ b/crates/nvisy-pattern/src/shipped/patterns/us.rs
@@ -1,7 +1,6 @@
 //! United States — patterns scoped to US jurisdictional formats.
 
-use crate::Regex;
-use crate::__shipped_pattern as shipped_pattern;
+use crate::{__shipped_pattern as shipped_pattern, Regex};
 
 shipped_pattern!(
     /// US bank routing numbers (ABA RTN, mod-10 validated).
diff --git a/crates/nvisy-pattern/src/shipped/patterns/world.rs b/crates/nvisy-pattern/src/shipped/patterns/world.rs
index d4f928a2..c023f540 100644
--- a/crates/nvisy-pattern/src/shipped/patterns/world.rs
+++ b/crates/nvisy-pattern/src/shipped/patterns/world.rs
@@ -1,7 +1,6 @@
 //! Universal patterns — apply regardless of jurisdiction.
 
-use crate::Regex;
-use crate::__shipped_pattern as shipped_pattern;
+use crate::{__shipped_pattern as shipped_pattern, Regex};
 
 shipped_pattern!(
     /// Email address (RFC-loose).
diff --git a/crates/nvisy-pattern/src/validators/mod.rs b/crates/nvisy-pattern/src/validators/mod.rs
index 39722f54..d833d6b8 100644
--- a/crates/nvisy-pattern/src/validators/mod.rs
+++ b/crates/nvisy-pattern/src/validators/mod.rs
@@ -31,6 +31,7 @@ pub mod us;
 
 use std::borrow::Cow;
 use std::collections::HashMap;
+use std::fmt;
 use std::sync::Arc;
 
 use nvisy_core::primitive::{CountryCode, LanguageTag};
@@ -173,8 +174,8 @@ impl ValidatorRegistry {
     }
 }
 
-impl std::fmt::Debug for ValidatorRegistry {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Debug for ValidatorRegistry {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let names: Vec<&str> = self.table.keys().map(AsRef::as_ref).collect();
         f.debug_struct("ValidatorRegistry")
             .field("validators", &names)
diff --git a/crates/nvisy-pattern/src/validators/phone.rs b/crates/nvisy-pattern/src/validators/phone.rs
index 9539f0d4..19cd4bd4 100644
--- a/crates/nvisy-pattern/src/validators/phone.rs
+++ b/crates/nvisy-pattern/src/validators/phone.rs
@@ -14,9 +14,10 @@
 //!    handful of national-format numbers than mislabel card and
 //!    account numbers as phones.
 
+use std::str::FromStr;
+
 use phonenumber::country::Id;
 use phonenumber::parse;
-use std::str::FromStr;
 
 use super::ValidationContext;
 
diff --git a/crates/nvisy-pattern/src/validators/uk/nhs.rs b/crates/nvisy-pattern/src/validators/uk/nhs.rs
index 582683e4..da184202 100644
--- a/crates/nvisy-pattern/src/validators/uk/nhs.rs
+++ b/crates/nvisy-pattern/src/validators/uk/nhs.rs
@@ -23,11 +23,7 @@ pub fn nhs(value: &str) -> bool {
     if digits.len() != 10 {
         return false;
     }
-    let total: u32 = digits
-        .iter()
-        .zip((1..=10).rev())
-        .map(|(d, w)| d * w)
-        .sum();
+    let total: u32 = digits.iter().zip((1..=10).rev()).map(|(d, w)| d * w).sum();
     total.is_multiple_of(11)
 }
 
diff --git a/crates/nvisy-pattern/src/validators/us/dea_number.rs b/crates/nvisy-pattern/src/validators/us/dea_number.rs
index ecb8a68d..1e6e56e3 100644
--- a/crates/nvisy-pattern/src/validators/us/dea_number.rs
+++ b/crates/nvisy-pattern/src/validators/us/dea_number.rs
@@ -28,7 +28,10 @@ pub fn dea_number(value: &str) -> bool {
     if !first.is_ascii_alphabetic() || !second.is_ascii_alphabetic() {
         return false;
     }
-    let digits: Vec<u32> = chars.map(|c| c.to_digit(10)).collect::<Option<Vec<_>>>().unwrap_or_default();
+    let digits: Vec<u32> = chars
+        .map(|c| c.to_digit(10))
+        .collect::<Option<Vec<_>>>()
+        .unwrap_or_default();
     if digits.len() != 7 {
         return false;
     }
diff --git a/crates/nvisy-pattern/tests/fixtures/mod.rs b/crates/nvisy-pattern/tests/fixtures/mod.rs
index 1b27792e..1e257e47 100644
--- a/crates/nvisy-pattern/tests/fixtures/mod.rs
+++ b/crates/nvisy-pattern/tests/fixtures/mod.rs
@@ -28,12 +28,7 @@ pub async fn scan(text: &str) -> (String, Vec<Entity<Text>>) {
 }
 
 #[track_caller]
-pub fn assert_match(
-    text: &str,
-    entities: &[Entity<Text>],
-    label: EntityLabelRef,
-    needle: &str,
-) {
+pub fn assert_match(text: &str, entities: &[Entity<Text>], label: EntityLabelRef, needle: &str) {
     let hit = entities
         .iter()
         .any(|e| e.label == label && &text[e.location.start..e.location.end] == needle);
@@ -52,9 +47,6 @@ pub fn assert_label_present(entities: &[Entity<Text>], label: EntityLabelRef) {
     assert!(
         entities.iter().any(|e| e.label == label),
         "expected at least one {label:?} entity; got labels: {:?}",
-        entities
-            .iter()
-            .map(|e| e.label.clone())
-            .collect::<Vec<_>>()
+        entities.iter().map(|e| e.label.clone()).collect::<Vec<_>>()
     );
 }
diff --git a/crates/nvisy-server/src/extract/json.rs b/crates/nvisy-server/src/extract/json.rs
index f40041cf..e3511531 100644
--- a/crates/nvisy-server/src/extract/json.rs
+++ b/crates/nvisy-server/src/extract/json.rs
@@ -6,7 +6,10 @@
 //! [`Json`]: axum::Json
 //! [`ErrorResponse`]: crate::handler::response::ErrorResponse
 
-use aide::OperationInput;
+use aide::generate::GenContext;
+use aide::openapi::{Operation, Response as OpenApiResponse};
+use aide::{OperationInput, OperationOutput};
+use axum::Json as AxumJson;
 use axum::extract::rejection::JsonRejection;
 use axum::extract::{FromRequest, Request};
 use axum::response::{IntoResponse, Response};
@@ -28,40 +31,37 @@ pub struct Json<T>(pub T);
 impl<S, T> FromRequest<S> for Json<T>
 where
     S: Send + Sync,
-    axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
+    AxumJson<T>: FromRequest<S, Rejection = JsonRejection>,
 {
     type Rejection = Error<'static>;
 
     async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
-        axum::Json::<T>::from_request(req, state)
+        AxumJson::<T>::from_request(req, state)
             .await
-            .map(|axum::Json(v)| Self(v))
+            .map(|AxumJson(v)| Self(v))
             .map_err(|rejection| ErrorKind::BadRequest.with_message(rejection.body_text()))
     }
 }
 
 impl<T: serde::Serialize> IntoResponse for Json<T> {
     fn into_response(self) -> Response {
-        axum::Json(self.0).into_response()
+        AxumJson(self.0).into_response()
     }
 }
 
 impl<T: schemars::JsonSchema> OperationInput for Json<T> {
-    fn operation_input(
-        ctx: &mut aide::generate::GenContext,
-        operation: &mut aide::openapi::Operation,
-    ) {
-        axum::Json::<T>::operation_input(ctx, operation);
+    fn operation_input(ctx: &mut GenContext, operation: &mut Operation) {
+        AxumJson::<T>::operation_input(ctx, operation);
     }
 }
 
-impl<T: schemars::JsonSchema + serde::Serialize> aide::OperationOutput for Json<T> {
+impl<T: schemars::JsonSchema + serde::Serialize> OperationOutput for Json<T> {
     type Inner = T;
 
     fn operation_response(
-        ctx: &mut aide::generate::GenContext,
-        operation: &mut aide::openapi::Operation,
-    ) -> Option<aide::openapi::Response> {
-        axum::Json::<T>::operation_response(ctx, operation)
+        ctx: &mut GenContext,
+        operation: &mut Operation,
+    ) -> Option<OpenApiResponse> {
+        AxumJson::<T>::operation_response(ctx, operation)
     }
 }
diff --git a/crates/nvisy-server/src/extract/path.rs b/crates/nvisy-server/src/extract/path.rs
index 1543afba..d698c064 100644
--- a/crates/nvisy-server/src/extract/path.rs
+++ b/crates/nvisy-server/src/extract/path.rs
@@ -8,8 +8,10 @@
 //! [`ErrorResponse`]: crate::handler::response::ErrorResponse
 
 use aide::OperationInput;
-use axum::extract::FromRequestParts;
+use aide::generate::GenContext;
+use aide::openapi::Operation;
 use axum::extract::rejection::PathRejection;
+use axum::extract::{FromRequestParts, Path as AxumPath};
 use axum::http::request::Parts;
 
 use crate::handler::error::{Error, ErrorKind};
@@ -26,23 +28,20 @@ pub struct Path<T>(pub T);
 impl<S, T> FromRequestParts<S> for Path<T>
 where
     S: Send + Sync,
-    axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
+    AxumPath<T>: FromRequestParts<S, Rejection = PathRejection>,
 {
     type Rejection = Error<'static>;
 
     async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
-        axum::extract::Path::<T>::from_request_parts(parts, state)
+        AxumPath::<T>::from_request_parts(parts, state)
             .await
-            .map(|axum::extract::Path(v)| Self(v))
+            .map(|AxumPath(v)| Self(v))
             .map_err(|rejection| ErrorKind::MissingPathParam.with_message(rejection.body_text()))
     }
 }
 
 impl<T: schemars::JsonSchema> OperationInput for Path<T> {
-    fn operation_input(
-        ctx: &mut aide::generate::GenContext,
-        operation: &mut aide::openapi::Operation,
-    ) {
-        axum::extract::Path::<T>::operation_input(ctx, operation);
+    fn operation_input(ctx: &mut GenContext, operation: &mut Operation) {
+        AxumPath::<T>::operation_input(ctx, operation);
     }
 }
diff --git a/crates/nvisy-server/src/handler/error/http_error.rs b/crates/nvisy-server/src/handler/error/http_error.rs
index ad9f4eb4..1c378135 100644
--- a/crates/nvisy-server/src/handler/error/http_error.rs
+++ b/crates/nvisy-server/src/handler/error/http_error.rs
@@ -3,6 +3,8 @@
 use std::borrow::Cow;
 use std::{error, fmt, result};
 
+use aide::generate::GenContext;
+use aide::openapi::{Operation, Response as OpenApiResponse, StatusCode};
 use axum::response::{IntoResponse, Response};
 
 use super::http_kind::ErrorKind;
@@ -205,16 +207,16 @@ impl<'a> aide::OperationOutput for Error<'a> {
     type Inner = ErrorResponse<'static>;
 
     fn operation_response(
-        ctx: &mut aide::generate::GenContext,
-        operation: &mut aide::openapi::Operation,
-    ) -> Option<aide::openapi::Response> {
+        ctx: &mut GenContext,
+        operation: &mut Operation,
+    ) -> Option<OpenApiResponse> {
         axum::Json::<ErrorResponse<'static>>::operation_response(ctx, operation)
     }
 
     fn inferred_responses(
-        _ctx: &mut aide::generate::GenContext,
-        _operation: &mut aide::openapi::Operation,
-    ) -> Vec<(Option<aide::openapi::StatusCode>, aide::openapi::Response)> {
+        _ctx: &mut GenContext,
+        _operation: &mut Operation,
+    ) -> Vec<(Option<StatusCode>, OpenApiResponse)> {
         Vec::new()
     }
 }
diff --git a/crates/nvisy-server/src/handler/mod.rs b/crates/nvisy-server/src/handler/mod.rs
index 941363ac..7b40b45b 100644
--- a/crates/nvisy-server/src/handler/mod.rs
+++ b/crates/nvisy-server/src/handler/mod.rs
@@ -22,6 +22,7 @@ mod request;
 mod response;
 
 use aide::axum::ApiRouter;
+use axum::http::Uri;
 
 pub use self::error::{Error, ErrorKind, Result};
 use crate::service::ServiceState;
@@ -68,7 +69,7 @@ fn v1_routes() -> ApiRouter<ServiceState> {
 ///
 /// Returns 404 with guidance pointing to the current API version
 /// for `/api/*` paths.
-async fn api_version_fallback(uri: axum::http::Uri) -> Result<()> {
+async fn api_version_fallback(uri: Uri) -> Result<()> {
     let path = uri.path();
     if path.starts_with("/api/") {
         Err(ErrorKind::NotFound.with_message(format!(
diff --git a/crates/nvisy-server/src/middleware/observability.rs b/crates/nvisy-server/src/middleware/observability.rs
index 783132bb..dd814b51 100644
--- a/crates/nvisy-server/src/middleware/observability.rs
+++ b/crates/nvisy-server/src/middleware/observability.rs
@@ -12,7 +12,7 @@
 
 use axum::Router;
 use axum::http::header;
-use tower_http::classify::SharedClassifier;
+use tower_http::classify::{ServerErrorsAsFailures, SharedClassifier};
 use tower_http::request_id::{MakeRequestUuid, PropagateRequestIdLayer, SetRequestIdLayer};
 use tower_http::sensitive_headers::SetSensitiveRequestHeadersLayer;
 use tower_http::trace::{self, TraceLayer};
@@ -55,7 +55,7 @@ where
 }
 
 /// Builds the [`TraceLayer`] with structured spans and callbacks.
-fn trace_layer() -> TraceLayer<SharedClassifier<tower_http::classify::ServerErrorsAsFailures>> {
+fn trace_layer() -> TraceLayer<SharedClassifier<ServerErrorsAsFailures>> {
     TraceLayer::new_for_http()
         .make_span_with(
             trace::DefaultMakeSpan::new()
diff --git a/crates/nvisy-server/src/middleware/specification.rs b/crates/nvisy-server/src/middleware/specification.rs
index 4f8f276b..95d5ef13 100644
--- a/crates/nvisy-server/src/middleware/specification.rs
+++ b/crates/nvisy-server/src/middleware/specification.rs
@@ -16,7 +16,7 @@
 //! [`ApiRouter`]: aide::axum::ApiRouter
 
 use aide::axum::ApiRouter;
-use aide::openapi::{OpenApi, Tag};
+use aide::openapi::{Info, OpenApi, Tag};
 use aide::scalar::Scalar;
 use aide::transform::TransformOpenApi;
 use axum::routing::get;
@@ -105,7 +105,7 @@ async fn serve_spec(Extension(api): Extension<OpenApi>) -> Json<OpenApi> {
 /// Takes ownership of `config` so it can set string fields on the spec
 /// without lifetime issues from the `finish_api_with` closure.
 fn api_docs(mut api: TransformOpenApi<'_>, config: OpenApiConfig) -> TransformOpenApi<'_> {
-    api.inner_mut().info = aide::openapi::Info {
+    api.inner_mut().info = Info {
         title: config.title,
         version: config.version,
         description: config.description,
diff --git a/crates/nvisy-toolkit/src/deduplication/mod.rs b/crates/nvisy-toolkit/src/deduplication/mod.rs
index c5cb05ab..26b6784f 100644
--- a/crates/nvisy-toolkit/src/deduplication/mod.rs
+++ b/crates/nvisy-toolkit/src/deduplication/mod.rs
@@ -49,18 +49,22 @@ mod params;
 mod pipeline;
 mod span_size;
 
+#[cfg(test)]
+use std::marker::PhantomData;
+
+#[cfg(test)]
+use nvisy_core::extraction::TextAt;
+#[cfg(test)]
+use nvisy_core::modality::Modality;
+
 pub use self::layer::{Layer, LayerContext};
 pub use self::params::LayerParams;
 pub use self::pipeline::LayerPipeline;
 pub use self::span_size::SpanSize;
 
 #[cfg(test)]
-pub(crate) fn test_resolver<M: nvisy_core::modality::Modality>()
--> Box<dyn nvisy_core::extraction::TextAt<M>> {
-    use nvisy_core::extraction::TextAt;
-    use nvisy_core::modality::Modality;
-
-    struct Noop<M>(std::marker::PhantomData<M>);
+pub(crate) fn test_resolver<M: Modality>() -> Box<dyn TextAt<M>> {
+    struct Noop<M>(PhantomData<M>);
 
     #[async_trait::async_trait]
     impl<M: Modality> TextAt<M> for Noop<M> {
@@ -69,5 +73,5 @@ pub(crate) fn test_resolver<M: nvisy_core::modality::Modality>()
         }
     }
 
-    Box::new(Noop::<M>(std::marker::PhantomData))
+    Box::new(Noop::<M>(PhantomData))
 }
diff --git a/crates/nvisy-toolkit/src/extraction/ocr.rs b/crates/nvisy-toolkit/src/extraction/ocr.rs
index 9ae5595d..57c125af 100644
--- a/crates/nvisy-toolkit/src/extraction/ocr.rs
+++ b/crates/nvisy-toolkit/src/extraction/ocr.rs
@@ -7,6 +7,7 @@
 //! `nvisy_toolkit::extraction::ocr::backend::NoopBackend`, etc. are
 //! all reachable here.
 
+use nvisy_ocr::backend::OcrResponse;
 pub use nvisy_ocr::*;
 
 /// Output shape produced by every image-modality extractor.
@@ -17,4 +18,4 @@ pub use nvisy_ocr::*;
 /// rather than hiding behind generics.
 ///
 /// [`OcrResponse`]: nvisy_ocr::backend::OcrResponse
-pub type ImageExtractorOutput = nvisy_ocr::backend::OcrResponse;
+pub type ImageExtractorOutput = OcrResponse;
diff --git a/crates/nvisy-toolkit/src/redaction/registry.rs b/crates/nvisy-toolkit/src/redaction/registry.rs
index 7d3a56f0..5c783275 100644
--- a/crates/nvisy-toolkit/src/redaction/registry.rs
+++ b/crates/nvisy-toolkit/src/redaction/registry.rs
@@ -37,6 +37,7 @@
 //! [`Anonymizer<M>`]: super::Anonymizer
 
 use std::collections::HashMap;
+use std::fmt;
 use std::sync::Arc;
 
 use nvisy_core::Result;
@@ -214,8 +215,8 @@ impl<M: Modality> Clone for RedactionRegistry<M> {
     }
 }
 
-impl<M: Modality> std::fmt::Debug for RedactionRegistry<M> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl<M: Modality> fmt::Debug for RedactionRegistry<M> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("RedactionRegistry")
             .field("labels", &self.by_label.len())
             .field("ids", &self.by_id.len())
diff --git a/crates/nvisy-toolkit/tests/fixtures/pipeline.rs b/crates/nvisy-toolkit/tests/fixtures/pipeline.rs
index 9963227e..3004737c 100644
--- a/crates/nvisy-toolkit/tests/fixtures/pipeline.rs
+++ b/crates/nvisy-toolkit/tests/fixtures/pipeline.rs
@@ -6,6 +6,7 @@
 //! writes the redacted output next to the fixture as
 //! `{stem}.redacted.{ext}` for human inspection.
 
+use std::path::Path;
 use std::str::from_utf8;
 
 use nvisy_codec::{CodecRegistry, DocumentHandle};
@@ -130,7 +131,7 @@ impl Fixture {
     /// for human inspection. Gitignored under
     /// `**/testdata/**/*.redacted.*`.
     fn write_redacted_artifact(&self, redacted: &str) {
-        let path = std::path::Path::new(self.path);
+        let path = Path::new(self.path);
         let stem = path
             .file_stem()
             .and_then(|s| s.to_str())
diff --git a/crates/nvisy-toolkit/tests/recognition_registry.rs b/crates/nvisy-toolkit/tests/recognition_registry.rs
index 640afb74..7f2c1332 100644
--- a/crates/nvisy-toolkit/tests/recognition_registry.rs
+++ b/crates/nvisy-toolkit/tests/recognition_registry.rs
@@ -22,7 +22,7 @@
 
 use std::env;
 
-use nvisy_core::entity::Entity;
+use nvisy_core::entity::{Entity, EntityLabelCatalog};
 use nvisy_core::modality::{Text, TextData};
 use nvisy_core::recognition::RecognizerInput;
 use nvisy_llm::backend::rig::RigBackend;
@@ -57,7 +57,7 @@ fn build_registry() -> RecognizerRegistry {
         .with_name("ner")
         .with_engine(bento_backend)
         .with_supported_labels(
-            nvisy_core::entity::EntityLabelCatalog::with_builtins()
+            EntityLabelCatalog::with_builtins()
                 .iter()
                 .map(|l| l.label_ref())
                 .collect::<Vec<_>>(),

From de31318e521878f991e292f3f1a9f740d61c0e0b Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 16 Jun 2026 11:13:56 +0200
Subject: [PATCH 14/14] fix(codec): collapse nested if in
 nearest_block_ancestor

CI clippy on Rust 1.95 flags the nested
  if let Node::Element(e) = node.value() {
      if is_block_element(...) { ... }
  }
as `clippy::collapsible-if`. Combine the two conditions with `&&`
so the chained let pattern + boolean check live on one expression.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/nvisy-codec/src/handler/text/html_loader.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/nvisy-codec/src/handler/text/html_loader.rs b/crates/nvisy-codec/src/handler/text/html_loader.rs
index ba255872..1667de01 100644
--- a/crates/nvisy-codec/src/handler/text/html_loader.rs
+++ b/crates/nvisy-codec/src/handler/text/html_loader.rs
@@ -219,10 +219,10 @@ fn sibling_text_hint(text_node: NodeRef<'_, Node>, own_text: &str) -> Vec<String
 fn nearest_block_ancestor(text_node: NodeRef<'_, Node>) -> Option<NodeRef<'_, Node>> {
     let mut current = text_node.parent();
     while let Some(node) = current {
-        if let Node::Element(e) = node.value() {
-            if is_block_element(e.name.local.as_ref()) {
-                return Some(node);
-            }
+        if let Node::Element(e) = node.value()
+            && is_block_element(e.name.local.as_ref())
+        {
+            return Some(node);
         }
         current = node.parent();
     }