From e8cd7d286f1ebcdeac7d8d39a75a8d2fd48388bb Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 13 Jun 2026 12:17:26 +0200 Subject: [PATCH 1/2] refactor(engine): inline policies on DetectionInput; drop /policies resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Policies stop being a persisted resource: the caller submits the full inline policy bodies on every detect call, the engine snapshots digests onto the persisted detection record, and the audit references rules by `(policy_name, rule_name)` instead of `(policy_id, rank)`. New types in `nvisy_engine::policy`: - `AnyPolicy` — modality-erased sum (Text/Tabular/Image/Audio) mirroring `AnyAudit`. Submitted inline on every detect call. - `PolicyDigest { name, version }` — header card stored on `DetectionResult`. No rule bodies. - `PolicyDecisionRef { policy_name, rule_name: Option }` — stamped on every policy-driven `AuditEntry`. `rule_name` is `None` when the policy's `default_action` fallback fired. - `validate_policy_namespace` — runs at the top of `Engine::detect`, rejects duplicate policy names within a submission and duplicate rule names within a policy. The audit's identity-by-name guarantee depends on these invariants. `Policy::id: Uuid` replaced with `Policy::name: HipStr<'static>`. `PolicyRule` gains a required `name: HipStr<'static>` field. `RuleRank` deleted. `HipStr` chosen for the names so audit-heavy passes share refcounts rather than allocating per-entity. Server-side: - `/api/v1/policies[/{id}]` removed (4 endpoints + request/response types + path helper + `service/mod.rs` doc trail). - `POST /api/v1/detections` body's `policies: Vec` becomes `policies: Vec` — inline submission only. Registry-side: - `register_policy`/`read_policy`/`unregister_policy`/ `unregister_all_policies`/`list_policies`/ `list_policies_with_summary` deleted. - `policies_ks` Fjall keyspace + `policy_cache` removed. - `ResourceCache`/`ResourceGuard` deleted (policy persistence was its only consumer). The detect→redact handoff carries the full `Vec` through an internal `DetectionHandoff` shape so the redact pipeline can re-evaluate operator specs without the caller re-submitting them. After a process restart the in-memory record is gone and the redaction can't proceed — that was already the existing semantic; this PR just makes it explicit. Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 1 + Cargo.toml | 2 +- crates/nvisy-engine/Cargo.toml | 6 + crates/nvisy-engine/src/core/policy_store.rs | 51 +-- .../src/document/provenance/entry.rs | 10 +- .../nvisy-engine/src/phases/redaction/mod.rs | 17 +- .../src/pipeline/detection/input.rs | 9 +- .../src/pipeline/detection/mod.rs | 2 +- .../src/pipeline/detection/pipeline.rs | 58 +-- .../src/pipeline/detection/result.rs | 12 +- .../src/pipeline/detection/state.rs | 40 +- crates/nvisy-engine/src/pipeline/engine.rs | 3 + .../src/pipeline/redaction/applicator.rs | 17 +- .../src/pipeline/redaction/pipeline.rs | 34 +- crates/nvisy-engine/src/policy/mod.rs | 279 ++++++++++--- crates/nvisy-engine/src/policy/rule.rs | 14 +- crates/nvisy-engine/src/registry/mod.rs | 15 +- .../src/registry/registry_store.rs | 112 ------ .../src/registry/resource_cache.rs | 366 ------------------ crates/nvisy-engine/tests/redaction_policy.rs | 3 +- crates/nvisy-server/src/handler/mod.rs | 11 +- crates/nvisy-server/src/handler/policies.rs | 177 --------- .../src/handler/request/detections.rs | 8 +- .../nvisy-server/src/handler/request/mod.rs | 4 +- .../nvisy-server/src/handler/request/path.rs | 7 - .../src/handler/request/policies.rs | 14 - .../nvisy-server/src/handler/response/mod.rs | 2 - .../src/handler/response/policies.rs | 44 --- 28 files changed, 408 insertions(+), 910 deletions(-) delete mode 100644 crates/nvisy-engine/src/registry/resource_cache.rs delete mode 100644 crates/nvisy-server/src/handler/policies.rs delete mode 100644 crates/nvisy-server/src/handler/request/policies.rs delete mode 100644 crates/nvisy-server/src/handler/response/policies.rs diff --git a/Cargo.lock b/Cargo.lock index ba457817..d5056aea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2924,6 +2924,7 @@ dependencies = [ "derive_more", "fjall", "futures", + "hipstr", "humantime-serde", "jiff", "nvisy-codec", diff --git a/Cargo.toml b/Cargo.toml index bd0ccd49..9a236f0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,7 @@ strum = { version = "0.28", features = ["derive"] } # Primitive datatypes uuid = { version = "1.23", features = ["serde", "v4", "v7"] } bytes = { version = "1.0", features = ["serde"] } -hipstr = { version = "0.8", features = [] } +hipstr = { version = "0.8", features = ["serde"] } jiff = { version = "0.2", features = ["serde"] } semver = { version = "1.0", features = ["serde"] } oxilangtag = { version = "0.1", features = ["serde"] } diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml index 753d6d8f..ff30c194 100644 --- a/crates/nvisy-engine/Cargo.toml +++ b/crates/nvisy-engine/Cargo.toml @@ -56,6 +56,12 @@ fjall = { workspace = true, features = [] } # Encoding base64 = { workspace = true, features = [] } +# Cheap-clone strings (refcounted / inline / borrowed). Used on +# policy / rule names and the audit's [`PolicyDecisionRef`] so +# audit-heavy passes share refcounts rather than allocating +# per-entity. +hipstr = { workspace = true, features = [] } + # (De)serialization serde = { workspace = true, features = [] } serde_json = { workspace = true, features = [] } diff --git a/crates/nvisy-engine/src/core/policy_store.rs b/crates/nvisy-engine/src/core/policy_store.rs index a712cd32..1609be44 100644 --- a/crates/nvisy-engine/src/core/policy_store.rs +++ b/crates/nvisy-engine/src/core/policy_store.rs @@ -17,13 +17,13 @@ use std::sync::Arc; +use hipstr::HipStr; use nvisy_codec::content::ContentDescriptor; use nvisy_core::entity::Entity; use type_map::concurrent::TypeMap; -use uuid::Uuid; use crate::modality::DocumentModality; -use crate::policy::{Action, Condition, Policy, PolicyRule, RuleRank}; +use crate::policy::{Action, Condition, Policy, PolicyRule}; /// Heterogeneous container of policies across all modalities, /// stored as `Arc>` so that multiple per-run stores can @@ -102,36 +102,36 @@ impl PolicyStore { document_labels: &[&str], descriptor: &ContentDescriptor, ) -> Decision { - for (policy_idx, policy) in self.get::().iter().enumerate() { - let policy_index = u32::try_from(policy_idx).unwrap_or(u32::MAX); - for (rule_idx, rule) in policy.rules.iter().enumerate() { + for policy in self.get::() { + for rule in &policy.rules { if !rule_matches(rule, entity, document_labels, descriptor) { continue; } - let rule_index = u32::try_from(rule_idx).unwrap_or(u32::MAX); - let rank = RuleRank::new(policy_index, rule_index); + let policy_name = policy.name.clone(); + let rule_name = Some(rule.name.clone()); return match &rule.action { Action::Redact { operator } => Decision::Redact { - policy_id: policy.id, - rank, + policy_name, + rule_name, operator: operator.clone(), }, Action::Suppress => Decision::Suppress { - policy_id: policy.id, - rank, + policy_name, + rule_name, }, }; } if let Some(default) = policy.default_action.as_ref() { + let policy_name = policy.name.clone(); return match default { Action::Redact { operator } => Decision::Redact { - policy_id: policy.id, - rank: RuleRank::for_default(policy_index), + policy_name, + rule_name: None, operator: operator.clone(), }, Action::Suppress => Decision::Suppress { - policy_id: policy.id, - rank: RuleRank::for_default(policy_index), + policy_name, + rule_name: None, }, }; } @@ -164,15 +164,20 @@ impl std::fmt::Debug for PolicyStore { /// [`RedactionRegistry`]: nvisy_toolkit::redaction::RedactionRegistry pub(crate) enum Decision { /// A rule chose to redact. `operator` is the per-modality - /// operator spec the winning rule carried; `rank` locates the - /// producing rule inside the chain. + /// operator spec the winning rule carried; `policy_name` + + /// `rule_name` locate the producing rule. `rule_name` is `None` + /// when the policy's `default_action` fallback fired. Redact { - policy_id: Uuid, - rank: RuleRank, + policy_name: HipStr<'static>, + rule_name: Option>, operator: M::Redaction, }, /// A `Suppress` rule fired; the caller records the suppression. - Suppress { policy_id: Uuid, rank: RuleRank }, + /// Same naming semantics as [`Decision::Redact`]. + Suppress { + policy_name: HipStr<'static>, + rule_name: Option>, + }, /// No policy in the chain produced a decision. The caller falls /// back to its default-threshold path. Fallthrough, @@ -223,8 +228,7 @@ mod tests { fn text_policy() -> Arc> { Arc::new(Policy:: { - id: uuid::Uuid::nil(), - name: "test".into(), + name: HipStr::from("test"), version: Version::new(1, 0, 0), description: None, rules: Vec::new(), @@ -235,8 +239,7 @@ mod tests { fn image_policy() -> Arc> { Arc::new(Policy:: { - id: uuid::Uuid::nil(), - name: "test".into(), + name: HipStr::from("test"), version: Version::new(1, 0, 0), description: None, rules: Vec::new(), diff --git a/crates/nvisy-engine/src/document/provenance/entry.rs b/crates/nvisy-engine/src/document/provenance/entry.rs index 8d59b826..db83f250 100644 --- a/crates/nvisy-engine/src/document/provenance/entry.rs +++ b/crates/nvisy-engine/src/document/provenance/entry.rs @@ -19,7 +19,7 @@ use uuid::Uuid; use super::override_decision::RedactionDecision; use crate::modality::DocumentModality; -use crate::policy::{Action, RuleRank}; +use crate::policy::{Action, PolicyDecisionRef}; /// A per-entity redaction record produced during a pipeline run. /// @@ -64,15 +64,11 @@ impl AuditEntry { #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct Decision { - /// Identifier of the policy that produced this decision. `None` + /// Reference to the rule that produced this decision. `None` /// when the decision came from a source outside the policy chain /// (e.g. the default-threshold fallback path). #[serde(default, skip_serializing_if = "Option::is_none")] - pub policy_id: Option, - /// Position of the producing rule in the per-run policy chain. - /// `None` for non-policy-driven decisions. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub rank: Option, + pub policy_ref: Option, /// The action the matching rule picked (with its operator spec /// for [`Action::Redact`]). pub action: Action, diff --git a/crates/nvisy-engine/src/phases/redaction/mod.rs b/crates/nvisy-engine/src/phases/redaction/mod.rs index 13368fb4..89fb75f7 100644 --- a/crates/nvisy-engine/src/phases/redaction/mod.rs +++ b/crates/nvisy-engine/src/phases/redaction/mod.rs @@ -49,7 +49,7 @@ use crate::document::provenance::{ AuditEntry, Decision as AuditDecision, EntryMetadata, Execution, }; use crate::modality::DocumentModality; -use crate::policy::Action; +use crate::policy::{Action, PolicyDecisionRef}; use crate::policy::redaction::Instantiate; pub(crate) const TARGET: &str = "nvisy_engine::redaction"; @@ -108,22 +108,23 @@ where let decision = policies.resolve::(&record.entity, &document_labels, descriptor); let entry = match decision { Decision::Redact { - policy_id, - rank, + policy_name, + rule_name, operator, } => AuditEntry { decision: AuditDecision { - policy_id: Some(policy_id), - rank: Some(rank), + policy_ref: Some(PolicyDecisionRef::new(policy_name, rule_name)), action: Action::Redact { operator }, }, execution: Execution::Pending, metadata: EntryMetadata::now(), }, - Decision::Suppress { policy_id, rank } => AuditEntry { + Decision::Suppress { + policy_name, + rule_name, + } => AuditEntry { decision: AuditDecision { - policy_id: Some(policy_id), - rank: Some(rank), + policy_ref: Some(PolicyDecisionRef::new(policy_name, rule_name)), action: Action::Suppress, }, execution: Execution::Suppressed, diff --git a/crates/nvisy-engine/src/pipeline/detection/input.rs b/crates/nvisy-engine/src/pipeline/detection/input.rs index 08a2c9be..739ab7e3 100644 --- a/crates/nvisy-engine/src/pipeline/detection/input.rs +++ b/crates/nvisy-engine/src/pipeline/detection/input.rs @@ -6,6 +6,7 @@ use uuid::Uuid; use crate::phases::ingestion::ImportFile; use crate::pipeline::Plan; +use crate::policy::AnyPolicy; /// Input required to execute a detection pass. /// @@ -22,9 +23,11 @@ use crate::pipeline::Plan; pub struct DetectionInput { /// Identity of the human or service account initiating the run. pub actor_id: Uuid, - /// Previously uploaded policies to apply, in precedence order: - /// index `0` is highest precedence. - pub policies: Vec, + /// Policies to apply, in precedence order: index `0` is highest + /// precedence. Submitted inline with their full rule bodies — + /// the engine does not persist policies as a resource. Callers + /// reuse policies by re-submitting the same bytes. + pub policies: Vec, /// Content sources to ingest at the start of the run. pub imports: Vec, /// Per-phase behaviour knobs the pipeline reads for each diff --git a/crates/nvisy-engine/src/pipeline/detection/mod.rs b/crates/nvisy-engine/src/pipeline/detection/mod.rs index e7980f73..fe0a4fb7 100644 --- a/crates/nvisy-engine/src/pipeline/detection/mod.rs +++ b/crates/nvisy-engine/src/pipeline/detection/mod.rs @@ -24,7 +24,7 @@ mod state; mod status; pub use self::input::DetectionInput; -pub(crate) use self::pipeline::{DetectionEngineState, DetectionPipeline}; +pub(crate) use self::pipeline::{DetectionEngineState, DetectionPipeline, build_policy_store}; pub use self::result::{DetectionEntry, DetectionFilter, DetectionResult, DetectionSnapshot}; pub(crate) use self::state::DetectionState; pub use self::status::DetectionStatus; diff --git a/crates/nvisy-engine/src/pipeline/detection/pipeline.rs b/crates/nvisy-engine/src/pipeline/detection/pipeline.rs index 30f03e0c..04c4709e 100644 --- a/crates/nvisy-engine/src/pipeline/detection/pipeline.rs +++ b/crates/nvisy-engine/src/pipeline/detection/pipeline.rs @@ -11,7 +11,6 @@ use std::sync::Arc; use jiff::Timestamp; use nvisy_codec::CodecRegistry; use nvisy_core::Error; -use nvisy_core::modality::Text; use nvisy_toolkit::detection::RecognizerRegistry; use nvisy_toolkit::extraction::ExtractorRegistry; use tokio_util::sync::CancellationToken; @@ -28,7 +27,7 @@ use crate::phases::ingestion::encryption::SharedKeyProvider; use crate::phases::redaction::RedactionRegistries; use crate::pipeline::RedactionConfig; use crate::pipeline::config::RuntimeConfig; -use crate::policy::Policy; +use crate::policy::AnyPolicy; use crate::registry::Registry; const TARGET: &str = "nvisy_engine::pipeline::detection::pipeline"; @@ -101,14 +100,8 @@ impl DetectionPipeline { input: DetectionInput, ) -> Result<(Vec, u64, DetectionStatus), Error> { let actor_id = input.actor_id; - let policy_ids = input.policies.clone(); - let _policy_guard = self.acquire_policies(actor_id, &policy_ids).await; - let cached_policies = self.registry.policy_cache().resolve(&policy_ids).await; - let text_policies: Vec>> = cached_policies; - - let mut policy_store = PolicyStore::new(); - policy_store.set::(text_policies); + let policy_store = build_policy_store(&input.policies); let mut shared_data = SharedData { run_id: self.detection_id, @@ -159,7 +152,7 @@ impl DetectionPipeline { let result = DetectionResult { id: self.detection_id, actor_id, - policies: input.policies.clone(), + policies: input.policies.iter().map(AnyPolicy::digest).collect(), imports: input.imports.clone(), audits: audits.clone(), entities_detected, @@ -193,22 +186,33 @@ impl DetectionPipeline { Ok((audits, entities_detected, status)) } - async fn acquire_policies( - &self, - actor_id: Uuid, - policy_ids: &[Uuid], - ) -> crate::registry::ResourceGuard> { - self.registry - .policy_cache() - .acquire(policy_ids, |id| async move { - match self.registry.read_policy(actor_id, id).await { - Ok(policy) => Some(policy), - Err(e) => { - tracing::warn!(%id, error = %e, "failed to load policy"); - None - } - } - }) - .await +} + +/// Distribute the modality-erased policies the caller submitted +/// into the typed [`PolicyStore`] buckets the evaluator reads at +/// rule-resolution time. One [`Arc::new`] per policy — the original +/// `AnyPolicy` values are kept alive through the detection record; +/// the store holds independent Arcs purely for the per-run hot +/// path. +pub(crate) fn build_policy_store(policies: &[AnyPolicy]) -> PolicyStore { + use crate::modality::{Audio, Image, Tabular, Text}; + + let mut text = Vec::new(); + let mut tabular = Vec::new(); + let mut image = Vec::new(); + let mut audio = Vec::new(); + for any in policies { + match any { + AnyPolicy::Text(p) => text.push(Arc::new(p.clone())), + AnyPolicy::Tabular(p) => tabular.push(Arc::new(p.clone())), + AnyPolicy::Image(p) => image.push(Arc::new(p.clone())), + AnyPolicy::Audio(p) => audio.push(Arc::new(p.clone())), + } } + let mut store = PolicyStore::new(); + store.set::(text); + store.set::(tabular); + store.set::(image); + store.set::