diff --git a/Cargo.lock b/Cargo.lock index ba457817..d5056aea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2924,6 +2924,7 @@ dependencies = [ "derive_more", "fjall", "futures", + "hipstr", "humantime-serde", "jiff", "nvisy-codec", diff --git a/Cargo.toml b/Cargo.toml index bd0ccd49..9a236f0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,7 @@ strum = { version = "0.28", features = ["derive"] } # Primitive datatypes uuid = { version = "1.23", features = ["serde", "v4", "v7"] } bytes = { version = "1.0", features = ["serde"] } -hipstr = { version = "0.8", features = [] } +hipstr = { version = "0.8", features = ["serde"] } jiff = { version = "0.2", features = ["serde"] } semver = { version = "1.0", features = ["serde"] } oxilangtag = { version = "0.1", features = ["serde"] } diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml index 753d6d8f..ff30c194 100644 --- a/crates/nvisy-engine/Cargo.toml +++ b/crates/nvisy-engine/Cargo.toml @@ -56,6 +56,12 @@ fjall = { workspace = true, features = [] } # Encoding base64 = { workspace = true, features = [] } +# Cheap-clone strings (refcounted / inline / borrowed). Used on +# policy / rule names and the audit's [`PolicyDecisionRef`] so +# audit-heavy passes share refcounts rather than allocating +# per-entity. +hipstr = { workspace = true, features = [] } + # (De)serialization serde = { workspace = true, features = [] } serde_json = { workspace = true, features = [] } diff --git a/crates/nvisy-engine/src/core/policy_store.rs b/crates/nvisy-engine/src/core/policy_store.rs index a712cd32..14b95157 100644 --- a/crates/nvisy-engine/src/core/policy_store.rs +++ b/crates/nvisy-engine/src/core/policy_store.rs @@ -2,136 +2,121 @@ //! modality, backed by a [`TypeMap`], plus the per-entity decision //! resolver that walks it. //! -//! `Policy` is generic over its modality; engine state ([`SharedData`]) -//! needs to hold policies for any modality without exposing a generic -//! surface or a fixed per-modality field set. `PolicyStore` provides -//! a single uniform container with typed `insert`/`get`/`len` -//! accessors parameterised over `M`, and a [`PolicyStore::resolve`] -//! method that walks the per-modality chain to pick a [`Decision`] -//! for a single entity. +//! Built from a `Vec` submission via +//! [`PolicyStore::from_any_policies`], which consumes the submitted +//! policies and wraps each in an `Arc>` — no deep clones. +//! Detection and redaction pipelines share a single store via +//! `Arc`; per-call handoff is a refcount bump. //! -//! Internally one `Vec>` is stored per modality; lookups -//! cost a single `TypeId` hash. +//! Internally one `Vec>>` is stored per modality; +//! lookups cost a single `TypeId` hash. The only crate-public +//! operation is [`PolicyStore::resolve`]. //! //! [`SharedData`]: super::SharedData use std::sync::Arc; +use hipstr::HipStr; use nvisy_codec::content::ContentDescriptor; use nvisy_core::entity::Entity; use type_map::concurrent::TypeMap; -use uuid::Uuid; use crate::modality::DocumentModality; -use crate::policy::{Action, Condition, Policy, PolicyRule, RuleRank}; +use crate::policy::{Action, AnyPolicy, Condition, Policy, PolicyRule}; /// Heterogeneous container of policies across all modalities, -/// stored as `Arc>` so that multiple per-run stores can -/// share the same loaded policy instances cheaply (the registry's -/// cross-run cache hands out `Arc>` clones). +/// stored as `Arc>` so handoff between detection and +/// redaction pipelines is a refcount bump rather than a deep clone. /// /// # Type-safe per-modality storage /// /// Backed by `type_map::TypeMap`, which stores at most one value -/// per concrete type. The accessors are parameterised over `M`, so -/// `insert::(...)` and `insert::(...)` go into -/// independent buckets; `get::()` is statically guaranteed -/// to return text policies — there is no way to retrieve an -/// `Image` bucket through a `Text` type parameter. The compiler -/// rejects mismatched-modality calls at the call site, not at -/// runtime. +/// per concrete type. The crate-internal `resolve::` method +/// looks the right bucket up by `TypeId`, so adding a new modality +/// is purely an `AnyPolicy::NewM(...)` arm in the crate-internal +/// constructor — no hardcoded fields or per-modality methods to +/// maintain. #[derive(Default)] pub struct PolicyStore { inner: TypeMap, } impl PolicyStore { - /// Create an empty store. - pub fn new() -> Self { - Self::default() - } + /// Construct a store from a `Vec` submission, taking + /// ownership of the policies (so each [`Policy`] is moved + /// straight into its [`Arc`] — no deep clone). + pub(crate) fn from_any_policies(policies: Vec) -> Self { + use crate::modality::{Audio, Image, Tabular, Text}; - /// Append a policy for modality `M`. Order within a modality is - /// preserved (callers feed policies in precedence order). The - /// policy is held by [`Arc`] so it can also live in the - /// registry's cross-run cache without copying. - pub fn insert(&mut self, policy: Arc>) { - self.bucket_mut::().push(policy); + let mut store = Self::default(); + for any in policies { + match any { + AnyPolicy::Text(p) => store.push::(Arc::new(p)), + AnyPolicy::Tabular(p) => store.push::(Arc::new(p)), + AnyPolicy::Image(p) => store.push::(Arc::new(p)), + AnyPolicy::Audio(p) => store.push::