Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ strum = { version = "0.28", features = ["derive"] }
# Primitive datatypes
uuid = { version = "1.23", features = ["serde", "v4", "v7"] }
bytes = { version = "1.0", features = ["serde"] }
hipstr = { version = "0.8", features = [] }
hipstr = { version = "0.8", features = ["serde"] }
jiff = { version = "0.2", features = ["serde"] }
semver = { version = "1.0", features = ["serde"] }
oxilangtag = { version = "0.1", features = ["serde"] }
Expand Down
6 changes: 6 additions & 0 deletions crates/nvisy-engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ fjall = { workspace = true, features = [] }
# Encoding
base64 = { workspace = true, features = [] }

# Cheap-clone strings (refcounted / inline / borrowed). Used on
# policy / rule names and the audit's [`PolicyDecisionRef`] so
# audit-heavy passes share refcounts rather than allocating
# per-entity.
hipstr = { workspace = true, features = [] }

# (De)serialization
serde = { workspace = true, features = [] }
serde_json = { workspace = true, features = [] }
Expand Down
194 changes: 84 additions & 110 deletions crates/nvisy-engine/src/core/policy_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,136 +2,121 @@
//! modality, backed by a [`TypeMap`], plus the per-entity decision
//! resolver that walks it.
//!
//! `Policy<M>` is generic over its modality; engine state ([`SharedData`])
//! needs to hold policies for any modality without exposing a generic
//! surface or a fixed per-modality field set. `PolicyStore` provides
//! a single uniform container with typed `insert`/`get`/`len`
//! accessors parameterised over `M`, and a [`PolicyStore::resolve`]
//! method that walks the per-modality chain to pick a [`Decision`]
//! for a single entity.
//! Built from a `Vec<AnyPolicy>` submission via
//! [`PolicyStore::from_any_policies`], which consumes the submitted
//! policies and wraps each in an `Arc<Policy<M>>` — no deep clones.
//! Detection and redaction pipelines share a single store via
//! `Arc<PolicyStore>`; per-call handoff is a refcount bump.
//!
//! Internally one `Vec<Policy<M>>` is stored per modality; lookups
//! cost a single `TypeId` hash.
//! Internally one `Vec<Arc<Policy<M>>>` is stored per modality;
//! lookups cost a single `TypeId` hash. The only crate-public
//! operation is [`PolicyStore::resolve`].
//!
//! [`SharedData`]: super::SharedData

use std::sync::Arc;

use hipstr::HipStr;
use nvisy_codec::content::ContentDescriptor;
use nvisy_core::entity::Entity;
use type_map::concurrent::TypeMap;
use uuid::Uuid;

use crate::modality::DocumentModality;
use crate::policy::{Action, Condition, Policy, PolicyRule, RuleRank};
use crate::policy::{Action, AnyPolicy, Condition, Policy, PolicyRule};

/// Heterogeneous container of policies across all modalities,
/// stored as `Arc<Policy<M>>` so that multiple per-run stores can
/// share the same loaded policy instances cheaply (the registry's
/// cross-run cache hands out `Arc<Policy<M>>` clones).
/// stored as `Arc<Policy<M>>` so handoff between detection and
/// redaction pipelines is a refcount bump rather than a deep clone.
///
/// # Type-safe per-modality storage
///
/// Backed by `type_map::TypeMap`, which stores at most one value
/// per concrete type. The accessors are parameterised over `M`, so
/// `insert::<Text>(...)` and `insert::<Image>(...)` go into
/// independent buckets; `get::<Text>()` is statically guaranteed
/// to return text policies — there is no way to retrieve an
/// `Image` bucket through a `Text` type parameter. The compiler
/// rejects mismatched-modality calls at the call site, not at
/// runtime.
/// per concrete type. The crate-internal `resolve::<M>` method
/// looks the right bucket up by `TypeId`, so adding a new modality
/// is purely an `AnyPolicy::NewM(...)` arm in the crate-internal
/// constructor — no hardcoded fields or per-modality methods to
/// maintain.
#[derive(Default)]
pub struct PolicyStore {
inner: TypeMap,
}

impl PolicyStore {
/// Create an empty store.
pub fn new() -> Self {
Self::default()
}
/// Construct a store from a `Vec<AnyPolicy>` submission, taking
/// ownership of the policies (so each [`Policy<M>`] is moved
/// straight into its [`Arc`] — no deep clone).
pub(crate) fn from_any_policies(policies: Vec<AnyPolicy>) -> Self {
use crate::modality::{Audio, Image, Tabular, Text};

/// Append a policy for modality `M`. Order within a modality is
/// preserved (callers feed policies in precedence order). The
/// policy is held by [`Arc`] so it can also live in the
/// registry's cross-run cache without copying.
pub fn insert<M: DocumentModality>(&mut self, policy: Arc<Policy<M>>) {
self.bucket_mut::<M>().push(policy);
let mut store = Self::default();
for any in policies {
match any {
AnyPolicy::Text(p) => store.push::<Text>(Arc::new(p)),
AnyPolicy::Tabular(p) => store.push::<Tabular>(Arc::new(p)),
AnyPolicy::Image(p) => store.push::<Image>(Arc::new(p)),
AnyPolicy::Audio(p) => store.push::<Audio>(Arc::new(p)),
}
}
store
}

/// Replace the policy stack for modality `M`.
pub fn set<M: DocumentModality>(&mut self, policies: Vec<Arc<Policy<M>>>) {
self.inner.insert::<Vec<Arc<Policy<M>>>>(policies);
fn push<M: DocumentModality>(&mut self, policy: Arc<Policy<M>>) {
self.bucket_mut::<M>().push(policy);
}

/// Borrow the policy stack for modality `M`. Returns an empty
/// slice when no policies of that modality have been inserted.
/// Each element is an `Arc<Policy<M>>` — deref through it to
/// read fields.
pub fn get<M: DocumentModality>(&self) -> &[Arc<Policy<M>>] {
fn chain<M: DocumentModality>(&self) -> &[Arc<Policy<M>>] {
self.inner
.get::<Vec<Arc<Policy<M>>>>()
.map(Vec::as_slice)
.unwrap_or(&[])
}

/// Number of policies stored for modality `M`.
pub fn len<M: DocumentModality>(&self) -> usize {
self.get::<M>().len()
}

/// `true` when no policies for modality `M` are stored.
pub fn is_empty<M: DocumentModality>(&self) -> bool {
self.get::<M>().is_empty()
}

/// Resolve a single entity against the per-modality policy
/// chain. Walks layers in precedence order; within a layer,
/// chain. Walks policies in precedence order; within a policy,
/// walks rules in declaration order. First matching rule wins;
/// when no rule in a layer matches, falls back to that layer's
/// [`Policy::default_action`] (if any) before descending to
/// the next layer.
/// when no rule in a policy matches, falls back to that policy's
/// [`Policy::default_action`] (if any) before descending to the
/// next policy.
///
/// Returns [`Decision::Fallthrough`] when no policy in the
/// chain produced a decision; the caller's default-threshold
/// path takes over. Crate-internal — the evaluator in
/// `redaction::evaluate` is the only caller.
/// path takes over.
pub(crate) fn resolve<M: DocumentModality>(
&self,
entity: &Entity<M>,
document_labels: &[&str],
descriptor: &ContentDescriptor,
) -> Decision<M> {
for (policy_idx, policy) in self.get::<M>().iter().enumerate() {
let policy_index = u32::try_from(policy_idx).unwrap_or(u32::MAX);
for (rule_idx, rule) in policy.rules.iter().enumerate() {
for policy in self.chain::<M>() {
for rule in &policy.rules {
if !rule_matches(rule, entity, document_labels, descriptor) {
continue;
}
let rule_index = u32::try_from(rule_idx).unwrap_or(u32::MAX);
let rank = RuleRank::new(policy_index, rule_index);
let policy_name = policy.name.clone();
let rule_name = Some(rule.name.clone());
return match &rule.action {
Action::Redact { operator } => Decision::Redact {
policy_id: policy.id,
rank,
policy_name,
rule_name,
operator: operator.clone(),
},
Action::Suppress => Decision::Suppress {
policy_id: policy.id,
rank,
policy_name,
rule_name,
},
};
}
if let Some(default) = policy.default_action.as_ref() {
let policy_name = policy.name.clone();
return match default {
Action::Redact { operator } => Decision::Redact {
policy_id: policy.id,
rank: RuleRank::for_default(policy_index),
policy_name,
rule_name: None,
operator: operator.clone(),
},
Action::Suppress => Decision::Suppress {
policy_id: policy.id,
rank: RuleRank::for_default(policy_index),
policy_name,
rule_name: None,
},
};
}
Expand Down Expand Up @@ -164,15 +149,20 @@ impl std::fmt::Debug for PolicyStore {
/// [`RedactionRegistry<M>`]: nvisy_toolkit::redaction::RedactionRegistry
pub(crate) enum Decision<M: DocumentModality> {
/// A rule chose to redact. `operator` is the per-modality
/// operator spec the winning rule carried; `rank` locates the
/// producing rule inside the chain.
/// operator spec the winning rule carried; `policy_name` +
/// `rule_name` locate the producing rule. `rule_name` is `None`
/// when the policy's `default_action` fallback fired.
Redact {
policy_id: Uuid,
rank: RuleRank,
policy_name: HipStr<'static>,
rule_name: Option<HipStr<'static>>,
operator: M::Redaction,
},
/// A `Suppress` rule fired; the caller records the suppression.
Suppress { policy_id: Uuid, rank: RuleRank },
/// Same naming semantics as [`Decision::Redact`].
Suppress {
policy_name: HipStr<'static>,
rule_name: Option<HipStr<'static>>,
},
/// No policy in the chain produced a decision. The caller falls
/// back to its default-threshold path.
Fallthrough,
Expand Down Expand Up @@ -221,63 +211,47 @@ mod tests {

use super::*;

fn text_policy() -> Arc<Policy<Text>> {
Arc::new(Policy::<Text> {
id: uuid::Uuid::nil(),
name: "test".into(),
fn text_policy(name: &str) -> Policy<Text> {
Policy::<Text> {
name: HipStr::from(name),
version: Version::new(1, 0, 0),
description: None,
rules: Vec::new(),
default_action: None,
retention: Vec::new(),
})
}
}

fn image_policy() -> Arc<Policy<Image>> {
Arc::new(Policy::<Image> {
id: uuid::Uuid::nil(),
name: "test".into(),
fn image_policy(name: &str) -> Policy<Image> {
Policy::<Image> {
name: HipStr::from(name),
version: Version::new(1, 0, 0),
description: None,
rules: Vec::new(),
default_action: None,
retention: Vec::new(),
})
}

#[test]
fn empty_store_returns_empty_slice() {
let store = PolicyStore::new();
assert!(store.get::<Text>().is_empty());
assert_eq!(store.len::<Text>(), 0);
}

#[test]
fn insert_and_get_per_modality() {
let mut store = PolicyStore::new();
store.insert(text_policy());
store.insert(image_policy());
store.insert(text_policy());
assert_eq!(store.len::<Text>(), 2);
assert_eq!(store.len::<Image>(), 1);
}

#[test]
fn set_replaces_bucket() {
let mut store = PolicyStore::new();
store.insert(text_policy());
store.set::<Text>(vec![text_policy(), text_policy(), text_policy()]);
assert_eq!(store.len::<Text>(), 3);
}
}

#[test]
fn resolve_empty_chain_returns_fallthrough() {
let store = PolicyStore::new();
fn empty_chain_returns_fallthrough() {
let store = PolicyStore::default();
let entity = Entity::<Text>::test_builder(0, 4).test_build();
let descriptor = ContentDescriptor::new();
assert!(matches!(
store.resolve::<Text>(&entity, &[], &descriptor),
Decision::Fallthrough
));
}

#[test]
fn from_any_policies_partitions_by_modality() {
let store = PolicyStore::from_any_policies(vec![
AnyPolicy::Text(text_policy("text-1")),
AnyPolicy::Image(image_policy("image-1")),
AnyPolicy::Text(text_policy("text-2")),
]);
assert_eq!(store.chain::<Text>().len(), 2);
assert_eq!(store.chain::<Image>().len(), 1);
}
}
Loading