Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1,946 changes: 399 additions & 1,547 deletions Cargo.lock

Large diffs are not rendered by default.

38 changes: 19 additions & 19 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,11 @@
[workspace]
resolver = "3"
members = [
"./crates/elide-bento",
"./crates/nvisy-cli",
"./crates/nvisy-codec",
"./crates/nvisy-context",
"./crates/nvisy-core",
"./crates/nvisy-engine",
"./crates/nvisy-fake",
"./crates/nvisy-llm",
"./crates/nvisy-ner",
"./crates/nvisy-ocr",
"./crates/nvisy-pattern",
"./crates/nvisy-server",
"./crates/nvisy-stt",
"./crates/nvisy-toolkit",
]

[workspace.package]
Expand All @@ -36,19 +28,27 @@ documentation = "https://docs.rs/nvisy-runtime"
#
# See for more details: https://github.com/rust-lang/cargo/issues/11329

# Elide toolkit (upstream)
elide = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false }
elide-core = { git = "https://github.com/nvisycom/elide", branch = "main" }
# `elide-ner` + `elide-ocr` are pulled directly only by `elide-bento`, which
# implements the per-backend traits these crates export. Engine and other
# consumers reach the same types through `elide::recognition::{ner, ocr}`.
elide-ner = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false }
elide-ocr = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false }
elide-stt = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false }

# Runtime-owned elide extensions
elide-bento = { path = "./crates/elide-bento", version = "0.1.0" }

# Internal crates
nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0", default-features = false }
nvisy-context = { path = "./crates/nvisy-context", version = "0.1.0" }
nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" }
nvisy-llm = { path = "./crates/nvisy-llm", version = "0.1.0" }
nvisy-ner = { path = "./crates/nvisy-ner", version = "0.1.0" }
nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" }
nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" }
nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" }
nvisy-toolkit = { path = "./crates/nvisy-toolkit", version = "0.1.0" }

# Internal crate being deleted: kept here as a path-dep so consumer
# manifests (nvisy-engine/server/cli) parse during the migration.
# Leaves the workspace once engine's source no longer imports it.
nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }

# Serialization
serde = { version = "1.0", features = ["derive"] }
Expand Down
179 changes: 69 additions & 110 deletions Nvisy.example.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# Nvisy configuration
#
# Copy to Nvisy.toml (or run `make generate-config`), then edit as needed.
# The server loads Nvisy.toml by default; override with --config or NVISY_CONFIG.
# Copy to Nvisy.toml (or run `make generate-config`), then edit as
# needed. The server loads Nvisy.toml by default; override with
# --config or NVISY_CONFIG.
#
# All sections are optional: omitted sections use runtime defaults.
# All sections are optional: omitted sections use server defaults.

version = "0.1.0"

# Server binding and storage.
# CLI flags (--host, --port, --data-dir) override these.
[server]
host = "0.0.0.0"
Expand All @@ -24,107 +22,68 @@ body_limit_mb = 4
request_timeout = "5m"
cors = { allowed_origins = ["*"], max_age = "1h" }

# Engine-level resource limits and shared infrastructure.
[engine]

# Run-level resource limits.
[engine.limits]
concurrency = 4 # max parallel documents in flight
run_timeout = "60s"

# Shared HTTP client for all downstream provider calls.
[engine.http]
max_retries = 3
timeout = "120s"
connect_timeout = "10s"
idle_timeout = "90s"

# Extraction registry.
#
# Each opted-in `[extraction.*]` section is built once at server
# startup. Set `enabled = false` to keep the config but skip
# construction and dispatch. The pipeline `Extraction` step carries
# per-call flags only.

# Extraction: OCR (optical character recognition, images only).
[extraction.ocr]
enabled = true

[extraction.ocr.backend]
kind = "bento"
base_url = "http://localhost:3001"

# Extraction: STT (speech-to-text, audio only).
[extraction.stt]
enabled = true

[extraction.stt.backend]
kind = "noop"

# Detection: pattern-based (regex + dictionary + checksum).
# Optional section — pattern detection runs by default. Include this
# block to narrow it (named patterns, tag filters) or to disable it
# entirely with `enabled = false`.
[detection.pattern]
enabled = true
patterns = [] # empty = all built-in patterns

# Detection: NER (named entity recognition).
[detection.ner]
enabled = true

[detection.ner.backend]
kind = "bento"
base_url = "http://localhost:3000"

# Detection: LLM (large language model).
[detection.llm]
enabled = true
unresolved_policy = "drop" # drop | first-match

[detection.llm.provider]
kind = "open-ai"
api_key = "sk-example-replace-me"
model = "gpt-4o"

[detection.llm.detect]
enabled = true
temperature = 0.1
max_tokens = 4096
max_retries = 3
# context_window = { ... } # optional, for chunking large inputs
# preamble = "..." # optional, overrides the agent's default system prompt

[detection.llm.verify]
enabled = true
temperature = 0.1
max_tokens = 4096
max_retries = 3
# context_window = { ... }
# preamble = "..."

# Detection: VLM (vision-language model, images only).
[detection.vlm]
enabled = true

[detection.vlm.provider]
kind = "open-ai"
api_key = "sk-example-replace-me"
model = "gpt-4o"

[detection.vlm.detect]
enabled = true
temperature = 0.1
max_tokens = 4096
max_retries = 3

[detection.vlm.verify]
enabled = true
temperature = 0.1
max_tokens = 4096
max_retries = 3

# Redaction params.
[redaction]
confidence_threshold = 0.5
process_metadata = false
# The deployment default for AnalyzerParams. Requests carry
# per-field `analyzer` overrides on top of this default (inherit
# the slot, replace it, remove it from an optional slot, or patch
# a list with extend + remove selectors). Omit the whole
# [analyzer] section and the server falls back to the type-level
# default — no recognizers, no enrichers, default dedup + scope,
# empty catalog — useful for tests, not useful in production.

# Caller-asserted scope threaded into every recognizer's context.
# Add languages and country codes the analyzer should consider
# (empty = no restriction).
[analyzer.scope]
languages = []
jurisdictions = []

# Deduplication pipeline applied after recognition:
# calibrate → fuse → resolve → filter.
[analyzer.deduplication]
fusion = "max_confidence" # max_confidence | mean | noisy_or
resolution = "highest_confidence" # highest_confidence | longest_span
min_confidence = 0.7 # filter threshold, 0.0..=1.0

# Per-request label catalog. Entries are LabelSchema records:
# { name, description?, tags?, metadata? }. The server unions
# every submitted policy's labels with this catalog at request
# time; entries here are the baseline.
# [[analyzer.label_catalog]]
# name = "email_address"
# description = "RFC 5322 email"
# tags = ["pii", "contact"]

# Pattern recognizer: at-most-one per analyzer. Loads every
# shipped pattern + dictionary and wraps the recognizer in
# elide's context-boost layer.
[analyzer.recognizers.pattern]
builtins = true
context_enhanced = true

# NER recognizer list. Each entry needs a unique `name`; multiple
# entries run in parallel (e.g. one English model, one Spanish).
# [[analyzer.recognizers.ner]]
# name = "default_ner"
# backend = { kind = "mock" }
# # backend = { kind = "bento", base_url = "http://localhost:3000", model = "..." }

# LLM recognizer list. Same shape; backend chooses provider.
# [[analyzer.recognizers.llm]]
# name = "default_llm"
# backend = { kind = "mock" }

# Language enricher (writes the document's detected languages
# into the recognizer context, driving jurisdiction-aware
# dispatch). At-most-one per analyzer.
# [analyzer.enrichers.language]
# min_confidence = 0.6 # 0.0..=1.0; None lets the engine choose

# OCR enricher (image modality only). At-most-one per analyzer.
# [analyzer.enrichers.ocr]
# backend = { kind = "mock" }
# # backend = { kind = "bento", base_url = "http://localhost:3001", model = "..." }

# STT enricher (audio modality only). At-most-one per analyzer.
# [analyzer.enrichers.stt]
# backend = { kind = "mock" }
# # backend = { kind = "bento", base_url = "http://localhost:3002", model = "..." }
59 changes: 59 additions & 0 deletions crates/elide-bento/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# https://doc.rust-lang.org/cargo/reference/manifest.html

[package]
name = "elide-bento"
description = "BentoML-backed NER and OCR backends for the elide toolkit"
keywords = ["elide", "bento", "bentoml", "ner", "ocr"]
categories = ["api-bindings"]
readme = "README.md"

version = { workspace = true }
rust-version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
publish = { workspace = true }

authors = { workspace = true }
repository = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

[features]
default = ["ner", "ocr"]
## Ship the NER backend (implements `elide_ner::backend::NerBackend`
## against the `nvisy-inference-ner` BentoML service).
ner = ["dep:elide-ner"]
## Ship the OCR backend (implements `elide_ocr::OcrBackend`
## against the `nvisy-inference-ocr` BentoML service).
ocr = ["dep:elide-ocr", "dep:base64", "elide-core/image"]

[dependencies]
# Elide toolkit (upstream)
elide-core = { workspace = true, features = [] }
elide-ner = { workspace = true, features = [], optional = true }
elide-ocr = { workspace = true, features = [], optional = true }

# Serialization
serde = { workspace = true, features = ["derive"] }

# Primitive datatypes
hipstr = { workspace = true, features = [] }

# Async runtime
async-trait = { workspace = true, features = [] }

# Error handling
thiserror = { workspace = true, features = [] }

# Image bytes → base64 (OCR only)
base64 = { workspace = true, features = [], optional = true }

# BentoML client
bentoml = { workspace = true, default-features = false, features = ["rustls-tls", "tracing"] }

[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
7 changes: 7 additions & 0 deletions crates/elide-bento/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# elide-bento

Shared BentoML HTTP client wrapper for elide backends.

Per-modality backends (NER, OCR, …) live in their consuming crates
(`elide-ner`, `elide-ocr`) and pull this crate for the common HTTP
client, params validation, and error translation.
39 changes: 39 additions & 0 deletions crates/elide-bento/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//! Error translation: `bentoml` errors → [`elide_core::Error`].
//!
//! Crate-private — the public API of every backend reports
//! [`elide_core::Error`]; this enum is the internal seam the
//! per-route helpers use before bubbling up.

use elide_core::{Error, ErrorKind};

/// Errors surfaced internally by the bento backends.
///
/// Two structural categories the consuming crate maps onto
/// [`ErrorKind`] when bubbling up: transport (HTTP / network /
/// client construction) and protocol (service answered but the
/// body did not match the contract — decode error, batch length
/// mismatch, …).
#[derive(Debug, thiserror::Error)]
pub(crate) enum BentoError {
/// HTTP / transport failure — client construction, network
/// I/O, status-code rejections.
#[error("bento transport error: {0}")]
Transport(#[from] bentoml::Error),
/// Protocol failure — the service answered but the body did not
/// match the contract.
#[error("bento protocol error: {0}")]
Protocol(String),
}

impl From<BentoError> for Error {
/// Map transport to [`ErrorKind::Transport`] and protocol to
/// [`ErrorKind::Validation`], carrying the original error as the
/// source cause.
fn from(err: BentoError) -> Self {
let kind = match err {
BentoError::Transport(_) => ErrorKind::Transport,
BentoError::Protocol(_) => ErrorKind::Validation,
};
Error::new(kind, err)
}
}
18 changes: 18 additions & 0 deletions crates/elide-bento/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#![forbid(unsafe_code)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![doc = include_str!("../README.md")]

mod error;

#[cfg(feature = "ner")]
#[cfg_attr(docsrs, doc(cfg(feature = "ner")))]
pub mod ner;

#[cfg(feature = "ocr")]
#[cfg_attr(docsrs, doc(cfg(feature = "ocr")))]
pub mod ocr;

#[cfg(feature = "ner")]
pub use self::ner::BentoNer;
#[cfg(feature = "ocr")]
pub use self::ocr::BentoOcr;
Loading