From cb4c74458ba761f3395188d075fffe61212c4b37 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 22 Jun 2026 16:18:24 +0200 Subject: [PATCH 01/14] chore(deps): add elide as upstream toolkit; wire engine to elide deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workspace gains `elide`, `elide-core`, `elide-llm` as git deps tracking `nvisycom/elide`'s main branch. `nvisy-engine`/`nvisy-server`/`nvisy-cli` drop the per-modality `rich` feature (gone from elide; collapsed into parent modalities with sub-handlers) and the LLM provider toggles (`openai`/`anthropic`/`google`/`bento`) — all provider backends now enabled by default through elide-llm. Engine's manifest now consumes elide+elide-core+elide-llm in place of the local `nvisy-{core,context,pattern,ner,llm,codec,toolkit}` crates. Those local crates remain on disk and as workspace path-deps so the not-yet-migrated consumers (server/cli/fake/ocr/stt/toolkit) keep parsing; each leaves the workspace alongside its consumer's migration. Engine source still imports `nvisy_*` paths and will not compile until the import rewire pass lands (E3.2c). Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 263 +++++++++++++++++++++++++++++++-- Cargo.toml | 19 ++- crates/nvisy-cli/Cargo.toml | 19 +-- crates/nvisy-engine/Cargo.toml | 32 ++-- crates/nvisy-server/Cargo.toml | 15 +- 5 files changed, 275 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 857e87ed..83879da4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -817,6 +817,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "convert_case" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "affbf0190ed2caf063e3def54ff444b449371d55c58e513a95ab98eca50adb49" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -1174,7 +1183,7 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ - "convert_case", + "convert_case 0.10.0", "proc-macro2", "quote", "rustc_version", @@ -1268,6 +1277,128 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" +[[package]] +name = "elide" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "bytes", + "derive_builder", + "elide-codec", + "elide-core", + "elide-llm", + "elide-ner", + "elide-pattern", + "erased-serde", + "futures", + "hex", + "serde", + "sha2 0.11.0", + "tracing", + "uuid", +] + +[[package]] +name = "elide-codec" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "bytes", + "csv", + "ego-tree", + "elide-core", + "hex", + "hound", + "image", + "imageproc", + "mp3lame-encoder", + "quick-xml 0.40.1", + "scraper", + "serde_json", + "sha2 0.11.0", + "symphonia", + "tracing", +] + +[[package]] +name = "elide-context" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "elide-core", + "hipstr", + "unicode-segmentation", +] + +[[package]] +name = "elide-core" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "bytes", + "celes", + "derive_builder", + "hipstr", + "jiff", + "oxilangtag", + "serde", + "type-map", + "uuid", +] + +[[package]] +name = "elide-llm" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "async-trait", + "derive_builder", + "elide-core", + "reqwest-middleware", + "reqwest-retry", + "reqwest-tracing", + "rig 0.39.0", + "schemars", + "serde", + "thiserror", + "tracing", + "unicode-normalization", +] + +[[package]] +name = "elide-ner" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "async-trait", + "derive_builder", + "elide-core", + "hipstr", + "serde", + "tracing", + "uuid", +] + +[[package]] +name = "elide-pattern" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#d3937a17f3c1f570a4de474d716456507e2d8dca" +dependencies = [ + "aho-corasick", + "async-trait", + "bs58", + "csv", + "derive_builder", + "derive_more", + "elide-context", + "elide-core", + "iban_validate", + "phonenumber", + "regex", + "serde", + "toml", +] + [[package]] name = "embedded-io" version = "0.4.0" @@ -1327,6 +1458,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "erased-serde" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2add8a07dd6a8d93ff627029c51de145e12686fbc36ecb298ac22e74cf02dec" +dependencies = [ + "serde", + "serde_core", + "typeid", +] + [[package]] name = "errno" version = "0.3.14" @@ -1555,6 +1697,10 @@ name = "futures-timer" version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af43fadb8a98512d547e37b4e92e0ced13e205c061b87b4623eff01d918d6968" +dependencies = [ + "gloo-timers", + "send_wrapper", +] [[package]] name = "futures-util" @@ -1700,6 +1846,18 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "gloo-timers" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "482ce8a491a501da4cd806bd190275363d674f2845005c6ddbd5d3e1dd54495d" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" version = "0.4.14" @@ -3034,20 +3192,15 @@ dependencies = [ "bytes", "derive_builder", "derive_more", + "elide", + "elide-core", + "elide-llm", "fjall", "futures", "hipstr", "humantime-serde", "jiff", - "nvisy-codec", - "nvisy-core", "nvisy-engine", - "nvisy-llm", - "nvisy-ner", - "nvisy-ocr", - "nvisy-pattern", - "nvisy-stt", - "nvisy-toolkit", "rand 0.10.1", "schemars", "semver", @@ -3089,7 +3242,7 @@ dependencies = [ "reqwest-middleware", "reqwest-retry", "reqwest-tracing", - "rig", + "rig 0.38.1", "schemars", "serde", "serde_json", @@ -3398,7 +3551,7 @@ dependencies = [ "nom 7.1.3", "once_cell", "postcard", - "quick-xml", + "quick-xml 0.38.4", "regex", "regex-cache", "serde", @@ -3652,6 +3805,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.40.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2474bd2e5029e7ccb6abb2ba48cf2383a333851dedf495901544281590c7da7f" +dependencies = [ + "memchr", +] + [[package]] name = "quick_cache" version = "0.6.23" @@ -4106,7 +4268,16 @@ version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5510d91a645a97d37ea03ddfd9c1c7f55d5eafbc7661b71bb210eba078b6966" dependencies = [ - "rig-core", + "rig-core 0.38.1", +] + +[[package]] +name = "rig" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e98e2e8f01c4c5bc23f577983634fa4d5244ffb070ea14c23b1ea5bd406e5cac" +dependencies = [ + "rig-core 0.39.0", ] [[package]] @@ -4132,7 +4303,43 @@ dependencies = [ "pin-project-lite", "reqwest", "reqwest-middleware", - "rig-derive", + "rig-derive 0.38.1", + "schemars", + "serde", + "serde_json", + "thiserror", + "tokio", + "tokio-tungstenite", + "tracing", + "tracing-futures", + "url", +] + +[[package]] +name = "rig-core" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80a4bc7a93b329c4e1a66d5fd211d79990e7331e3c701f057c29f135f548686d" +dependencies = [ + "as-any", + "async-stream", + "base64", + "bytes", + "eventsource-stream", + "fastrand", + "futures", + "futures-timer", + "glob", + "http", + "indexmap", + "mime", + "mime_guess", + "nanoid", + "ordered-float", + "pin-project-lite", + "reqwest", + "reqwest-middleware", + "rig-derive 0.39.0", "schemars", "serde", "serde_json", @@ -4150,7 +4357,23 @@ version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9551846a623524ffddfab42cac15f03d0547b6d5f81a66549341126205e4b721" dependencies = [ - "convert_case", + "convert_case 0.10.0", + "deluxe", + "indoc", + "proc-macro-crate 3.5.0", + "proc-macro2", + "quote", + "serde_json", + "syn", +] + +[[package]] +name = "rig-derive" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5531bfa887b371eab658a92de7db35003370bbeee208ff5e68bbb81a5ae92d3d" +dependencies = [ + "convert_case 0.11.0", "deluxe", "indoc", "proc-macro-crate 3.5.0", @@ -4452,6 +4675,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "send_wrapper" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0b0ec5f1c1ca621c432a25813d8d60c88abe6d3e08a3eb9cf37d97a0fe3d73" + [[package]] name = "serde" version = "1.0.228" @@ -5384,6 +5613,12 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "typenum" version = "1.20.1" diff --git a/Cargo.toml b/Cargo.toml index 8a6555cf..8e86cdb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,18 +36,27 @@ documentation = "https://docs.rs/nvisy-runtime" # # See for more details: https://github.com/rust-lang/cargo/issues/11329 +# Elide toolkit (upstream) +elide = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } +elide-core = { git = "https://github.com/nvisycom/elide", branch = "main" } +elide-llm = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } + # Internal crates +nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" } +nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" } +nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" } +nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } +nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" } + +# Internal crates being deleted: kept here as path-deps so consumer +# manifests (nvisy-server/cli/fake/ocr/stt/toolkit) parse during the +# migration. Each entry leaves with its consumer's migration task. nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0", default-features = false } nvisy-context = { path = "./crates/nvisy-context", version = "0.1.0" } nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } -nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" } -nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" } nvisy-llm = { path = "./crates/nvisy-llm", version = "0.1.0" } nvisy-ner = { path = "./crates/nvisy-ner", version = "0.1.0" } -nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" } nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" } -nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } -nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" } nvisy-toolkit = { path = "./crates/nvisy-toolkit", version = "0.1.0" } # Serialization diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml index c8fa9aec..04abbf38 100644 --- a/crates/nvisy-cli/Cargo.toml +++ b/crates/nvisy-cli/Cargo.toml @@ -19,11 +19,7 @@ homepage = { workspace = true } documentation = { workspace = true } [features] -default = [ - "tabular", "image", "audio", "rich", - "openai", "anthropic", "google", - "bento", -] +default = ["tabular", "image", "audio"] ## Tabular modality — csv, xlsx. tabular = ["nvisy-server/tabular"] @@ -31,19 +27,6 @@ tabular = ["nvisy-server/tabular"] image = ["nvisy-server/image"] ## Audio modality — wav, mp3. audio = ["nvisy-server/audio"] -## Rich-document modality — pdf, docx. -rich = ["nvisy-server/rich"] - -## Enable all OpenAI providers (GPT, Whisper STT). -openai = ["nvisy-server/openai"] -## Enable Anthropic Claude completion provider. -anthropic = ["nvisy-server/anthropic"] -## Enable Google Gemini. -google = ["nvisy-server/google"] - -## Enable the externalized BentoML backends (NER + OCR) — forwards -## to `nvisy-server/bento`. -bento = ["nvisy-server/bento"] [package.metadata.docs.rs] all-features = true diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml index 97a99643..f49b4a39 100644 --- a/crates/nvisy-engine/Cargo.toml +++ b/crates/nvisy-engine/Cargo.toml @@ -19,34 +19,22 @@ homepage = { workspace = true } documentation = { workspace = true } [features] -default = ["tabular", "image", "audio", "rich"] +default = ["tabular", "image", "audio"] test-utils = ["dep:tempfile"] -tabular = ["nvisy-codec/tabular"] -image = ["nvisy-codec/image"] -audio = ["nvisy-codec/audio"] -rich = ["image", "nvisy-codec/rich"] - -openai = ["nvisy-llm/openai-gpt"] -anthropic = ["nvisy-llm/anthropic-claude"] -google = ["nvisy-llm/google-gemini"] - -bento = ["nvisy-toolkit/bento", "nvisy-ner/bento", "nvisy-ocr/bento"] +tabular = ["elide/codec-tabular"] +image = ["elide/codec-image"] +audio = ["elide/codec-audio"] [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] -# Internal crates -nvisy-codec = { workspace = true, features = ["text"] } -nvisy-core = { workspace = true, features = [] } -nvisy-llm = { workspace = true, features = [] } -nvisy-ner = { workspace = true, features = [] } -nvisy-ocr = { workspace = true, features = [] } -nvisy-pattern = { workspace = true, features = [] } -nvisy-stt = { workspace = true, features = [] } -nvisy-toolkit = { workspace = true, features = [] } +# Elide toolkit (upstream) +elide = { workspace = true, features = ["codec", "codec-text", "pattern", "ner", "llm", "serde"] } +elide-core = { workspace = true, features = [] } +elide-llm = { workspace = true, features = ["openai-gpt", "anthropic-claude", "google-gemini"] } # Serialization serde = { workspace = true, features = [] } @@ -91,8 +79,8 @@ tempfile = { workspace = true, features = [], optional = true } [dev-dependencies] nvisy-engine = { path = ".", features = ["test-utils"] } -nvisy-core = { workspace = true, features = ["test-utils"] } -nvisy-codec = { workspace = true, features = ["txt", "csv", "test-utils"] } +elide-core = { workspace = true, features = [] } +elide = { workspace = true, features = ["codec", "codec-txt", "codec-csv", "pattern", "ner", "llm", "serde"] } tempfile = { workspace = true, features = [] } [package.metadata.cargo-machete] diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index eb2e7628..9787fb82 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -19,7 +19,7 @@ homepage = { workspace = true } documentation = { workspace = true } [features] -default = ["tabular", "image", "audio", "rich"] +default = ["tabular", "image", "audio"] ## Tabular modality — csv, xlsx. tabular = ["nvisy-engine/tabular"] @@ -27,19 +27,6 @@ tabular = ["nvisy-engine/tabular"] image = ["nvisy-engine/image"] ## Audio modality — wav, mp3. audio = ["nvisy-engine/audio"] -## Rich-document modality — pdf, docx. -rich = ["nvisy-engine/rich"] - -## Enable all OpenAI providers (GPT, Whisper STT). -openai = ["nvisy-engine/openai"] -## Enable Anthropic Claude completion provider. -anthropic = ["nvisy-engine/anthropic"] -## Enable Google Gemini. -google = ["nvisy-engine/google"] - -## Enable the externalized BentoML backends (NER + OCR) — forwards -## to `nvisy-engine/bento`. -bento = ["nvisy-engine/bento"] [package.metadata.docs.rs] all-features = true From 7b84825461ab8ddc81cfcc17d9fde321a101d034 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 22 Jun 2026 16:39:09 +0200 Subject: [PATCH 02/14] chore(workspace): delete superseded crates; add elide-bento; slim nvisy-core Bulk teardown of the runtime's toolkit half now that elide ships upstream equivalents. Deleted: - nvisy-{pattern,llm,ner,ocr,stt}: superseded by elide-{pattern,llm, ner,ocr,stt} on nvisycom/elide main - nvisy-codec: superseded by elide-codec - nvisy-context: superseded by elide-context - nvisy-toolkit: superseded by the elide umbrella crate (Analyzer + Anonymizer + deduplication layers + operators all ship there) Renamed: - nvisy-fake -> elide-fake (runtime-owned extension over elide types; source still uses nvisy_core paths and will be reworked in its own pass) Created: - elide-bento: shared BentoML HTTP client wrapper for elide backends (per E0.3 plan; minimal boilerplate -- BentoClient + BentoParams + BentoError; per-modality backends compose from this in the consuming crates) Slimmed nvisy-core to {error, health, policy}: - dropped entity/extraction/modality/primitive/recognition/redaction (all re-exported from elide-core at consumer sites) - moved nvisy-engine/src/policy/ -> nvisy-core/src/policy/ (policy is the runtime's public governance contract; engine consumes it) - added elide-core as nvisy-core's only upstream dep so Policy types reference elide_core::entity::Label directly State of the migration: workspace parses; elide-bento is the only crate that compiles end-to-end. nvisy-core/engine/server/cli/elide-fake source still imports deleted nvisy-* paths and will be redesigned crate-by- crate on top of elide's Analyzer/Anonymizer/Orchestrator surface. Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 706 +------------ Cargo.toml | 30 +- crates/{nvisy-stt => elide-bento}/Cargo.toml | 31 +- crates/elide-bento/README.md | 7 + crates/elide-bento/src/client.rs | 49 + crates/elide-bento/src/error.rs | 31 + crates/elide-bento/src/lib.rs | 9 + crates/{nvisy-fake => elide-fake}/Cargo.toml | 12 +- crates/{nvisy-fake => elide-fake}/README.md | 0 .../src/anonymizer/mod.rs | 0 .../src/generator/case_id.rs | 0 .../src/generator/contact.rs | 0 .../src/generator/device.rs | 0 .../src/generator/dispatch.rs | 0 .../src/generator/finance.rs | 0 .../src/generator/identity.rs | 0 .../src/generator/mod.rs | 0 .../src/generator/pattern.rs | 0 .../src/generator/temporal.rs | 0 crates/{nvisy-fake => elide-fake}/src/lib.rs | 0 .../src/locale/mod.rs | 0 crates/nvisy-codec/Cargo.toml | 148 --- crates/nvisy-codec/README.md | 48 - crates/nvisy-codec/src/content/bundle.rs | 114 --- .../nvisy-codec/src/content/content_data.rs | 268 ----- .../src/content/content_metadata.rs | 176 ---- crates/nvisy-codec/src/content/encoding.rs | 23 - crates/nvisy-codec/src/content/mod.rs | 29 - crates/nvisy-codec/src/core/format.rs | 179 ---- crates/nvisy-codec/src/core/handler.rs | 127 --- crates/nvisy-codec/src/core/loader.rs | 129 --- crates/nvisy-codec/src/core/mod.rs | 27 - crates/nvisy-codec/src/core/registry.rs | 173 ---- crates/nvisy-codec/src/document/audio.rs | 36 - crates/nvisy-codec/src/document/image.rs | 36 - crates/nvisy-codec/src/document/mod.rs | 239 ----- crates/nvisy-codec/src/document/tabular.rs | 43 - crates/nvisy-codec/src/document/text.rs | 43 - .../nvisy-codec/src/handler/audio/duration.rs | 152 --- crates/nvisy-codec/src/handler/audio/mod.rs | 37 - .../src/handler/audio/mp3_codec.rs | 373 ------- .../src/handler/audio/mp3_handler.rs | 282 ------ .../src/handler/audio/mp3_loader.rs | 86 -- .../nvisy-codec/src/handler/audio/redact.rs | 150 --- .../src/handler/audio/wav_handler.rs | 326 ------ .../src/handler/audio/wav_loader.rs | 26 - .../src/handler/image/image_ops.rs | 83 -- .../src/handler/image/jpeg_handler.rs | 25 - .../src/handler/image/jpeg_loader.rs | 31 - .../nvisy-codec/src/handler/image/macros.rs | 204 ---- crates/nvisy-codec/src/handler/image/mod.rs | 39 - .../src/handler/image/png_handler.rs | 33 - .../src/handler/image/png_loader.rs | 31 - .../nvisy-codec/src/handler/image/redact.rs | 56 - .../src/handler/image/tiff_handler.rs | 25 - .../src/handler/image/tiff_loader.rs | 31 - crates/nvisy-codec/src/handler/mod.rs | 18 - .../src/handler/rich/docx_handler.rs | 99 -- .../src/handler/rich/docx_loader.rs | 29 - crates/nvisy-codec/src/handler/rich/mod.rs | 21 - .../src/handler/rich/pdf_handler.rs | 339 ------- .../src/handler/rich/pdf_loader.rs | 96 -- .../src/handler/rich/pdf_render.rs | 152 --- .../src/handler/tabular/csv_handler.rs | 571 ----------- .../src/handler/tabular/csv_loader.rs | 204 ---- crates/nvisy-codec/src/handler/tabular/mod.rs | 57 -- .../src/handler/tabular/xlsx_handler.rs | 85 -- .../src/handler/tabular/xlsx_loader.rs | 25 - .../src/handler/text/html_encode.rs | 172 ---- .../src/handler/text/html_handler.rs | 437 -------- .../src/handler/text/html_loader.rs | 282 ------ .../src/handler/text/json_handler.rs | 953 ------------------ .../src/handler/text/json_loader.rs | 38 - .../src/handler/text/markdown_loader.rs | 50 - crates/nvisy-codec/src/handler/text/mod.rs | 45 - crates/nvisy-codec/src/handler/text/redact.rs | 76 -- .../src/handler/text/txt_handler.rs | 446 -------- .../src/handler/text/txt_loader.rs | 80 -- crates/nvisy-codec/src/lib.rs | 78 -- crates/nvisy-context/Cargo.toml | 39 - crates/nvisy-context/README.md | 44 - crates/nvisy-context/src/enhancer/context.rs | 68 -- crates/nvisy-context/src/enhancer/mod.rs | 506 ---------- crates/nvisy-context/src/enhancer/window.rs | 118 --- crates/nvisy-context/src/io/mod.rs | 20 - crates/nvisy-context/src/io/tokens.rs | 166 --- crates/nvisy-context/src/io/wrapper.rs | 82 -- crates/nvisy-context/src/lib.rs | 13 - crates/nvisy-context/src/matching/lemma.rs | 65 -- crates/nvisy-context/src/matching/matcher.rs | 70 -- crates/nvisy-context/src/matching/mod.rs | 19 - crates/nvisy-context/src/rule.rs | 181 ---- crates/nvisy-core/Cargo.toml | 3 + crates/nvisy-core/src/entity/annotation.rs | 311 ------ .../nvisy-core/src/entity/label/builtins.rs | 180 ---- crates/nvisy-core/src/entity/label/catalog.rs | 115 --- .../src/entity/label/entity_label.rs | 178 ---- crates/nvisy-core/src/entity/label/mod.rs | 31 - .../nvisy-core/src/entity/label/reference.rs | 148 --- crates/nvisy-core/src/entity/method/mod.rs | 22 - .../src/entity/method/provenance.rs | 124 --- crates/nvisy-core/src/entity/method/trail.rs | 242 ----- crates/nvisy-core/src/entity/mod.rs | 161 --- crates/nvisy-core/src/entity/source.rs | 108 -- crates/nvisy-core/src/extraction/artifacts.rs | 63 -- crates/nvisy-core/src/extraction/data_at.rs | 34 - crates/nvisy-core/src/extraction/mod.rs | 62 -- crates/nvisy-core/src/extraction/output.rs | 33 - crates/nvisy-core/src/extraction/span.rs | 79 -- crates/nvisy-core/src/extraction/text_at.rs | 36 - crates/nvisy-core/src/lib.rs | 7 +- crates/nvisy-core/src/modality/audio.rs | 163 --- crates/nvisy-core/src/modality/image.rs | 183 ---- crates/nvisy-core/src/modality/mod.rs | 190 ---- crates/nvisy-core/src/modality/tabular.rs | 193 ---- crates/nvisy-core/src/modality/text.rs | 259 ----- .../src/policy/audit.rs | 0 .../src/policy/condition.rs | 0 .../src/policy/mod.rs | 0 .../src/policy/redaction/any.rs | 0 .../src/policy/redaction/audio.rs | 0 .../src/policy/redaction/image.rs | 0 .../src/policy/redaction/mod.rs | 0 .../src/policy/redaction/tabular.rs | 0 .../src/policy/redaction/text.rs | 0 .../src/policy/retention/duration.rs | 0 .../src/policy/retention/mod.rs | 0 .../src/policy/rule.rs | 0 .../src/policy/selector.rs | 0 .../src/policy/suppress.rs | 0 .../src/primitive/confidence/mod.rs | 14 - .../src/primitive/confidence/threshold.rs | 159 --- .../src/primitive/confidence/value.rs | 204 ---- crates/nvisy-core/src/primitive/country.rs | 179 ---- .../src/primitive/geometry/bounding_box.rs | 205 ---- .../src/primitive/geometry/dimensions.rs | 34 - .../nvisy-core/src/primitive/geometry/mod.rs | 27 - .../geometry/normalized_bounding_box.rs | 59 -- .../src/primitive/geometry/polygon.rs | 27 - .../src/primitive/language/detection.rs | 71 -- .../src/primitive/language/detections.rs | 72 -- .../nvisy-core/src/primitive/language/mod.rs | 15 - .../nvisy-core/src/primitive/language/tag.rs | 113 --- crates/nvisy-core/src/primitive/mod.rs | 30 - .../src/primitive/rendering/color.rs | 28 - .../nvisy-core/src/primitive/rendering/dpi.rs | 44 - .../nvisy-core/src/primitive/rendering/mod.rs | 12 - crates/nvisy-core/src/primitive/time_span.rs | 204 ---- crates/nvisy-core/src/recognition/hint.rs | 65 -- crates/nvisy-core/src/recognition/input.rs | 202 ---- .../nvisy-core/src/recognition/label_map.rs | 156 --- crates/nvisy-core/src/recognition/mod.rs | 63 -- crates/nvisy-core/src/recognition/output.rs | 53 - crates/nvisy-core/src/redaction/anonymizer.rs | 31 - .../nvisy-core/src/redaction/deanonymizer.rs | 52 - .../nvisy-core/src/redaction/leak_profile.rs | 30 - crates/nvisy-core/src/redaction/memoized.rs | 86 -- crates/nvisy-core/src/redaction/mod.rs | 52 - crates/nvisy-core/src/redaction/redact_at.rs | 32 - crates/nvisy-core/src/redaction/redactions.rs | 110 -- .../nvisy-core/src/redaction/replacement.rs | 137 --- crates/nvisy-core/src/redaction/store.rs | 32 - crates/nvisy-engine/src/lib.rs | 1 - crates/nvisy-llm/Cargo.toml | 76 -- crates/nvisy-llm/README.md | 41 - crates/nvisy-llm/src/backend/http/config.rs | 72 -- .../nvisy-llm/src/backend/http/middleware.rs | 23 - crates/nvisy-llm/src/backend/http/mod.rs | 60 -- crates/nvisy-llm/src/backend/mod.rs | 42 - crates/nvisy-llm/src/backend/request.rs | 20 - crates/nvisy-llm/src/backend/response.rs | 26 - crates/nvisy-llm/src/backend/rig/config.rs | 54 - crates/nvisy-llm/src/backend/rig/context.rs | 74 -- crates/nvisy-llm/src/backend/rig/inner.rs | 41 - crates/nvisy-llm/src/backend/rig/mod.rs | 243 ----- crates/nvisy-llm/src/backend/rig/usage.rs | 83 -- crates/nvisy-llm/src/error.rs | 175 ---- crates/nvisy-llm/src/lib.rs | 13 - .../nvisy-llm/src/provider/authenticated.rs | 99 -- crates/nvisy-llm/src/provider/llm.rs | 103 -- crates/nvisy-llm/src/provider/mod.rs | 18 - .../nvisy-llm/src/provider/unauthenticated.rs | 34 - .../nvisy-llm/src/recognition/candidates.rs | 67 -- .../src/recognition/default_prompt.rs | 74 -- .../nvisy-llm/src/recognition/file_prompt.rs | 324 ------ crates/nvisy-llm/src/recognition/lift.rs | 147 --- .../src/recognition/llm_recognizer.rs | 117 --- crates/nvisy-llm/src/recognition/localize.rs | 172 ---- crates/nvisy-llm/src/recognition/mod.rs | 26 - crates/nvisy-llm/src/recognition/prompt.rs | 52 - .../src/recognition/response_parser.rs | 46 - crates/nvisy-llm/src/recognition/schemas.rs | 29 - .../nvisy-llm/src/recognition/text_prompt.rs | 105 -- .../nvisy-llm/src/recognition/vlm_prompt.rs | 66 -- crates/nvisy-llm/testdata/prompts/ner.toml | 33 - crates/nvisy-llm/testdata/prompts/vlm.toml | 25 - crates/nvisy-llm/tests/file_prompt.rs | 185 ---- crates/nvisy-ner/Cargo.toml | 60 -- crates/nvisy-ner/README.md | 40 - crates/nvisy-ner/src/backend/bento_backend.rs | 123 --- crates/nvisy-ner/src/backend/bento_types.rs | 78 -- crates/nvisy-ner/src/backend/mod.rs | 24 - crates/nvisy-ner/src/backend/ner_backend.rs | 107 -- crates/nvisy-ner/src/backend/ner_span.rs | 45 - crates/nvisy-ner/src/backend/noop_backend.rs | 26 - crates/nvisy-ner/src/lib.rs | 11 - crates/nvisy-ner/src/nlp/capabilities.rs | 72 -- crates/nvisy-ner/src/nlp/engine.rs | 86 -- crates/nvisy-ner/src/nlp/lingua_detector.rs | 175 ---- crates/nvisy-ner/src/nlp/lingua_engine.rs | 144 --- crates/nvisy-ner/src/nlp/mod.rs | 37 - .../nvisy-ner/src/recognition/aggregation.rs | 59 -- crates/nvisy-ner/src/recognition/config.rs | 102 -- crates/nvisy-ner/src/recognition/mod.rs | 21 - .../nvisy-ner/src/recognition/recognizer.rs | 204 ---- crates/nvisy-ocr/Cargo.toml | 56 - crates/nvisy-ocr/README.md | 41 - crates/nvisy-ocr/src/backend/bento_backend.rs | 91 -- crates/nvisy-ocr/src/backend/bento_types.rs | 20 - crates/nvisy-ocr/src/backend/mod.rs | 39 - crates/nvisy-ocr/src/backend/noop_backend.rs | 36 - crates/nvisy-ocr/src/backend/ocr_backend.rs | 155 --- crates/nvisy-ocr/src/extraction/extractor.rs | 99 -- crates/nvisy-ocr/src/extraction/mod.rs | 14 - crates/nvisy-ocr/src/lib.rs | 9 - crates/nvisy-ocr/src/types/mod.rs | 12 - crates/nvisy-ocr/src/types/raw_block.rs | 71 -- crates/nvisy-pattern/Cargo.toml | 54 - crates/nvisy-pattern/README.md | 58 -- crates/nvisy-pattern/assets/PRESIDIO.md | 41 - crates/nvisy-pattern/assets/README.md | 94 -- .../dictionaries/en/finance/currencies.csv | 40 - .../dictionaries/en/finance/currencies.toml | 4 - .../dictionaries/en/personal/languages.csv | 104 -- .../dictionaries/en/personal/languages.toml | 8 - .../en/personal/nationalities.toml | 4 - .../en/personal/nationalities.txt | 152 --- .../dictionaries/en/personal/religions.toml | 4 - .../dictionaries/en/personal/religions.txt | 34 - .../world/finance/cryptocurrencies.csv | 19 - .../world/finance/cryptocurrencies.toml | 3 - .../assets/patterns/au/finance/abn.toml | 25 - .../assets/patterns/au/finance/acn.toml | 24 - .../assets/patterns/au/health/medicare.toml | 25 - .../assets/patterns/au/identity/tfn.toml | 24 - .../patterns/ca/contact/postal_code.toml | 20 - .../assets/patterns/ca/identity/sin.toml | 32 - .../assets/patterns/de/contact/plz.toml | 21 - .../patterns/de/finance/handelsregister.toml | 23 - .../assets/patterns/de/health/bsnr.toml | 29 - .../patterns/de/health/health_insurance.toml | 33 - .../assets/patterns/de/health/lanr.toml | 31 - .../patterns/de/identity/fuehrerschein.toml | 34 - .../assets/patterns/de/identity/id_card.toml | 36 - .../assets/patterns/de/identity/passport.toml | 23 - .../patterns/de/identity/social_security.toml | 25 - .../assets/patterns/de/identity/tax_id.toml | 24 - .../patterns/de/identity/tax_number.toml | 34 - .../assets/patterns/de/identity/vat_id.toml | 32 - .../assets/patterns/de/vehicle/kfz.toml | 44 - .../patterns/es/contact/codigo_postal.toml | 19 - .../assets/patterns/es/finance/cif.toml | 21 - .../assets/patterns/es/identity/nie.toml | 21 - .../assets/patterns/es/identity/nif.toml | 22 - .../assets/patterns/es/identity/passport.toml | 18 - .../assets/patterns/fi/identity/hetu.toml | 24 - .../assets/patterns/in/finance/gstin.toml | 20 - .../assets/patterns/in/identity/aadhaar.toml | 29 - .../assets/patterns/in/identity/pan.toml | 19 - .../assets/patterns/in/identity/passport.toml | 19 - .../assets/patterns/in/identity/voter.toml | 25 - .../patterns/in/vehicle/registration.toml | 31 - .../assets/patterns/it/finance/vat_code.toml | 20 - .../patterns/it/identity/driving_licence.toml | 22 - .../patterns/it/identity/fiscal_code.toml | 23 - .../patterns/it/identity/identity_card.toml | 33 - .../assets/patterns/it/identity/passport.toml | 19 - .../assets/patterns/kr/finance/brn.toml | 31 - .../patterns/kr/identity/driver_license.toml | 28 - .../assets/patterns/kr/identity/frn.toml | 27 - .../assets/patterns/kr/identity/passport.toml | 27 - .../assets/patterns/kr/identity/rrn.toml | 29 - .../assets/patterns/ng/identity/nin.toml | 22 - .../patterns/ng/vehicle/registration.toml | 20 - .../patterns/pl/contact/kod_pocztowy.toml | 18 - .../assets/patterns/pl/finance/nip.toml | 31 - .../assets/patterns/pl/finance/regon.toml | 26 - .../assets/patterns/pl/identity/pesel.toml | 21 - .../patterns/se/contact/postnummer.toml | 23 - .../se/finance/organisationsnummer.toml | 21 - .../patterns/se/identity/personnummer.toml | 29 - .../patterns/sg/contact/postal_code.toml | 18 - .../assets/patterns/sg/finance/uen.toml | 37 - .../assets/patterns/sg/identity/nric.toml | 31 - .../patterns/th/contact/postal_code.toml | 19 - .../patterns/th/identity/national_id.toml | 30 - .../patterns/tr/contact/posta_kodu.toml | 18 - .../assets/patterns/tr/identity/tckn.toml | 24 - .../patterns/tr/vehicle/license_plate.toml | 28 - .../assets/patterns/uk/contact/postcode.toml | 23 - .../patterns/uk/identity/driving_licence.toml | 25 - .../assets/patterns/uk/identity/nhs.toml | 17 - .../assets/patterns/uk/identity/nino.toml | 16 - .../assets/patterns/uk/identity/passport.toml | 26 - .../patterns/uk/vehicle/registration.toml | 40 - .../patterns/us/finance/bank_account.toml | 26 - .../patterns/us/finance/bank_routing.toml | 32 - .../assets/patterns/us/health/mbi.toml | 27 - .../patterns/us/health/medical_license.toml | 36 - .../assets/patterns/us/health/npi.toml | 28 - .../patterns/us/identity/drivers_license.toml | 26 - .../assets/patterns/us/identity/itin.toml | 28 - .../assets/patterns/us/identity/passport.toml | 14 - .../patterns/us/identity/postal_code.toml | 17 - .../assets/patterns/us/identity/ssn.toml | 14 - .../assets/patterns/world/contact/email.toml | 12 - .../assets/patterns/world/contact/phone.toml | 13 - .../assets/patterns/world/contact/url.toml | 6 - .../patterns/world/credentials/aws_key.toml | 26 - .../world/credentials/generic_api_key.toml | 10 - .../world/credentials/github_token.toml | 14 - .../world/credentials/private_key.toml | 17 - .../world/credentials/stripe_key.toml | 6 - .../world/finance/bitcoin_address.toml | 17 - .../patterns/world/finance/credit_card.toml | 22 - .../world/finance/ethereum_address.toml | 6 - .../assets/patterns/world/finance/iban.toml | 8 - .../patterns/world/finance/swift_code.toml | 6 - .../assets/patterns/world/network/ipv4.toml | 8 - .../assets/patterns/world/network/ipv6.toml | 25 - .../patterns/world/network/mac_address.toml | 16 - .../world/personal/date_of_birth.toml | 13 - .../patterns/world/personal/datetime.toml | 12 - crates/nvisy-pattern/src/lib.rs | 13 - .../nvisy-pattern/src/recognition/compiled.rs | 172 ---- .../nvisy-pattern/src/recognition/context.rs | 163 --- .../src/recognition/dictionary.rs | 246 ----- crates/nvisy-pattern/src/recognition/mod.rs | 22 - .../src/recognition/recognizer.rs | 858 ---------------- crates/nvisy-pattern/src/recognition/regex.rs | 181 ---- crates/nvisy-pattern/src/recognition/term.rs | 104 -- .../src/shipped/dictionaries/en.rs | 51 - .../src/shipped/dictionaries/mod.rs | 80 -- .../src/shipped/dictionaries/world.rs | 22 - crates/nvisy-pattern/src/shipped/mod.rs | 14 - .../nvisy-pattern/src/shipped/patterns/au.rs | 32 - .../nvisy-pattern/src/shipped/patterns/ca.rs | 22 - .../nvisy-pattern/src/shipped/patterns/de.rs | 87 -- .../nvisy-pattern/src/shipped/patterns/es.rs | 34 - .../nvisy-pattern/src/shipped/patterns/fi.rs | 17 - .../nvisy-pattern/src/shipped/patterns/in.rs | 49 - .../nvisy-pattern/src/shipped/patterns/it.rs | 43 - .../nvisy-pattern/src/shipped/patterns/kr.rs | 36 - .../nvisy-pattern/src/shipped/patterns/mod.rs | 358 ------- .../nvisy-pattern/src/shipped/patterns/ng.rs | 22 - .../nvisy-pattern/src/shipped/patterns/pl.rs | 32 - .../nvisy-pattern/src/shipped/patterns/se.rs | 27 - .../nvisy-pattern/src/shipped/patterns/sg.rs | 27 - .../nvisy-pattern/src/shipped/patterns/th.rs | 23 - .../nvisy-pattern/src/shipped/patterns/tr.rs | 27 - .../nvisy-pattern/src/shipped/patterns/uk.rs | 45 - .../nvisy-pattern/src/shipped/patterns/us.rs | 63 -- .../src/shipped/patterns/world.rs | 105 -- crates/nvisy-pattern/src/validators/au/abn.rs | 71 -- crates/nvisy-pattern/src/validators/au/acn.rs | 63 -- .../src/validators/au/medicare.rs | 76 -- crates/nvisy-pattern/src/validators/au/mod.rs | 17 - crates/nvisy-pattern/src/validators/au/tfn.rs | 61 -- crates/nvisy-pattern/src/validators/btc.rs | 53 - crates/nvisy-pattern/src/validators/ca/mod.rs | 10 - crates/nvisy-pattern/src/validators/ca/sin.rs | 77 -- crates/nvisy-pattern/src/validators/date.rs | 135 --- .../nvisy-pattern/src/validators/de/bsnr.rs | 51 - .../src/validators/de/health_insurance.rs | 81 -- .../nvisy-pattern/src/validators/de/icao.rs | 37 - .../src/validators/de/id_card.rs | 85 -- .../nvisy-pattern/src/validators/de/lanr.rs | 67 -- crates/nvisy-pattern/src/validators/de/mod.rs | 29 - .../src/validators/de/passport.rs | 67 -- crates/nvisy-pattern/src/validators/de/plz.rs | 50 - .../src/validators/de/social_security.rs | 100 -- .../nvisy-pattern/src/validators/de/tax_id.rs | 87 -- .../nvisy-pattern/src/validators/de/vat_id.rs | 85 -- crates/nvisy-pattern/src/validators/es/cif.rs | 119 --- crates/nvisy-pattern/src/validators/es/mod.rs | 14 - crates/nvisy-pattern/src/validators/es/nie.rs | 83 -- crates/nvisy-pattern/src/validators/es/nif.rs | 71 -- .../nvisy-pattern/src/validators/fi/hetu.rs | 90 -- crates/nvisy-pattern/src/validators/fi/mod.rs | 10 - crates/nvisy-pattern/src/validators/iban.rs | 67 -- .../src/validators/in/aadhaar.rs | 72 -- .../nvisy-pattern/src/validators/in/gstin.rs | 89 -- crates/nvisy-pattern/src/validators/in/mod.rs | 14 - crates/nvisy-pattern/src/validators/in/pan.rs | 71 -- .../src/validators/it/fiscal_code.rs | 109 -- crates/nvisy-pattern/src/validators/it/mod.rs | 12 - .../src/validators/it/vat_code.rs | 76 -- crates/nvisy-pattern/src/validators/kr/brn.rs | 68 -- .../src/validators/kr/driver_license.rs | 56 - crates/nvisy-pattern/src/validators/kr/frn.rs | 70 -- crates/nvisy-pattern/src/validators/kr/mod.rs | 17 - crates/nvisy-pattern/src/validators/kr/rrn.rs | 86 -- crates/nvisy-pattern/src/validators/luhn.rs | 92 -- crates/nvisy-pattern/src/validators/mod.rs | 281 ------ crates/nvisy-pattern/src/validators/ng/mod.rs | 10 - crates/nvisy-pattern/src/validators/ng/nin.rs | 59 -- crates/nvisy-pattern/src/validators/phone.rs | 39 - crates/nvisy-pattern/src/validators/pl/mod.rs | 14 - crates/nvisy-pattern/src/validators/pl/nip.rs | 65 -- .../nvisy-pattern/src/validators/pl/pesel.rs | 53 - .../nvisy-pattern/src/validators/pl/regon.rs | 80 -- .../nvisy-pattern/src/validators/se/luhn.rs | 18 - crates/nvisy-pattern/src/validators/se/mod.rs | 14 - .../src/validators/se/organisationsnummer.rs | 62 -- .../src/validators/se/personnummer.rs | 94 -- crates/nvisy-pattern/src/validators/sg/mod.rs | 12 - .../nvisy-pattern/src/validators/sg/nric.rs | 115 --- crates/nvisy-pattern/src/validators/sg/uen.rs | 140 --- crates/nvisy-pattern/src/validators/th/mod.rs | 10 - .../src/validators/th/national_id.rs | 85 -- crates/nvisy-pattern/src/validators/tr/mod.rs | 10 - .../nvisy-pattern/src/validators/tr/tckn.rs | 86 -- .../src/validators/uk/driving_licence.rs | 71 -- crates/nvisy-pattern/src/validators/uk/mod.rs | 17 - crates/nvisy-pattern/src/validators/uk/nhs.rs | 63 -- .../nvisy-pattern/src/validators/uk/nino.rs | 71 -- .../src/validators/uk/vehicle_registration.rs | 69 -- .../src/validators/us/aba_routing.rs | 64 -- .../src/validators/us/dea_number.rs | 86 -- crates/nvisy-pattern/src/validators/us/mod.rs | 18 - crates/nvisy-pattern/src/validators/us/npi.rs | 66 -- .../src/validators/us/postal_code.rs | 40 - crates/nvisy-pattern/src/validators/us/ssn.rs | 84 -- .../nvisy-pattern/src/validators/verhoeff.rs | 40 - .../testdata/dictionaries/product_codes.csv | 4 - .../testdata/dictionaries/product_codes.toml | 3 - .../testdata/inputs/au/finance.txt | 8 - .../testdata/inputs/au/health.txt | 4 - .../testdata/inputs/au/identity.txt | 4 - .../testdata/inputs/ca/contact.txt | 7 - .../testdata/inputs/ca/identity.txt | 4 - .../nvisy-pattern/testdata/inputs/contact.txt | 10 - .../testdata/inputs/credentials.txt | 16 - .../testdata/inputs/de/contact.txt | 7 - .../testdata/inputs/de/finance.txt | 8 - .../testdata/inputs/de/health.txt | 6 - .../testdata/inputs/de/identity.txt | 9 - .../testdata/inputs/de/vehicle.txt | 8 - .../testdata/inputs/es/contact.txt | 7 - .../testdata/inputs/es/finance.txt | 7 - .../testdata/inputs/es/identity.txt | 7 - .../testdata/inputs/fi/identity.txt | 4 - .../nvisy-pattern/testdata/inputs/finance.txt | 13 - .../testdata/inputs/in/finance.txt | 8 - .../testdata/inputs/in/identity.txt | 8 - .../testdata/inputs/in/vehicle.txt | 7 - .../testdata/inputs/internal.txt | 6 - .../testdata/inputs/it/finance.txt | 7 - .../testdata/inputs/it/identity.txt | 7 - .../testdata/inputs/kr/finance.txt | 7 - .../testdata/inputs/kr/identity.txt | 7 - .../nvisy-pattern/testdata/inputs/network.txt | 11 - .../testdata/inputs/ng/identity.txt | 4 - .../testdata/inputs/ng/vehicle.txt | 7 - .../testdata/inputs/personal.txt | 7 - .../testdata/inputs/pl/contact.txt | 7 - .../testdata/inputs/pl/finance.txt | 8 - .../testdata/inputs/pl/identity.txt | 5 - .../testdata/inputs/se/contact.txt | 7 - .../testdata/inputs/se/finance.txt | 7 - .../testdata/inputs/se/identity.txt | 4 - .../testdata/inputs/sg/contact.txt | 6 - .../testdata/inputs/sg/finance.txt | 7 - .../testdata/inputs/sg/identity.txt | 4 - .../testdata/inputs/th/contact.txt | 7 - .../testdata/inputs/th/identity.txt | 3 - .../testdata/inputs/tr/contact.txt | 7 - .../testdata/inputs/tr/identity.txt | 4 - .../testdata/inputs/uk/contact.txt | 7 - .../testdata/inputs/uk/identity.txt | 8 - .../testdata/inputs/uk/vehicle.txt | 8 - .../testdata/inputs/us/finance.txt | 10 - .../testdata/inputs/us/health.txt | 15 - .../testdata/inputs/us/identity.txt | 12 - .../testdata/patterns/employee_id.toml | 6 - .../testdata/patterns/product_codes.toml | 6 - crates/nvisy-pattern/tests/builtin.rs | 143 --- crates/nvisy-pattern/tests/builtin_au.rs | 52 - crates/nvisy-pattern/tests/builtin_ca.rs | 35 - crates/nvisy-pattern/tests/builtin_de.rs | 114 --- crates/nvisy-pattern/tests/builtin_es.rs | 61 -- crates/nvisy-pattern/tests/builtin_fi.rs | 19 - crates/nvisy-pattern/tests/builtin_in.rs | 60 -- crates/nvisy-pattern/tests/builtin_it.rs | 61 -- crates/nvisy-pattern/tests/builtin_kr.rs | 48 - crates/nvisy-pattern/tests/builtin_ng.rs | 30 - crates/nvisy-pattern/tests/builtin_pl.rs | 60 -- crates/nvisy-pattern/tests/builtin_se.rs | 41 - crates/nvisy-pattern/tests/builtin_sg.rs | 47 - crates/nvisy-pattern/tests/builtin_th.rs | 25 - crates/nvisy-pattern/tests/builtin_tr.rs | 31 - crates/nvisy-pattern/tests/builtin_uk.rs | 65 -- crates/nvisy-pattern/tests/builtin_us.rs | 85 -- .../nvisy-pattern/tests/enhancer_roundtrip.rs | 75 -- crates/nvisy-pattern/tests/fixtures/mod.rs | 52 - crates/nvisy-pattern/tests/user_rules.rs | 91 -- crates/nvisy-stt/README.md | 34 - crates/nvisy-stt/src/backend/mod.rs | 16 - crates/nvisy-stt/src/backend/noop_backend.rs | 24 - crates/nvisy-stt/src/backend/stt_backend.rs | 77 -- .../src/backend/transcribed_segment.rs | 47 - crates/nvisy-stt/src/extraction/mod.rs | 82 -- .../nvisy-stt/src/extraction/transcription.rs | 39 - crates/nvisy-stt/src/lib.rs | 8 - crates/nvisy-toolkit/Cargo.toml | 102 -- crates/nvisy-toolkit/README.md | 40 - crates/nvisy-toolkit/examples/pipeline.rs | 144 --- .../src/deduplication/calibrate/mod.rs | 246 ----- .../src/deduplication/filter/mod.rs | 169 ---- .../src/deduplication/fuse/group.rs | 214 ---- .../src/deduplication/fuse/key.rs | 43 - .../src/deduplication/fuse/mod.rs | 172 ---- .../src/deduplication/fuse/strategy.rs | 184 ---- .../nvisy-toolkit/src/deduplication/layer.rs | 99 -- crates/nvisy-toolkit/src/deduplication/mod.rs | 77 -- .../nvisy-toolkit/src/deduplication/params.rs | 70 -- .../src/deduplication/pipeline.rs | 196 ---- .../src/deduplication/resolve/mod.rs | 188 ---- .../src/deduplication/resolve/strategy.rs | 40 - .../src/deduplication/span_size.rs | 117 --- .../src/deduplication/suppress/mod.rs | 366 ------- .../src/deduplication/suppress/params.rs | 82 -- crates/nvisy-toolkit/src/detection/chunks.rs | 137 --- crates/nvisy-toolkit/src/detection/llm.rs | 13 - crates/nvisy-toolkit/src/detection/mod.rs | 33 - crates/nvisy-toolkit/src/detection/ner.rs | 9 - crates/nvisy-toolkit/src/detection/pattern.rs | 10 - .../nvisy-toolkit/src/detection/registry.rs | 170 ---- crates/nvisy-toolkit/src/extraction/mod.rs | 31 - crates/nvisy-toolkit/src/extraction/ocr.rs | 21 - .../nvisy-toolkit/src/extraction/registry.rs | 93 -- crates/nvisy-toolkit/src/extraction/stt.rs | 12 - crates/nvisy-toolkit/src/ingestion/mod.rs | 31 - crates/nvisy-toolkit/src/lib.rs | 10 - .../src/redaction/anonymizer/encrypt.rs | 134 --- .../src/redaction/anonymizer/hash.rs | 158 --- .../src/redaction/anonymizer/keep.rs | 31 - .../src/redaction/anonymizer/mask.rs | 198 ---- .../src/redaction/anonymizer/mod.rs | 27 - .../src/redaction/anonymizer/redact.rs | 26 - .../src/redaction/anonymizer/replace.rs | 127 --- .../src/redaction/deanonymizer/decrypt.rs | 207 ---- .../src/redaction/deanonymizer/mod.rs | 19 - crates/nvisy-toolkit/src/redaction/id.rs | 125 --- crates/nvisy-toolkit/src/redaction/mod.rs | 49 - .../nvisy-toolkit/src/redaction/registry.rs | 355 ------- crates/nvisy-toolkit/src/redaction/store.rs | 114 --- crates/nvisy-toolkit/src/validation/check.rs | 152 --- .../nvisy-toolkit/src/validation/leak/mod.rs | 206 ---- crates/nvisy-toolkit/src/validation/mod.rs | 30 - .../nvisy-toolkit/src/validation/pipeline.rs | 66 -- crates/nvisy-toolkit/testdata/contact.csv | 3 - crates/nvisy-toolkit/testdata/contact.html | 26 - crates/nvisy-toolkit/testdata/contact.json | 23 - crates/nvisy-toolkit/testdata/contact.txt | 14 - crates/nvisy-toolkit/tests/codec_e2e_csv.rs | 138 --- crates/nvisy-toolkit/tests/codec_e2e_html.rs | 155 --- crates/nvisy-toolkit/tests/codec_e2e_json.rs | 111 -- crates/nvisy-toolkit/tests/codec_e2e_txt.rs | 69 -- .../nvisy-toolkit/tests/fixtures/asserts.rs | 87 -- crates/nvisy-toolkit/tests/fixtures/mod.rs | 36 - .../nvisy-toolkit/tests/fixtures/pipeline.rs | 160 --- .../tests/fixtures/registries.rs | 63 -- .../tests/recognition_registry.rs | 116 --- 574 files changed, 160 insertions(+), 38901 deletions(-) rename crates/{nvisy-stt => elide-bento}/Cargo.toml (53%) create mode 100644 crates/elide-bento/README.md create mode 100644 crates/elide-bento/src/client.rs create mode 100644 crates/elide-bento/src/error.rs create mode 100644 crates/elide-bento/src/lib.rs rename crates/{nvisy-fake => elide-fake}/Cargo.toml (72%) rename crates/{nvisy-fake => elide-fake}/README.md (100%) rename crates/{nvisy-fake => elide-fake}/src/anonymizer/mod.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/case_id.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/contact.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/device.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/dispatch.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/finance.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/identity.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/mod.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/pattern.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/generator/temporal.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/lib.rs (100%) rename crates/{nvisy-fake => elide-fake}/src/locale/mod.rs (100%) delete mode 100644 crates/nvisy-codec/Cargo.toml delete mode 100644 crates/nvisy-codec/README.md delete mode 100644 crates/nvisy-codec/src/content/bundle.rs delete mode 100644 crates/nvisy-codec/src/content/content_data.rs delete mode 100644 crates/nvisy-codec/src/content/content_metadata.rs delete mode 100644 crates/nvisy-codec/src/content/encoding.rs delete mode 100644 crates/nvisy-codec/src/content/mod.rs delete mode 100644 crates/nvisy-codec/src/core/format.rs delete mode 100644 crates/nvisy-codec/src/core/handler.rs delete mode 100644 crates/nvisy-codec/src/core/loader.rs delete mode 100644 crates/nvisy-codec/src/core/mod.rs delete mode 100644 crates/nvisy-codec/src/core/registry.rs delete mode 100644 crates/nvisy-codec/src/document/audio.rs delete mode 100644 crates/nvisy-codec/src/document/image.rs delete mode 100644 crates/nvisy-codec/src/document/mod.rs delete mode 100644 crates/nvisy-codec/src/document/tabular.rs delete mode 100644 crates/nvisy-codec/src/document/text.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/duration.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/mod.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/mp3_codec.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/mp3_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/mp3_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/redact.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/wav_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/audio/wav_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/image/image_ops.rs delete mode 100644 crates/nvisy-codec/src/handler/image/jpeg_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/image/jpeg_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/image/macros.rs delete mode 100644 crates/nvisy-codec/src/handler/image/mod.rs delete mode 100644 crates/nvisy-codec/src/handler/image/png_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/image/png_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/image/redact.rs delete mode 100644 crates/nvisy-codec/src/handler/image/tiff_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/image/tiff_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/mod.rs delete mode 100644 crates/nvisy-codec/src/handler/rich/docx_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/rich/docx_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/rich/mod.rs delete mode 100644 crates/nvisy-codec/src/handler/rich/pdf_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/rich/pdf_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/rich/pdf_render.rs delete mode 100644 crates/nvisy-codec/src/handler/tabular/csv_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/tabular/csv_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/tabular/mod.rs delete mode 100644 crates/nvisy-codec/src/handler/tabular/xlsx_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/tabular/xlsx_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/text/html_encode.rs delete mode 100644 crates/nvisy-codec/src/handler/text/html_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/text/html_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/text/json_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/text/json_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/text/markdown_loader.rs delete mode 100644 crates/nvisy-codec/src/handler/text/mod.rs delete mode 100644 crates/nvisy-codec/src/handler/text/redact.rs delete mode 100644 crates/nvisy-codec/src/handler/text/txt_handler.rs delete mode 100644 crates/nvisy-codec/src/handler/text/txt_loader.rs delete mode 100644 crates/nvisy-codec/src/lib.rs delete mode 100644 crates/nvisy-context/Cargo.toml delete mode 100644 crates/nvisy-context/README.md delete mode 100644 crates/nvisy-context/src/enhancer/context.rs delete mode 100644 crates/nvisy-context/src/enhancer/mod.rs delete mode 100644 crates/nvisy-context/src/enhancer/window.rs delete mode 100644 crates/nvisy-context/src/io/mod.rs delete mode 100644 crates/nvisy-context/src/io/tokens.rs delete mode 100644 crates/nvisy-context/src/io/wrapper.rs delete mode 100644 crates/nvisy-context/src/lib.rs delete mode 100644 crates/nvisy-context/src/matching/lemma.rs delete mode 100644 crates/nvisy-context/src/matching/matcher.rs delete mode 100644 crates/nvisy-context/src/matching/mod.rs delete mode 100644 crates/nvisy-context/src/rule.rs delete mode 100644 crates/nvisy-core/src/entity/annotation.rs delete mode 100644 crates/nvisy-core/src/entity/label/builtins.rs delete mode 100644 crates/nvisy-core/src/entity/label/catalog.rs delete mode 100644 crates/nvisy-core/src/entity/label/entity_label.rs delete mode 100644 crates/nvisy-core/src/entity/label/mod.rs delete mode 100644 crates/nvisy-core/src/entity/label/reference.rs delete mode 100644 crates/nvisy-core/src/entity/method/mod.rs delete mode 100644 crates/nvisy-core/src/entity/method/provenance.rs delete mode 100644 crates/nvisy-core/src/entity/method/trail.rs delete mode 100644 crates/nvisy-core/src/entity/mod.rs delete mode 100644 crates/nvisy-core/src/entity/source.rs delete mode 100644 crates/nvisy-core/src/extraction/artifacts.rs delete mode 100644 crates/nvisy-core/src/extraction/data_at.rs delete mode 100644 crates/nvisy-core/src/extraction/mod.rs delete mode 100644 crates/nvisy-core/src/extraction/output.rs delete mode 100644 crates/nvisy-core/src/extraction/span.rs delete mode 100644 crates/nvisy-core/src/extraction/text_at.rs delete mode 100644 crates/nvisy-core/src/modality/audio.rs delete mode 100644 crates/nvisy-core/src/modality/image.rs delete mode 100644 crates/nvisy-core/src/modality/mod.rs delete mode 100644 crates/nvisy-core/src/modality/tabular.rs delete mode 100644 crates/nvisy-core/src/modality/text.rs rename crates/{nvisy-engine => nvisy-core}/src/policy/audit.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/condition.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/mod.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/redaction/any.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/redaction/audio.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/redaction/image.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/redaction/mod.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/redaction/tabular.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/redaction/text.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/retention/duration.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/retention/mod.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/rule.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/selector.rs (100%) rename crates/{nvisy-engine => nvisy-core}/src/policy/suppress.rs (100%) delete mode 100644 crates/nvisy-core/src/primitive/confidence/mod.rs delete mode 100644 crates/nvisy-core/src/primitive/confidence/threshold.rs delete mode 100644 crates/nvisy-core/src/primitive/confidence/value.rs delete mode 100644 crates/nvisy-core/src/primitive/country.rs delete mode 100644 crates/nvisy-core/src/primitive/geometry/bounding_box.rs delete mode 100644 crates/nvisy-core/src/primitive/geometry/dimensions.rs delete mode 100644 crates/nvisy-core/src/primitive/geometry/mod.rs delete mode 100644 crates/nvisy-core/src/primitive/geometry/normalized_bounding_box.rs delete mode 100644 crates/nvisy-core/src/primitive/geometry/polygon.rs delete mode 100644 crates/nvisy-core/src/primitive/language/detection.rs delete mode 100644 crates/nvisy-core/src/primitive/language/detections.rs delete mode 100644 crates/nvisy-core/src/primitive/language/mod.rs delete mode 100644 crates/nvisy-core/src/primitive/language/tag.rs delete mode 100644 crates/nvisy-core/src/primitive/mod.rs delete mode 100644 crates/nvisy-core/src/primitive/rendering/color.rs delete mode 100644 crates/nvisy-core/src/primitive/rendering/dpi.rs delete mode 100644 crates/nvisy-core/src/primitive/rendering/mod.rs delete mode 100644 crates/nvisy-core/src/primitive/time_span.rs delete mode 100644 crates/nvisy-core/src/recognition/hint.rs delete mode 100644 crates/nvisy-core/src/recognition/input.rs delete mode 100644 crates/nvisy-core/src/recognition/label_map.rs delete mode 100644 crates/nvisy-core/src/recognition/mod.rs delete mode 100644 crates/nvisy-core/src/recognition/output.rs delete mode 100644 crates/nvisy-core/src/redaction/anonymizer.rs delete mode 100644 crates/nvisy-core/src/redaction/deanonymizer.rs delete mode 100644 crates/nvisy-core/src/redaction/leak_profile.rs delete mode 100644 crates/nvisy-core/src/redaction/memoized.rs delete mode 100644 crates/nvisy-core/src/redaction/mod.rs delete mode 100644 crates/nvisy-core/src/redaction/redact_at.rs delete mode 100644 crates/nvisy-core/src/redaction/redactions.rs delete mode 100644 crates/nvisy-core/src/redaction/replacement.rs delete mode 100644 crates/nvisy-core/src/redaction/store.rs delete mode 100644 crates/nvisy-llm/Cargo.toml delete mode 100644 crates/nvisy-llm/README.md delete mode 100644 crates/nvisy-llm/src/backend/http/config.rs delete mode 100644 crates/nvisy-llm/src/backend/http/middleware.rs delete mode 100644 crates/nvisy-llm/src/backend/http/mod.rs delete mode 100644 crates/nvisy-llm/src/backend/mod.rs delete mode 100644 crates/nvisy-llm/src/backend/request.rs delete mode 100644 crates/nvisy-llm/src/backend/response.rs delete mode 100644 crates/nvisy-llm/src/backend/rig/config.rs delete mode 100644 crates/nvisy-llm/src/backend/rig/context.rs delete mode 100644 crates/nvisy-llm/src/backend/rig/inner.rs delete mode 100644 crates/nvisy-llm/src/backend/rig/mod.rs delete mode 100644 crates/nvisy-llm/src/backend/rig/usage.rs delete mode 100644 crates/nvisy-llm/src/error.rs delete mode 100644 crates/nvisy-llm/src/lib.rs delete mode 100644 crates/nvisy-llm/src/provider/authenticated.rs delete mode 100644 crates/nvisy-llm/src/provider/llm.rs delete mode 100644 crates/nvisy-llm/src/provider/mod.rs delete mode 100644 crates/nvisy-llm/src/provider/unauthenticated.rs delete mode 100644 crates/nvisy-llm/src/recognition/candidates.rs delete mode 100644 crates/nvisy-llm/src/recognition/default_prompt.rs delete mode 100644 crates/nvisy-llm/src/recognition/file_prompt.rs delete mode 100644 crates/nvisy-llm/src/recognition/lift.rs delete mode 100644 crates/nvisy-llm/src/recognition/llm_recognizer.rs delete mode 100644 crates/nvisy-llm/src/recognition/localize.rs delete mode 100644 crates/nvisy-llm/src/recognition/mod.rs delete mode 100644 crates/nvisy-llm/src/recognition/prompt.rs delete mode 100644 crates/nvisy-llm/src/recognition/response_parser.rs delete mode 100644 crates/nvisy-llm/src/recognition/schemas.rs delete mode 100644 crates/nvisy-llm/src/recognition/text_prompt.rs delete mode 100644 crates/nvisy-llm/src/recognition/vlm_prompt.rs delete mode 100644 crates/nvisy-llm/testdata/prompts/ner.toml delete mode 100644 crates/nvisy-llm/testdata/prompts/vlm.toml delete mode 100644 crates/nvisy-llm/tests/file_prompt.rs delete mode 100644 crates/nvisy-ner/Cargo.toml delete mode 100644 crates/nvisy-ner/README.md delete mode 100644 crates/nvisy-ner/src/backend/bento_backend.rs delete mode 100644 crates/nvisy-ner/src/backend/bento_types.rs delete mode 100644 crates/nvisy-ner/src/backend/mod.rs delete mode 100644 crates/nvisy-ner/src/backend/ner_backend.rs delete mode 100644 crates/nvisy-ner/src/backend/ner_span.rs delete mode 100644 crates/nvisy-ner/src/backend/noop_backend.rs delete mode 100644 crates/nvisy-ner/src/lib.rs delete mode 100644 crates/nvisy-ner/src/nlp/capabilities.rs delete mode 100644 crates/nvisy-ner/src/nlp/engine.rs delete mode 100644 crates/nvisy-ner/src/nlp/lingua_detector.rs delete mode 100644 crates/nvisy-ner/src/nlp/lingua_engine.rs delete mode 100644 crates/nvisy-ner/src/nlp/mod.rs delete mode 100644 crates/nvisy-ner/src/recognition/aggregation.rs delete mode 100644 crates/nvisy-ner/src/recognition/config.rs delete mode 100644 crates/nvisy-ner/src/recognition/mod.rs delete mode 100644 crates/nvisy-ner/src/recognition/recognizer.rs delete mode 100644 crates/nvisy-ocr/Cargo.toml delete mode 100644 crates/nvisy-ocr/README.md delete mode 100644 crates/nvisy-ocr/src/backend/bento_backend.rs delete mode 100644 crates/nvisy-ocr/src/backend/bento_types.rs delete mode 100644 crates/nvisy-ocr/src/backend/mod.rs delete mode 100644 crates/nvisy-ocr/src/backend/noop_backend.rs delete mode 100644 crates/nvisy-ocr/src/backend/ocr_backend.rs delete mode 100644 crates/nvisy-ocr/src/extraction/extractor.rs delete mode 100644 crates/nvisy-ocr/src/extraction/mod.rs delete mode 100644 crates/nvisy-ocr/src/lib.rs delete mode 100644 crates/nvisy-ocr/src/types/mod.rs delete mode 100644 crates/nvisy-ocr/src/types/raw_block.rs delete mode 100644 crates/nvisy-pattern/Cargo.toml delete mode 100644 crates/nvisy-pattern/README.md delete mode 100644 crates/nvisy-pattern/assets/PRESIDIO.md delete mode 100644 crates/nvisy-pattern/assets/README.md delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/finance/currencies.csv delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/finance/currencies.toml delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/personal/languages.csv delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/personal/languages.toml delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/personal/nationalities.toml delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/personal/nationalities.txt delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/personal/religions.toml delete mode 100644 crates/nvisy-pattern/assets/dictionaries/en/personal/religions.txt delete mode 100644 crates/nvisy-pattern/assets/dictionaries/world/finance/cryptocurrencies.csv delete mode 100644 crates/nvisy-pattern/assets/dictionaries/world/finance/cryptocurrencies.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/au/finance/abn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/au/finance/acn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/au/health/medicare.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/au/identity/tfn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/ca/contact/postal_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/ca/identity/sin.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/contact/plz.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/finance/handelsregister.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/health/bsnr.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/health/health_insurance.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/health/lanr.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/fuehrerschein.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/id_card.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/social_security.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/tax_id.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/tax_number.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/identity/vat_id.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/de/vehicle/kfz.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/es/contact/codigo_postal.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/es/finance/cif.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/es/identity/nie.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/es/identity/nif.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/es/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/fi/identity/hetu.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/in/finance/gstin.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/in/identity/aadhaar.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/in/identity/pan.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/in/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/in/identity/voter.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/in/vehicle/registration.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/it/finance/vat_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/it/identity/driving_licence.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/it/identity/fiscal_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/it/identity/identity_card.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/it/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/kr/finance/brn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/kr/identity/driver_license.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/kr/identity/frn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/kr/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/kr/identity/rrn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/ng/identity/nin.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/ng/vehicle/registration.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/pl/contact/kod_pocztowy.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/pl/finance/nip.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/pl/finance/regon.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/pl/identity/pesel.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/se/contact/postnummer.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/se/finance/organisationsnummer.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/se/identity/personnummer.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/sg/contact/postal_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/sg/finance/uen.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/sg/identity/nric.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/th/contact/postal_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/th/identity/national_id.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/tr/contact/posta_kodu.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/tr/identity/tckn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/tr/vehicle/license_plate.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/uk/contact/postcode.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/driving_licence.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/nhs.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/nino.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/uk/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/uk/vehicle/registration.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/finance/bank_account.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/finance/bank_routing.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/health/mbi.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/health/medical_license.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/health/npi.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/drivers_license.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/itin.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/passport.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/postal_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/us/identity/ssn.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/contact/email.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/contact/phone.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/contact/url.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/credentials/aws_key.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/credentials/generic_api_key.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/credentials/github_token.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/credentials/private_key.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/credentials/stripe_key.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/finance/bitcoin_address.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/finance/credit_card.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/finance/ethereum_address.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/finance/iban.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/finance/swift_code.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/network/ipv4.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/network/ipv6.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/network/mac_address.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/personal/date_of_birth.toml delete mode 100644 crates/nvisy-pattern/assets/patterns/world/personal/datetime.toml delete mode 100644 crates/nvisy-pattern/src/lib.rs delete mode 100644 crates/nvisy-pattern/src/recognition/compiled.rs delete mode 100644 crates/nvisy-pattern/src/recognition/context.rs delete mode 100644 crates/nvisy-pattern/src/recognition/dictionary.rs delete mode 100644 crates/nvisy-pattern/src/recognition/mod.rs delete mode 100644 crates/nvisy-pattern/src/recognition/recognizer.rs delete mode 100644 crates/nvisy-pattern/src/recognition/regex.rs delete mode 100644 crates/nvisy-pattern/src/recognition/term.rs delete mode 100644 crates/nvisy-pattern/src/shipped/dictionaries/en.rs delete mode 100644 crates/nvisy-pattern/src/shipped/dictionaries/mod.rs delete mode 100644 crates/nvisy-pattern/src/shipped/dictionaries/world.rs delete mode 100644 crates/nvisy-pattern/src/shipped/mod.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/au.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/ca.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/de.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/es.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/fi.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/in.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/it.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/kr.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/mod.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/ng.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/pl.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/se.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/sg.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/th.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/tr.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/uk.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/us.rs delete mode 100644 crates/nvisy-pattern/src/shipped/patterns/world.rs delete mode 100644 crates/nvisy-pattern/src/validators/au/abn.rs delete mode 100644 crates/nvisy-pattern/src/validators/au/acn.rs delete mode 100644 crates/nvisy-pattern/src/validators/au/medicare.rs delete mode 100644 crates/nvisy-pattern/src/validators/au/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/au/tfn.rs delete mode 100644 crates/nvisy-pattern/src/validators/btc.rs delete mode 100644 crates/nvisy-pattern/src/validators/ca/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/ca/sin.rs delete mode 100644 crates/nvisy-pattern/src/validators/date.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/bsnr.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/health_insurance.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/icao.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/id_card.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/lanr.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/passport.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/plz.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/social_security.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/tax_id.rs delete mode 100644 crates/nvisy-pattern/src/validators/de/vat_id.rs delete mode 100644 crates/nvisy-pattern/src/validators/es/cif.rs delete mode 100644 crates/nvisy-pattern/src/validators/es/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/es/nie.rs delete mode 100644 crates/nvisy-pattern/src/validators/es/nif.rs delete mode 100644 crates/nvisy-pattern/src/validators/fi/hetu.rs delete mode 100644 crates/nvisy-pattern/src/validators/fi/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/iban.rs delete mode 100644 crates/nvisy-pattern/src/validators/in/aadhaar.rs delete mode 100644 crates/nvisy-pattern/src/validators/in/gstin.rs delete mode 100644 crates/nvisy-pattern/src/validators/in/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/in/pan.rs delete mode 100644 crates/nvisy-pattern/src/validators/it/fiscal_code.rs delete mode 100644 crates/nvisy-pattern/src/validators/it/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/it/vat_code.rs delete mode 100644 crates/nvisy-pattern/src/validators/kr/brn.rs delete mode 100644 crates/nvisy-pattern/src/validators/kr/driver_license.rs delete mode 100644 crates/nvisy-pattern/src/validators/kr/frn.rs delete mode 100644 crates/nvisy-pattern/src/validators/kr/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/kr/rrn.rs delete mode 100644 crates/nvisy-pattern/src/validators/luhn.rs delete mode 100644 crates/nvisy-pattern/src/validators/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/ng/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/ng/nin.rs delete mode 100644 crates/nvisy-pattern/src/validators/phone.rs delete mode 100644 crates/nvisy-pattern/src/validators/pl/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/pl/nip.rs delete mode 100644 crates/nvisy-pattern/src/validators/pl/pesel.rs delete mode 100644 crates/nvisy-pattern/src/validators/pl/regon.rs delete mode 100644 crates/nvisy-pattern/src/validators/se/luhn.rs delete mode 100644 crates/nvisy-pattern/src/validators/se/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/se/organisationsnummer.rs delete mode 100644 crates/nvisy-pattern/src/validators/se/personnummer.rs delete mode 100644 crates/nvisy-pattern/src/validators/sg/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/sg/nric.rs delete mode 100644 crates/nvisy-pattern/src/validators/sg/uen.rs delete mode 100644 crates/nvisy-pattern/src/validators/th/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/th/national_id.rs delete mode 100644 crates/nvisy-pattern/src/validators/tr/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/tr/tckn.rs delete mode 100644 crates/nvisy-pattern/src/validators/uk/driving_licence.rs delete mode 100644 crates/nvisy-pattern/src/validators/uk/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/uk/nhs.rs delete mode 100644 crates/nvisy-pattern/src/validators/uk/nino.rs delete mode 100644 crates/nvisy-pattern/src/validators/uk/vehicle_registration.rs delete mode 100644 crates/nvisy-pattern/src/validators/us/aba_routing.rs delete mode 100644 crates/nvisy-pattern/src/validators/us/dea_number.rs delete mode 100644 crates/nvisy-pattern/src/validators/us/mod.rs delete mode 100644 crates/nvisy-pattern/src/validators/us/npi.rs delete mode 100644 crates/nvisy-pattern/src/validators/us/postal_code.rs delete mode 100644 crates/nvisy-pattern/src/validators/us/ssn.rs delete mode 100644 crates/nvisy-pattern/src/validators/verhoeff.rs delete mode 100644 crates/nvisy-pattern/testdata/dictionaries/product_codes.csv delete mode 100644 crates/nvisy-pattern/testdata/dictionaries/product_codes.toml delete mode 100644 crates/nvisy-pattern/testdata/inputs/au/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/au/health.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/au/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/ca/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/ca/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/credentials.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/de/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/de/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/de/health.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/de/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/de/vehicle.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/es/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/es/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/es/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/fi/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/in/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/in/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/in/vehicle.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/internal.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/it/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/it/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/kr/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/kr/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/network.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/ng/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/ng/vehicle.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/personal.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/pl/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/pl/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/pl/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/se/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/se/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/se/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/sg/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/sg/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/sg/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/th/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/th/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/tr/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/tr/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/uk/contact.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/uk/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/uk/vehicle.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/us/finance.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/us/health.txt delete mode 100644 crates/nvisy-pattern/testdata/inputs/us/identity.txt delete mode 100644 crates/nvisy-pattern/testdata/patterns/employee_id.toml delete mode 100644 crates/nvisy-pattern/testdata/patterns/product_codes.toml delete mode 100644 crates/nvisy-pattern/tests/builtin.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_au.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_ca.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_de.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_es.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_fi.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_in.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_it.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_kr.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_ng.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_pl.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_se.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_sg.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_th.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_tr.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_uk.rs delete mode 100644 crates/nvisy-pattern/tests/builtin_us.rs delete mode 100644 crates/nvisy-pattern/tests/enhancer_roundtrip.rs delete mode 100644 crates/nvisy-pattern/tests/fixtures/mod.rs delete mode 100644 crates/nvisy-pattern/tests/user_rules.rs delete mode 100644 crates/nvisy-stt/README.md delete mode 100644 crates/nvisy-stt/src/backend/mod.rs delete mode 100644 crates/nvisy-stt/src/backend/noop_backend.rs delete mode 100644 crates/nvisy-stt/src/backend/stt_backend.rs delete mode 100644 crates/nvisy-stt/src/backend/transcribed_segment.rs delete mode 100644 crates/nvisy-stt/src/extraction/mod.rs delete mode 100644 crates/nvisy-stt/src/extraction/transcription.rs delete mode 100644 crates/nvisy-stt/src/lib.rs delete mode 100644 crates/nvisy-toolkit/Cargo.toml delete mode 100644 crates/nvisy-toolkit/README.md delete mode 100644 crates/nvisy-toolkit/examples/pipeline.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/calibrate/mod.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/filter/mod.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/fuse/group.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/fuse/key.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/fuse/mod.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/fuse/strategy.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/layer.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/mod.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/params.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/pipeline.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/resolve/mod.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/resolve/strategy.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/span_size.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/suppress/mod.rs delete mode 100644 crates/nvisy-toolkit/src/deduplication/suppress/params.rs delete mode 100644 crates/nvisy-toolkit/src/detection/chunks.rs delete mode 100644 crates/nvisy-toolkit/src/detection/llm.rs delete mode 100644 crates/nvisy-toolkit/src/detection/mod.rs delete mode 100644 crates/nvisy-toolkit/src/detection/ner.rs delete mode 100644 crates/nvisy-toolkit/src/detection/pattern.rs delete mode 100644 crates/nvisy-toolkit/src/detection/registry.rs delete mode 100644 crates/nvisy-toolkit/src/extraction/mod.rs delete mode 100644 crates/nvisy-toolkit/src/extraction/ocr.rs delete mode 100644 crates/nvisy-toolkit/src/extraction/registry.rs delete mode 100644 crates/nvisy-toolkit/src/extraction/stt.rs delete mode 100644 crates/nvisy-toolkit/src/ingestion/mod.rs delete mode 100644 crates/nvisy-toolkit/src/lib.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/encrypt.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/hash.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/keep.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/mask.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/mod.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/redact.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/anonymizer/replace.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/deanonymizer/decrypt.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/deanonymizer/mod.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/id.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/mod.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/registry.rs delete mode 100644 crates/nvisy-toolkit/src/redaction/store.rs delete mode 100644 crates/nvisy-toolkit/src/validation/check.rs delete mode 100644 crates/nvisy-toolkit/src/validation/leak/mod.rs delete mode 100644 crates/nvisy-toolkit/src/validation/mod.rs delete mode 100644 crates/nvisy-toolkit/src/validation/pipeline.rs delete mode 100644 crates/nvisy-toolkit/testdata/contact.csv delete mode 100644 crates/nvisy-toolkit/testdata/contact.html delete mode 100644 crates/nvisy-toolkit/testdata/contact.json delete mode 100644 crates/nvisy-toolkit/testdata/contact.txt delete mode 100644 crates/nvisy-toolkit/tests/codec_e2e_csv.rs delete mode 100644 crates/nvisy-toolkit/tests/codec_e2e_html.rs delete mode 100644 crates/nvisy-toolkit/tests/codec_e2e_json.rs delete mode 100644 crates/nvisy-toolkit/tests/codec_e2e_txt.rs delete mode 100644 crates/nvisy-toolkit/tests/fixtures/asserts.rs delete mode 100644 crates/nvisy-toolkit/tests/fixtures/mod.rs delete mode 100644 crates/nvisy-toolkit/tests/fixtures/pipeline.rs delete mode 100644 crates/nvisy-toolkit/tests/fixtures/registries.rs delete mode 100644 crates/nvisy-toolkit/tests/recognition_registry.rs diff --git a/Cargo.lock b/Cargo.lock index 83879da4..ac346f19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -135,15 +135,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anstream" version = "1.0.0" @@ -503,15 +494,6 @@ dependencies = [ "hybrid-array", ] -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "brotli" version = "8.0.3" @@ -588,15 +570,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c53ba0f290bfc610084c05582d9c5d421662128fc69f4bf236707af6fd321b9" -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cc" version = "1.2.63" @@ -619,17 +592,6 @@ dependencies = [ "serde", ] -[[package]] -name = "cfb" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" -dependencies = [ - "byteorder", - "fnv", - "uuid", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -653,19 +615,6 @@ dependencies = [ "rand_core 0.10.1", ] -[[package]] -name = "chrono" -version = "0.4.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - [[package]] name = "cipher" version = "0.4.4" @@ -782,26 +731,6 @@ version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] - -[[package]] -name = "console_log" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f" -dependencies = [ - "log", - "web-sys", -] - [[package]] name = "const-oid" version = "0.10.2" @@ -842,15 +771,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "counter" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66e8e052be91f1c8aae2c1d81307d9f6e67f5f37001e3ddee419e971e73f03bc" -dependencies = [ - "num-traits", -] - [[package]] name = "cpufeatures" version = "0.2.17" @@ -1128,15 +1048,6 @@ dependencies = [ "syn", ] -[[package]] -name = "deranged" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" -dependencies = [ - "powerfmt", -] - [[package]] name = "derive_builder" version = "0.20.2" @@ -1256,15 +1167,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" -[[package]] -name = "ecb" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7" -dependencies = [ - "cipher", -] - [[package]] name = "ego-tree" version = "0.11.0" @@ -1298,6 +1200,19 @@ dependencies = [ "uuid", ] +[[package]] +name = "elide-bento" +version = "0.1.0" +dependencies = [ + "async-trait", + "bentoml", + "elide-core", + "thiserror", + "tokio", + "tracing", + "url", +] + [[package]] name = "elide-codec" version = "0.1.0" @@ -1346,6 +1261,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "elide-fake" +version = "0.1.0" +dependencies = [ + "async-trait", + "elide-core", + "fake", + "tokio", + "uuid", +] + [[package]] name = "elide-llm" version = "0.1.0" @@ -1357,7 +1283,7 @@ dependencies = [ "reqwest-middleware", "reqwest-retry", "reqwest-tracing", - "rig 0.39.0", + "rig", "schemars", "serde", "thiserror", @@ -1527,9 +1453,6 @@ name = "fastrand" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" -dependencies = [ - "getrandom 0.3.4", -] [[package]] name = "fax" @@ -1615,12 +1538,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "fst" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" - [[package]] name = "futures" version = "0.3.32" @@ -2112,30 +2029,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "iban_validate" version = "5.0.1" @@ -2325,25 +2218,6 @@ version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40fac9d56ed6437b198fddba683305e8e2d651aa42647f00f5ae542e7f5c94a2" -[[package]] -name = "include_dir" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" -dependencies = [ - "include_dir_macros", -] - -[[package]] -name = "include_dir_macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" -dependencies = [ - "proc-macro2", - "quote", -] - [[package]] name = "indexmap" version = "2.14.0" @@ -2365,22 +2239,12 @@ dependencies = [ "rustversion", ] -[[package]] -name = "infer" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" -dependencies = [ - "cfb", -] - [[package]] name = "inout" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "block-padding", "generic-array", ] @@ -2577,54 +2441,12 @@ dependencies = [ "cc", ] -[[package]] -name = "libloading" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" -dependencies = [ - "cfg-if", - "windows-link", -] - [[package]] name = "libm" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" -[[package]] -name = "lingua" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40d9129bb9fe42c95d1bd420d6891607eaff17df16ee15674aed2d05b0ec8f4" -dependencies = [ - "counter", - "dashmap", - "fastrand", - "fst", - "include_dir", - "itertools", - "lingua-english-language-model", - "maplit", - "rayon", - "regex", - "serde", - "serde-wasm-bindgen", - "strum 0.27.2", - "strum_macros 0.27.2", - "wasm-bindgen", -] - -[[package]] -name = "lingua-english-language-model" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97102de08b134a49f1cce05a1b6f5bf08ef21fe858074ae2b794e7892c43dd4b" -dependencies = [ - "include_dir", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -2680,37 +2502,6 @@ dependencies = [ "imgref", ] -[[package]] -name = "lopdf" -version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67513274c50a2b51e5f75d9e682fcf4ab064a8a9c9ae2c3c59309084882bb24d" -dependencies = [ - "aes", - "bitflags", - "cbc", - "chrono", - "ecb", - "encoding_rs", - "flate2", - "getrandom 0.4.2", - "indexmap", - "itoa", - "jiff", - "log", - "md-5", - "nom 8.0.0", - "rand 0.10.1", - "rangemap", - "rayon", - "sha2 0.10.9", - "stringprep", - "thiserror", - "time", - "ttf-parser", - "weezl", -] - [[package]] name = "lru-cache" version = "0.1.2" @@ -2757,12 +2548,6 @@ dependencies = [ "twox-hash", ] -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - [[package]] name = "markup5ever" version = "0.39.0" @@ -2799,12 +2584,6 @@ dependencies = [ "rawpointer", ] -[[package]] -name = "maybe-owned" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" - [[package]] name = "maybe-rayon" version = "0.1.1" @@ -2815,28 +2594,12 @@ dependencies = [ "rayon", ] -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest 0.10.7", -] - [[package]] name = "memchr" version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" -[[package]] -name = "memo-map" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" - [[package]] name = "mime" version = "0.3.17" @@ -2853,16 +2616,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "minijinja" -version = "2.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2929e494b2280e1e18959bb2e121da03347ae896896fdfaceaab43c88a02803f" -dependencies = [ - "memo-map", - "serde", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -3046,12 +2799,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-conv" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" - [[package]] name = "num-derive" version = "0.4.2" @@ -3122,45 +2869,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "nvisy-codec" -version = "0.1.0" -dependencies = [ - "async-trait", - "bytes", - "csv", - "derive_more", - "ego-tree", - "hex", - "hound", - "image", - "imageproc", - "infer", - "lopdf", - "mp3lame-encoder", - "nvisy-core", - "pdfium-render", - "rayon", - "schemars", - "scraper", - "serde", - "serde_json", - "sha2 0.11.0", - "symphonia", - "tokio", - "tracing", -] - -[[package]] -name = "nvisy-context" -version = "0.1.0" -dependencies = [ - "async-trait", - "hipstr", - "nvisy-core", - "unicode-segmentation", -] - [[package]] name = "nvisy-core" version = "0.1.0" @@ -3170,6 +2878,7 @@ dependencies = [ "celes", "derive_builder", "derive_more", + "elide-core", "hipstr", "oxilangtag", "schemars", @@ -3217,94 +2926,7 @@ dependencies = [ ] [[package]] -name = "nvisy-fake" -version = "0.1.0" -dependencies = [ - "async-trait", - "fake", - "nvisy-core", - "nvisy-toolkit", - "tokio", - "uuid", -] - -[[package]] -name = "nvisy-llm" -version = "0.1.0" -dependencies = [ - "async-trait", - "base64", - "derive_builder", - "derive_more", - "humantime-serde", - "minijinja", - "nvisy-core", - "reqwest-middleware", - "reqwest-retry", - "reqwest-tracing", - "rig 0.38.1", - "schemars", - "serde", - "serde_json", - "thiserror", - "tokio", - "toml", - "tracing", - "unicode-normalization", -] - -[[package]] -name = "nvisy-ner" -version = "0.1.0" -dependencies = [ - "async-trait", - "bentoml", - "derive_builder", - "lingua", - "nvisy-core", - "serde", - "tokio", - "tracing", - "type-map", - "uuid", -] - -[[package]] -name = "nvisy-ocr" -version = "0.1.0" -dependencies = [ - "async-trait", - "bentoml", - "bytes", - "futures", - "nvisy-core", - "tokio", - "tracing", - "uuid", -] - -[[package]] -name = "nvisy-pattern" -version = "0.1.0" -dependencies = [ - "aho-corasick", - "async-trait", - "bs58", - "csv", - "derive_builder", - "derive_more", - "iban_validate", - "nvisy-context", - "nvisy-core", - "phonenumber", - "regex", - "serde", - "tokio", - "toml", -] - -[[package]] -name = "nvisy-server" +name = "nvisy-server" version = "0.1.0" dependencies = [ "aide", @@ -3324,45 +2946,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "nvisy-stt" -version = "0.1.0" -dependencies = [ - "async-trait", - "nvisy-core", - "tokio", - "tracing", - "uuid", -] - -[[package]] -name = "nvisy-toolkit" -version = "0.1.0" -dependencies = [ - "aes-gcm", - "async-trait", - "base64", - "nvisy-codec", - "nvisy-context", - "nvisy-core", - "nvisy-fake", - "nvisy-llm", - "nvisy-ner", - "nvisy-ocr", - "nvisy-pattern", - "nvisy-stt", - "regex", - "schemars", - "serde", - "serde_json", - "sha2 0.11.0", - "tokio", - "tracing", - "type-map", - "unicode-normalization", - "uuid", -] - [[package]] name = "once_cell" version = "1.21.4" @@ -3455,32 +3038,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" -[[package]] -name = "pdfium-render" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076dd8f3a6c7da9298ddffbcc0d5a109f89caf967fa4871c9a172d5b3498b35b" -dependencies = [ - "bitflags", - "bytemuck", - "bytes", - "chrono", - "console_error_panic_hook", - "console_log", - "image", - "itertools", - "js-sys", - "libloading", - "log", - "maybe-owned", - "once_cell", - "utf16string", - "vecmath", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "percent-encoding" version = "2.3.2" @@ -3592,12 +3149,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "piston-float" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590" - [[package]] name = "pkg-config" version = "0.3.33" @@ -3666,12 +3217,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -3987,12 +3532,6 @@ dependencies = [ "rand 0.10.1", ] -[[package]] -name = "rangemap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" - [[package]] name = "rav1e" version = "0.8.1" @@ -4262,57 +3801,13 @@ version = "0.8.53" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4" -[[package]] -name = "rig" -version = "0.38.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5510d91a645a97d37ea03ddfd9c1c7f55d5eafbc7661b71bb210eba078b6966" -dependencies = [ - "rig-core 0.38.1", -] - [[package]] name = "rig" version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e98e2e8f01c4c5bc23f577983634fa4d5244ffb070ea14c23b1ea5bd406e5cac" dependencies = [ - "rig-core 0.39.0", -] - -[[package]] -name = "rig-core" -version = "0.38.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d9bae0f2b910cbd4ca2d53513f18aba3b6f6cb6cb0bf295e390d5396f56359d" -dependencies = [ - "as-any", - "async-stream", - "base64", - "bytes", - "eventsource-stream", - "fastrand", - "futures", - "futures-timer", - "glob", - "http", - "mime", - "mime_guess", - "nanoid", - "ordered-float", - "pin-project-lite", - "reqwest", - "reqwest-middleware", - "rig-derive 0.38.1", - "schemars", - "serde", - "serde_json", - "thiserror", - "tokio", - "tokio-tungstenite", - "tracing", - "tracing-futures", - "url", + "rig-core", ] [[package]] @@ -4339,7 +3834,7 @@ dependencies = [ "pin-project-lite", "reqwest", "reqwest-middleware", - "rig-derive 0.39.0", + "rig-derive", "schemars", "serde", "serde_json", @@ -4351,22 +3846,6 @@ dependencies = [ "url", ] -[[package]] -name = "rig-derive" -version = "0.38.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9551846a623524ffddfab42cac15f03d0547b6d5f81a66549341126205e4b721" -dependencies = [ - "convert_case 0.10.0", - "deluxe", - "indoc", - "proc-macro-crate 3.5.0", - "proc-macro2", - "quote", - "serde_json", - "syn", -] - [[package]] name = "rig-derive" version = "0.39.0" @@ -4691,17 +4170,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-wasm-bindgen" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" -dependencies = [ - "js-sys", - "serde", - "wasm-bindgen", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -4985,17 +4453,6 @@ dependencies = [ "quote", ] -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - [[package]] name = "strsim" version = "0.10.0" @@ -5227,37 +4684,6 @@ dependencies = [ "zune-jpeg", ] -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "itoa", - "num-conv", - "powerfmt", - "serde_core", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "time-macros" -version = "0.2.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" -dependencies = [ - "num-conv", - "time-core", -] - [[package]] name = "tinystr" version = "0.8.3" @@ -5631,12 +5057,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - [[package]] name = "unicode-ident" version = "1.0.24" @@ -5652,12 +5072,6 @@ dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-properties" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" - [[package]] name = "unicode-segmentation" version = "1.13.3" @@ -5710,15 +5124,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf16string" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216" -dependencies = [ - "byteorder", -] - [[package]] name = "utf8_iter" version = "1.0.4" @@ -5796,15 +5201,6 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" -[[package]] -name = "vecmath" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a" -dependencies = [ - "piston-float", -] - [[package]] name = "version_check" version = "0.9.5" @@ -6054,41 +5450,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "windows-link" version = "0.2.1" @@ -6104,15 +5465,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 8e86cdb8..7b0232b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,19 +3,12 @@ [workspace] resolver = "3" members = [ + "./crates/elide-bento", + "./crates/elide-fake", "./crates/nvisy-cli", - "./crates/nvisy-codec", - "./crates/nvisy-context", "./crates/nvisy-core", "./crates/nvisy-engine", - "./crates/nvisy-fake", - "./crates/nvisy-llm", - "./crates/nvisy-ner", - "./crates/nvisy-ocr", - "./crates/nvisy-pattern", "./crates/nvisy-server", - "./crates/nvisy-stt", - "./crates/nvisy-toolkit", ] [workspace.package] @@ -41,23 +34,18 @@ elide = { git = "https://github.com/nvisycom/elide", branch = "main", default-fe elide-core = { git = "https://github.com/nvisycom/elide", branch = "main" } elide-llm = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } +# Runtime-owned elide extensions +elide-bento = { path = "./crates/elide-bento", version = "0.1.0" } +elide-fake = { path = "./crates/elide-fake", version = "0.1.0" } + # Internal crates nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" } -nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" } -nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } -nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" } -# Internal crates being deleted: kept here as path-deps so consumer -# manifests (nvisy-server/cli/fake/ocr/stt/toolkit) parse during the -# migration. Each entry leaves with its consumer's migration task. -nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0", default-features = false } -nvisy-context = { path = "./crates/nvisy-context", version = "0.1.0" } +# Internal crate being deleted: kept here as a path-dep so consumer +# manifests (nvisy-engine/server/cli) parse during the migration. +# Leaves the workspace once engine's source no longer imports it. nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } -nvisy-llm = { path = "./crates/nvisy-llm", version = "0.1.0" } -nvisy-ner = { path = "./crates/nvisy-ner", version = "0.1.0" } -nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" } -nvisy-toolkit = { path = "./crates/nvisy-toolkit", version = "0.1.0" } # Serialization serde = { version = "1.0", features = ["derive"] } diff --git a/crates/nvisy-stt/Cargo.toml b/crates/elide-bento/Cargo.toml similarity index 53% rename from crates/nvisy-stt/Cargo.toml rename to crates/elide-bento/Cargo.toml index 6890e789..6dc43a46 100644 --- a/crates/nvisy-stt/Cargo.toml +++ b/crates/elide-bento/Cargo.toml @@ -1,10 +1,10 @@ # https://doc.rust-lang.org/cargo/reference/manifest.html [package] -name = "nvisy-stt" -description = "Speech-to-text extractor (transcription + timestamps + diarization) for the Nvisy runtime" -keywords = ["nvisy", "stt", "transcription", "diarization"] -categories = ["multimedia::audio", "api-bindings"] +name = "elide-bento" +description = "Shared BentoML HTTP client for elide backends (NER, OCR)" +keywords = ["elide", "bento", "bentoml", "ner", "ocr"] +categories = ["api-bindings"] readme = "README.md" version = { workspace = true } @@ -18,25 +18,28 @@ repository = { workspace = true } homepage = { workspace = true } documentation = { workspace = true } -[features] -default = [] - [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] -# Internal crates -nvisy-core = { workspace = true, features = [] } - -# Primitive datatypes -uuid = { workspace = true, features = [] } +# Elide toolkit (upstream) +elide-core = { workspace = true, features = [] } -# Async runtime and parallelism +# Async runtime async-trait = { workspace = true, features = [] } # Observability tracing = { workspace = true, features = [] } +# Error handling +thiserror = { workspace = true, features = [] } + +# URL parsing +url = "2.5" + +# BentoML client +bentoml = { workspace = true, default-features = false, features = ["rustls-tls", "tracing"] } + [dev-dependencies] -tokio = { workspace = true, features = ["macros", "rt"] } +tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/elide-bento/README.md b/crates/elide-bento/README.md new file mode 100644 index 00000000..49b8e23e --- /dev/null +++ b/crates/elide-bento/README.md @@ -0,0 +1,7 @@ +# elide-bento + +Shared BentoML HTTP client wrapper for elide backends. + +Per-modality backends (NER, OCR, …) live in their consuming crates +(`elide-ner`, `elide-ocr`) and pull this crate for the common HTTP +client, params validation, and error translation. diff --git a/crates/elide-bento/src/client.rs b/crates/elide-bento/src/client.rs new file mode 100644 index 00000000..196c0985 --- /dev/null +++ b/crates/elide-bento/src/client.rs @@ -0,0 +1,49 @@ +//! Shared BentoML HTTP client wrapper. + +use bentoml::Client; + +use crate::error::BentoError; + +/// Connection params common to every BentoML backend. +/// +/// Per-modality backends (NER, OCR, …) typically wrap this with their +/// own per-endpoint config. +#[derive(Debug, Clone)] +pub struct BentoParams { + /// Base URL of the BentoML service (e.g. `http://inference:3000`). + pub base_url: String, +} + +impl BentoParams { + /// Construct, validating the URL parses. + pub fn new(base_url: impl Into) -> Result { + let base_url = base_url.into(); + url::Url::parse(&base_url).map_err(|e| BentoError::Config(format!("base_url: {e}")))?; + Ok(Self { base_url }) + } +} + +/// Thin wrapper over the upstream [`bentoml::Client`] for use by +/// per-modality backends. Owns the HTTP transport + base URL; the +/// modality-specific request/response wire shape lives in the +/// consuming crate. +#[derive(Debug, Clone)] +pub struct BentoClient { + inner: Client, +} + +impl BentoClient { + /// Build from params. Returns a transport-ready client. + pub fn new(params: &BentoParams) -> Result { + let inner = Client::builder() + .with_base_url(¶ms.base_url) + .build() + .map_err(BentoError::Transport)?; + Ok(Self { inner }) + } + + /// Borrow the underlying client for endpoint-specific calls. + pub fn inner(&self) -> &Client { + &self.inner + } +} diff --git a/crates/elide-bento/src/error.rs b/crates/elide-bento/src/error.rs new file mode 100644 index 00000000..a54d6576 --- /dev/null +++ b/crates/elide-bento/src/error.rs @@ -0,0 +1,31 @@ +//! Error translation: `bentoml` errors → [`elide_core::Error`]. + +use elide_core::{Error, ErrorKind}; + +/// Errors surfaced by [`BentoClient`](crate::BentoClient) operations. +/// +/// Wraps the upstream [`bentoml::Error`] with a structural classification +/// the consuming crate can map onto the right [`ErrorKind`] when bubbling +/// up to elide. +#[derive(Debug, thiserror::Error)] +pub enum BentoError { + /// HTTP / transport failure talking to the BentoML service. + #[error("bento transport error: {0}")] + Transport(#[from] bentoml::Error), + /// Configuration failure (bad URL, missing required field, …). + #[error("bento config error: {0}")] + Config(String), +} + +impl From for Error { + /// Map a transport failure to [`ErrorKind::Transport`] and a config + /// failure to [`ErrorKind::Validation`], carrying the original error + /// as the source cause. + fn from(err: BentoError) -> Self { + let kind = match err { + BentoError::Transport(_) => ErrorKind::Transport, + BentoError::Config(_) => ErrorKind::Validation, + }; + Error::new(kind, err) + } +} diff --git a/crates/elide-bento/src/lib.rs b/crates/elide-bento/src/lib.rs new file mode 100644 index 00000000..7df1d119 --- /dev/null +++ b/crates/elide-bento/src/lib.rs @@ -0,0 +1,9 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +pub mod client; +pub mod error; + +pub use self::client::{BentoClient, BentoParams}; +pub use self::error::BentoError; diff --git a/crates/nvisy-fake/Cargo.toml b/crates/elide-fake/Cargo.toml similarity index 72% rename from crates/nvisy-fake/Cargo.toml rename to crates/elide-fake/Cargo.toml index d2bdba91..8a5d787c 100644 --- a/crates/nvisy-fake/Cargo.toml +++ b/crates/elide-fake/Cargo.toml @@ -1,9 +1,9 @@ # https://doc.rust-lang.org/cargo/reference/manifest.html [package] -name = "nvisy-fake" -description = "Locale-aware fake-data Anonymizer for Nvisy entities" -keywords = ["nvisy", "fake", "anonymizer", "pii"] +name = "elide-fake" +description = "Locale-aware fake-data Anonymizer for elide entities" +keywords = ["elide", "fake", "anonymizer", "pii"] categories = ["text-processing"] readme = "README.md" @@ -23,8 +23,8 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] -# Internal crates -nvisy-core = { workspace = true, features = [] } +# Elide toolkit (upstream) +elide-core = { workspace = true, features = [] } # Primitive datatypes (UUIDv4 for fake `DeviceId`) uuid = { workspace = true, features = ["v4"] } @@ -36,6 +36,4 @@ async-trait = { workspace = true, features = [] } fake = { workspace = true, features = [] } [dev-dependencies] -nvisy-core = { workspace = true, features = ["test-utils"] } -nvisy-toolkit = { workspace = true, features = [] } tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/nvisy-fake/README.md b/crates/elide-fake/README.md similarity index 100% rename from crates/nvisy-fake/README.md rename to crates/elide-fake/README.md diff --git a/crates/nvisy-fake/src/anonymizer/mod.rs b/crates/elide-fake/src/anonymizer/mod.rs similarity index 100% rename from crates/nvisy-fake/src/anonymizer/mod.rs rename to crates/elide-fake/src/anonymizer/mod.rs diff --git a/crates/nvisy-fake/src/generator/case_id.rs b/crates/elide-fake/src/generator/case_id.rs similarity index 100% rename from crates/nvisy-fake/src/generator/case_id.rs rename to crates/elide-fake/src/generator/case_id.rs diff --git a/crates/nvisy-fake/src/generator/contact.rs b/crates/elide-fake/src/generator/contact.rs similarity index 100% rename from crates/nvisy-fake/src/generator/contact.rs rename to crates/elide-fake/src/generator/contact.rs diff --git a/crates/nvisy-fake/src/generator/device.rs b/crates/elide-fake/src/generator/device.rs similarity index 100% rename from crates/nvisy-fake/src/generator/device.rs rename to crates/elide-fake/src/generator/device.rs diff --git a/crates/nvisy-fake/src/generator/dispatch.rs b/crates/elide-fake/src/generator/dispatch.rs similarity index 100% rename from crates/nvisy-fake/src/generator/dispatch.rs rename to crates/elide-fake/src/generator/dispatch.rs diff --git a/crates/nvisy-fake/src/generator/finance.rs b/crates/elide-fake/src/generator/finance.rs similarity index 100% rename from crates/nvisy-fake/src/generator/finance.rs rename to crates/elide-fake/src/generator/finance.rs diff --git a/crates/nvisy-fake/src/generator/identity.rs b/crates/elide-fake/src/generator/identity.rs similarity index 100% rename from crates/nvisy-fake/src/generator/identity.rs rename to crates/elide-fake/src/generator/identity.rs diff --git a/crates/nvisy-fake/src/generator/mod.rs b/crates/elide-fake/src/generator/mod.rs similarity index 100% rename from crates/nvisy-fake/src/generator/mod.rs rename to crates/elide-fake/src/generator/mod.rs diff --git a/crates/nvisy-fake/src/generator/pattern.rs b/crates/elide-fake/src/generator/pattern.rs similarity index 100% rename from crates/nvisy-fake/src/generator/pattern.rs rename to crates/elide-fake/src/generator/pattern.rs diff --git a/crates/nvisy-fake/src/generator/temporal.rs b/crates/elide-fake/src/generator/temporal.rs similarity index 100% rename from crates/nvisy-fake/src/generator/temporal.rs rename to crates/elide-fake/src/generator/temporal.rs diff --git a/crates/nvisy-fake/src/lib.rs b/crates/elide-fake/src/lib.rs similarity index 100% rename from crates/nvisy-fake/src/lib.rs rename to crates/elide-fake/src/lib.rs diff --git a/crates/nvisy-fake/src/locale/mod.rs b/crates/elide-fake/src/locale/mod.rs similarity index 100% rename from crates/nvisy-fake/src/locale/mod.rs rename to crates/elide-fake/src/locale/mod.rs diff --git a/crates/nvisy-codec/Cargo.toml b/crates/nvisy-codec/Cargo.toml deleted file mode 100644 index ff975296..00000000 --- a/crates/nvisy-codec/Cargo.toml +++ /dev/null @@ -1,148 +0,0 @@ -# https://doc.rust-lang.org/cargo/reference/manifest.html - -[package] -name = "nvisy-codec" -description = "Codec traits + format implementations (TXT, JSON, HTML, CSV, XLSX, PNG, JPEG, TIFF, WAV, MP3, PDF, DOCX) for the Nvisy multimodal redaction platform" -keywords = ["nvisy", "codec", "loader", "pdf", "redaction"] -categories = ["parser-implementations", "encoding"] -readme = "README.md" - -version = { workspace = true } -rust-version = { workspace = true } -edition = { workspace = true } -license = { workspace = true } -publish = { workspace = true } - -authors = { workspace = true } -repository = { workspace = true } -homepage = { workspace = true } -documentation = { workspace = true } - -[features] -default = ["text", "tabular"] - -## Plain-text (`.txt`, `.log`) loader and handler. -txt = ["internal_text"] -## JSON loader and handler. -json = ["internal_text"] -## Markdown loader. -markdown = ["internal_text"] -## HTML loader and handler via `scraper`. -html = ["internal_text", "dep:scraper", "dep:ego-tree"] - -## CSV loader and handler via the `csv` crate. Tabular cells reuse the -## text redact helper, so this pulls `internal_text`. -csv = ["internal_tabular", "internal_text", "dep:csv"] -## Excel (`.xlsx`) loader and handler. Pulls `internal_text` for the -## same reason as `csv`. The handler is a stub today (decode returns -## empty); a parser dep will be wired back in when real extraction -## lands. -xlsx = ["internal_tabular", "internal_text"] - -## PNG loader and handler. -png = ["internal_image"] -## JPEG loader and handler. -jpeg = ["internal_image"] -## TIFF loader and handler. -tiff = ["internal_image"] - -## WAV audio loader and handler. `hound` covers sample read + write; -## `symphonia` probes container metadata for the clip duration. -wav = ["internal_audio", "dep:hound", "dep:symphonia"] -## MP3 audio loader and handler. `symphonia` covers the duration -## probe and PCM decode; `mp3lame-encoder` (LGPL-3.0 via libmp3lame — -## requires a C toolchain + autoconf/automake at build time) handles -## re-encoding for the redaction round-trip. -mp3 = ["internal_audio", "dep:symphonia", "dep:mp3lame-encoder"] - -## PDF loader, handler, and page-to-image rendering via `lopdf` + -## `pdfium-render`. Pulls `internal_text` (page-text redact) + -## `internal_image` (PngHandler for rendered pages + extracted -## figures) + `png` (the concrete image format wrapping rendered -## pages). -pdf = ["internal_rich", "internal_text", "internal_image", "png", "dep:lopdf", "dep:pdfium-render", "dep:rayon"] -## DOCX loader. Pulls `internal_text` only — DOCX text extraction is -## stubbed today, no image rendering needed. Parser deps (`zip` + -## `quick-xml`) will be wired back in when real extraction lands. -docx = ["internal_rich", "internal_text"] - -## All text formats: `txt` + `json` + `markdown` + `html`. -text = ["txt", "json", "markdown", "html"] -## All tabular formats: `csv` + `xlsx`. -tabular = ["csv", "xlsx"] -## All image formats: `png` + `jpeg` + `tiff`. -image = ["png", "jpeg", "tiff"] -## All audio formats: `wav` + `mp3`. -audio = ["wav", "mp3"] -## All rich-document formats: `pdf` + `docx`. -rich = ["pdf", "docx"] - -## In-memory decode helpers for tests in this and downstream crates. -## Re-exported as `nvisy_codec::test_utils`. -test-utils = [] - -# Internal helpers — set automatically by any format feature above. -# Library code uses `#[cfg(feature = "internal_text")]` to mean -# "any text format is enabled", the right gate for shared -# infrastructure like the modality's `impl Codable` block and -# top-level `crate::text` module. -internal_text = [] -internal_tabular = [] -internal_image = [] -internal_audio = [] -internal_rich = [] - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[dependencies] -# Internal crates -nvisy-core = { workspace = true, features = [] } - -# Serialization -serde = { workspace = true, features = [] } -serde_json = { workspace = true, features = [] } -schemars = { workspace = true, features = [] } - -# Derive macros and error handling -derive_more = { workspace = true, features = ["as_ref", "deref", "from"] } - -# Primitive datatypes -bytes = { workspace = true, features = [] } - -# Encoding and hashing -hex = { workspace = true, features = [] } -sha2 = { workspace = true, features = [] } - -# Async runtime and parallelism -async-trait = { workspace = true, features = [] } -tokio = { workspace = true, features = ["sync"] } -rayon = { workspace = true, optional = true, features = [] } - -# Observability -tracing = { workspace = true, features = [] } - -# Tabular document parsing (feature-gated) -csv = { workspace = true, optional = true, features = [] } - -# Rich-document parsing (feature-gated: HTML + PDF) -scraper = { workspace = true, optional = true, features = [] } -ego-tree = { workspace = true, optional = true, features = [] } -lopdf = { workspace = true, optional = true, features = [] } -pdfium-render = { workspace = true, optional = true, features = [] } - -# Image processing -image = { workspace = true, features = [] } -imageproc = { workspace = true, features = [] } - -# Audio processing (feature-gated) -hound = { workspace = true, optional = true, features = [] } -symphonia = { workspace = true, optional = true, features = [] } -mp3lame-encoder = { workspace = true, optional = true, features = [] } - -# Storage and file-type detection -infer = { workspace = true, features = [] } - -[dev-dependencies] -tokio = { workspace = true, features = ["macros", "rt"] } diff --git a/crates/nvisy-codec/README.md b/crates/nvisy-codec/README.md deleted file mode 100644 index 693709c1..00000000 --- a/crates/nvisy-codec/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# nvisy-codec - -[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml) - -Format handlers, the `Handler` trait, and the `CodecRegistry` -that drives ingest in the Nvisy runtime. - -## Overview - -Built-in handlers cover TXT, JSON, Markdown, HTML, CSV, XLSX, PNG, -JPEG, TIFF, WAV, MP3, PDF, and DOCX. Each implements `Handler` -(streaming `next_chunk`, random-access `read` / `redact`, -`lift_chunk` for offset translation) and pairs with a `Loader` -that decodes raw bytes into the handler. A `Format` descriptor -built via `Format::new::(id, loader)` plus chained -`.with_extensions(...)` / `.with_content_types(...)` registers the -pair into `CodecRegistry`. - -Consumers resolve a `Format` by extension, content-type, or id and -get back an `UntypedDocumentHandle` they commit to a modality via -`into_text` / `into_tabular` / `into_image` / `into_audio`. The -typed `DocumentHandle` implements `nvisy-core`'s `TextAt` / -`DataAt` / `RedactAt` directly, so pipeline components read from -and write to codec-backed sources through the same traits the -engine bounds on. The `content` module (`Content`, `ContentData`, -`ContentDescriptor`, `ContentDigest`, `ContentRecord`, -`ContentSource`, `TextEncoding`) carries the raw-bytes side of the -import surface. Each format is feature-gated (`txt`, `csv`, -`png`, …) with umbrella features `text`, `tabular`, `image`, -`audio`, `rich`. Depends only on `nvisy-core`. - -## Documentation - -See [`docs/`](../../docs/) for architecture, security, and API documentation. - -## Changelog - -See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. - -## License - -Apache 2.0 License, see [LICENSE.txt](../../LICENSE.txt) - -## Support - -- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) -- **Issues**: [GitHub Issues](https://github.com/nvisycom/runtime/issues) -- **Email**: [support@nvisy.com](mailto:support@nvisy.com) diff --git a/crates/nvisy-codec/src/content/bundle.rs b/crates/nvisy-codec/src/content/bundle.rs deleted file mode 100644 index cc524605..00000000 --- a/crates/nvisy-codec/src/content/bundle.rs +++ /dev/null @@ -1,114 +0,0 @@ -//! [`Content`]: data bytes optionally paired with a caller-supplied -//! [`ContentDescriptor`]. - -use std::path::Path; - -use derive_more::{AsRef, Deref}; -use nvisy_core::Result; -use serde::{Deserialize, Serialize}; - -use super::{ContentData, ContentDescriptor, ContentSource}; - -/// Upload-shape carrier: raw bytes plus the caller's descriptive -/// metadata. -/// -/// [`ContentData`] holds the bytes and source identity. -/// [`ContentDescriptor`] holds filename, MIME hint, and extras when -/// the caller has them. The descriptor is optional because some -/// import paths (raw byte uploads, generated content) have nothing -/// to attach. -/// -/// After `Registry::register_content` consumes a `Content`, the -/// stored shape is a `ContentRecord` (descriptor + byte-derived -/// digest), which is what registry reads return. -#[derive(Debug, Clone, PartialEq)] -#[derive(AsRef, Deref, Serialize, Deserialize)] -pub struct Content { - /// Raw content bytes. - #[deref] - #[as_ref] - data: ContentData, - /// Caller-supplied descriptive metadata. - descriptor: Option, -} - -impl From for Content { - fn from(data: ContentData) -> Self { - Self::new(data) - } -} - -impl Content { - /// Create content from data without a descriptor. - pub fn new(data: ContentData) -> Self { - Self { - data, - descriptor: None, - } - } - - /// Create content with a caller-supplied descriptor. - pub fn with_descriptor(data: ContentData, descriptor: ContentDescriptor) -> Self { - Self { - data, - descriptor: Some(descriptor), - } - } - - /// Returns the raw content data. - pub fn data(&self) -> &ContentData { - &self.data - } - - /// Returns the caller-supplied descriptor, if present. - pub fn descriptor(&self) -> Option<&ContentDescriptor> { - self.descriptor.as_ref() - } - - /// Returns the content source identifier. - pub fn content_source(&self) -> ContentSource { - self.data.content_source - } - - /// Returns the raw bytes. - pub fn as_bytes(&self) -> &[u8] { - self.data.as_bytes() - } - - /// Returns `true` if the content appears to be text. - pub fn is_likely_text(&self) -> bool { - self.data.is_likely_text() - } - - /// Try to get the content as a string slice. - /// - /// # Errors - /// - /// Returns an error if the content is not valid UTF-8. - pub fn as_str(&self) -> Result<&str> { - self.data.as_str() - } - - /// Caller-supplied MIME type, if any. Detected MIME isn't - /// available pre-registration (the registry computes it). - pub fn content_type(&self) -> Option<&str> { - self.descriptor - .as_ref() - .and_then(|d| d.content_type.as_deref()) - } - - /// Original filename from the descriptor. - pub fn filename(&self) -> Option<&Path> { - self.descriptor.as_ref().and_then(|d| d.filename.as_deref()) - } - - /// File extension from the descriptor's source path. - pub fn file_extension(&self) -> Option<&str> { - self.descriptor.as_ref().and_then(|d| d.file_extension()) - } - - /// Consume and return both data and descriptor. - pub fn into_parts(self) -> (ContentData, Option) { - (self.data, self.descriptor) - } -} diff --git a/crates/nvisy-codec/src/content/content_data.rs b/crates/nvisy-codec/src/content/content_data.rs deleted file mode 100644 index a31f5158..00000000 --- a/crates/nvisy-codec/src/content/content_data.rs +++ /dev/null @@ -1,268 +0,0 @@ -//! Raw content bytes with source identity. -//! -//! [`ContentData`] is the pure data half of the content model. It holds -//! the raw bytes and a [`ContentSource`] identifier. All descriptive -//! attributes (MIME type, filename, arbitrary metadata) live on -//! [`ContentMetadata`]. -//! -//! [`ContentMetadata`]: super::ContentMetadata - -use std::{fmt, str}; - -use bytes::Bytes; -use nvisy_core::{Error, ErrorKind, Result}; -use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; - -use super::ContentSource; - -/// Raw content bytes with source identity. -/// -/// This is the data-only half of the content model — it does not -/// carry MIME type, filename, or other descriptive metadata. -/// Pair with [`ContentDescriptor`] via [`Content`] for a complete -/// representation. -/// -/// [`ContentDescriptor`]: super::ContentDescriptor -/// [`Content`]: super::Content -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct ContentData { - /// Unique identifier for the content source. - pub content_source: ContentSource, - /// The actual content bytes. - data: Bytes, -} - -impl ContentData { - /// Creates content data from raw bytes. - pub fn new(content_source: ContentSource, data: Bytes) -> Self { - Self { - content_source, - data, - } - } - - /// Creates content data from a text string. - pub fn from_text(content_source: ContentSource, text: impl Into) -> Self { - Self { - content_source, - data: Bytes::from(text.into().into_bytes()), - } - } - - /// Returns the size of the content in bytes. - #[must_use] - pub fn size(&self) -> usize { - self.data.len() - } - - /// Returns the content data as a byte slice. - #[must_use] - pub fn as_bytes(&self) -> &[u8] { - &self.data - } - - /// Converts the content data to `Bytes`. - #[must_use] - pub fn to_bytes(&self) -> Bytes { - self.data.clone() - } - - /// Consumes and converts into `Bytes`. - #[must_use] - pub fn into_bytes(self) -> Bytes { - self.data - } - - /// Returns `true` if the content appears to be text. - /// - /// Checks that the content is valid UTF-8 and contains no control - /// characters other than common whitespace (tab, newline, carriage - /// return). - #[must_use] - pub fn is_likely_text(&self) -> bool { - let Ok(s) = str::from_utf8(&self.data) else { - return false; - }; - s.chars() - .all(|c| !c.is_control() || matches!(c, '\t' | '\n' | '\r')) - } - - /// Tries to convert the content data to a UTF-8 string slice. - /// - /// # Errors - /// - /// Returns an error if the content data contains invalid UTF-8 sequences. - pub fn as_str(&self) -> Result<&str> { - str::from_utf8(&self.data) - .map_err(|e| Error::new(ErrorKind::Serialization, format!("Invalid UTF-8: {e}"))) - } - - /// Computes and returns the SHA256 hash of the content. - #[must_use] - pub fn sha256(&self) -> Bytes { - let mut hasher = Sha256::new(); - hasher.update(&self.data); - Bytes::from(hasher.finalize().to_vec()) - } - - /// Returns the SHA256 hash as a hex string. - #[must_use] - pub fn sha256_hex(&self) -> String { - hex::encode(self.sha256()) - } - - /// Verifies the content against a provided SHA256 hash. - /// - /// # Errors - /// - /// Returns an error if the computed hash does not match the expected hash. - pub fn verify_sha256(&self, expected_hash: impl AsRef<[u8]>) -> Result<()> { - let actual_hash = self.sha256(); - let expected = expected_hash.as_ref(); - - if actual_hash.as_ref() == expected { - Ok(()) - } else { - Err(Error::new( - ErrorKind::Validation, - format!( - "Hash mismatch: expected {}, got {}", - hex::encode(expected), - hex::encode(actual_hash) - ), - )) - } - } - - /// Returns a slice of the content data. - /// - /// # Errors - /// - /// Returns an error if the end index is beyond the content length - /// or if start is greater than end. - pub fn slice(&self, start: usize, end: usize) -> Result { - if end > self.data.len() { - return Err(Error::new( - ErrorKind::Validation, - format!( - "Slice end {} exceeds content length {}", - end, - self.data.len() - ), - )); - } - if start > end { - return Err(Error::new( - ErrorKind::Validation, - format!("Slice start {start} is greater than end {end}"), - )); - } - Ok(Bytes::copy_from_slice(&self.data[start..end])) - } - - /// Returns `true` if the content is empty. - #[must_use] - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - /// Detect MIME type from the raw bytes using magic-byte signatures. - /// - /// Returns `None` for content with no recognizable magic bytes - /// (e.g. plain text). Pairing the result with - /// `ContentMetadata::detected_content_type` is a caller-side - /// convention — no type-level link exists between the two. - #[must_use] - pub fn detect_mime(&self) -> Option { - infer::get(&self.data).map(|t| t.mime_type().to_owned()) - } -} - -impl From<&str> for ContentData { - fn from(s: &str) -> Self { - Self::from_text(ContentSource::new(), s) - } -} - -impl From for ContentData { - fn from(s: String) -> Self { - Self::from_text(ContentSource::new(), s) - } -} - -impl From<&[u8]> for ContentData { - fn from(bytes: &[u8]) -> Self { - Self::new(ContentSource::new(), Bytes::copy_from_slice(bytes)) - } -} - -impl From> for ContentData { - fn from(vec: Vec) -> Self { - Self::new(ContentSource::new(), Bytes::from(vec)) - } -} - -impl From for ContentData { - fn from(bytes: Bytes) -> Self { - Self::new(ContentSource::new(), bytes) - } -} - -impl fmt::Display for ContentData { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Ok(text) = self.as_str() { - write!(f, "{text}") - } else { - write!(f, "[Binary data: {} bytes]", self.size()) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sha256_computation() { - let content = ContentData::from("Hello, world!"); - let hash = content.sha256(); - assert_eq!(hash.len(), 32); - assert_eq!(hash, content.sha256()); - } - - #[test] - fn sha256_verification() { - let content = ContentData::from("Hello, world!"); - let hash = content.sha256().clone(); - assert!(content.verify_sha256(&hash).is_ok()); - assert!(content.verify_sha256([0u8; 32]).is_err()); - } - - #[test] - fn slice_operations() { - let content = ContentData::from("Hello, world!"); - assert_eq!(content.slice(0, 5).unwrap(), Bytes::from("Hello")); - assert_eq!(content.slice(7, 12).unwrap(), Bytes::from("world")); - assert!(content.slice(0, 100).is_err()); - assert!(content.slice(10, 5).is_err()); - } - - #[test] - fn detect_mime_png() { - let png = vec![ - 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, - 0x44, 0x52, - ]; - let data = ContentData::from(png); - assert_eq!(data.detect_mime().as_deref(), Some("image/png")); - } - - #[test] - fn is_likely_text() { - assert!(ContentData::from("ascii text").is_likely_text()); - assert!(ContentData::from("").is_likely_text()); - assert!(ContentData::from("café").is_likely_text()); - assert!(!ContentData::from(vec![0x00]).is_likely_text()); - } -} diff --git a/crates/nvisy-codec/src/content/content_metadata.rs b/crates/nvisy-codec/src/content/content_metadata.rs deleted file mode 100644 index ca586515..00000000 --- a/crates/nvisy-codec/src/content/content_metadata.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! Two-layer content metadata: -//! -//! - [`ContentDescriptor`] holds caller-supplied descriptive bits -//! (filename, MIME hint, source path, arbitrary extras). All -//! optional — the caller might or might not have any of them. -//! - [`ContentDigest`] holds facts the registry computes by looking -//! at the bytes (size, sha256, sniffed MIME). Required fields are -//! actually required. -//! - [`ContentRecord`] bundles a descriptor with a digest. This is -//! what the registry persists and what read sites get back. - -use std::path::{Path, PathBuf}; - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Caller-supplied descriptive metadata for an upload. -/// -/// Built before the bytes have been written to the registry, so -/// every field is optional — the caller knows whatever they know. -/// The registry's `register_content` consumes this alongside the -/// bytes to produce a [`ContentRecord`]. -#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ContentDescriptor { - /// Optional path to the source file. - #[serde(default, skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub source_path: Option, - /// MIME type supplied by the caller (e.g. `"text/plain"` from - /// an HTTP `Content-Type` header or an explicit API call). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub content_type: Option, - /// Original filename, if known. Used by `CodecRegistry` for - /// extension-based format resolution. - #[serde(default, skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub filename: Option, - /// Caller-supplied key-value pairs that policy conditions - /// (`Condition::Metadata { key, value }`) match against. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub policy_metadata: Option>, -} - -impl ContentDescriptor { - /// Create an empty descriptor. - #[must_use] - pub fn new() -> Self { - Self::default() - } - - /// Create a descriptor with a source file path. - pub fn with_path(path: impl Into) -> Self { - Self { - source_path: Some(path.into()), - ..Self::default() - } - } - - /// Set the caller-supplied MIME type (builder pattern). - #[must_use] - pub fn with_content_type(mut self, mime: impl Into) -> Self { - self.content_type = Some(mime.into()); - self - } - - /// Set the original filename (builder pattern). - #[must_use] - pub fn with_filename(mut self, name: impl Into) -> Self { - self.filename = Some(name.into()); - self - } - - /// Get the file extension from the source path, if available. - #[must_use] - pub fn file_extension(&self) -> Option<&str> { - self.source_path - .as_ref() - .and_then(|path| path.extension()) - .and_then(|ext| ext.to_str()) - } - - /// Get the full path if available. - #[must_use] - pub fn path(&self) -> Option<&Path> { - self.source_path.as_deref() - } - - /// Get a single value from the policy metadata map. - #[must_use] - pub fn get_policy_metadata(&self, key: &str) -> Option<&serde_json::Value> { - self.policy_metadata.as_ref().and_then(|m| m.get(key)) - } - - /// Insert a key-value pair into the policy metadata map, - /// creating the map if it doesn't exist yet. - pub fn set_policy_metadata(&mut self, key: impl Into, value: serde_json::Value) { - self.policy_metadata - .get_or_insert_with(serde_json::Map::new) - .insert(key.into(), value); - } - - /// Remove a key from the policy metadata map. Returns the - /// removed value if the key existed. - pub fn remove_policy_metadata(&mut self, key: &str) -> Option { - self.policy_metadata.as_mut().and_then(|m| m.remove(key)) - } -} - -/// Byte-derived facts about a piece of content. -/// -/// Computed by `Registry::register_content` after the bytes are in -/// hand. Required fields (`size`, `sha256`) are unconditional; -/// `detected_content_type` is `Option` because magic-byte sniffing -/// may legitimately fail (e.g. plain text). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ContentDigest { - /// Size in bytes. - pub size: u64, - /// SHA-256 hex digest of the raw bytes. - pub sha256: String, - /// MIME type sniffed from the bytes, if magic-byte detection - /// produced a result. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub detected_content_type: Option, -} - -/// Persisted, post-registration view of a piece of content. -/// -/// Returned by registry read endpoints (`list_content_with_record`, -/// `read_content`). The [`ContentDescriptor`] half is whatever the -/// caller supplied at upload; the [`ContentDigest`] half is what -/// the registry computed. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ContentRecord { - /// Caller-supplied descriptor (filename, MIME hint, policy metadata). - pub descriptor: ContentDescriptor, - /// Registry-computed digest (size, sha256, detected MIME). - pub digest: ContentDigest, -} - -impl ContentRecord { - /// Best-available MIME type: caller-supplied takes priority - /// over sniffed. - #[must_use] - pub fn content_type(&self) -> Option<&str> { - self.descriptor - .content_type - .as_deref() - .or(self.digest.detected_content_type.as_deref()) - } - - /// Original filename from the descriptor. - #[must_use] - pub fn filename(&self) -> Option<&Path> { - self.descriptor.filename.as_deref() - } - - /// Original filename rendered as a UTF-8 string. Non-UTF-8 byte - /// sequences in the path are replaced with U+FFFD (lossy - /// conversion). Use [`filename`] when you need the raw `&Path`. - /// - /// [`filename`]: Self::filename - #[must_use] - pub fn filename_lossy(&self) -> Option { - self.filename().map(|p| p.to_string_lossy().into_owned()) - } - - /// File extension from the descriptor's source path. - #[must_use] - pub fn file_extension(&self) -> Option<&str> { - self.descriptor.file_extension() - } -} diff --git a/crates/nvisy-codec/src/content/encoding.rs b/crates/nvisy-codec/src/content/encoding.rs deleted file mode 100644 index 27f8d8d0..00000000 --- a/crates/nvisy-codec/src/content/encoding.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Character encoding for text-based loaders. - -use nvisy_core::{Error, Result}; -/// Character encoding used to decode raw bytes before parsing. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum TextEncoding { - /// UTF-8 (the default and by far the most common encoding). - #[default] - Utf8, -} - -impl TextEncoding { - /// Decode raw bytes to a UTF-8 string. - /// - /// `origin` identifies the caller for error messages - /// (e.g. `"json-loader"`). - pub fn decode_bytes(self, bytes: &[u8], origin: &str) -> Result { - match self { - Self::Utf8 => String::from_utf8(bytes.to_vec()) - .map_err(|e| Error::validation(format!("Invalid UTF-8: {e}"), origin.to_owned())), - } - } -} diff --git a/crates/nvisy-codec/src/content/mod.rs b/crates/nvisy-codec/src/content/mod.rs deleted file mode 100644 index a0e4e4f0..00000000 --- a/crates/nvisy-codec/src/content/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -//! Content data containers, metadata, and source identity. -//! -//! - [`ContentData`]: raw content bytes with source identity. -//! - [`ContentDescriptor`]: caller-supplied descriptive bits -//! (filename, MIME hint, extras) — built before bytes are -//! persisted. -//! - [`ContentDigest`]: byte-derived facts (size, sha256, sniffed -//! MIME) — computed at registration time. -//! - [`ContentRecord`]: persisted view (descriptor + digest), what -//! registry reads return. -//! - [`Content`]: [`ContentData`] paired with an optional -//! [`ContentDescriptor`] — the upload-shape carrier. -//! - [`ContentSource`]: UUIDv7-based content identity and lineage. -//! -//! Top-level format classification lives on [`FormatId`]. -//! -//! [`FormatId`]: crate::FormatId - -mod bundle; -mod content_data; -mod content_metadata; -mod encoding; - -pub use nvisy_core::entity::ContentSource; - -pub use self::bundle::Content; -pub use self::content_data::ContentData; -pub use self::content_metadata::{ContentDescriptor, ContentDigest, ContentRecord}; -pub use self::encoding::TextEncoding; diff --git a/crates/nvisy-codec/src/core/format.rs b/crates/nvisy-codec/src/core/format.rs deleted file mode 100644 index fe4015e9..00000000 --- a/crates/nvisy-codec/src/core/format.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! Format identity: what kind of thing a registered codec is. -//! -//! - [`FormatId`] — stable string identifier (e.g. -//! `"nvisy.text.txt"`). Open namespace, no central enum. -//! - [`Format`] — descriptor [`CodecRegistry`] indexes by id / -//! extension / content type. Bundles a `FormatId`, its -//! [`ModalityKind`], lookup keys, and an erased loader that -//! decodes bytes into a typed handle. -//! -//! [`CodecRegistry`]: super::CodecRegistry -//! [`ModalityKind`]: nvisy_core::modality::ModalityKind - -use std::borrow::Cow; -use std::fmt; -use std::sync::Arc; - -use nvisy_core::modality::{Modality, ModalityKind}; - -use super::{ErasedLoader, Loader, erase}; -use crate::document::{DocumentHandle, UntypedDocumentHandle}; - -/// Stable identifier for a registered codec format. Open string -/// namespace — downstream crates ship their own formats by -/// registering a [`Format`] with a unique [`FormatId`]. -/// -/// Convention: dot-separated namespace. Built-in formats use the -/// `nvisy.` prefix (e.g. `"nvisy.text.txt"`, `"nvisy.rich.pdf"`). -/// Third-party formats use their own (e.g. `"acme.parquet.v2"`). -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct FormatId(Cow<'static, str>); - -impl FormatId { - /// Construct from a static string literal — no allocation. - pub const fn from_static(id: &'static str) -> Self { - Self(Cow::Borrowed(id)) - } - - /// Construct from an owned [`String`]. - pub fn from_owned(id: String) -> Self { - Self(Cow::Owned(id)) - } - - /// Borrow as `&str`. - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for FormatId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.0) - } -} - -impl AsRef for FormatId { - fn as_ref(&self) -> &str { - &self.0 - } -} - -/// Descriptor for one registered codec format. Indexed by -/// [`CodecRegistry`] under its [`FormatId`], every extension in -/// `extensions`, and every MIME in `content_types`. -/// -/// Construct via [`Format::new`]; read the parts via the accessor -/// methods. The fields are crate-private so the constructor stays -/// the only path that produces a `Format` — that way the -/// [`ModalityKind`] tag is always derived from the loader's -/// modality and never hand-set. -/// -/// [`CodecRegistry`]: super::CodecRegistry -#[derive(Clone)] -pub struct Format { - pub(crate) id: FormatId, - pub(crate) modality: ModalityKind, - pub(crate) extensions: Vec>, - pub(crate) content_types: Vec>, - pub(crate) loader: Arc, -} - -impl Format { - /// Build a [`Format`] for modality `M`. The runtime - /// [`ModalityKind`] tag is taken from `M::KIND` and the loader - /// is erased internally — neither needs to be named at the call - /// site. - /// - /// Extensions and content types default to empty; chain - /// [`with_extensions`] / [`with_content_types`] to declare the - /// lookup keys the [`CodecRegistry`] indexes this format under. - /// - /// [`with_extensions`]: Self::with_extensions - /// [`with_content_types`]: Self::with_content_types - /// [`CodecRegistry`]: super::CodecRegistry - pub fn new(id: FormatId, loader: L) -> Self - where - M: Modality, - L: Loader, - DocumentHandle: Into, - { - Self { - id, - modality: M::KIND, - extensions: Vec::new(), - content_types: Vec::new(), - loader: erase(loader), - } - } - - /// Declare the file extensions (lowercased, no leading dot) that - /// resolve to this format. Extends any previously-declared list. - #[must_use] - pub fn with_extensions(mut self, extensions: I) -> Self - where - I: IntoIterator, - S: Into>, - { - self.extensions - .extend(extensions.into_iter().map(Into::into)); - self - } - - /// Declare the MIME content types (lowercased) that resolve to - /// this format. Extends any previously-declared list. - #[must_use] - pub fn with_content_types(mut self, content_types: I) -> Self - where - I: IntoIterator, - S: Into>, - { - self.content_types - .extend(content_types.into_iter().map(Into::into)); - self - } - - /// Stable identifier of this format. - pub fn id(&self) -> &FormatId { - &self.id - } - - /// Modality this format produces. - pub fn modality(&self) -> ModalityKind { - self.modality - } - - /// File extensions (lowercased, no leading dot) that resolve to - /// this format. - pub fn extensions(&self) -> &[Cow<'static, str>] { - &self.extensions - } - - /// MIME content types (lowercased) that resolve to this format. - pub fn content_types(&self) -> &[Cow<'static, str>] { - &self.content_types - } - - /// Decode raw content through this format's loader, returning - /// the runtime-tagged handle. Equivalent to calling - /// [`CodecRegistry::decode`] after resolving the - /// format yourself. - /// - /// [`CodecRegistry::decode`]: super::CodecRegistry::decode - pub async fn decode( - &self, - content: crate::content::ContentData, - ) -> Result { - self.loader.decode(content).await - } -} - -impl fmt::Debug for Format { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Format") - .field("id", &self.id) - .field("modality", &self.modality) - .field("extensions", &self.extensions) - .field("content_types", &self.content_types) - .finish_non_exhaustive() - } -} diff --git a/crates/nvisy-codec/src/core/handler.rs b/crates/nvisy-codec/src/core/handler.rs deleted file mode 100644 index 9ecd97b6..00000000 --- a/crates/nvisy-codec/src/core/handler.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! What a codec handler exposes — the trait surface every shipped -//! format handler implements: -//! -//! - [`Handler`] — per-modality capability trait. Identifies and -//! serialises the handler ([`format`], [`source`], [`encode`]), -//! streams chunks ([`next_chunk`]), supports random-access reads -//! and redactions ([`read`], [`redact`]), and lifts recognizer -//! offsets back to source coordinates ([`lift_chunk`]). -//! - [`Chunk`] — one decoded unit yielded by `next_chunk`. -//! -//! [`format`]: Handler::format -//! [`source`]: Handler::source -//! [`encode`]: Handler::encode -//! [`next_chunk`]: Handler::next_chunk -//! [`read`]: Handler::read -//! [`redact`]: Handler::redact -//! [`lift_chunk`]: Handler::lift_chunk - -use std::ops::Range; - -use nvisy_core::Error; -use nvisy_core::modality::Modality; -use nvisy_core::redaction::Redactions; - -use super::FormatId; -use crate::content::{ContentData, ContentSource}; - -/// One decoded unit yielded by [`Handler::next_chunk`]. -/// -/// `data` is the per-modality wire payload; `location` is the -/// coordinate the handler will accept in [`Handler::read`] / -/// [`Handler::redact`] to address the same chunk again. `hints` -/// carries out-of-band context strings the chunk's structural -/// neighbours surface — CSV/XLSX column headers, JSON object -/// keys, HTML parent-element text — for downstream context-aware -/// recognizers; handlers without such metadata leave it empty. -#[derive(Debug, Clone, PartialEq)] -pub struct Chunk { - /// Coordinate addressing this chunk inside the handler. - pub location: M::Location, - /// Wire payload at the chunk's location. - pub data: M::Data, - /// Out-of-band context strings recognizers should treat as - /// in-context (column headers, parent element text, …). - /// Empty when the handler has no such metadata to surface. - pub hints: Vec, -} - -/// Per-modality capability trait every format handler implements. -/// -/// Identifies and serialises the handler ([`format`], [`source`], -/// [`encode`]), streams chunks ([`next_chunk`]), supports -/// random-access reads and redactions ([`read`], [`redact`]), and -/// lifts recognizer offsets back to source coordinates -/// ([`lift_chunk`]). -/// -/// The handler owns the streaming cursor — concurrent iteration -/// of the same handle is not supported (only one `&mut self`). -/// -/// [`format`]: Handler::format -/// [`source`]: Handler::source -/// [`encode`]: Handler::encode -/// [`next_chunk`]: Handler::next_chunk -/// [`read`]: Handler::read -/// [`redact`]: Handler::redact -/// [`lift_chunk`]: Handler::lift_chunk -#[async_trait::async_trait] -pub trait Handler: Send + Sync + 'static { - /// Stable id of the format this handler represents (e.g. - /// `"nvisy.text.txt"`). Cheap to clone. - fn format(&self) -> FormatId; - - /// Content source identity and lineage for this handler. - fn source(&self) -> ContentSource; - - /// Serialize the current handler content back to [`ContentData`]. - fn encode(&self) -> Result; - - /// Advance the cursor and yield the next chunk, or `None` at - /// end-of-stream. - async fn next_chunk(&mut self) -> Result>, Error>; - - /// Read the wire payload at the given location. Used by - /// [`TextAt`] resolvers to fetch bytes for a coordinate already - /// known from somewhere else (an entity audit record, an - /// annotation). Extraction itself does not call this — it - /// drives [`next_chunk`] which returns `(location, data)` - /// together. - /// - /// [`next_chunk`]: Handler::next_chunk - /// [`TextAt`]: nvisy_core::extraction::TextAt - async fn read(&self, location: &M::Location) -> Result, Error>; - - /// Apply a batch of `(location, replacement)` pairs in whatever - /// order is correct for this format. Engine guarantees no two - /// locations overlap; handler decides ordering (right-to-left - /// for text/audio so deletions don't shift later indices, batch - /// per page for PDF, …). The first error aborts the batch. - /// - /// Use [`Redactions::single`] when only one replacement is needed. - async fn redact(&mut self, redactions: Redactions) -> Result<(), Error>; - - /// Translate a `value_range` expressed inside `chunk.data`'s - /// coordinate system into a source-coordinate `M::Location`. - /// - /// Recognizers see the unescaped, decoded chunk payload and - /// emit offsets into that. Downstream stages — dedup, redact, - /// audit — need locations that address the handler's source - /// bytes. `lift_chunk` is the bridge. - /// - /// For text-shaped handlers where `chunk.data` is byte-for-byte - /// a slice of source (TXT lines, HTML text nodes, PDF page - /// text, CSV cells, DOCX text runs), the mapping is the - /// identity offset add against `chunk.location.start`. Handlers - /// whose chunks decode escapes or otherwise transform the - /// payload (JSON `\"` / `\\`, future HTML entity refs) override - /// to walk their per-chunk escape map. - /// - /// Returns `None` when the range has no source pre-image — out - /// of bounds, lands inside an escape pair, or the modality - /// doesn't have a meaningful `usize` value-range concept (image - /// bounding boxes, audio time spans, tabular cell coords). - /// Non-text impls leave the default `None`. - fn lift_chunk(&self, _chunk: &Chunk, _value_range: Range) -> Option { - None - } -} diff --git a/crates/nvisy-codec/src/core/loader.rs b/crates/nvisy-codec/src/core/loader.rs deleted file mode 100644 index f0dc5120..00000000 --- a/crates/nvisy-codec/src/core/loader.rs +++ /dev/null @@ -1,129 +0,0 @@ -//! Decoding raw bytes into a typed handle. -//! -//! - [`Loader`] — per-modality decoder format implementations -//! write. Returns a concrete handler that implements -//! [`Handler`]. -//! - [`ErasedLoader`] — object-safe loader surface the -//! [`CodecRegistry`] stores behind `Arc`. Adapts a per-modality -//! `Loader` into a uniform `decode` signature that returns -//! [`UntypedDocumentHandle`]. -//! - [`erase`] — bridge from typed `Loader` to -//! `Arc` every shipped format uses when -//! populating [`Format::loader`]. -//! -//! The handler's own [`Handler::format`] supplies the -//! [`FormatId`] inside [`ErasedLoader::decode`]; [`erase`] only -//! erases `M`. -//! -//! [`Handler`]: super::Handler -//! [`Handler::format`]: super::Handler::format -//! [`CodecRegistry`]: super::CodecRegistry -//! [`UntypedDocumentHandle`]: crate::document::UntypedDocumentHandle -//! [`Format::loader`]: super::Format::loader -//! [`FormatId`]: super::FormatId - -use std::marker::PhantomData; -use std::sync::Arc; - -use nvisy_core::Error; -use nvisy_core::modality::Modality; - -use super::Handler; -use crate::content::ContentData; -use crate::document::{DocumentHandle, UntypedDocumentHandle}; - -/// Per-modality format loader. -/// -/// A loader validates and parses raw content for modality `M`, -/// producing a handler that implements [`Handler`]. Loaders are -/// the leaves the [`CodecRegistry`] composes — registering a -/// format means registering its loader. -/// -/// # Implementing a third-party format -/// -/// 1. Implement [`Handler`] for the per-format handler type that -/// owns the parsed in-memory representation. -/// 2. Implement `Loader` for a stateless type whose [`decode`] -/// validates raw [`ContentData`] and returns the handler. -/// 3. Build a [`Format`] with [`Format::new`], chain -/// [`with_extensions`] / [`with_content_types`] as needed, and -/// register it on a [`CodecRegistry`] via -/// [`CodecRegistry::add_format`]. -/// -/// The registry erases `M` internally; third-party callers never -/// touch the object-safe loader surface. -/// -/// [`Handler`]: super::Handler -/// [`CodecRegistry`]: super::CodecRegistry -/// [`CodecRegistry::add_format`]: super::CodecRegistry::add_format -/// [`Format`]: super::Format -/// [`Format::new`]: super::Format::new -/// [`with_extensions`]: super::Format::with_extensions -/// [`with_content_types`]: super::Format::with_content_types -/// [`decode`]: Loader::decode -#[async_trait::async_trait] -pub trait Loader: Send + Sync + 'static { - /// The handler type this loader produces. - type Handler: Handler; - - /// Validate and parse the content, returning the loaded handler. - async fn decode(&self, content: ContentData) -> Result; -} - -/// Object-safe loader the [`CodecRegistry`] holds behind `Arc`. -/// Adapts a per-modality [`Loader`] into a uniform `decode` -/// signature returning an [`UntypedDocumentHandle`]. -/// -/// Crate-internal: every consumer goes through [`Format::decode`] -/// or [`CodecRegistry::decode`] instead of touching -/// this trait directly. -/// -/// [`CodecRegistry`]: super::CodecRegistry -/// [`Format::decode`]: super::Format::decode -/// [`CodecRegistry::decode`]: super::CodecRegistry::decode -#[async_trait::async_trait] -pub(crate) trait ErasedLoader: Send + Sync + 'static { - /// Decode raw content into an [`UntypedDocumentHandle`]. - async fn decode(&self, content: ContentData) -> Result; -} - -/// Erase a typed [`Loader`] into an `Arc` the -/// [`CodecRegistry`] can store. Called only by [`Format::new`] — -/// not part of the public API. -/// -/// [`CodecRegistry`]: super::CodecRegistry -/// [`Format::new`]: super::Format::new -pub(crate) fn erase(loader: L) -> Arc -where - M: Modality, - L: Loader, - DocumentHandle: Into, -{ - Arc::new(LoaderAdapter { - loader, - _phantom: PhantomData, - }) -} - -/// Private wrapper that holds a typed [`Loader`] and implements -/// the object-safe [`ErasedLoader`] surface. Constructed only via -/// [`erase`]; not part of the public API. -struct LoaderAdapter> { - loader: L, - _phantom: PhantomData M>, -} - -#[async_trait::async_trait] -impl ErasedLoader for LoaderAdapter -where - M: Modality, - L: Loader, - DocumentHandle: Into, -{ - async fn decode(&self, content: ContentData) -> Result { - let handler = self.loader.decode(content).await?; - let format = handler.format(); - let handle: Box> = Box::new(handler); - Ok(DocumentHandle::new(format, handle).into()) - } -} diff --git a/crates/nvisy-codec/src/core/mod.rs b/crates/nvisy-codec/src/core/mod.rs deleted file mode 100644 index f013507d..00000000 --- a/crates/nvisy-codec/src/core/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Codec core contracts, grouped by concern: -//! -//! - `format` — *what kind of thing a codec is*. [`FormatId`], -//! [`Format`] descriptor. -//! - `handler` — *what a handler exposes*. [`Handler`] -//! (per-modality capability surface — identify, encode, stream, -//! read, redact, lift), [`Chunk`] payload. -//! - `loader` — *how raw bytes become a handle*. [`Loader`] -//! (per-modality decoder). The registry-side erasure machinery -//! (`ErasedLoader` trait, `erase` helper) is crate-internal and -//! wired through [`Format::new`] / [`Format::decode`]. -//! - `registry` — *the lookup engine*. [`CodecRegistry`] indexes -//! [`Format`]s by id, extension, and content type, and decodes -//! bytes through the matching loader. -//! -//! Concrete format implementations live in `crate::handler::*`. - -mod format; -mod handler; -mod loader; -mod registry; - -pub use self::format::{Format, FormatId}; -pub use self::handler::{Chunk, Handler}; -pub use self::loader::Loader; -pub(crate) use self::loader::{ErasedLoader, erase}; -pub use self::registry::CodecRegistry; diff --git a/crates/nvisy-codec/src/core/registry.rs b/crates/nvisy-codec/src/core/registry.rs deleted file mode 100644 index 9f65d12a..00000000 --- a/crates/nvisy-codec/src/core/registry.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! [`CodecRegistry`]: resolves an extension or content type to a -//! registered [`Format`] and decodes content through its loader. -//! -//! Downstream crates register their own formats by calling -//! [`CodecRegistry::add_format`] — there is no central enum to -//! extend. - -use std::collections::HashMap; - -use nvisy_core::Error; - -use super::{Format, FormatId}; -use crate::content::ContentData; -use crate::document::UntypedDocumentHandle; - -/// Codec registry — owns the set of registered [`Format`]s and -/// resolves them by extension, content type, or id. -#[derive(Debug, Default)] -pub struct CodecRegistry { - formats: Vec, - by_id: HashMap, - by_extension: HashMap, - by_content_type: HashMap, -} - -impl CodecRegistry { - /// Empty registry. Use [`with_format`] / [`add_format`] to add - /// custom formats, or [`with_builtin`] to start from a pre- - /// populated set of every built-in format the active feature - /// set enables. - /// - /// [`with_format`]: Self::with_format - /// [`add_format`]: Self::add_format - /// [`with_builtin`]: Self::with_builtin - pub fn new() -> Self { - Self::default() - } - - /// Pre-populated registry containing every built-in format the - /// active feature set enables (TXT, JSON, HTML, CSV, PNG, JPEG, - /// WAV, PDF, …). Equivalent to [`new`] followed by registering - /// each built-in format. - /// - /// Add custom formats afterward with [`with_format`] (chainable) - /// or [`add_format`] (in-place); they take precedence on - /// extension / content-type collisions (last registration wins). - /// - /// [`new`]: Self::new - /// [`with_format`]: Self::with_format - /// [`add_format`]: Self::add_format - pub fn with_builtin() -> Self { - let mut registry = Self::new(); - #[cfg(feature = "txt")] - registry.add_format(crate::handler::text::txt_format()); - #[cfg(feature = "json")] - registry.add_format(crate::handler::text::json_format()); - #[cfg(feature = "markdown")] - registry.add_format(crate::handler::text::markdown_format()); - #[cfg(feature = "html")] - registry.add_format(crate::handler::text::html_format()); - #[cfg(feature = "csv")] - registry.add_format(crate::handler::tabular::csv_format()); - #[cfg(feature = "xlsx")] - registry.add_format(crate::handler::tabular::xlsx_format()); - #[cfg(feature = "png")] - registry.add_format(crate::handler::image::png_format()); - #[cfg(feature = "jpeg")] - registry.add_format(crate::handler::image::jpeg_format()); - #[cfg(feature = "tiff")] - registry.add_format(crate::handler::image::tiff_format()); - #[cfg(feature = "wav")] - registry.add_format(crate::handler::audio::wav_format()); - #[cfg(feature = "mp3")] - registry.add_format(crate::handler::audio::mp3_format()); - #[cfg(feature = "pdf")] - registry.add_format(crate::handler::rich::pdf_format()); - #[cfg(feature = "docx")] - registry.add_format(crate::handler::rich::docx_format()); - registry - } - - /// Register a [`Format`] and return `self` for chained builder - /// calls. Delegates to [`add_format`] for the indexing body. - /// - /// # Panics - /// - /// Panics if the format's id is already registered. Extensions - /// and content types that conflict with an existing format are - /// overwritten (last registration wins) — register custom - /// formats *after* [`with_builtin`] if you want them to take - /// precedence. - /// - /// [`with_builtin`]: Self::with_builtin - /// [`add_format`]: Self::add_format - #[must_use] - pub fn with_format(mut self, format: Format) -> Self { - self.add_format(format); - self - } - - /// In-place equivalent of [`with_format`]. Useful with an - /// already-mut binding (e.g. inside a cfg-stanza in - /// [`with_builtin`]) where the `let registry = registry.with_format(...)` - /// dance is just noise. - /// - /// # Panics - /// - /// Same conditions as [`with_format`]. - /// - /// [`with_format`]: Self::with_format - /// [`with_builtin`]: Self::with_builtin - pub fn add_format(&mut self, format: Format) -> &mut Self { - assert!( - !self.by_id.contains_key(&format.id), - "format id already registered: {}", - format.id - ); - let index = self.formats.len(); - for ext in &format.extensions { - self.by_extension.insert(ext.to_ascii_lowercase(), index); - } - for ct in &format.content_types { - self.by_content_type.insert(ct.to_ascii_lowercase(), index); - } - self.by_id.insert(format.id.clone(), index); - self.formats.push(format); - self - } - - /// Look up a registered format by id. - pub fn by_id(&self, id: &FormatId) -> Option<&Format> { - self.by_id.get(id).map(|&i| &self.formats[i]) - } - - /// Look up a registered format by file extension - /// (case-insensitive, no leading dot). - pub fn by_extension(&self, ext: &str) -> Option<&Format> { - self.by_extension - .get(&ext.to_ascii_lowercase()) - .map(|&i| &self.formats[i]) - } - - /// Look up a registered format by MIME content type - /// (case-insensitive). - pub fn by_content_type(&self, mime: &str) -> Option<&Format> { - self.by_content_type - .get(&mime.to_ascii_lowercase()) - .map(|&i| &self.formats[i]) - } - - /// Iterate over every registered format in registration order. - pub fn iter(&self) -> impl Iterator { - self.formats.iter() - } - - /// Decode raw content using the format resolved from the - /// extension hint. Accepts anything convertible into - /// [`ContentData`] — `&str`, `&[u8]`, `Vec`, `Bytes`, - /// `String`. - pub async fn decode( - &self, - content: impl Into, - extension: &str, - ) -> Result { - let format = self.by_extension(extension).ok_or_else(|| { - Error::validation( - format!("no codec registered for extension `{extension}`"), - "nvisy_codec::handler::registry::decode", - ) - })?; - format.loader.decode(content.into()).await - } -} diff --git a/crates/nvisy-codec/src/document/audio.rs b/crates/nvisy-codec/src/document/audio.rs deleted file mode 100644 index 14b86c59..00000000 --- a/crates/nvisy-codec/src/document/audio.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! [`DocumentHandle