diff --git a/Cargo.lock b/Cargo.lock index b3f88a420..8f85ebe17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,41 +24,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "aead" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" -dependencies = [ - "crypto-common 0.1.7", - "generic-array", -] - -[[package]] -name = "aes" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" -dependencies = [ - "cfg-if", - "cipher", - "cpufeatures 0.2.17", -] - -[[package]] -name = "aes-gcm" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" -dependencies = [ - "aead", - "aes", - "cipher", - "ctr", - "ghash", - "subtle", -] - [[package]] name = "aho-corasick" version = "1.1.4" @@ -128,22 +93,13 @@ checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" [[package]] name = "alloc-stdlib" -version = "0.2.2" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +checksum = "0e76a019e91224d279006ff972f1e984179a6e9feb050adba6ce8274aef23195" dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anstream" version = "1.0.0" @@ -180,7 +136,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -191,14 +147,14 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +checksum = "2a4385e2e34eb35d6b3efe798b9eb88096925d87726c0798709bf56d9ed84af3" [[package]] name = "approx" @@ -228,15 +184,9 @@ dependencies = [ [[package]] name = "arrayvec" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" - -[[package]] -name = "as-any" -version = "0.3.2" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" +checksum = "f02882884d3e1bc524fb12c79f107f6ad0e1cfd498c536ffb494301740995dfe" [[package]] name = "as-slice" @@ -259,28 +209,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-stream" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "async-trait" version = "0.1.89" @@ -472,9 +400,15 @@ checksum = "1e4b40c7323adcfc0a41c4b88143ed58346ff65a288fc144329c5c45e05d70c6" [[package]] name = "bitflags" -version = "2.12.1" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" [[package]] name = "bitstream-io" @@ -496,27 +430,18 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" dependencies = [ "hybrid-array", ] -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "brotli" -version = "8.0.3" +version = "8.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8119e4516436f5708bbc474a9d395bf12f1b5395e93a92a56e647ac3388c8610" +checksum = "5cc91aac060a7a1e25823bdccbfb6af1875b88f17c6daac97894eed8207166b3" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -525,9 +450,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "5.0.1" +version = "5.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5962523e1b92ce1b5e793d9169b9943eece10d39f62550bc04bb605d75b94924" +checksum = "3a32acac15fe1967bc3986b2a6347dffc965602354ea6f450ad07e8bfd253583" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -575,9 +500,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +checksum = "8ae3f5d315924270530207e2a68396c3cc547f6dca3fbdca317cfb1a51edb593" dependencies = [ "serde", ] @@ -588,20 +513,11 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c53ba0f290bfc610084c05582d9c5d421662128fc69f4bf236707af6fd321b9" -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cc" -version = "1.2.63" +version = "1.2.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +checksum = "e228eec9be7c17ccb640b59b36a5cd805ea2a564a4c5e162c2f659fea30d3b96" dependencies = [ "find-msvc-tools", "jobserver", @@ -619,17 +535,6 @@ dependencies = [ "serde", ] -[[package]] -name = "cfb" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" -dependencies = [ - "byteorder", - "fnv", - "uuid", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -644,38 +549,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chacha20" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +checksum = "d524456ba66e72eb8b115ff89e01e497f8e6d11d78b70b1aa13c0fbd97540a81" dependencies = [ "cfg-if", "cpufeatures 0.3.0", "rand_core 0.10.1", ] -[[package]] -name = "chrono" -version = "0.4.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "cipher" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" -dependencies = [ - "crypto-common 0.1.7", - "inout", -] - [[package]] name = "clap" version = "4.6.1" @@ -695,7 +577,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim 0.11.1", + "strsim", ] [[package]] @@ -704,7 +586,7 @@ version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn", @@ -782,26 +664,6 @@ version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] - -[[package]] -name = "console_log" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f" -dependencies = [ - "log", - "web-sys", -] - [[package]] name = "const-oid" version = "0.10.2" @@ -817,15 +679,6 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "convert_case" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "affbf0190ed2caf063e3def54ff444b449371d55c58e513a95ab98eca50adb49" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "core-foundation" version = "0.10.1" @@ -842,15 +695,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "counter" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66e8e052be91f1c8aae2c1d81307d9f6e67f5f37001e3ddee419e971e73f03bc" -dependencies = [ - "num-traits", -] - [[package]] name = "cpufeatures" version = "0.2.17" @@ -932,7 +776,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", - "rand_core 0.6.4", "typenum", ] @@ -989,15 +832,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ctr" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" -dependencies = [ - "cipher", -] - [[package]] name = "darling" version = "0.20.11" @@ -1028,7 +862,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim 0.11.1", + "strsim", "syn", ] @@ -1041,7 +875,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim 0.11.1", + "strsim", "syn", ] @@ -1082,59 +916,35 @@ dependencies = [ ] [[package]] -name = "data-encoding" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" - -[[package]] -name = "deluxe" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ed332aaf752b459088acf3dd4eca323e3ef4b83c70a84ca48fb0ec5305f1488" -dependencies = [ - "deluxe-core", - "deluxe-macros", - "once_cell", - "proc-macro2", - "syn", -] - -[[package]] -name = "deluxe-core" -version = "0.5.0" +name = "defmt" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddada51c8576df9d6a8450c351ff63042b092c9458b8ac7d20f89cbd0ffd313" +checksum = "a6e524506490a1953d237cb87b1cfc1e46f88c18f10a22dfe0f507dc6bfc7f7f" dependencies = [ - "arrayvec", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn", + "bitflags 1.3.2", + "defmt-macros", ] [[package]] -name = "deluxe-macros" -version = "0.5.0" +name = "defmt-macros" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f87546d9c837f0b7557e47b8bd6eae52c3c223141b76aa233c345c9ab41d9117" +checksum = "f0a27770e9c8f719a79d8b638281f4d828f77d8fd61e0bd94451b9b85e576a0b" dependencies = [ - "deluxe-core", - "heck 0.4.1", - "if_chain", - "proc-macro-crate 1.3.1", + "defmt-parser", + "proc-macro-error2", "proc-macro2", "quote", "syn", ] [[package]] -name = "deranged" -version = "0.5.8" +name = "defmt-parser" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +checksum = "10d60334b3b2e7c9d91ef8150abfb6fa4c1c39ebbcf4a81c2e346aad939fee3e" dependencies = [ - "powerfmt", + "thiserror", ] [[package]] @@ -1183,7 +993,7 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ - "convert_case 0.10.0", + "convert_case", "proc-macro2", "quote", "rustc_version", @@ -1191,12 +1001,6 @@ dependencies = [ "unicode-xid", ] -[[package]] -name = "deunicode" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" - [[package]] name = "digest" version = "0.10.7" @@ -1213,7 +1017,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "block-buffer 0.12.0", + "block-buffer 0.12.1", "const-oid", "crypto-common 0.2.2", ] @@ -1256,15 +1060,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" -[[package]] -name = "ecb" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7" -dependencies = [ - "cipher", -] - [[package]] name = "ego-tree" version = "0.11.0" @@ -1278,83 +1073,269 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" [[package]] -name = "embedded-io" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" - -[[package]] -name = "embedded-io" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" +name = "elide" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "elide-codec", + "elide-context", + "elide-core", + "elide-detection", + "elide-llm", + "elide-ner", + "elide-ocr", + "elide-orchestration", + "elide-pattern", + "elide-redaction", + "elide-stt", +] [[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +name = "elide-bento" +version = "0.1.0" dependencies = [ - "cfg-if", + "async-trait", + "base64", + "bentoml", + "elide-core", + "elide-ner", + "elide-ocr", + "hipstr", + "serde", + "thiserror", + "tokio", ] [[package]] -name = "enum_dispatch" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" +name = "elide-codec" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" dependencies = [ - "once_cell", - "proc-macro2", - "quote", - "syn", + "bytes", + "csv", + "ego-tree", + "elide-core", + "hex", + "hound", + "image", + "imageproc", + "mp3lame-encoder", + "quick-xml 0.40.1", + "scraper", + "serde_json", + "sha2 0.11.0", + "symphonia", + "tracing", ] [[package]] -name = "equator" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" +name = "elide-context" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" dependencies = [ - "equator-macro", + "elide-core", + "hipstr", + "unicode-segmentation", ] [[package]] -name = "equator-macro" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +name = "elide-core" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" dependencies = [ - "proc-macro2", - "quote", - "syn", + "bytes", + "celes", + "derive_builder", + "hipstr", + "jiff", + "oxilangtag", + "serde", + "type-map", + "uuid", ] [[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +name = "elide-detection" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "elide-core", + "futures", + "tracing", +] [[package]] -name = "errno" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +name = "elide-llm" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" dependencies = [ - "libc", - "windows-sys 0.52.0", + "async-trait", + "derive_builder", + "elide-core", + "schemars", + "serde", + "tracing", + "unicode-normalization", ] [[package]] -name = "eventsource-stream" -version = "0.2.3" +name = "elide-ner" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "async-trait", + "derive_builder", + "elide-core", + "hipstr", + "serde", + "uuid", +] + +[[package]] +name = "elide-ocr" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "async-trait", + "elide-core", + "uuid", +] + +[[package]] +name = "elide-orchestration" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "bytes", + "elide-codec", + "elide-core", + "elide-detection", + "elide-redaction", + "erased-serde", + "serde", +] + +[[package]] +name = "elide-pattern" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "aho-corasick", + "bs58", + "csv", + "derive_builder", + "derive_more", + "elide-context", + "elide-core", + "iban_validate", + "phonenumber", + "regex", + "serde", + "toml", +] + +[[package]] +name = "elide-redaction" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "bytes", + "elide-core", + "hex", + "hipstr", + "sha2 0.11.0", + "tracing", + "uuid", +] + +[[package]] +name = "elide-stt" +version = "0.1.0" +source = "git+https://github.com/nvisycom/elide?branch=main#70e6e8bff19a64cef7653fe4a1b1b7975f95c249" +dependencies = [ + "async-trait", + "elide-core", + "uuid", +] + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "encoding_rs" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ - "futures-core", - "nom 7.1.3", - "pin-project-lite", + "cfg-if", +] + +[[package]] +name = "enum_dispatch" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" +dependencies = [ + "once_cell", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "equator" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "erased-serde" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2add8a07dd6a8d93ff627029c51de145e12686fbc36ecb298ac22e74cf02dec" +dependencies = [ + "serde", + "serde_core", + "typeid", +] + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", ] [[package]] @@ -1378,25 +1359,11 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af9673d8203fcb076b19dfd17e38b3d4ae9f44959416ea532ce72415a6020365" -[[package]] -name = "fake" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea6be833b323a56361118a747470a45a1bcd5c52a2ec9b1e40c83dafe687e453" -dependencies = [ - "deunicode", - "either", - "rand 0.10.1", -] - [[package]] name = "fastrand" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" -dependencies = [ - "getrandom 0.3.4", -] [[package]] name = "fax" @@ -1461,12 +1428,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1482,12 +1443,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "fst" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" - [[package]] name = "futures" version = "0.3.32" @@ -1559,12 +1514,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" -[[package]] -name = "futures-timer" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af43fadb8a98512d547e37b4e92e0ced13e205c061b87b4623eff01d918d6968" - [[package]] name = "futures-util" version = "0.3.32" @@ -1645,30 +1594,18 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +checksum = "300e883d756b2e4ec94e02791f39b04b522276138852cfc41d9fb7e904106099" dependencies = [ "cfg-if", "js-sys", "libc", "r-efi 6.0.0", "rand_core 0.10.1", - "wasip2", - "wasip3", "wasm-bindgen", ] -[[package]] -name = "ghash" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" -dependencies = [ - "opaque-debug", - "polyval", -] - [[package]] name = "gif" version = "0.14.2" @@ -1703,31 +1640,6 @@ version = "0.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "898f5a568a84989b6c0f8caa50a93074b97dbdc58fc6d9543157bb4562758933" -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - -[[package]] -name = "h2" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "half" version = "2.7.1" @@ -1754,15 +1666,6 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "foldhash", -] - [[package]] name = "hashbrown" version = "0.16.1" @@ -1789,12 +1692,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -1835,9 +1732,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" +checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425" dependencies = [ "bytes", "itoa", @@ -1913,7 +1810,6 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", "http", "http-body", "httparse", @@ -1963,30 +1859,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "iban_validate" version = "5.0.1" @@ -2078,12 +1950,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - [[package]] name = "ident_case" version = "1.0.1" @@ -2111,12 +1977,6 @@ dependencies = [ "icu_properties", ] -[[package]] -name = "if_chain" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd62e6b5e86ea8eeeb8db1de02880a6abc01a397b2ebb64b5d74ac255318f5cb" - [[package]] name = "image" version = "0.25.10" @@ -2159,7 +2019,7 @@ checksum = "c7b27bc0867dc40df08deb53d6e96342db6e0702e7ae33ed09a4eba33e594b05" dependencies = [ "ab_glyph", "approx", - "getrandom 0.4.2", + "getrandom 0.4.3", "image", "itertools", "nalgebra", @@ -2172,28 +2032,9 @@ dependencies = [ [[package]] name = "imgref" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40fac9d56ed6437b198fddba683305e8e2d651aa42647f00f5ae542e7f5c94a2" - -[[package]] -name = "include_dir" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" -dependencies = [ - "include_dir_macros", -] - -[[package]] -name = "include_dir_macros" -version = "0.7.4" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" -dependencies = [ - "proc-macro2", - "quote", -] +checksum = "89194689a993ab15268672e99e7b0e19da2da3268ac682e8f02d29d4d1434cd7" [[package]] name = "indexmap" @@ -2207,34 +2048,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - -[[package]] -name = "infer" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" -dependencies = [ - "cfb", -] - -[[package]] -name = "inout" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" -dependencies = [ - "block-padding", - "generic-array", -] - [[package]] name = "interpolate_name" version = "0.2.4" @@ -2284,10 +2097,11 @@ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.28" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4603d3033e49e2b0e31229fcab20a5d40089c607d975cd9c80551dc69eed9102" +checksum = "34f877a98676d2fb664698d74cc6a51ce6c484ce8c770f05d0108ec9090aeb46" dependencies = [ + "defmt", "jiff-static", "jiff-tzdb-platform", "log", @@ -2299,9 +2113,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.28" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "782d32378dddf207193ac91cefb848ad41abb58195c95168e1291227a0832b47" +checksum = "0666b5ab5ecaca213fc2a85b8c0083d9004e84ee2d5f9a7e0017aaf50986f25f" dependencies = [ "proc-macro2", "quote", @@ -2384,13 +2198,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.99" +version = "0.3.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +checksum = "53b44bfcdb3f8d5837a46dae1ca9660a837176eee74a28b229bc626816589102" dependencies = [ "cfg-if", "futures-util", - "once_cell", "wasm-bindgen", ] @@ -2400,12 +2213,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "leb128fmt" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" - [[package]] name = "lebe" version = "0.5.3" @@ -2428,54 +2235,12 @@ dependencies = [ "cc", ] -[[package]] -name = "libloading" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" -dependencies = [ - "cfg-if", - "windows-link", -] - [[package]] name = "libm" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" -[[package]] -name = "lingua" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40d9129bb9fe42c95d1bd420d6891607eaff17df16ee15674aed2d05b0ec8f4" -dependencies = [ - "counter", - "dashmap", - "fastrand", - "fst", - "include_dir", - "itertools", - "lingua-english-language-model", - "maplit", - "rayon", - "regex", - "serde", - "serde-wasm-bindgen", - "strum 0.27.2", - "strum_macros 0.27.2", - "wasm-bindgen", -] - -[[package]] -name = "lingua-english-language-model" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97102de08b134a49f1cce05a1b6f5bf08ef21fe858074ae2b794e7892c43dd4b" -dependencies = [ - "include_dir", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -2505,9 +2270,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.32" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" +checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad" [[package]] name = "loom" @@ -2531,37 +2296,6 @@ dependencies = [ "imgref", ] -[[package]] -name = "lopdf" -version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67513274c50a2b51e5f75d9e682fcf4ab064a8a9c9ae2c3c59309084882bb24d" -dependencies = [ - "aes", - "bitflags", - "cbc", - "chrono", - "ecb", - "encoding_rs", - "flate2", - "getrandom 0.4.2", - "indexmap", - "itoa", - "jiff", - "log", - "md-5", - "nom 8.0.0", - "rand 0.10.1", - "rangemap", - "rayon", - "sha2 0.10.9", - "stringprep", - "thiserror", - "time", - "ttf-parser", - "weezl", -] - [[package]] name = "lru-cache" version = "0.1.2" @@ -2608,12 +2342,6 @@ dependencies = [ "twox-hash", ] -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - [[package]] name = "markup5ever" version = "0.39.0" @@ -2650,12 +2378,6 @@ dependencies = [ "rawpointer", ] -[[package]] -name = "maybe-owned" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" - [[package]] name = "maybe-rayon" version = "0.1.1" @@ -2666,27 +2388,11 @@ dependencies = [ "rayon", ] -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest 0.10.7", -] - [[package]] name = "memchr" -version = "2.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" - -[[package]] -name = "memo-map" -version = "0.3.3" +version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" [[package]] name = "mime" @@ -2704,16 +2410,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "minijinja" -version = "2.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2929e494b2280e1e18959bb2e121da03347ae896896fdfaceaab43c88a02803f" -dependencies = [ - "memo-map", - "serde", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -2807,15 +2503,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "nanoid" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8" -dependencies = [ - "rand 0.8.6", -] - [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -2862,7 +2549,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2897,12 +2584,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-conv" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" - [[package]] name = "num-derive" version = "0.4.2" @@ -2964,7 +2645,7 @@ dependencies = [ "clap", "humantime", "humantime-serde", - "nvisy-engine", + "nvisy-core", "nvisy-server", "serde", "tokio", @@ -2973,62 +2654,23 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "nvisy-codec" -version = "0.1.0" -dependencies = [ - "async-trait", - "bytes", - "csv", - "derive_more", - "ego-tree", - "hex", - "hound", - "image", - "imageproc", - "infer", - "lopdf", - "mp3lame-encoder", - "nvisy-core", - "pdfium-render", - "rayon", - "schemars", - "scraper", - "serde", - "serde_json", - "sha2 0.11.0", - "symphonia", - "tokio", - "tracing", -] - -[[package]] -name = "nvisy-context" -version = "0.1.0" -dependencies = [ - "async-trait", - "hipstr", - "nvisy-core", - "unicode-segmentation", -] - [[package]] name = "nvisy-core" version = "0.1.0" dependencies = [ "async-trait", - "bytes", - "celes", "derive_builder", "derive_more", + "elide", + "elide-core", "hipstr", - "oxilangtag", + "jiff", "schemars", + "semver", "serde", "serde_json", "strum 0.28.0", "thiserror", - "type-map", "uuid", ] @@ -3036,129 +2678,26 @@ dependencies = [ name = "nvisy-engine" version = "0.1.0" dependencies = [ - "aes-gcm", - "anyhow", - "async-trait", - "base64", "bytes", - "derive_builder", "derive_more", + "elide", + "elide-bento", + "elide-core", + "elide-ocr", + "elide-stt", "fjall", "futures", "hipstr", - "humantime-serde", "jiff", - "nvisy-codec", "nvisy-core", - "nvisy-engine", - "nvisy-llm", - "nvisy-ner", - "nvisy-ocr", - "nvisy-pattern", - "nvisy-stt", - "nvisy-toolkit", - "rand 0.10.1", - "schemars", "semver", "serde", "serde_json", - "strum 0.28.0", - "tempfile", - "tokio", - "tokio-util", - "toml", - "tracing", - "uuid", - "validator", -] - -[[package]] -name = "nvisy-fake" -version = "0.1.0" -dependencies = [ - "async-trait", - "fake", - "nvisy-core", - "nvisy-toolkit", - "tokio", - "uuid", -] - -[[package]] -name = "nvisy-llm" -version = "0.1.0" -dependencies = [ - "async-trait", - "base64", - "derive_builder", - "derive_more", - "humantime-serde", - "minijinja", - "nvisy-core", - "reqwest-middleware", - "reqwest-retry", - "reqwest-tracing", - "rig", - "schemars", - "serde", - "serde_json", - "thiserror", - "tokio", - "toml", - "tracing", - "unicode-normalization", -] - -[[package]] -name = "nvisy-ner" -version = "0.1.0" -dependencies = [ - "async-trait", - "bentoml", - "derive_builder", - "lingua", - "nvisy-core", - "serde", - "tokio", - "tracing", - "type-map", - "uuid", -] - -[[package]] -name = "nvisy-ocr" -version = "0.1.0" -dependencies = [ - "async-trait", - "bentoml", - "bytes", - "futures", - "nvisy-core", + "sha2 0.11.0", "tokio", - "tracing", "uuid", ] -[[package]] -name = "nvisy-pattern" -version = "0.1.0" -dependencies = [ - "aho-corasick", - "async-trait", - "bs58", - "csv", - "derive_builder", - "derive_more", - "iban_validate", - "nvisy-context", - "nvisy-core", - "phonenumber", - "regex", - "serde", - "tokio", - "toml", -] - [[package]] name = "nvisy-server" version = "0.1.0" @@ -3166,12 +2705,14 @@ dependencies = [ "aide", "axum", "derive_more", + "elide-core", "futures", "humantime-serde", "jiff", "nvisy-core", "nvisy-engine", "schemars", + "semver", "serde", "tokio", "tower", @@ -3180,45 +2721,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "nvisy-stt" -version = "0.1.0" -dependencies = [ - "async-trait", - "nvisy-core", - "tokio", - "tracing", - "uuid", -] - -[[package]] -name = "nvisy-toolkit" -version = "0.1.0" -dependencies = [ - "aes-gcm", - "async-trait", - "base64", - "nvisy-codec", - "nvisy-context", - "nvisy-core", - "nvisy-fake", - "nvisy-llm", - "nvisy-ner", - "nvisy-ocr", - "nvisy-pattern", - "nvisy-stt", - "regex", - "schemars", - "serde", - "serde_json", - "sha2 0.11.0", - "tokio", - "tracing", - "type-map", - "unicode-normalization", - "uuid", -] - [[package]] name = "once_cell" version = "1.21.4" @@ -3237,27 +2739,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d11de466f4a3006fe8a5e7ec84e93b79c70cb992ae0aa0eb631ad2df8abfe2" -[[package]] -name = "opaque-debug" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" - [[package]] name = "openssl-probe" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" -[[package]] -name = "ordered-float" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" -dependencies = [ - "num-traits", -] - [[package]] name = "owned_ttf_parser" version = "0.25.1" @@ -3311,32 +2798,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" -[[package]] -name = "pdfium-render" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e06f0df3ca17554c1b8f31eb17bc77eedbbafa120d35b90b3096fa46b2fdc94c" -dependencies = [ - "bitflags", - "bytemuck", - "bytes", - "chrono", - "console_error_panic_hook", - "console_log", - "image", - "itertools", - "js-sys", - "libloading", - "log", - "maybe-owned", - "once_cell", - "utf16string", - "vecmath", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "percent-encoding" version = "2.3.2" @@ -3407,7 +2868,7 @@ dependencies = [ "nom 7.1.3", "once_cell", "postcard", - "quick-xml", + "quick-xml 0.38.4", "regex", "regex-cache", "serde", @@ -3416,26 +2877,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "pin-project" -version = "1.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "pin-project-lite" version = "0.2.17" @@ -3448,12 +2889,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "piston-float" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590" - [[package]] name = "pkg-config" version = "0.3.33" @@ -3466,25 +2901,13 @@ version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" dependencies = [ - "bitflags", + "bitflags 2.13.0", "crc32fast", "fdeflate", "flate2", "miniz_oxide", ] -[[package]] -name = "polyval" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" -dependencies = [ - "cfg-if", - "cpufeatures 0.2.17", - "opaque-debug", - "universal-hash", -] - [[package]] name = "portable-atomic" version = "1.13.1" @@ -3522,12 +2945,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -3543,16 +2960,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn", -] - [[package]] name = "primal-check" version = "0.3.4" @@ -3562,25 +2969,6 @@ dependencies = [ "num-integer", ] -[[package]] -name = "proc-macro-crate" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" -dependencies = [ - "once_cell", - "toml_edit 0.19.15", -] - -[[package]] -name = "proc-macro-crate" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" -dependencies = [ - "toml_edit 0.25.12+spec-1.1.0", -] - [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -3661,11 +3049,20 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.40.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2474bd2e5029e7ccb6abb2ba48cf2383a333851dedf495901544281590c7da7f" +dependencies = [ + "memchr", +] + [[package]] name = "quick_cache" -version = "0.6.23" +version = "0.6.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3db184a8b66cfe87f0263a1de147a6b554c864d1767c6f7fa4eb0e5497b565" +checksum = "b9c6658afe513a3b484e3abfdaa0d03ef3c0bbf017542c178dd55f94eb3051f9" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -3673,9 +3070,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.9" +version = "0.11.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +checksum = "0c1a41e437b6bbd489372cd4971de128e85c855f56c57f283d20ff016cf7c0a8" dependencies = [ "bytes", "cfg_aliases", @@ -3693,9 +3090,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.14" +version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +checksum = "4fcb935c5bec503c2f0e306bdd3e58bb9029dcb14fa8d9ac76e3a5256ac0763e" dependencies = [ "aws-lc-rs", "bytes", @@ -3729,9 +3126,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.45" +version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368" dependencies = [ "proc-macro2", ] @@ -3748,24 +3145,13 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" -[[package]] -name = "rand" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - [[package]] name = "rand" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ - "rand_chacha 0.9.0", + "rand_chacha", "rand_core 0.9.5", ] @@ -3776,20 +3162,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "chacha20", - "getrandom 0.4.2", + "getrandom 0.4.3", "rand_core 0.10.1", ] -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - [[package]] name = "rand_chacha" version = "0.9.0" @@ -3800,15 +3176,6 @@ dependencies = [ "rand_core 0.9.5", ] -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.17", -] - [[package]] name = "rand_core" version = "0.9.5" @@ -3834,12 +3201,6 @@ dependencies = [ "rand 0.10.1", ] -[[package]] -name = "rangemap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" - [[package]] name = "rav1e" version = "0.8.1" @@ -3868,7 +3229,7 @@ dependencies = [ "paste", "profiling", "rand 0.9.4", - "rand_chacha 0.9.0", + "rand_chacha", "simd_helpers", "thiserror", "v_frame", @@ -3922,7 +3283,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.13.0", ] [[package]] @@ -4006,10 +3367,8 @@ checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" dependencies = [ "base64", "bytes", - "encoding_rs", "futures-core", "futures-util", - "h2", "http", "http-body", "http-body-util", @@ -4018,7 +3377,6 @@ dependencies = [ "hyper-util", "js-sys", "log", - "mime", "mime_guess", "percent-encoding", "pin-project-lite", @@ -4031,14 +3389,12 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-rustls", - "tokio-util", "tower", "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", "web-sys", ] @@ -4078,22 +3434,6 @@ dependencies = [ "wasmtimer", ] -[[package]] -name = "reqwest-tracing" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e5af0cd6fc3d3c8f703d597af70b6e4e62432c63157b49419fa1ffaf481702" -dependencies = [ - "anyhow", - "async-trait", - "getrandom 0.2.17", - "http", - "matchit", - "reqwest", - "reqwest-middleware", - "tracing", -] - [[package]] name = "retry-policies" version = "0.5.2" @@ -4109,66 +3449,6 @@ version = "0.8.53" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4" -[[package]] -name = "rig" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95947817936a8a51866a8b7d1599b721cd7d1567dd49257dfaf55e13d59110bf" -dependencies = [ - "rig-core", -] - -[[package]] -name = "rig-core" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557f11c26c2c2ea61d9cb843ce5adff138cc5785f58ff4b87d779de8acaa32ae" -dependencies = [ - "as-any", - "async-stream", - "base64", - "bytes", - "eventsource-stream", - "fastrand", - "futures", - "futures-timer", - "glob", - "http", - "mime", - "mime_guess", - "nanoid", - "ordered-float", - "pin-project-lite", - "reqwest", - "reqwest-middleware", - "rig-derive", - "schemars", - "serde", - "serde_json", - "thiserror", - "tokio", - "tokio-tungstenite", - "tracing", - "tracing-futures", - "url", -] - -[[package]] -name = "rig-derive" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643eb495acbd4bd5976164cd068f186d534f741def8d5d052f860266b98d07f8" -dependencies = [ - "convert_case 0.11.0", - "deluxe", - "indoc", - "proc-macro-crate 3.5.0", - "proc-macro2", - "quote", - "serde_json", - "syn", -] - [[package]] name = "ring" version = "0.17.14" @@ -4227,18 +3507,18 @@ version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags", + "bitflags 2.13.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.40" +version = "0.23.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +checksum = "6b92b125634d9b795e7beca796cc790df15a7fb38323bf3196fda83292d06b1f" dependencies = [ "aws-lc-rs", "once_cell", @@ -4288,7 +3568,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4409,7 +3689,7 @@ version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags", + "bitflags 2.13.0", "core-foundation", "core-foundation-sys", "libc", @@ -4432,7 +3712,7 @@ version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8adfa1c298912827b8a28b223b3b874357397ae706e6190acd9bf28cee99114d" dependencies = [ - "bitflags", + "bitflags 2.13.0", "cssparser", "derive_more", "log", @@ -4471,17 +3751,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-wasm-bindgen" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" -dependencies = [ - "js-sys", - "serde", - "wasm-bindgen", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -4591,17 +3860,6 @@ dependencies = [ "xxhash-rust", ] -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures 0.2.17", - "digest 0.10.7", -] - [[package]] name = "sha2" version = "0.10.9" @@ -4706,9 +3964,9 @@ checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" -version = "1.15.1" +version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" [[package]] name = "socket2" @@ -4717,7 +3975,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4765,23 +4023,6 @@ dependencies = [ "quote", ] -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - [[package]] name = "strsim" version = "0.11.1" @@ -4812,7 +4053,7 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn", @@ -4824,7 +4065,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn", @@ -4877,7 +4118,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95ec293b5f288383b72a7bffcade6b2860b642cf66f28b3bd5967349a49938b1" dependencies = [ - "bitflags", + "bitflags 2.13.0", "bytemuck", "lazy_static", "log", @@ -4912,9 +4153,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.117" +version = "2.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" dependencies = [ "proc-macro2", "quote", @@ -4948,10 +4189,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.2", + "getrandom 0.4.3", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -5004,38 +4245,7 @@ dependencies = [ "half", "quick-error", "weezl", - "zune-jpeg", -] - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "itoa", - "num-conv", - "powerfmt", - "serde_core", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "time-macros" -version = "0.2.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" -dependencies = [ - "num-conv", - "time-core", + "zune-jpeg", ] [[package]] @@ -5100,22 +4310,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" -dependencies = [ - "futures-util", - "log", - "rustls", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tungstenite", - "webpki-roots 0.26.11", -] - [[package]] name = "tokio-util" version = "0.7.18" @@ -5138,18 +4332,12 @@ dependencies = [ "indexmap", "serde_core", "serde_spanned", - "toml_datetime 1.1.1+spec-1.1.0", + "toml_datetime", "toml_parser", "toml_writer", - "winnow 1.0.3", + "winnow", ] -[[package]] -name = "toml_datetime" -version = "0.6.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" - [[package]] name = "toml_datetime" version = "1.1.1+spec-1.1.0" @@ -5159,36 +4347,13 @@ dependencies = [ "serde_core", ] -[[package]] -name = "toml_edit" -version = "0.19.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" -dependencies = [ - "indexmap", - "toml_datetime 0.6.11", - "winnow 0.5.40", -] - -[[package]] -name = "toml_edit" -version = "0.25.12+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" -dependencies = [ - "indexmap", - "toml_datetime 1.1.1+spec-1.1.0", - "toml_parser", - "winnow 1.0.3", -] - [[package]] name = "toml_parser" version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow 1.0.3", + "winnow", ] [[package]] @@ -5220,7 +4385,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "async-compression", - "bitflags", + "bitflags 2.13.0", "bytes", "futures-core", "futures-util", @@ -5283,18 +4448,6 @@ dependencies = [ "valuable", ] -[[package]] -name = "tracing-futures" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" -dependencies = [ - "futures", - "futures-task", - "pin-project", - "tracing", -] - [[package]] name = "tracing-log" version = "0.2.0" @@ -5359,25 +4512,6 @@ version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2df906b07856748fa3f6e0ad0cbaa047052d4a7dd609e231c4f72cee8c36f31" -[[package]] -name = "tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" -dependencies = [ - "bytes", - "data-encoding", - "http", - "httparse", - "log", - "rand 0.9.4", - "rustls", - "rustls-pki-types", - "sha1", - "thiserror", - "utf-8", -] - [[package]] name = "twox-hash" version = "2.1.2" @@ -5393,6 +4527,12 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "typenum" version = "1.20.1" @@ -5405,12 +4545,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - [[package]] name = "unicode-ident" version = "1.0.24" @@ -5426,12 +4560,6 @@ dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-properties" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" - [[package]] name = "unicode-segmentation" version = "1.13.3" @@ -5450,16 +4578,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" -[[package]] -name = "universal-hash" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" -dependencies = [ - "crypto-common 0.1.7", - "subtle", -] - [[package]] name = "untrusted" version = "0.9.0" @@ -5484,15 +4602,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf16string" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216" -dependencies = [ - "byteorder", -] - [[package]] name = "utf8_iter" version = "1.0.4" @@ -5507,11 +4616,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.3" +version = "1.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" +checksum = "bf80a72845275afea99e7f2b434723d3bc7e38470fcd1c7ed39a599c73319a53" dependencies = [ - "getrandom 0.4.2", + "getrandom 0.4.3", "js-sys", "serde_core", "wasm-bindgen", @@ -5528,36 +4637,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "validator" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa" -dependencies = [ - "idna", - "once_cell", - "regex", - "serde", - "serde_derive", - "serde_json", - "url", - "validator_derive", -] - -[[package]] -name = "validator_derive" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca" -dependencies = [ - "darling 0.20.11", - "once_cell", - "proc-macro-error2", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "valuable" version = "0.1.1" @@ -5566,18 +4645,9 @@ checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] name = "varint-rs" -version = "2.2.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" - -[[package]] -name = "vecmath" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a" -dependencies = [ - "piston-float", -] +checksum = "bfa6c38708f6257f1ec2ca7e5a11f9bbf58a27d7060078b6b333624968183d96" [[package]] name = "version_check" @@ -5612,27 +4682,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.3+wasi-0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" -dependencies = [ - "wit-bindgen 0.57.1", -] - -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +version = "1.0.4+wasi-0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487" dependencies = [ - "wit-bindgen 0.51.0", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.122" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +checksum = "4b067c0c11094aef6b7a801c1e34a26affafdf3d051dba08456b868789aaf9a4" dependencies = [ "cfg-if", "once_cell", @@ -5643,9 +4704,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.72" +version = "0.4.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" +checksum = "c62df1340f32221cb9c54d6a27b030e3dba64361d4a95bed55f9aacb44da291d" dependencies = [ "js-sys", "wasm-bindgen", @@ -5653,9 +4714,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.122" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +checksum = "167ce5e579f6bcf889c4f7175a8a5a585de84e8ff93976ce393efa5f2837aab1" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5663,9 +4724,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.122" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +checksum = "f3997c7839262f4ef12cf90b818d6340c18e80f263f1a94bf157d0ec4420380e" dependencies = [ "bumpalo", "proc-macro2", @@ -5676,60 +4737,13 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.122" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +checksum = "dc1b4cb0cc549fcf58d7dfc081778139b3d283a081644e833e84682ad71cea24" dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasm-streams" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - [[package]] name = "wasmtimer" version = "0.4.3" @@ -5746,9 +4760,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.99" +version = "0.3.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +checksum = "8622dcb61c0bcc9fffa6938bed81210af2da9a7e4a1a834b2e37a59b6dfb6141" dependencies = [ "js-sys", "wasm-bindgen", @@ -5766,9 +4780,9 @@ dependencies = [ [[package]] name = "web_atoms" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7cff6eef815df1834fd250e3a2ff436044d82a9f1bc1980ca1dbdf07effc538" +checksum = "075474b12bcb3d2e3d4546580e9de478eeeead668a1761e2a8860c836b7ef297" dependencies = [ "phf", "phf_codegen", @@ -5778,27 +4792,9 @@ dependencies = [ [[package]] name = "webpki-root-certs" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" -dependencies = [ - "rustls-pki-types", -] - -[[package]] -name = "webpki-roots" -version = "0.26.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" -dependencies = [ - "webpki-roots 1.0.7", -] - -[[package]] -name = "webpki-roots" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +checksum = "0d46a5a140e6f7afeccd8eae97eff335163939eac8b929834875168b29b3d267" dependencies = [ "rustls-pki-types", ] @@ -5825,42 +4821,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "windows-sys 0.61.2", ] [[package]] @@ -5878,15 +4839,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -6043,32 +4995,11 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" -[[package]] -name = "winnow" -version = "0.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" -dependencies = [ - "memchr", -] - [[package]] name = "winnow" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" -dependencies = [ - "memchr", -] - -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -dependencies = [ - "wit-bindgen-rust-macro", -] [[package]] name = "wit-bindgen" @@ -6076,85 +5007,6 @@ version = "0.57.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" -[[package]] -name = "wit-bindgen-core" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck 0.5.0", - "wit-parser", -] - -[[package]] -name = "wit-bindgen-rust" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" -dependencies = [ - "anyhow", - "heck 0.5.0", - "indexmap", - "prettyplease", - "syn", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", -] - -[[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" -dependencies = [ - "anyhow", - "prettyplease", - "proc-macro2", - "quote", - "syn", - "wit-bindgen-core", - "wit-bindgen-rust", -] - -[[package]] -name = "wit-component" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] - -[[package]] -name = "wit-parser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" -dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", -] - [[package]] name = "writeable" version = "0.6.3" @@ -6198,18 +5050,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.50" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.50" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" dependencies = [ "proc-macro2", "quote", @@ -6239,9 +5091,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.8.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +checksum = "e13c156562582aa81c60cb29407084cdb54c4164760106ab78e6c5b0858cf64e" [[package]] name = "zerotrie" diff --git a/Cargo.toml b/Cargo.toml index 8a6555cfc..2b0bd49e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,19 +3,11 @@ [workspace] resolver = "3" members = [ + "./crates/elide-bento", "./crates/nvisy-cli", - "./crates/nvisy-codec", - "./crates/nvisy-context", "./crates/nvisy-core", "./crates/nvisy-engine", - "./crates/nvisy-fake", - "./crates/nvisy-llm", - "./crates/nvisy-ner", - "./crates/nvisy-ocr", - "./crates/nvisy-pattern", "./crates/nvisy-server", - "./crates/nvisy-stt", - "./crates/nvisy-toolkit", ] [workspace.package] @@ -36,19 +28,27 @@ documentation = "https://docs.rs/nvisy-runtime" # # See for more details: https://github.com/rust-lang/cargo/issues/11329 +# Elide toolkit (upstream) +elide = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } +elide-core = { git = "https://github.com/nvisycom/elide", branch = "main" } +# `elide-ner` + `elide-ocr` are pulled directly only by `elide-bento`, which +# implements the per-backend traits these crates export. Engine and other +# consumers reach the same types through `elide::recognition::{ner, ocr}`. +elide-ner = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } +elide-ocr = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } +elide-stt = { git = "https://github.com/nvisycom/elide", branch = "main", default-features = false } + +# Runtime-owned elide extensions +elide-bento = { path = "./crates/elide-bento", version = "0.1.0" } + # Internal crates -nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0", default-features = false } -nvisy-context = { path = "./crates/nvisy-context", version = "0.1.0" } -nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" } -nvisy-fake = { path = "./crates/nvisy-fake", version = "0.1.0" } -nvisy-llm = { path = "./crates/nvisy-llm", version = "0.1.0" } -nvisy-ner = { path = "./crates/nvisy-ner", version = "0.1.0" } -nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" } -nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } -nvisy-stt = { path = "./crates/nvisy-stt", version = "0.1.0" } -nvisy-toolkit = { path = "./crates/nvisy-toolkit", version = "0.1.0" } + +# Internal crate being deleted: kept here as a path-dep so consumer +# manifests (nvisy-engine/server/cli) parse during the migration. +# Leaves the workspace once engine's source no longer imports it. +nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } # Serialization serde = { version = "1.0", features = ["derive"] } diff --git a/Nvisy.example.toml b/Nvisy.example.toml index 372b40332..423fe3613 100644 --- a/Nvisy.example.toml +++ b/Nvisy.example.toml @@ -1,13 +1,11 @@ # Nvisy configuration # -# Copy to Nvisy.toml (or run `make generate-config`), then edit as needed. -# The server loads Nvisy.toml by default; override with --config or NVISY_CONFIG. +# Copy to Nvisy.toml (or run `make generate-config`), then edit as +# needed. The server loads Nvisy.toml by default; override with +# --config or NVISY_CONFIG. # -# All sections are optional: omitted sections use runtime defaults. +# All sections are optional: omitted sections use server defaults. -version = "0.1.0" - -# Server binding and storage. # CLI flags (--host, --port, --data-dir) override these. [server] host = "0.0.0.0" @@ -24,107 +22,68 @@ body_limit_mb = 4 request_timeout = "5m" cors = { allowed_origins = ["*"], max_age = "1h" } -# Engine-level resource limits and shared infrastructure. -[engine] - -# Run-level resource limits. -[engine.limits] -concurrency = 4 # max parallel documents in flight -run_timeout = "60s" - -# Shared HTTP client for all downstream provider calls. -[engine.http] -max_retries = 3 -timeout = "120s" -connect_timeout = "10s" -idle_timeout = "90s" - -# Extraction registry. -# -# Each opted-in `[extraction.*]` section is built once at server -# startup. Set `enabled = false` to keep the config but skip -# construction and dispatch. The pipeline `Extraction` step carries -# per-call flags only. - -# Extraction: OCR (optical character recognition, images only). -[extraction.ocr] -enabled = true - -[extraction.ocr.backend] -kind = "bento" -base_url = "http://localhost:3001" - -# Extraction: STT (speech-to-text, audio only). -[extraction.stt] -enabled = true - -[extraction.stt.backend] -kind = "noop" - -# Detection: pattern-based (regex + dictionary + checksum). -# Optional section — pattern detection runs by default. Include this -# block to narrow it (named patterns, tag filters) or to disable it -# entirely with `enabled = false`. -[detection.pattern] -enabled = true -patterns = [] # empty = all built-in patterns - -# Detection: NER (named entity recognition). -[detection.ner] -enabled = true - -[detection.ner.backend] -kind = "bento" -base_url = "http://localhost:3000" - -# Detection: LLM (large language model). -[detection.llm] -enabled = true -unresolved_policy = "drop" # drop | first-match - -[detection.llm.provider] -kind = "open-ai" -api_key = "sk-example-replace-me" -model = "gpt-4o" - -[detection.llm.detect] -enabled = true -temperature = 0.1 -max_tokens = 4096 -max_retries = 3 -# context_window = { ... } # optional, for chunking large inputs -# preamble = "..." # optional, overrides the agent's default system prompt - -[detection.llm.verify] -enabled = true -temperature = 0.1 -max_tokens = 4096 -max_retries = 3 -# context_window = { ... } -# preamble = "..." - -# Detection: VLM (vision-language model, images only). -[detection.vlm] -enabled = true - -[detection.vlm.provider] -kind = "open-ai" -api_key = "sk-example-replace-me" -model = "gpt-4o" - -[detection.vlm.detect] -enabled = true -temperature = 0.1 -max_tokens = 4096 -max_retries = 3 - -[detection.vlm.verify] -enabled = true -temperature = 0.1 -max_tokens = 4096 -max_retries = 3 - -# Redaction params. -[redaction] -confidence_threshold = 0.5 -process_metadata = false +# The deployment default for AnalyzerParams. Requests carry +# per-field `analyzer` overrides on top of this default (inherit +# the slot, replace it, remove it from an optional slot, or patch +# a list with extend + remove selectors). Omit the whole +# [analyzer] section and the server falls back to the type-level +# default — no recognizers, no enrichers, default dedup + scope, +# empty catalog — useful for tests, not useful in production. + +# Caller-asserted scope threaded into every recognizer's context. +# Add languages and country codes the analyzer should consider +# (empty = no restriction). +[analyzer.scope] +languages = [] +jurisdictions = [] + +# Deduplication pipeline applied after recognition: +# calibrate → fuse → resolve → filter. +[analyzer.deduplication] +fusion = "max_confidence" # max_confidence | mean | noisy_or +resolution = "highest_confidence" # highest_confidence | longest_span +min_confidence = 0.7 # filter threshold, 0.0..=1.0 + +# Per-request label catalog. Entries are LabelSchema records: +# { name, description?, tags?, metadata? }. The server unions +# every submitted policy's labels with this catalog at request +# time; entries here are the baseline. +# [[analyzer.label_catalog]] +# name = "email_address" +# description = "RFC 5322 email" +# tags = ["pii", "contact"] + +# Pattern recognizer: at-most-one per analyzer. Loads every +# shipped pattern + dictionary and wraps the recognizer in +# elide's context-boost layer. +[analyzer.recognizers.pattern] +builtins = true +context_enhanced = true + +# NER recognizer list. Each entry needs a unique `name`; multiple +# entries run in parallel (e.g. one English model, one Spanish). +# [[analyzer.recognizers.ner]] +# name = "default_ner" +# backend = { kind = "mock" } +# # backend = { kind = "bento", base_url = "http://localhost:3000", model = "..." } + +# LLM recognizer list. Same shape; backend chooses provider. +# [[analyzer.recognizers.llm]] +# name = "default_llm" +# backend = { kind = "mock" } + +# Language enricher (writes the document's detected languages +# into the recognizer context, driving jurisdiction-aware +# dispatch). At-most-one per analyzer. +# [analyzer.enrichers.language] +# min_confidence = 0.6 # 0.0..=1.0; None lets the engine choose + +# OCR enricher (image modality only). At-most-one per analyzer. +# [analyzer.enrichers.ocr] +# backend = { kind = "mock" } +# # backend = { kind = "bento", base_url = "http://localhost:3001", model = "..." } + +# STT enricher (audio modality only). At-most-one per analyzer. +# [analyzer.enrichers.stt] +# backend = { kind = "mock" } +# # backend = { kind = "bento", base_url = "http://localhost:3002", model = "..." } diff --git a/crates/elide-bento/Cargo.toml b/crates/elide-bento/Cargo.toml new file mode 100644 index 000000000..f7b5d2cbd --- /dev/null +++ b/crates/elide-bento/Cargo.toml @@ -0,0 +1,59 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "elide-bento" +description = "BentoML-backed NER and OCR backends for the elide toolkit" +keywords = ["elide", "bento", "bentoml", "ner", "ocr"] +categories = ["api-bindings"] +readme = "README.md" + +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[features] +default = ["ner", "ocr"] +## Ship the NER backend (implements `elide_ner::backend::NerBackend` +## against the `nvisy-inference-ner` BentoML service). +ner = ["dep:elide-ner"] +## Ship the OCR backend (implements `elide_ocr::OcrBackend` +## against the `nvisy-inference-ocr` BentoML service). +ocr = ["dep:elide-ocr", "dep:base64", "elide-core/image"] + +[dependencies] +# Elide toolkit (upstream) +elide-core = { workspace = true, features = [] } +elide-ner = { workspace = true, features = [], optional = true } +elide-ocr = { workspace = true, features = [], optional = true } + +# Serialization +serde = { workspace = true, features = ["derive"] } + +# Primitive datatypes +hipstr = { workspace = true, features = [] } + +# Async runtime +async-trait = { workspace = true, features = [] } + +# Error handling +thiserror = { workspace = true, features = [] } + +# Image bytes → base64 (OCR only) +base64 = { workspace = true, features = [], optional = true } + +# BentoML client +bentoml = { workspace = true, default-features = false, features = ["rustls-tls", "tracing"] } + +[dev-dependencies] +tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/elide-bento/README.md b/crates/elide-bento/README.md new file mode 100644 index 000000000..49b8e23e3 --- /dev/null +++ b/crates/elide-bento/README.md @@ -0,0 +1,7 @@ +# elide-bento + +Shared BentoML HTTP client wrapper for elide backends. + +Per-modality backends (NER, OCR, …) live in their consuming crates +(`elide-ner`, `elide-ocr`) and pull this crate for the common HTTP +client, params validation, and error translation. diff --git a/crates/elide-bento/src/error.rs b/crates/elide-bento/src/error.rs new file mode 100644 index 000000000..fcdd5caeb --- /dev/null +++ b/crates/elide-bento/src/error.rs @@ -0,0 +1,39 @@ +//! Error translation: `bentoml` errors → [`elide_core::Error`]. +//! +//! Crate-private — the public API of every backend reports +//! [`elide_core::Error`]; this enum is the internal seam the +//! per-route helpers use before bubbling up. + +use elide_core::{Error, ErrorKind}; + +/// Errors surfaced internally by the bento backends. +/// +/// Two structural categories the consuming crate maps onto +/// [`ErrorKind`] when bubbling up: transport (HTTP / network / +/// client construction) and protocol (service answered but the +/// body did not match the contract — decode error, batch length +/// mismatch, …). +#[derive(Debug, thiserror::Error)] +pub(crate) enum BentoError { + /// HTTP / transport failure — client construction, network + /// I/O, status-code rejections. + #[error("bento transport error: {0}")] + Transport(#[from] bentoml::Error), + /// Protocol failure — the service answered but the body did not + /// match the contract. + #[error("bento protocol error: {0}")] + Protocol(String), +} + +impl From for Error { + /// Map transport to [`ErrorKind::Transport`] and protocol to + /// [`ErrorKind::Validation`], carrying the original error as the + /// source cause. + fn from(err: BentoError) -> Self { + let kind = match err { + BentoError::Transport(_) => ErrorKind::Transport, + BentoError::Protocol(_) => ErrorKind::Validation, + }; + Error::new(kind, err) + } +} diff --git a/crates/elide-bento/src/lib.rs b/crates/elide-bento/src/lib.rs new file mode 100644 index 000000000..ad4151ba3 --- /dev/null +++ b/crates/elide-bento/src/lib.rs @@ -0,0 +1,18 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +mod error; + +#[cfg(feature = "ner")] +#[cfg_attr(docsrs, doc(cfg(feature = "ner")))] +pub mod ner; + +#[cfg(feature = "ocr")] +#[cfg_attr(docsrs, doc(cfg(feature = "ocr")))] +pub mod ocr; + +#[cfg(feature = "ner")] +pub use self::ner::BentoNer; +#[cfg(feature = "ocr")] +pub use self::ocr::BentoOcr; diff --git a/crates/elide-bento/src/ner/mod.rs b/crates/elide-bento/src/ner/mod.rs new file mode 100644 index 000000000..f7d8545b5 --- /dev/null +++ b/crates/elide-bento/src/ner/mod.rs @@ -0,0 +1,140 @@ +//! [`BentoNer`]: an [`elide_ner::backend::NerBackend`] backed by the +//! `nvisy-inference-ner` BentoML service. +//! +//! Wire contract: `POST /recognize` accepts a batched list of +//! requests (one schema-driven entity-extraction call per item) +//! and returns the matching list of responses. Each request carries +//! a schema (entities + optional classifications + structures); this +//! backend uses entities only and ignores the rest. Per-call +//! correlation IDs propagate as `x-request-id` headers when set. +//! +//! Wire types live in the private `request` (outgoing) and +//! `response` (incoming) submodules; only the public +//! [`BentoNer`] backend is part of this crate's API. + +mod request; +mod response; + +use bentoml::{Client, Endpoint}; +use elide_core::Result; +use elide_core::entity::provenance::ModelEvent; +use elide_ner::backend::{NerBackend, NerRequest, NerResponse}; +use hipstr::HipStr; + +use self::request::WireNerRequest; +use self::response::WireNerResponse; +use crate::error::BentoError; + +const ROUTE: &str = "recognize"; + +/// BentoML NER backend. +/// +/// Owns a cached [`Endpoint`] pointing at the `nvisy-inference-ner` +/// `/recognize` route, plus the per-deployment model id (echoed +/// into [`NerBackend::provenance`]) and a default per-label +/// confidence threshold the service applies when a schema entry +/// does not pin its own. +#[derive(Debug, Clone)] +pub struct BentoNer { + /// Pre-built endpoint at the `/recognize` route. Cloned per + /// call so per-request headers (`x-request-id`) layer onto a + /// fresh instance without rebuilding the route. + endpoint: Endpoint, + /// Service-side model identifier echoed in provenance. + model_id: HipStr<'static>, + /// Default per-label confidence cutoff sent on every request. + /// Per-label thresholds in the schema override it. + default_threshold: f32, +} + +impl BentoNer { + /// Build from a service URL + the deployment's model id. The + /// default per-label threshold starts at `0.5` (matches the + /// service's own default); use [`with_default_threshold`] to + /// override. + /// + /// [`with_default_threshold`]: Self::with_default_threshold + pub fn new(base_url: impl Into, model_id: impl Into>) -> Result { + let client = Client::builder() + .with_base_url(base_url) + .build() + .map_err(BentoError::Transport)?; + Ok(Self { + endpoint: client.endpoint(ROUTE), + model_id: model_id.into(), + default_threshold: 0.5, + }) + } + + /// Override the per-request default confidence threshold (the + /// service applies it when a schema entity has no per-label + /// `threshold` of its own). + #[must_use] + pub fn with_default_threshold(mut self, threshold: f32) -> Self { + self.default_threshold = threshold; + self + } + + /// Send one batched `/recognize` POST and parse the response + /// body. Clones the cached endpoint so per-request headers + /// layer on without touching the original. + async fn post_recognize( + &self, + requests: &[NerRequest<'_>], + ) -> Result, BentoError> { + let body: Vec = requests + .iter() + .map(|r| WireNerRequest::from_request(r, self.default_threshold)) + .collect(); + let mut endpoint = self.endpoint.clone(); + if let Some(id) = requests.iter().find_map(|r| r.correlation_id) { + endpoint = endpoint.with_request_id(id.to_string()); + } + endpoint + .invoke::<_, Vec>(&body) + .await + .map_err(BentoError::Transport) + } +} + +#[async_trait::async_trait] +impl NerBackend for BentoNer { + fn provenance(&self) -> ModelEvent { + ModelEvent { + name: self.model_id.clone(), + version: None, + contextual: false, + } + } + + async fn recognize(&self, request: NerRequest<'_>) -> Result { + let responses = self.post_recognize(&[request]).await?; + let mut iter = responses.into_iter(); + let response = iter + .next() + .ok_or_else(|| BentoError::Protocol("bento ner returned an empty batch".into()))?; + if iter.next().is_some() { + return Err(BentoError::Protocol( + "bento ner returned more responses than requests".into(), + ) + .into()); + } + Ok(response.decode()) + } + + async fn recognize_batch(&self, requests: &[NerRequest<'_>]) -> Result> { + if requests.is_empty() { + return Ok(Vec::new()); + } + let responses = self.post_recognize(requests).await?; + if responses.len() != requests.len() { + return Err(BentoError::Protocol(format!( + "bento ner returned {} responses for {} requests", + responses.len(), + requests.len(), + )) + .into()); + } + Ok(responses.into_iter().map(WireNerResponse::decode).collect()) + } +} diff --git a/crates/elide-bento/src/ner/request.rs b/crates/elide-bento/src/ner/request.rs new file mode 100644 index 000000000..4fb6f86b9 --- /dev/null +++ b/crates/elide-bento/src/ner/request.rs @@ -0,0 +1,72 @@ +//! Outgoing wire types for the NER `/recognize` endpoint. +//! +//! Mirrors `nvisy_core.ner.v1.NerRequest` from the inference +//! repository: a `(text, schema, threshold)` triple where the +//! schema lists the entities to extract. Classifications and +//! structured records (also part of the upstream schema) are +//! omitted — this backend surfaces entity extraction only. + +use elide_ner::backend::NerRequest; +use serde::Serialize; + +/// Outgoing per-call request body element. +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireNerRequest { + /// Source text to scan. + pub text: String, + /// Schema describing what to extract. + pub schema: WireSchema, + /// Default per-label confidence cutoff. Per-entity `threshold` + /// overrides this when present. + pub threshold: f32, +} + +impl WireNerRequest { + /// Translate an elide [`NerRequest`] into the wire shape, + /// pinning the service-default threshold when the request has + /// no per-label thresholds of its own. + pub(super) fn from_request(request: &NerRequest<'_>, default_threshold: f32) -> Self { + let entities = request + .labels + .map(|labels| { + labels + .iter() + .map(|label| WireEntitySpec { + label: label.name().to_owned(), + description: label.description().map(str::to_owned), + threshold: None, + }) + .collect() + }) + .unwrap_or_default(); + Self { + text: request.text.to_owned(), + schema: WireSchema { entities }, + threshold: default_threshold, + } + } +} + +/// `Schema` group: the entities the call extracts. +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireSchema { + pub entities: Vec, +} + +/// One entity to extract. +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireEntitySpec { + /// Stable label identifier (e.g. `"email_address"`). + pub label: String, + /// Optional natural-language description that steers zero-shot + /// extraction. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Per-label confidence cutoff. `None` falls through to the + /// request-level [`WireNerRequest::threshold`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub threshold: Option, +} diff --git a/crates/elide-bento/src/ner/response.rs b/crates/elide-bento/src/ner/response.rs new file mode 100644 index 000000000..6250da111 --- /dev/null +++ b/crates/elide-bento/src/ner/response.rs @@ -0,0 +1,53 @@ +//! Incoming wire types for the NER `/recognize` endpoint. +//! +//! Mirrors `nvisy_core.ner.v1.NerResponse` from the inference +//! repository. Classifications, structures, and the response-level +//! `modelId` are deserialised-and-discarded — this backend surfaces +//! entity-extraction results only. + +use elide_ner::backend::{NerResponse, NerSpan}; +use serde::Deserialize; + +/// Incoming per-call response body element. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireNerResponse { + /// Extracted entities, in backend order. + #[serde(default)] + pub entities: Vec, + // `classifications`, `structures`, `modelId` ignored. +} + +/// One extracted entity span. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireEntity { + /// Model-native label string. + pub label: String, + /// Confidence in `[0, 1]`. + pub score: f32, + /// Byte offset, inclusive. + pub start: usize, + /// Byte offset, exclusive. + pub end: usize, +} + +impl WireNerResponse { + /// Translate into the elide [`NerResponse`] the backend trait + /// expects. Drops malformed (`end <= start`) spans defensively + /// — the wire validator already rejects them, but the guard + /// keeps a misbehaving service from poisoning the recognizer. + pub(super) fn decode(self) -> NerResponse { + let spans = self + .entities + .into_iter() + .filter_map(|e| { + if e.end <= e.start { + return None; + } + Some(NerSpan::new(e.label, e.score, e.start..e.end)) + }) + .collect(); + NerResponse::new(spans) + } +} diff --git a/crates/elide-bento/src/ocr/mod.rs b/crates/elide-bento/src/ocr/mod.rs new file mode 100644 index 000000000..641f37ef3 --- /dev/null +++ b/crates/elide-bento/src/ocr/mod.rs @@ -0,0 +1,119 @@ +//! [`BentoOcr`]: an [`elide_ocr::OcrBackend`] backed by the +//! `nvisy-inference-ocr` BentoML service. +//! +//! Wire contract: `POST /recognize` accepts a batched list of +//! requests (each carrying base64-encoded image bytes + a +//! confidence threshold) and returns the matching list of +//! responses. Each response is a `Page -> Block -> Line -> Word` +//! tree; this backend flattens it to elide's +//! [`LayoutBlock`]/[`LayoutWord`] vocabulary: one [`LayoutBlock`] +//! per inference `Block`, every per-block word lifted into a +//! [`LayoutWord`] regardless of its parent `Line`. Per-call +//! correlation IDs propagate as `x-request-id` headers when set. +//! +//! Wire types live in the private `request` (outgoing) and +//! `response` (incoming) submodules; only the public +//! [`BentoOcr`] backend is part of this crate's API. +//! +//! [`LayoutBlock`]: elide_core::modality::image::LayoutBlock +//! [`LayoutWord`]: elide_core::modality::image::LayoutWord + +mod request; +mod response; + +use bentoml::{Client, Endpoint}; +use elide_core::Result; +use elide_core::entity::provenance::ModelEvent; +use elide_ocr::{OcrBackend, OcrRequest, OcrResponse}; +use hipstr::HipStr; + +use self::request::WireOcrRequest; +use self::response::WireOcrResponse; +use crate::error::BentoError; + +const ROUTE: &str = "recognize"; + +/// BentoML OCR backend. +/// +/// Owns a cached [`Endpoint`] pointing at the `nvisy-inference-ocr` +/// `/recognize` route, plus the per-deployment model id (echoed +/// into [`OcrBackend::provenance`]) and a default per-word +/// confidence threshold (the service drops anything weaker before +/// returning). +#[derive(Debug, Clone)] +pub struct BentoOcr { + /// Pre-built endpoint at the `/recognize` route. Cloned per + /// call so per-request headers (`x-request-id`) layer onto a + /// fresh instance without rebuilding the route. + endpoint: Endpoint, + /// Service-side model identifier echoed in provenance. + model_id: HipStr<'static>, + /// Default confidence floor sent on every request; the service + /// drops weaker per-word recognitions before responding. + default_threshold: f32, +} + +impl BentoOcr { + /// Build from a service URL + the deployment's model id. + /// Default per-word confidence threshold is `0.0` (no + /// filtering, matches the service's own default); use + /// [`with_default_threshold`] to override. + /// + /// [`with_default_threshold`]: Self::with_default_threshold + pub fn new(base_url: impl Into, model_id: impl Into>) -> Result { + let client = Client::builder() + .with_base_url(base_url) + .build() + .map_err(BentoError::Transport)?; + Ok(Self { + endpoint: client.endpoint(ROUTE), + model_id: model_id.into(), + default_threshold: 0.0, + }) + } + + /// Override the per-request default per-word confidence + /// threshold. + #[must_use] + pub fn with_default_threshold(mut self, threshold: f32) -> Self { + self.default_threshold = threshold; + self + } +} + +#[async_trait::async_trait] +impl OcrBackend for BentoOcr { + fn provenance(&self) -> ModelEvent { + ModelEvent { + name: self.model_id.clone(), + version: None, + contextual: false, + } + } + + async fn recognize(&self, request: OcrRequest<'_>) -> Result { + let body = vec![WireOcrRequest::from_request( + &request, + self.default_threshold, + )]; + let mut endpoint = self.endpoint.clone(); + if let Some(id) = request.correlation_id { + endpoint = endpoint.with_request_id(id.to_string()); + } + let responses: Vec = endpoint + .invoke(&body) + .await + .map_err(BentoError::Transport)?; + let mut iter = responses.into_iter(); + let response = iter + .next() + .ok_or_else(|| BentoError::Protocol("bento ocr returned an empty batch".into()))?; + if iter.next().is_some() { + return Err(BentoError::Protocol( + "bento ocr returned more responses than requests".into(), + ) + .into()); + } + Ok(response.decode()) + } +} diff --git a/crates/elide-bento/src/ocr/request.rs b/crates/elide-bento/src/ocr/request.rs new file mode 100644 index 000000000..a15725762 --- /dev/null +++ b/crates/elide-bento/src/ocr/request.rs @@ -0,0 +1,29 @@ +//! Outgoing wire types for the OCR `/recognize` endpoint. +//! +//! Mirrors `nvisy_core.ocr.v1.OcrRequest` from the inference +//! repository: base64-encoded image bytes plus a per-word +//! confidence floor the service applies before returning. + +use base64::Engine; +use base64::engine::general_purpose::STANDARD as BASE64; +use elide_ocr::OcrRequest; +use serde::Serialize; + +/// Outgoing per-call request body element. +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireOcrRequest { + /// Base64-encoded image bytes. + pub image: String, + /// Drop per-word recognitions weaker than this; `0.0` keeps all. + pub confidence_threshold: f32, +} + +impl WireOcrRequest { + pub(super) fn from_request(request: &OcrRequest<'_>, default_threshold: f32) -> Self { + Self { + image: BASE64.encode(request.image), + confidence_threshold: default_threshold, + } + } +} diff --git a/crates/elide-bento/src/ocr/response.rs b/crates/elide-bento/src/ocr/response.rs new file mode 100644 index 000000000..110fa6174 --- /dev/null +++ b/crates/elide-bento/src/ocr/response.rs @@ -0,0 +1,138 @@ +//! Incoming wire types for the OCR `/recognize` endpoint. +//! +//! Mirrors `nvisy_core.ocr.v1.OcrResponse` from the inference +//! repository. The full upstream tree is +//! `Page -> Block -> Line -> Word`; elide's vocabulary collapses +//! lines into the parent block — [`WireOcrResponse::decode`] +//! flattens every word under its grandparent block. The +//! response-level `modelId`, +//! per-page `width`/`height`, per-block `kind`, and any rotated +//! polygons are deserialised-and-discarded for now. + +use elide_core::modality::image::{ImageLocation, LayoutBlock, LayoutWord}; +use elide_core::primitive::{BoundingBox, Confidence, Point}; +use elide_ocr::OcrResponse; +use serde::Deserialize; + +/// Incoming per-call response body element. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireOcrResponse { + #[serde(default)] + pub pages: Vec, + // `modelId` ignored: provenance comes from `BentoOcr::model_id` + // (the deployment-level id the operator wired at construction). +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WirePage { + /// 1-based page index; flows straight onto [`ImageLocation::page`]. + pub page_number: Option, + #[serde(default)] + pub blocks: Vec, + // `width`, `height` ignored: elide's layout does not carry page + // dimensions today. +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireBlock { + pub text: String, + pub bbox: WireBoundingBox, + #[serde(default)] + pub lines: Vec, + // `kind` (text / table / figure / other) ignored: elide's + // `LayoutBlock` does not yet model block kind. When upstream + // grows it, surface here. +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireLine { + #[serde(default)] + pub words: Vec, + // Per-line text + bbox are subsumed by the block's text + the + // per-word geometry; elide does not model the intermediate line + // layer today. +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireWord { + pub text: String, + pub confidence: Option, + pub bbox: WireBoundingBox, + // `polygon` ignored: rotated regions land on a future + // `LayoutWord::polygon` field once elide grows one. +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct WireBoundingBox { + pub x: f64, + pub y: f64, + pub width: f64, + pub height: f64, +} + +impl From for BoundingBox { + fn from(b: WireBoundingBox) -> Self { + BoundingBox::new( + Point::new(b.x, b.y), + Point::new(b.x + b.width, b.y + b.height), + ) + } +} + +impl WireOcrResponse { + /// Translate into the elide [`OcrResponse`] the backend trait + /// expects. Flattens pages → blocks → words; every per-block + /// word becomes a [`LayoutWord`] on the resulting + /// [`LayoutBlock`]. + pub(super) fn decode(self) -> OcrResponse { + let blocks = self + .pages + .into_iter() + .flat_map(|page| { + let page_number = page.page_number; + page.blocks + .into_iter() + .map(move |block| block.decode(page_number)) + }) + .collect(); + OcrResponse::new(blocks) + } +} + +impl WireBlock { + fn decode(self, page_number: Option) -> LayoutBlock { + let region = ImageLocation { + bounding_box: self.bbox.into(), + polygon: None, + page: page_number, + }; + let words: Vec = self + .lines + .into_iter() + .flat_map(|line| line.words.into_iter()) + .map(|word| word.decode(page_number)) + .collect(); + LayoutBlock::new(region, self.text).with_words(words) + } +} + +impl WireWord { + fn decode(self, page_number: Option) -> LayoutWord { + let region = ImageLocation { + bounding_box: self.bbox.into(), + polygon: None, + page: page_number, + }; + let mut layout = LayoutWord::new(region, self.text); + if let Some(c) = self.confidence { + layout = layout.with_confidence(Confidence::clamped(c)); + } + layout + } +} diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml index c8fa9aec5..1bb5bd235 100644 --- a/crates/nvisy-cli/Cargo.toml +++ b/crates/nvisy-cli/Cargo.toml @@ -19,11 +19,7 @@ homepage = { workspace = true } documentation = { workspace = true } [features] -default = [ - "tabular", "image", "audio", "rich", - "openai", "anthropic", "google", - "bento", -] +default = ["tabular", "image", "audio"] ## Tabular modality — csv, xlsx. tabular = ["nvisy-server/tabular"] @@ -31,19 +27,6 @@ tabular = ["nvisy-server/tabular"] image = ["nvisy-server/image"] ## Audio modality — wav, mp3. audio = ["nvisy-server/audio"] -## Rich-document modality — pdf, docx. -rich = ["nvisy-server/rich"] - -## Enable all OpenAI providers (GPT, Whisper STT). -openai = ["nvisy-server/openai"] -## Enable Anthropic Claude completion provider. -anthropic = ["nvisy-server/anthropic"] -## Enable Google Gemini. -google = ["nvisy-server/google"] - -## Enable the externalized BentoML backends (NER + OCR) — forwards -## to `nvisy-server/bento`. -bento = ["nvisy-server/bento"] [package.metadata.docs.rs] all-features = true @@ -55,7 +38,7 @@ path = "src/main.rs" [dependencies] # Internal crates -nvisy-engine = { workspace = true, features = [] } +nvisy-core = { workspace = true, features = [] } nvisy-server = { workspace = true, features = [] } # Serialization diff --git a/crates/nvisy-cli/src/config/mod.rs b/crates/nvisy-cli/src/config/mod.rs index c2fa93b92..dacf13d7e 100644 --- a/crates/nvisy-cli/src/config/mod.rs +++ b/crates/nvisy-cli/src/config/mod.rs @@ -1,8 +1,8 @@ //! CLI configuration management. //! -//! TOML file is the source of truth; CLI flags override a small set -//! of network / lifecycle fields. The resolved [`AppConfig`] is what -//! the rest of the binary consumes. +//! TOML file is the source of truth; CLI flags override a small +//! set of network / lifecycle fields. The resolved [`AppConfig`] +//! is what the rest of the binary consumes. //! //! # Architecture //! @@ -11,12 +11,20 @@ //! └── Overrides thin CLI overlay applied to ServerConfig //! //! AppConfig (TOML) -//! ├── server: ServerConfig [server] + nested .observability / .middleware -//! └── runtime: RuntimeConfig [engine], [extraction.*], [detection.*], [redaction] +//! └── server: ServerConfig [server] + nested .observability / .middleware //! ``` //! -//! [`Cli::load`] reads the file, merges CLI overrides, and returns -//! the resolved [`AppConfig`]. +//! There is no engine config section. Recognizer / analyzer / +//! anonymizer settings travel per-request inside +//! [`AnalyzerParams`] on `POST /detections`; the server just +//! orchestrates and persists. The only server-side +//! engine-shaped setting is the data directory, which lives on +//! [`ServerConfig`]. +//! +//! [`Cli::load`] reads the file, merges CLI overrides, and +//! returns the resolved [`AppConfig`]. +//! +//! [`AnalyzerParams`]: nvisy_core::plan::AnalyzerParams pub mod middleware; pub mod observability; @@ -29,7 +37,7 @@ use std::time::Duration; use anyhow::Context; use clap::{Args, Parser}; -use nvisy_engine::core::RuntimeConfig; +use nvisy_core::plan::AnalyzerParams; use serde::Deserialize; pub use self::server::ServerConfig; @@ -91,39 +99,32 @@ impl Overrides { } } -/// Resolved top-level configuration: server settings + engine -/// subsystem settings, all merged from TOML + CLI overrides. -/// -/// `deny_unknown_fields` catches typos at the top level (e.g. -/// `[serer]` instead of `[server]`). Each nested struct denies -/// unknown fields too so typos inside a section also fail loudly. +/// Resolved top-level configuration: server settings + the +/// deployment's default [`AnalyzerParams`], merged from TOML + +/// CLI overrides. #[derive(Debug, Clone, Default, Deserialize)] pub struct AppConfig { /// Server, observability, and middleware configuration. #[serde(default)] pub server: ServerConfig, - /// Engine and provider subsystem settings. Flattened so its - /// sections (`[engine]`, `[extraction.*]`, ...) sit at the - /// TOML root alongside `[server]`. The top-level `version` - /// key the TOML carries is consumed here by `RuntimeConfig`. - #[serde(flatten)] - pub runtime: RuntimeConfig, + /// Default analyzer spec the server applies to every + /// request that doesn't override its analyzer fields. + /// Empty when the `[analyzer]` section is absent — + /// requests then have to ship a complete spec or get + /// "nothing detects" semantics. + #[serde(default)] + pub analyzer: AnalyzerParams, } impl Cli { - /// Read the TOML file, apply CLI overrides, run runtime - /// validation, and return the resolved [`AppConfig`]. + /// Read the TOML file, apply CLI overrides, return the + /// resolved [`AppConfig`]. /// - /// Missing TOML file resolves to defaults (everything from CLI - /// + built-ins). + /// Missing TOML file resolves to defaults (everything from + /// CLI + built-ins). pub fn load(self) -> anyhow::Result { let mut config = read_toml(&self.config)?; self.overrides.merge_into(&mut config.server); - config.runtime.resolve_env(); - config - .runtime - .validate() - .map_err(|e| anyhow::anyhow!("invalid configuration: {}", e.message()))?; Ok(config) } } @@ -145,8 +146,8 @@ mod tests { use super::*; - /// `Nvisy.example.toml` is the source of truth for the documented - /// schema. If it stops parsing, the docs lie. + /// `Nvisy.example.toml` is the source of truth for the + /// documented schema. If it stops parsing, the docs lie. #[test] fn example_toml_parses() { let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) @@ -154,20 +155,7 @@ mod tests { .join("Nvisy.example.toml"); let contents = fs::read_to_string(&path).expect("Nvisy.example.toml exists"); let config: AppConfig = toml::from_str(&contents).expect("Nvisy.example.toml parses"); - - assert!(config.runtime.engine.is_some(), "[engine] should be set"); - assert!( - config.runtime.extraction.is_some(), - "[extraction.*] should be set" - ); - assert!( - config.runtime.detection.is_some(), - "[detection.*] should be set" - ); - assert!( - config.runtime.redaction.is_some(), - "[redaction] should be set" - ); + assert_eq!(config.server.port, 8080); } #[test] diff --git a/crates/nvisy-cli/src/main.rs b/crates/nvisy-cli/src/main.rs index 606b53c4c..2d914b545 100644 --- a/crates/nvisy-cli/src/main.rs +++ b/crates/nvisy-cli/src/main.rs @@ -39,13 +39,10 @@ async fn run() -> anyhow::Result<()> { tracing::info!( target: TARGET, binary = env!("CARGO_PKG_VERSION"), - config = %config.runtime.version, "starting nvisy", ); - let AppConfig { - server, runtime, .. - } = config; - let state = ServiceState::new(runtime, server.data_dir.clone()).await?; + let AppConfig { server, analyzer } = config; + let state = ServiceState::new(server.data_dir.clone(), analyzer).await?; let router = create_router(&server, state); server::run(&server, router).await } diff --git a/crates/nvisy-codec/Cargo.toml b/crates/nvisy-codec/Cargo.toml deleted file mode 100644 index ff9752965..000000000 --- a/crates/nvisy-codec/Cargo.toml +++ /dev/null @@ -1,148 +0,0 @@ -# https://doc.rust-lang.org/cargo/reference/manifest.html - -[package] -name = "nvisy-codec" -description = "Codec traits + format implementations (TXT, JSON, HTML, CSV, XLSX, PNG, JPEG, TIFF, WAV, MP3, PDF, DOCX) for the Nvisy multimodal redaction platform" -keywords = ["nvisy", "codec", "loader", "pdf", "redaction"] -categories = ["parser-implementations", "encoding"] -readme = "README.md" - -version = { workspace = true } -rust-version = { workspace = true } -edition = { workspace = true } -license = { workspace = true } -publish = { workspace = true } - -authors = { workspace = true } -repository = { workspace = true } -homepage = { workspace = true } -documentation = { workspace = true } - -[features] -default = ["text", "tabular"] - -## Plain-text (`.txt`, `.log`) loader and handler. -txt = ["internal_text"] -## JSON loader and handler. -json = ["internal_text"] -## Markdown loader. -markdown = ["internal_text"] -## HTML loader and handler via `scraper`. -html = ["internal_text", "dep:scraper", "dep:ego-tree"] - -## CSV loader and handler via the `csv` crate. Tabular cells reuse the -## text redact helper, so this pulls `internal_text`. -csv = ["internal_tabular", "internal_text", "dep:csv"] -## Excel (`.xlsx`) loader and handler. Pulls `internal_text` for the -## same reason as `csv`. The handler is a stub today (decode returns -## empty); a parser dep will be wired back in when real extraction -## lands. -xlsx = ["internal_tabular", "internal_text"] - -## PNG loader and handler. -png = ["internal_image"] -## JPEG loader and handler. -jpeg = ["internal_image"] -## TIFF loader and handler. -tiff = ["internal_image"] - -## WAV audio loader and handler. `hound` covers sample read + write; -## `symphonia` probes container metadata for the clip duration. -wav = ["internal_audio", "dep:hound", "dep:symphonia"] -## MP3 audio loader and handler. `symphonia` covers the duration -## probe and PCM decode; `mp3lame-encoder` (LGPL-3.0 via libmp3lame — -## requires a C toolchain + autoconf/automake at build time) handles -## re-encoding for the redaction round-trip. -mp3 = ["internal_audio", "dep:symphonia", "dep:mp3lame-encoder"] - -## PDF loader, handler, and page-to-image rendering via `lopdf` + -## `pdfium-render`. Pulls `internal_text` (page-text redact) + -## `internal_image` (PngHandler for rendered pages + extracted -## figures) + `png` (the concrete image format wrapping rendered -## pages). -pdf = ["internal_rich", "internal_text", "internal_image", "png", "dep:lopdf", "dep:pdfium-render", "dep:rayon"] -## DOCX loader. Pulls `internal_text` only — DOCX text extraction is -## stubbed today, no image rendering needed. Parser deps (`zip` + -## `quick-xml`) will be wired back in when real extraction lands. -docx = ["internal_rich", "internal_text"] - -## All text formats: `txt` + `json` + `markdown` + `html`. -text = ["txt", "json", "markdown", "html"] -## All tabular formats: `csv` + `xlsx`. -tabular = ["csv", "xlsx"] -## All image formats: `png` + `jpeg` + `tiff`. -image = ["png", "jpeg", "tiff"] -## All audio formats: `wav` + `mp3`. -audio = ["wav", "mp3"] -## All rich-document formats: `pdf` + `docx`. -rich = ["pdf", "docx"] - -## In-memory decode helpers for tests in this and downstream crates. -## Re-exported as `nvisy_codec::test_utils`. -test-utils = [] - -# Internal helpers — set automatically by any format feature above. -# Library code uses `#[cfg(feature = "internal_text")]` to mean -# "any text format is enabled", the right gate for shared -# infrastructure like the modality's `impl Codable` block and -# top-level `crate::text` module. -internal_text = [] -internal_tabular = [] -internal_image = [] -internal_audio = [] -internal_rich = [] - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[dependencies] -# Internal crates -nvisy-core = { workspace = true, features = [] } - -# Serialization -serde = { workspace = true, features = [] } -serde_json = { workspace = true, features = [] } -schemars = { workspace = true, features = [] } - -# Derive macros and error handling -derive_more = { workspace = true, features = ["as_ref", "deref", "from"] } - -# Primitive datatypes -bytes = { workspace = true, features = [] } - -# Encoding and hashing -hex = { workspace = true, features = [] } -sha2 = { workspace = true, features = [] } - -# Async runtime and parallelism -async-trait = { workspace = true, features = [] } -tokio = { workspace = true, features = ["sync"] } -rayon = { workspace = true, optional = true, features = [] } - -# Observability -tracing = { workspace = true, features = [] } - -# Tabular document parsing (feature-gated) -csv = { workspace = true, optional = true, features = [] } - -# Rich-document parsing (feature-gated: HTML + PDF) -scraper = { workspace = true, optional = true, features = [] } -ego-tree = { workspace = true, optional = true, features = [] } -lopdf = { workspace = true, optional = true, features = [] } -pdfium-render = { workspace = true, optional = true, features = [] } - -# Image processing -image = { workspace = true, features = [] } -imageproc = { workspace = true, features = [] } - -# Audio processing (feature-gated) -hound = { workspace = true, optional = true, features = [] } -symphonia = { workspace = true, optional = true, features = [] } -mp3lame-encoder = { workspace = true, optional = true, features = [] } - -# Storage and file-type detection -infer = { workspace = true, features = [] } - -[dev-dependencies] -tokio = { workspace = true, features = ["macros", "rt"] } diff --git a/crates/nvisy-codec/README.md b/crates/nvisy-codec/README.md deleted file mode 100644 index 693709c18..000000000 --- a/crates/nvisy-codec/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# nvisy-codec - -[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml) - -Format handlers, the `Handler` trait, and the `CodecRegistry` -that drives ingest in the Nvisy runtime. - -## Overview - -Built-in handlers cover TXT, JSON, Markdown, HTML, CSV, XLSX, PNG, -JPEG, TIFF, WAV, MP3, PDF, and DOCX. Each implements `Handler` -(streaming `next_chunk`, random-access `read` / `redact`, -`lift_chunk` for offset translation) and pairs with a `Loader` -that decodes raw bytes into the handler. A `Format` descriptor -built via `Format::new::(id, loader)` plus chained -`.with_extensions(...)` / `.with_content_types(...)` registers the -pair into `CodecRegistry`. - -Consumers resolve a `Format` by extension, content-type, or id and -get back an `UntypedDocumentHandle` they commit to a modality via -`into_text` / `into_tabular` / `into_image` / `into_audio`. The -typed `DocumentHandle` implements `nvisy-core`'s `TextAt` / -`DataAt` / `RedactAt` directly, so pipeline components read from -and write to codec-backed sources through the same traits the -engine bounds on. The `content` module (`Content`, `ContentData`, -`ContentDescriptor`, `ContentDigest`, `ContentRecord`, -`ContentSource`, `TextEncoding`) carries the raw-bytes side of the -import surface. Each format is feature-gated (`txt`, `csv`, -`png`, …) with umbrella features `text`, `tabular`, `image`, -`audio`, `rich`. Depends only on `nvisy-core`. - -## Documentation - -See [`docs/`](../../docs/) for architecture, security, and API documentation. - -## Changelog - -See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. - -## License - -Apache 2.0 License, see [LICENSE.txt](../../LICENSE.txt) - -## Support - -- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) -- **Issues**: [GitHub Issues](https://github.com/nvisycom/runtime/issues) -- **Email**: [support@nvisy.com](mailto:support@nvisy.com) diff --git a/crates/nvisy-codec/src/content/bundle.rs b/crates/nvisy-codec/src/content/bundle.rs deleted file mode 100644 index cc5246059..000000000 --- a/crates/nvisy-codec/src/content/bundle.rs +++ /dev/null @@ -1,114 +0,0 @@ -//! [`Content`]: data bytes optionally paired with a caller-supplied -//! [`ContentDescriptor`]. - -use std::path::Path; - -use derive_more::{AsRef, Deref}; -use nvisy_core::Result; -use serde::{Deserialize, Serialize}; - -use super::{ContentData, ContentDescriptor, ContentSource}; - -/// Upload-shape carrier: raw bytes plus the caller's descriptive -/// metadata. -/// -/// [`ContentData`] holds the bytes and source identity. -/// [`ContentDescriptor`] holds filename, MIME hint, and extras when -/// the caller has them. The descriptor is optional because some -/// import paths (raw byte uploads, generated content) have nothing -/// to attach. -/// -/// After `Registry::register_content` consumes a `Content`, the -/// stored shape is a `ContentRecord` (descriptor + byte-derived -/// digest), which is what registry reads return. -#[derive(Debug, Clone, PartialEq)] -#[derive(AsRef, Deref, Serialize, Deserialize)] -pub struct Content { - /// Raw content bytes. - #[deref] - #[as_ref] - data: ContentData, - /// Caller-supplied descriptive metadata. - descriptor: Option, -} - -impl From for Content { - fn from(data: ContentData) -> Self { - Self::new(data) - } -} - -impl Content { - /// Create content from data without a descriptor. - pub fn new(data: ContentData) -> Self { - Self { - data, - descriptor: None, - } - } - - /// Create content with a caller-supplied descriptor. - pub fn with_descriptor(data: ContentData, descriptor: ContentDescriptor) -> Self { - Self { - data, - descriptor: Some(descriptor), - } - } - - /// Returns the raw content data. - pub fn data(&self) -> &ContentData { - &self.data - } - - /// Returns the caller-supplied descriptor, if present. - pub fn descriptor(&self) -> Option<&ContentDescriptor> { - self.descriptor.as_ref() - } - - /// Returns the content source identifier. - pub fn content_source(&self) -> ContentSource { - self.data.content_source - } - - /// Returns the raw bytes. - pub fn as_bytes(&self) -> &[u8] { - self.data.as_bytes() - } - - /// Returns `true` if the content appears to be text. - pub fn is_likely_text(&self) -> bool { - self.data.is_likely_text() - } - - /// Try to get the content as a string slice. - /// - /// # Errors - /// - /// Returns an error if the content is not valid UTF-8. - pub fn as_str(&self) -> Result<&str> { - self.data.as_str() - } - - /// Caller-supplied MIME type, if any. Detected MIME isn't - /// available pre-registration (the registry computes it). - pub fn content_type(&self) -> Option<&str> { - self.descriptor - .as_ref() - .and_then(|d| d.content_type.as_deref()) - } - - /// Original filename from the descriptor. - pub fn filename(&self) -> Option<&Path> { - self.descriptor.as_ref().and_then(|d| d.filename.as_deref()) - } - - /// File extension from the descriptor's source path. - pub fn file_extension(&self) -> Option<&str> { - self.descriptor.as_ref().and_then(|d| d.file_extension()) - } - - /// Consume and return both data and descriptor. - pub fn into_parts(self) -> (ContentData, Option) { - (self.data, self.descriptor) - } -} diff --git a/crates/nvisy-codec/src/content/content_data.rs b/crates/nvisy-codec/src/content/content_data.rs deleted file mode 100644 index a31f5158e..000000000 --- a/crates/nvisy-codec/src/content/content_data.rs +++ /dev/null @@ -1,268 +0,0 @@ -//! Raw content bytes with source identity. -//! -//! [`ContentData`] is the pure data half of the content model. It holds -//! the raw bytes and a [`ContentSource`] identifier. All descriptive -//! attributes (MIME type, filename, arbitrary metadata) live on -//! [`ContentMetadata`]. -//! -//! [`ContentMetadata`]: super::ContentMetadata - -use std::{fmt, str}; - -use bytes::Bytes; -use nvisy_core::{Error, ErrorKind, Result}; -use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; - -use super::ContentSource; - -/// Raw content bytes with source identity. -/// -/// This is the data-only half of the content model — it does not -/// carry MIME type, filename, or other descriptive metadata. -/// Pair with [`ContentDescriptor`] via [`Content`] for a complete -/// representation. -/// -/// [`ContentDescriptor`]: super::ContentDescriptor -/// [`Content`]: super::Content -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct ContentData { - /// Unique identifier for the content source. - pub content_source: ContentSource, - /// The actual content bytes. - data: Bytes, -} - -impl ContentData { - /// Creates content data from raw bytes. - pub fn new(content_source: ContentSource, data: Bytes) -> Self { - Self { - content_source, - data, - } - } - - /// Creates content data from a text string. - pub fn from_text(content_source: ContentSource, text: impl Into) -> Self { - Self { - content_source, - data: Bytes::from(text.into().into_bytes()), - } - } - - /// Returns the size of the content in bytes. - #[must_use] - pub fn size(&self) -> usize { - self.data.len() - } - - /// Returns the content data as a byte slice. - #[must_use] - pub fn as_bytes(&self) -> &[u8] { - &self.data - } - - /// Converts the content data to `Bytes`. - #[must_use] - pub fn to_bytes(&self) -> Bytes { - self.data.clone() - } - - /// Consumes and converts into `Bytes`. - #[must_use] - pub fn into_bytes(self) -> Bytes { - self.data - } - - /// Returns `true` if the content appears to be text. - /// - /// Checks that the content is valid UTF-8 and contains no control - /// characters other than common whitespace (tab, newline, carriage - /// return). - #[must_use] - pub fn is_likely_text(&self) -> bool { - let Ok(s) = str::from_utf8(&self.data) else { - return false; - }; - s.chars() - .all(|c| !c.is_control() || matches!(c, '\t' | '\n' | '\r')) - } - - /// Tries to convert the content data to a UTF-8 string slice. - /// - /// # Errors - /// - /// Returns an error if the content data contains invalid UTF-8 sequences. - pub fn as_str(&self) -> Result<&str> { - str::from_utf8(&self.data) - .map_err(|e| Error::new(ErrorKind::Serialization, format!("Invalid UTF-8: {e}"))) - } - - /// Computes and returns the SHA256 hash of the content. - #[must_use] - pub fn sha256(&self) -> Bytes { - let mut hasher = Sha256::new(); - hasher.update(&self.data); - Bytes::from(hasher.finalize().to_vec()) - } - - /// Returns the SHA256 hash as a hex string. - #[must_use] - pub fn sha256_hex(&self) -> String { - hex::encode(self.sha256()) - } - - /// Verifies the content against a provided SHA256 hash. - /// - /// # Errors - /// - /// Returns an error if the computed hash does not match the expected hash. - pub fn verify_sha256(&self, expected_hash: impl AsRef<[u8]>) -> Result<()> { - let actual_hash = self.sha256(); - let expected = expected_hash.as_ref(); - - if actual_hash.as_ref() == expected { - Ok(()) - } else { - Err(Error::new( - ErrorKind::Validation, - format!( - "Hash mismatch: expected {}, got {}", - hex::encode(expected), - hex::encode(actual_hash) - ), - )) - } - } - - /// Returns a slice of the content data. - /// - /// # Errors - /// - /// Returns an error if the end index is beyond the content length - /// or if start is greater than end. - pub fn slice(&self, start: usize, end: usize) -> Result { - if end > self.data.len() { - return Err(Error::new( - ErrorKind::Validation, - format!( - "Slice end {} exceeds content length {}", - end, - self.data.len() - ), - )); - } - if start > end { - return Err(Error::new( - ErrorKind::Validation, - format!("Slice start {start} is greater than end {end}"), - )); - } - Ok(Bytes::copy_from_slice(&self.data[start..end])) - } - - /// Returns `true` if the content is empty. - #[must_use] - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - /// Detect MIME type from the raw bytes using magic-byte signatures. - /// - /// Returns `None` for content with no recognizable magic bytes - /// (e.g. plain text). Pairing the result with - /// `ContentMetadata::detected_content_type` is a caller-side - /// convention — no type-level link exists between the two. - #[must_use] - pub fn detect_mime(&self) -> Option { - infer::get(&self.data).map(|t| t.mime_type().to_owned()) - } -} - -impl From<&str> for ContentData { - fn from(s: &str) -> Self { - Self::from_text(ContentSource::new(), s) - } -} - -impl From for ContentData { - fn from(s: String) -> Self { - Self::from_text(ContentSource::new(), s) - } -} - -impl From<&[u8]> for ContentData { - fn from(bytes: &[u8]) -> Self { - Self::new(ContentSource::new(), Bytes::copy_from_slice(bytes)) - } -} - -impl From> for ContentData { - fn from(vec: Vec) -> Self { - Self::new(ContentSource::new(), Bytes::from(vec)) - } -} - -impl From for ContentData { - fn from(bytes: Bytes) -> Self { - Self::new(ContentSource::new(), bytes) - } -} - -impl fmt::Display for ContentData { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Ok(text) = self.as_str() { - write!(f, "{text}") - } else { - write!(f, "[Binary data: {} bytes]", self.size()) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sha256_computation() { - let content = ContentData::from("Hello, world!"); - let hash = content.sha256(); - assert_eq!(hash.len(), 32); - assert_eq!(hash, content.sha256()); - } - - #[test] - fn sha256_verification() { - let content = ContentData::from("Hello, world!"); - let hash = content.sha256().clone(); - assert!(content.verify_sha256(&hash).is_ok()); - assert!(content.verify_sha256([0u8; 32]).is_err()); - } - - #[test] - fn slice_operations() { - let content = ContentData::from("Hello, world!"); - assert_eq!(content.slice(0, 5).unwrap(), Bytes::from("Hello")); - assert_eq!(content.slice(7, 12).unwrap(), Bytes::from("world")); - assert!(content.slice(0, 100).is_err()); - assert!(content.slice(10, 5).is_err()); - } - - #[test] - fn detect_mime_png() { - let png = vec![ - 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, - 0x44, 0x52, - ]; - let data = ContentData::from(png); - assert_eq!(data.detect_mime().as_deref(), Some("image/png")); - } - - #[test] - fn is_likely_text() { - assert!(ContentData::from("ascii text").is_likely_text()); - assert!(ContentData::from("").is_likely_text()); - assert!(ContentData::from("café").is_likely_text()); - assert!(!ContentData::from(vec![0x00]).is_likely_text()); - } -} diff --git a/crates/nvisy-codec/src/content/content_metadata.rs b/crates/nvisy-codec/src/content/content_metadata.rs deleted file mode 100644 index ca586515c..000000000 --- a/crates/nvisy-codec/src/content/content_metadata.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! Two-layer content metadata: -//! -//! - [`ContentDescriptor`] holds caller-supplied descriptive bits -//! (filename, MIME hint, source path, arbitrary extras). All -//! optional — the caller might or might not have any of them. -//! - [`ContentDigest`] holds facts the registry computes by looking -//! at the bytes (size, sha256, sniffed MIME). Required fields are -//! actually required. -//! - [`ContentRecord`] bundles a descriptor with a digest. This is -//! what the registry persists and what read sites get back. - -use std::path::{Path, PathBuf}; - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Caller-supplied descriptive metadata for an upload. -/// -/// Built before the bytes have been written to the registry, so -/// every field is optional — the caller knows whatever they know. -/// The registry's `register_content` consumes this alongside the -/// bytes to produce a [`ContentRecord`]. -#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ContentDescriptor { - /// Optional path to the source file. - #[serde(default, skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub source_path: Option, - /// MIME type supplied by the caller (e.g. `"text/plain"` from - /// an HTTP `Content-Type` header or an explicit API call). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub content_type: Option, - /// Original filename, if known. Used by `CodecRegistry` for - /// extension-based format resolution. - #[serde(default, skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub filename: Option, - /// Caller-supplied key-value pairs that policy conditions - /// (`Condition::Metadata { key, value }`) match against. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub policy_metadata: Option>, -} - -impl ContentDescriptor { - /// Create an empty descriptor. - #[must_use] - pub fn new() -> Self { - Self::default() - } - - /// Create a descriptor with a source file path. - pub fn with_path(path: impl Into) -> Self { - Self { - source_path: Some(path.into()), - ..Self::default() - } - } - - /// Set the caller-supplied MIME type (builder pattern). - #[must_use] - pub fn with_content_type(mut self, mime: impl Into) -> Self { - self.content_type = Some(mime.into()); - self - } - - /// Set the original filename (builder pattern). - #[must_use] - pub fn with_filename(mut self, name: impl Into) -> Self { - self.filename = Some(name.into()); - self - } - - /// Get the file extension from the source path, if available. - #[must_use] - pub fn file_extension(&self) -> Option<&str> { - self.source_path - .as_ref() - .and_then(|path| path.extension()) - .and_then(|ext| ext.to_str()) - } - - /// Get the full path if available. - #[must_use] - pub fn path(&self) -> Option<&Path> { - self.source_path.as_deref() - } - - /// Get a single value from the policy metadata map. - #[must_use] - pub fn get_policy_metadata(&self, key: &str) -> Option<&serde_json::Value> { - self.policy_metadata.as_ref().and_then(|m| m.get(key)) - } - - /// Insert a key-value pair into the policy metadata map, - /// creating the map if it doesn't exist yet. - pub fn set_policy_metadata(&mut self, key: impl Into, value: serde_json::Value) { - self.policy_metadata - .get_or_insert_with(serde_json::Map::new) - .insert(key.into(), value); - } - - /// Remove a key from the policy metadata map. Returns the - /// removed value if the key existed. - pub fn remove_policy_metadata(&mut self, key: &str) -> Option { - self.policy_metadata.as_mut().and_then(|m| m.remove(key)) - } -} - -/// Byte-derived facts about a piece of content. -/// -/// Computed by `Registry::register_content` after the bytes are in -/// hand. Required fields (`size`, `sha256`) are unconditional; -/// `detected_content_type` is `Option` because magic-byte sniffing -/// may legitimately fail (e.g. plain text). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ContentDigest { - /// Size in bytes. - pub size: u64, - /// SHA-256 hex digest of the raw bytes. - pub sha256: String, - /// MIME type sniffed from the bytes, if magic-byte detection - /// produced a result. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub detected_content_type: Option, -} - -/// Persisted, post-registration view of a piece of content. -/// -/// Returned by registry read endpoints (`list_content_with_record`, -/// `read_content`). The [`ContentDescriptor`] half is whatever the -/// caller supplied at upload; the [`ContentDigest`] half is what -/// the registry computed. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ContentRecord { - /// Caller-supplied descriptor (filename, MIME hint, policy metadata). - pub descriptor: ContentDescriptor, - /// Registry-computed digest (size, sha256, detected MIME). - pub digest: ContentDigest, -} - -impl ContentRecord { - /// Best-available MIME type: caller-supplied takes priority - /// over sniffed. - #[must_use] - pub fn content_type(&self) -> Option<&str> { - self.descriptor - .content_type - .as_deref() - .or(self.digest.detected_content_type.as_deref()) - } - - /// Original filename from the descriptor. - #[must_use] - pub fn filename(&self) -> Option<&Path> { - self.descriptor.filename.as_deref() - } - - /// Original filename rendered as a UTF-8 string. Non-UTF-8 byte - /// sequences in the path are replaced with U+FFFD (lossy - /// conversion). Use [`filename`] when you need the raw `&Path`. - /// - /// [`filename`]: Self::filename - #[must_use] - pub fn filename_lossy(&self) -> Option { - self.filename().map(|p| p.to_string_lossy().into_owned()) - } - - /// File extension from the descriptor's source path. - #[must_use] - pub fn file_extension(&self) -> Option<&str> { - self.descriptor.file_extension() - } -} diff --git a/crates/nvisy-codec/src/content/encoding.rs b/crates/nvisy-codec/src/content/encoding.rs deleted file mode 100644 index 27f8d8d02..000000000 --- a/crates/nvisy-codec/src/content/encoding.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Character encoding for text-based loaders. - -use nvisy_core::{Error, Result}; -/// Character encoding used to decode raw bytes before parsing. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum TextEncoding { - /// UTF-8 (the default and by far the most common encoding). - #[default] - Utf8, -} - -impl TextEncoding { - /// Decode raw bytes to a UTF-8 string. - /// - /// `origin` identifies the caller for error messages - /// (e.g. `"json-loader"`). - pub fn decode_bytes(self, bytes: &[u8], origin: &str) -> Result { - match self { - Self::Utf8 => String::from_utf8(bytes.to_vec()) - .map_err(|e| Error::validation(format!("Invalid UTF-8: {e}"), origin.to_owned())), - } - } -} diff --git a/crates/nvisy-codec/src/content/mod.rs b/crates/nvisy-codec/src/content/mod.rs deleted file mode 100644 index a0e4e4f0f..000000000 --- a/crates/nvisy-codec/src/content/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -//! Content data containers, metadata, and source identity. -//! -//! - [`ContentData`]: raw content bytes with source identity. -//! - [`ContentDescriptor`]: caller-supplied descriptive bits -//! (filename, MIME hint, extras) — built before bytes are -//! persisted. -//! - [`ContentDigest`]: byte-derived facts (size, sha256, sniffed -//! MIME) — computed at registration time. -//! - [`ContentRecord`]: persisted view (descriptor + digest), what -//! registry reads return. -//! - [`Content`]: [`ContentData`] paired with an optional -//! [`ContentDescriptor`] — the upload-shape carrier. -//! - [`ContentSource`]: UUIDv7-based content identity and lineage. -//! -//! Top-level format classification lives on [`FormatId`]. -//! -//! [`FormatId`]: crate::FormatId - -mod bundle; -mod content_data; -mod content_metadata; -mod encoding; - -pub use nvisy_core::entity::ContentSource; - -pub use self::bundle::Content; -pub use self::content_data::ContentData; -pub use self::content_metadata::{ContentDescriptor, ContentDigest, ContentRecord}; -pub use self::encoding::TextEncoding; diff --git a/crates/nvisy-codec/src/core/format.rs b/crates/nvisy-codec/src/core/format.rs deleted file mode 100644 index fe4015e98..000000000 --- a/crates/nvisy-codec/src/core/format.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! Format identity: what kind of thing a registered codec is. -//! -//! - [`FormatId`] — stable string identifier (e.g. -//! `"nvisy.text.txt"`). Open namespace, no central enum. -//! - [`Format`] — descriptor [`CodecRegistry`] indexes by id / -//! extension / content type. Bundles a `FormatId`, its -//! [`ModalityKind`], lookup keys, and an erased loader that -//! decodes bytes into a typed handle. -//! -//! [`CodecRegistry`]: super::CodecRegistry -//! [`ModalityKind`]: nvisy_core::modality::ModalityKind - -use std::borrow::Cow; -use std::fmt; -use std::sync::Arc; - -use nvisy_core::modality::{Modality, ModalityKind}; - -use super::{ErasedLoader, Loader, erase}; -use crate::document::{DocumentHandle, UntypedDocumentHandle}; - -/// Stable identifier for a registered codec format. Open string -/// namespace — downstream crates ship their own formats by -/// registering a [`Format`] with a unique [`FormatId`]. -/// -/// Convention: dot-separated namespace. Built-in formats use the -/// `nvisy.` prefix (e.g. `"nvisy.text.txt"`, `"nvisy.rich.pdf"`). -/// Third-party formats use their own (e.g. `"acme.parquet.v2"`). -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct FormatId(Cow<'static, str>); - -impl FormatId { - /// Construct from a static string literal — no allocation. - pub const fn from_static(id: &'static str) -> Self { - Self(Cow::Borrowed(id)) - } - - /// Construct from an owned [`String`]. - pub fn from_owned(id: String) -> Self { - Self(Cow::Owned(id)) - } - - /// Borrow as `&str`. - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for FormatId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.0) - } -} - -impl AsRef for FormatId { - fn as_ref(&self) -> &str { - &self.0 - } -} - -/// Descriptor for one registered codec format. Indexed by -/// [`CodecRegistry`] under its [`FormatId`], every extension in -/// `extensions`, and every MIME in `content_types`. -/// -/// Construct via [`Format::new`]; read the parts via the accessor -/// methods. The fields are crate-private so the constructor stays -/// the only path that produces a `Format` — that way the -/// [`ModalityKind`] tag is always derived from the loader's -/// modality and never hand-set. -/// -/// [`CodecRegistry`]: super::CodecRegistry -#[derive(Clone)] -pub struct Format { - pub(crate) id: FormatId, - pub(crate) modality: ModalityKind, - pub(crate) extensions: Vec>, - pub(crate) content_types: Vec>, - pub(crate) loader: Arc, -} - -impl Format { - /// Build a [`Format`] for modality `M`. The runtime - /// [`ModalityKind`] tag is taken from `M::KIND` and the loader - /// is erased internally — neither needs to be named at the call - /// site. - /// - /// Extensions and content types default to empty; chain - /// [`with_extensions`] / [`with_content_types`] to declare the - /// lookup keys the [`CodecRegistry`] indexes this format under. - /// - /// [`with_extensions`]: Self::with_extensions - /// [`with_content_types`]: Self::with_content_types - /// [`CodecRegistry`]: super::CodecRegistry - pub fn new(id: FormatId, loader: L) -> Self - where - M: Modality, - L: Loader, - DocumentHandle: Into, - { - Self { - id, - modality: M::KIND, - extensions: Vec::new(), - content_types: Vec::new(), - loader: erase(loader), - } - } - - /// Declare the file extensions (lowercased, no leading dot) that - /// resolve to this format. Extends any previously-declared list. - #[must_use] - pub fn with_extensions(mut self, extensions: I) -> Self - where - I: IntoIterator, - S: Into>, - { - self.extensions - .extend(extensions.into_iter().map(Into::into)); - self - } - - /// Declare the MIME content types (lowercased) that resolve to - /// this format. Extends any previously-declared list. - #[must_use] - pub fn with_content_types(mut self, content_types: I) -> Self - where - I: IntoIterator, - S: Into>, - { - self.content_types - .extend(content_types.into_iter().map(Into::into)); - self - } - - /// Stable identifier of this format. - pub fn id(&self) -> &FormatId { - &self.id - } - - /// Modality this format produces. - pub fn modality(&self) -> ModalityKind { - self.modality - } - - /// File extensions (lowercased, no leading dot) that resolve to - /// this format. - pub fn extensions(&self) -> &[Cow<'static, str>] { - &self.extensions - } - - /// MIME content types (lowercased) that resolve to this format. - pub fn content_types(&self) -> &[Cow<'static, str>] { - &self.content_types - } - - /// Decode raw content through this format's loader, returning - /// the runtime-tagged handle. Equivalent to calling - /// [`CodecRegistry::decode`] after resolving the - /// format yourself. - /// - /// [`CodecRegistry::decode`]: super::CodecRegistry::decode - pub async fn decode( - &self, - content: crate::content::ContentData, - ) -> Result { - self.loader.decode(content).await - } -} - -impl fmt::Debug for Format { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Format") - .field("id", &self.id) - .field("modality", &self.modality) - .field("extensions", &self.extensions) - .field("content_types", &self.content_types) - .finish_non_exhaustive() - } -} diff --git a/crates/nvisy-codec/src/core/handler.rs b/crates/nvisy-codec/src/core/handler.rs deleted file mode 100644 index 9ecd97b6c..000000000 --- a/crates/nvisy-codec/src/core/handler.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! What a codec handler exposes — the trait surface every shipped -//! format handler implements: -//! -//! - [`Handler`] — per-modality capability trait. Identifies and -//! serialises the handler ([`format`], [`source`], [`encode`]), -//! streams chunks ([`next_chunk`]), supports random-access reads -//! and redactions ([`read`], [`redact`]), and lifts recognizer -//! offsets back to source coordinates ([`lift_chunk`]). -//! - [`Chunk`] — one decoded unit yielded by `next_chunk`. -//! -//! [`format`]: Handler::format -//! [`source`]: Handler::source -//! [`encode`]: Handler::encode -//! [`next_chunk`]: Handler::next_chunk -//! [`read`]: Handler::read -//! [`redact`]: Handler::redact -//! [`lift_chunk`]: Handler::lift_chunk - -use std::ops::Range; - -use nvisy_core::Error; -use nvisy_core::modality::Modality; -use nvisy_core::redaction::Redactions; - -use super::FormatId; -use crate::content::{ContentData, ContentSource}; - -/// One decoded unit yielded by [`Handler::next_chunk`]. -/// -/// `data` is the per-modality wire payload; `location` is the -/// coordinate the handler will accept in [`Handler::read`] / -/// [`Handler::redact`] to address the same chunk again. `hints` -/// carries out-of-band context strings the chunk's structural -/// neighbours surface — CSV/XLSX column headers, JSON object -/// keys, HTML parent-element text — for downstream context-aware -/// recognizers; handlers without such metadata leave it empty. -#[derive(Debug, Clone, PartialEq)] -pub struct Chunk { - /// Coordinate addressing this chunk inside the handler. - pub location: M::Location, - /// Wire payload at the chunk's location. - pub data: M::Data, - /// Out-of-band context strings recognizers should treat as - /// in-context (column headers, parent element text, …). - /// Empty when the handler has no such metadata to surface. - pub hints: Vec, -} - -/// Per-modality capability trait every format handler implements. -/// -/// Identifies and serialises the handler ([`format`], [`source`], -/// [`encode`]), streams chunks ([`next_chunk`]), supports -/// random-access reads and redactions ([`read`], [`redact`]), and -/// lifts recognizer offsets back to source coordinates -/// ([`lift_chunk`]). -/// -/// The handler owns the streaming cursor — concurrent iteration -/// of the same handle is not supported (only one `&mut self`). -/// -/// [`format`]: Handler::format -/// [`source`]: Handler::source -/// [`encode`]: Handler::encode -/// [`next_chunk`]: Handler::next_chunk -/// [`read`]: Handler::read -/// [`redact`]: Handler::redact -/// [`lift_chunk`]: Handler::lift_chunk -#[async_trait::async_trait] -pub trait Handler: Send + Sync + 'static { - /// Stable id of the format this handler represents (e.g. - /// `"nvisy.text.txt"`). Cheap to clone. - fn format(&self) -> FormatId; - - /// Content source identity and lineage for this handler. - fn source(&self) -> ContentSource; - - /// Serialize the current handler content back to [`ContentData`]. - fn encode(&self) -> Result; - - /// Advance the cursor and yield the next chunk, or `None` at - /// end-of-stream. - async fn next_chunk(&mut self) -> Result>, Error>; - - /// Read the wire payload at the given location. Used by - /// [`TextAt`] resolvers to fetch bytes for a coordinate already - /// known from somewhere else (an entity audit record, an - /// annotation). Extraction itself does not call this — it - /// drives [`next_chunk`] which returns `(location, data)` - /// together. - /// - /// [`next_chunk`]: Handler::next_chunk - /// [`TextAt`]: nvisy_core::extraction::TextAt - async fn read(&self, location: &M::Location) -> Result, Error>; - - /// Apply a batch of `(location, replacement)` pairs in whatever - /// order is correct for this format. Engine guarantees no two - /// locations overlap; handler decides ordering (right-to-left - /// for text/audio so deletions don't shift later indices, batch - /// per page for PDF, …). The first error aborts the batch. - /// - /// Use [`Redactions::single`] when only one replacement is needed. - async fn redact(&mut self, redactions: Redactions) -> Result<(), Error>; - - /// Translate a `value_range` expressed inside `chunk.data`'s - /// coordinate system into a source-coordinate `M::Location`. - /// - /// Recognizers see the unescaped, decoded chunk payload and - /// emit offsets into that. Downstream stages — dedup, redact, - /// audit — need locations that address the handler's source - /// bytes. `lift_chunk` is the bridge. - /// - /// For text-shaped handlers where `chunk.data` is byte-for-byte - /// a slice of source (TXT lines, HTML text nodes, PDF page - /// text, CSV cells, DOCX text runs), the mapping is the - /// identity offset add against `chunk.location.start`. Handlers - /// whose chunks decode escapes or otherwise transform the - /// payload (JSON `\"` / `\\`, future HTML entity refs) override - /// to walk their per-chunk escape map. - /// - /// Returns `None` when the range has no source pre-image — out - /// of bounds, lands inside an escape pair, or the modality - /// doesn't have a meaningful `usize` value-range concept (image - /// bounding boxes, audio time spans, tabular cell coords). - /// Non-text impls leave the default `None`. - fn lift_chunk(&self, _chunk: &Chunk, _value_range: Range) -> Option { - None - } -} diff --git a/crates/nvisy-codec/src/core/loader.rs b/crates/nvisy-codec/src/core/loader.rs deleted file mode 100644 index f0dc51200..000000000 --- a/crates/nvisy-codec/src/core/loader.rs +++ /dev/null @@ -1,129 +0,0 @@ -//! Decoding raw bytes into a typed handle. -//! -//! - [`Loader`] — per-modality decoder format implementations -//! write. Returns a concrete handler that implements -//! [`Handler`]. -//! - [`ErasedLoader`] — object-safe loader surface the -//! [`CodecRegistry`] stores behind `Arc`. Adapts a per-modality -//! `Loader` into a uniform `decode` signature that returns -//! [`UntypedDocumentHandle`]. -//! - [`erase`] — bridge from typed `Loader` to -//! `Arc` every shipped format uses when -//! populating [`Format::loader`]. -//! -//! The handler's own [`Handler::format`] supplies the -//! [`FormatId`] inside [`ErasedLoader::decode`]; [`erase`] only -//! erases `M`. -//! -//! [`Handler`]: super::Handler -//! [`Handler::format`]: super::Handler::format -//! [`CodecRegistry`]: super::CodecRegistry -//! [`UntypedDocumentHandle`]: crate::document::UntypedDocumentHandle -//! [`Format::loader`]: super::Format::loader -//! [`FormatId`]: super::FormatId - -use std::marker::PhantomData; -use std::sync::Arc; - -use nvisy_core::Error; -use nvisy_core::modality::Modality; - -use super::Handler; -use crate::content::ContentData; -use crate::document::{DocumentHandle, UntypedDocumentHandle}; - -/// Per-modality format loader. -/// -/// A loader validates and parses raw content for modality `M`, -/// producing a handler that implements [`Handler`]. Loaders are -/// the leaves the [`CodecRegistry`] composes — registering a -/// format means registering its loader. -/// -/// # Implementing a third-party format -/// -/// 1. Implement [`Handler`] for the per-format handler type that -/// owns the parsed in-memory representation. -/// 2. Implement `Loader` for a stateless type whose [`decode`] -/// validates raw [`ContentData`] and returns the handler. -/// 3. Build a [`Format`] with [`Format::new`], chain -/// [`with_extensions`] / [`with_content_types`] as needed, and -/// register it on a [`CodecRegistry`] via -/// [`CodecRegistry::add_format`]. -/// -/// The registry erases `M` internally; third-party callers never -/// touch the object-safe loader surface. -/// -/// [`Handler`]: super::Handler -/// [`CodecRegistry`]: super::CodecRegistry -/// [`CodecRegistry::add_format`]: super::CodecRegistry::add_format -/// [`Format`]: super::Format -/// [`Format::new`]: super::Format::new -/// [`with_extensions`]: super::Format::with_extensions -/// [`with_content_types`]: super::Format::with_content_types -/// [`decode`]: Loader::decode -#[async_trait::async_trait] -pub trait Loader: Send + Sync + 'static { - /// The handler type this loader produces. - type Handler: Handler; - - /// Validate and parse the content, returning the loaded handler. - async fn decode(&self, content: ContentData) -> Result; -} - -/// Object-safe loader the [`CodecRegistry`] holds behind `Arc`. -/// Adapts a per-modality [`Loader`] into a uniform `decode` -/// signature returning an [`UntypedDocumentHandle`]. -/// -/// Crate-internal: every consumer goes through [`Format::decode`] -/// or [`CodecRegistry::decode`] instead of touching -/// this trait directly. -/// -/// [`CodecRegistry`]: super::CodecRegistry -/// [`Format::decode`]: super::Format::decode -/// [`CodecRegistry::decode`]: super::CodecRegistry::decode -#[async_trait::async_trait] -pub(crate) trait ErasedLoader: Send + Sync + 'static { - /// Decode raw content into an [`UntypedDocumentHandle`]. - async fn decode(&self, content: ContentData) -> Result; -} - -/// Erase a typed [`Loader`] into an `Arc` the -/// [`CodecRegistry`] can store. Called only by [`Format::new`] — -/// not part of the public API. -/// -/// [`CodecRegistry`]: super::CodecRegistry -/// [`Format::new`]: super::Format::new -pub(crate) fn erase(loader: L) -> Arc -where - M: Modality, - L: Loader, - DocumentHandle: Into, -{ - Arc::new(LoaderAdapter { - loader, - _phantom: PhantomData, - }) -} - -/// Private wrapper that holds a typed [`Loader`] and implements -/// the object-safe [`ErasedLoader`] surface. Constructed only via -/// [`erase`]; not part of the public API. -struct LoaderAdapter> { - loader: L, - _phantom: PhantomData M>, -} - -#[async_trait::async_trait] -impl ErasedLoader for LoaderAdapter -where - M: Modality, - L: Loader, - DocumentHandle: Into, -{ - async fn decode(&self, content: ContentData) -> Result { - let handler = self.loader.decode(content).await?; - let format = handler.format(); - let handle: Box> = Box::new(handler); - Ok(DocumentHandle::new(format, handle).into()) - } -} diff --git a/crates/nvisy-codec/src/core/mod.rs b/crates/nvisy-codec/src/core/mod.rs deleted file mode 100644 index f013507d1..000000000 --- a/crates/nvisy-codec/src/core/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Codec core contracts, grouped by concern: -//! -//! - `format` — *what kind of thing a codec is*. [`FormatId`], -//! [`Format`] descriptor. -//! - `handler` — *what a handler exposes*. [`Handler`] -//! (per-modality capability surface — identify, encode, stream, -//! read, redact, lift), [`Chunk`] payload. -//! - `loader` — *how raw bytes become a handle*. [`Loader`] -//! (per-modality decoder). The registry-side erasure machinery -//! (`ErasedLoader` trait, `erase` helper) is crate-internal and -//! wired through [`Format::new`] / [`Format::decode`]. -//! - `registry` — *the lookup engine*. [`CodecRegistry`] indexes -//! [`Format`]s by id, extension, and content type, and decodes -//! bytes through the matching loader. -//! -//! Concrete format implementations live in `crate::handler::*`. - -mod format; -mod handler; -mod loader; -mod registry; - -pub use self::format::{Format, FormatId}; -pub use self::handler::{Chunk, Handler}; -pub use self::loader::Loader; -pub(crate) use self::loader::{ErasedLoader, erase}; -pub use self::registry::CodecRegistry; diff --git a/crates/nvisy-codec/src/core/registry.rs b/crates/nvisy-codec/src/core/registry.rs deleted file mode 100644 index 9f65d12a6..000000000 --- a/crates/nvisy-codec/src/core/registry.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! [`CodecRegistry`]: resolves an extension or content type to a -//! registered [`Format`] and decodes content through its loader. -//! -//! Downstream crates register their own formats by calling -//! [`CodecRegistry::add_format`] — there is no central enum to -//! extend. - -use std::collections::HashMap; - -use nvisy_core::Error; - -use super::{Format, FormatId}; -use crate::content::ContentData; -use crate::document::UntypedDocumentHandle; - -/// Codec registry — owns the set of registered [`Format`]s and -/// resolves them by extension, content type, or id. -#[derive(Debug, Default)] -pub struct CodecRegistry { - formats: Vec, - by_id: HashMap, - by_extension: HashMap, - by_content_type: HashMap, -} - -impl CodecRegistry { - /// Empty registry. Use [`with_format`] / [`add_format`] to add - /// custom formats, or [`with_builtin`] to start from a pre- - /// populated set of every built-in format the active feature - /// set enables. - /// - /// [`with_format`]: Self::with_format - /// [`add_format`]: Self::add_format - /// [`with_builtin`]: Self::with_builtin - pub fn new() -> Self { - Self::default() - } - - /// Pre-populated registry containing every built-in format the - /// active feature set enables (TXT, JSON, HTML, CSV, PNG, JPEG, - /// WAV, PDF, …). Equivalent to [`new`] followed by registering - /// each built-in format. - /// - /// Add custom formats afterward with [`with_format`] (chainable) - /// or [`add_format`] (in-place); they take precedence on - /// extension / content-type collisions (last registration wins). - /// - /// [`new`]: Self::new - /// [`with_format`]: Self::with_format - /// [`add_format`]: Self::add_format - pub fn with_builtin() -> Self { - let mut registry = Self::new(); - #[cfg(feature = "txt")] - registry.add_format(crate::handler::text::txt_format()); - #[cfg(feature = "json")] - registry.add_format(crate::handler::text::json_format()); - #[cfg(feature = "markdown")] - registry.add_format(crate::handler::text::markdown_format()); - #[cfg(feature = "html")] - registry.add_format(crate::handler::text::html_format()); - #[cfg(feature = "csv")] - registry.add_format(crate::handler::tabular::csv_format()); - #[cfg(feature = "xlsx")] - registry.add_format(crate::handler::tabular::xlsx_format()); - #[cfg(feature = "png")] - registry.add_format(crate::handler::image::png_format()); - #[cfg(feature = "jpeg")] - registry.add_format(crate::handler::image::jpeg_format()); - #[cfg(feature = "tiff")] - registry.add_format(crate::handler::image::tiff_format()); - #[cfg(feature = "wav")] - registry.add_format(crate::handler::audio::wav_format()); - #[cfg(feature = "mp3")] - registry.add_format(crate::handler::audio::mp3_format()); - #[cfg(feature = "pdf")] - registry.add_format(crate::handler::rich::pdf_format()); - #[cfg(feature = "docx")] - registry.add_format(crate::handler::rich::docx_format()); - registry - } - - /// Register a [`Format`] and return `self` for chained builder - /// calls. Delegates to [`add_format`] for the indexing body. - /// - /// # Panics - /// - /// Panics if the format's id is already registered. Extensions - /// and content types that conflict with an existing format are - /// overwritten (last registration wins) — register custom - /// formats *after* [`with_builtin`] if you want them to take - /// precedence. - /// - /// [`with_builtin`]: Self::with_builtin - /// [`add_format`]: Self::add_format - #[must_use] - pub fn with_format(mut self, format: Format) -> Self { - self.add_format(format); - self - } - - /// In-place equivalent of [`with_format`]. Useful with an - /// already-mut binding (e.g. inside a cfg-stanza in - /// [`with_builtin`]) where the `let registry = registry.with_format(...)` - /// dance is just noise. - /// - /// # Panics - /// - /// Same conditions as [`with_format`]. - /// - /// [`with_format`]: Self::with_format - /// [`with_builtin`]: Self::with_builtin - pub fn add_format(&mut self, format: Format) -> &mut Self { - assert!( - !self.by_id.contains_key(&format.id), - "format id already registered: {}", - format.id - ); - let index = self.formats.len(); - for ext in &format.extensions { - self.by_extension.insert(ext.to_ascii_lowercase(), index); - } - for ct in &format.content_types { - self.by_content_type.insert(ct.to_ascii_lowercase(), index); - } - self.by_id.insert(format.id.clone(), index); - self.formats.push(format); - self - } - - /// Look up a registered format by id. - pub fn by_id(&self, id: &FormatId) -> Option<&Format> { - self.by_id.get(id).map(|&i| &self.formats[i]) - } - - /// Look up a registered format by file extension - /// (case-insensitive, no leading dot). - pub fn by_extension(&self, ext: &str) -> Option<&Format> { - self.by_extension - .get(&ext.to_ascii_lowercase()) - .map(|&i| &self.formats[i]) - } - - /// Look up a registered format by MIME content type - /// (case-insensitive). - pub fn by_content_type(&self, mime: &str) -> Option<&Format> { - self.by_content_type - .get(&mime.to_ascii_lowercase()) - .map(|&i| &self.formats[i]) - } - - /// Iterate over every registered format in registration order. - pub fn iter(&self) -> impl Iterator { - self.formats.iter() - } - - /// Decode raw content using the format resolved from the - /// extension hint. Accepts anything convertible into - /// [`ContentData`] — `&str`, `&[u8]`, `Vec`, `Bytes`, - /// `String`. - pub async fn decode( - &self, - content: impl Into, - extension: &str, - ) -> Result { - let format = self.by_extension(extension).ok_or_else(|| { - Error::validation( - format!("no codec registered for extension `{extension}`"), - "nvisy_codec::handler::registry::decode", - ) - })?; - format.loader.decode(content.into()).await - } -} diff --git a/crates/nvisy-codec/src/document/audio.rs b/crates/nvisy-codec/src/document/audio.rs deleted file mode 100644 index 14b86c591..000000000 --- a/crates/nvisy-codec/src/document/audio.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! [`DocumentHandle