From 5f9081cf7eb46e7a47ba5acc67b0705693890a71 Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Tue, 12 May 2026 18:21:09 -0600 Subject: [PATCH] bitref: bit-level reference types Adds a crate providing a `&BitSlice`/`&mut BitSlice` type which is constructable from `&[u8]` but provides slicing at the granularity of individual bits. The name of the crate is a play on `bitvec`, which provides a similar type. However, the implementation in this crate is significantly simpler with a much smaller code surface and minimal use of `unsafe` code. The implementation is a generalization of RustCrypto/formats#2300 which sought to implement a similar data structure as a reference type for representing ASN.1 BIT STRINGs. However, using this approach was deferred because the implementation relies on a conversion which is sound under Tree Borrows (as verified by Miri) but unsound under Stacked Borrows as it loses provenance. See rust-lang/unsafe-code-guidelines#134 There are several places such a data structure is potentially useful for RustCrypto projects. Beyond the previously mentioned ASN.1 BIT STRING use case, being able to iterate over bits is useful in many numerical algorithms with applications in cryptography, notably in `crypto-bigint` and for elliptic curves. Elliptic curve scalar multiplication is generally implemented as a loop over the bits of a scalar. Having an iterator type for this purpose avoids problems relating to the endianness of how scalars are serialized when implementing generic scalar multiplication algorithms, e.g. wNAF (see RustCrypto/group#12). Given the current open soundness story, I'm not rushing to use this in `crypto-bigint` until that changes. Where we could use it today though is as an optional dependency to `der`, where it can act as an ASN.1 BIT STRING type, but implement `ToOwned` producing a `der::asn1::BitString` (which, to make `ToOwned` work, needs to impl `Borrow`). This would make it optionally possible to use `Cow` for copy-on-write BIT STRINGs today with `BitSlice` as the borrowed form, but leaving the preferred default data structure for that purpose as `der::asn1::BitStringRef`, which is a lifetime-parameterized struct that avoids the open soundness questions around `BitSlice`. From there we can see what develops around the soundness story and SB/TB discrepancy, and beyond that new Rust features like custom DSTs which may make expressing structures like this less of a hack. --- .github/workflows/bitref.yml | 135 +++++++++++ Cargo.lock | 407 ++++++++++++++++++++++++++++++++ Cargo.toml | 1 + bitref/CHANGELOG.md | 5 + bitref/Cargo.toml | 20 ++ bitref/LICENSE-APACHE | 202 ++++++++++++++++ bitref/LICENSE-MIT | 25 ++ bitref/README.md | 84 +++++++ bitref/src/errors.rs | 15 ++ bitref/src/iter.rs | 68 ++++++ bitref/src/lib.rs | 436 +++++++++++++++++++++++++++++++++++ bitref/src/tagged_len.rs | 349 ++++++++++++++++++++++++++++ bitref/tests/bitslice.rs | 178 ++++++++++++++ 13 files changed, 1925 insertions(+) create mode 100644 .github/workflows/bitref.yml create mode 100644 bitref/CHANGELOG.md create mode 100644 bitref/Cargo.toml create mode 100644 bitref/LICENSE-APACHE create mode 100644 bitref/LICENSE-MIT create mode 100644 bitref/README.md create mode 100644 bitref/src/errors.rs create mode 100644 bitref/src/iter.rs create mode 100644 bitref/src/lib.rs create mode 100644 bitref/src/tagged_len.rs create mode 100644 bitref/tests/bitslice.rs diff --git a/.github/workflows/bitref.yml b/.github/workflows/bitref.yml new file mode 100644 index 00000000..d768122f --- /dev/null +++ b/.github/workflows/bitref.yml @@ -0,0 +1,135 @@ +name: bitref + +on: + pull_request: + paths: + - ".github/workflows/bitref.yml" + - "bitref/**" + - "Cargo.*" + push: + branches: master + +permissions: + contents: read + +defaults: + run: + working-directory: bitref + +env: + CARGO_INCREMENTAL: 0 + RUSTFLAGS: "-Dwarnings" + +# Cancels CI jobs when new commits are pushed to a PR branch +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build-no-std: + strategy: + matrix: + target: + - riscv32i-unknown-none-elf + - thumbv7em-none-eabi + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + targets: ${{ matrix.target }} + - uses: RustCrypto/actions/cargo-hack-install@master + - run: cargo build --target ${{ matrix.target }} + + minimal-versions: + uses: RustCrypto/actions/.github/workflows/minimal-versions.yml@master + with: + working-directory: ${{ github.workflow }} + + test: + strategy: + matrix: + include: + - rust: 1.85.0 # MSRV + - rust: stable + runs-on: "ubuntu-latest" + steps: + - uses: actions/checkout@v6 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + - run: cargo test + + # Test using `cargo careful` + test-careful: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@nightly + - run: cargo install cargo-careful + - run: cargo careful test + + # Cross-compiled tests + test-cross: + strategy: + matrix: + include: + # ARM32 + - target: armv7-unknown-linux-gnueabi + rust: 1.85.0 # MSRV + - target: armv7-unknown-linux-gnueabi + rust: stable # MSRV + # PPC32 + - target: powerpc-unknown-linux-gnu + rust: 1.85.0 # MSRV + - target: powerpc-unknown-linux-gnu + rust: stable + # RISCV64 + - target: riscv64gc-unknown-linux-gnu + rust: stable + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + targets: ${{ matrix.target }} + - uses: RustCrypto/actions/cross-install@master + - run: cross test --target ${{ matrix.target }} + + # Test using `cargo miri` + test-miri: + runs-on: ubuntu-latest + env: + MIRIFLAGS: "-Zmiri-tree-borrows -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-backtrace=full" + strategy: + matrix: + target: + - armv7-unknown-linux-gnueabi + - powerpc-unknown-linux-gnu + - riscv64gc-unknown-linux-gnu + - s390x-unknown-linux-gnu + - x86_64-unknown-linux-gnu + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@nightly + - run: rustup component add miri && cargo miri setup + - run: cargo miri test --target ${{ matrix.target }} + + # Test WASM using `wasmtime` + test-wasm: + runs-on: ubuntu-latest + env: + CARGO_TARGET_WASM32_WASIP1_RUNNER: "wasmtime" + steps: + - uses: actions/checkout@v6 + - uses: bytecodealliance/actions/wasmtime/setup@v1 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + targets: wasm32-wasip1 + - run: cargo test --target wasm32-wasip1 diff --git a/Cargo.lock b/Cargo.lock index 50508cbd..ac2f5f78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,27 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + [[package]] name = "anyhow" version = "1.0.102" @@ -35,6 +56,14 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "bitref" +version = "0.0.0" +dependencies = [ + "criterion", + "proptest", +] + [[package]] name = "blobby" version = "0.4.0" @@ -55,12 +84,76 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + [[package]] name = "cmov" version = "0.5.3" @@ -88,6 +181,70 @@ dependencies = [ "libc", ] +[[package]] +name = "criterion" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "itertools", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "ctutils" version = "0.4.2" @@ -104,6 +261,12 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" @@ -138,6 +301,30 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -163,6 +350,17 @@ dependencies = [ "wasip3", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -235,12 +433,33 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "js-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + [[package]] name = "keccak" version = "0.2.0" @@ -296,6 +515,46 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -402,6 +661,49 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + [[package]] name = "regex-syntax" version = "0.8.9" @@ -421,6 +723,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "rusty-fork" version = "0.3.1" @@ -433,6 +741,15 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "1.0.27" @@ -482,6 +799,12 @@ dependencies = [ "zmij", ] +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "sponge-cursor" version = "0.1.0" @@ -520,6 +843,16 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "typenum" version = "1.20.0" @@ -553,6 +886,16 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -571,6 +914,51 @@ dependencies = [ "wit-bindgen 0.51.0", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +dependencies = [ + "unicode-ident", +] + [[package]] name = "wasm-encoder" version = "0.244.0" @@ -605,6 +993,25 @@ dependencies = [ "semver", ] +[[package]] +name = "web-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-link" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index a674e416..7492adc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "3" members = [ + "bitref", "blobby", "block-buffer", "block-padding", diff --git a/bitref/CHANGELOG.md b/bitref/CHANGELOG.md new file mode 100644 index 00000000..d6637e04 --- /dev/null +++ b/bitref/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). diff --git a/bitref/Cargo.toml b/bitref/Cargo.toml new file mode 100644 index 00000000..df08c009 --- /dev/null +++ b/bitref/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "bitref" +description = "Reference types for bitstrings with bit-level slicing support" +version = "0.0.0" +authors = ["RustCrypto Developers"] +license = "Apache-2.0 OR MIT" +homepage = "https://github.com/RustCrypto/utils/pull/1481" +repository = "https://github.com/RustCrypto/utils" +categories = ["data-structures"] +keywords = ["bits"] +readme = "README.md" +edition = "2024" +rust-version = "1.85" + +[lints] +workspace = true + +[target.'cfg(any(unix, windows))'.dev-dependencies] +criterion = { version = "0.7", features = ["html_reports"] } +proptest = "1.11" diff --git a/bitref/LICENSE-APACHE b/bitref/LICENSE-APACHE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/bitref/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bitref/LICENSE-MIT b/bitref/LICENSE-MIT new file mode 100644 index 00000000..4697b3e7 --- /dev/null +++ b/bitref/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2026 The RustCrypto Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/bitref/README.md b/bitref/README.md new file mode 100644 index 00000000..a3f8230e --- /dev/null +++ b/bitref/README.md @@ -0,0 +1,84 @@ +# [RustCrypto]: Bit-Level Reference Types + +[![Crate][crate-image]][crate-link] +[![Docs][docs-image]][docs-link] +[![Build Status][build-image]][build-link] +![Apache 2.0/MIT Licensed][license-image] +![MSRV][msrv-image] +[![Project Chat][chat-image]][chat-link] + +Provides `&BitSlice`/`&mut BitSlice`, a fat pointer-sized reference type which can be initialized +from `&[u8]`/`&mut [u8]` and can be used to implement any reference-based patterns that possible +with byte slices, e.g. `Borrow`, `Deref`, `Index`/`IndexMut`, and `ToOwned`. + +## About + +The `BitSlice` type in this crate was inspired by a similar type in the [`bitvec`]. However, the +implementation approach used in this crate minimizes use of `unsafe` code to just fat pointer +encoding/decoding and simple `repr(transparent)` casts to construct reference newtypes. + +Notably it performs no arithmetic on pointers whatsoever and works entirely within the domain +of slices, largely ensuring any bugs should result in panics rather than memory safety errors, +with a hypothetical caveat noted below. + +(NOTE: in the future, we may experiment with more usage of `unsafe` if the performance gains can +justify its use, but will ensure the resulting code is easily reasoned about) + +## Soundness + +This crate relies on operations which are not yet fully specified in the Rust memory model, and +while sound in all existing supported versions of the Rust compiler, may result in undefined +behavior in future versions. + +Thus this crate is considered EXPERIMENTAL, and while it's been written with the intent of future +use in cryptographic applications by minimizing use of `unsafe` and ensuring an otherwise simple and +minimal implementation, until this situation changes it should not be considered ready for +production use. + +CI checks the crate is sound under Miri with `-Zmiri-tree-borrows -Zmiri-strict-provenance` which +checks the code under the Tree Borrows model, however it is known to fail under Stacked Borrows. +More information can be found in the SAFETY comments in the source code. + +Ensuring this crate will be fully sound in future versions of Rust will require upstream resolution +regarding the operational semantics and this discrepancy between SB/TB. For more information, see: + + + +## Minimum Supported Rust Version (MSRV) Policy + +MSRV increases are not considered breaking changes and can happen in patch releases. + +The crate MSRV accounts for all supported targets and crate feature combinations. + +## License + +Licensed under either of: + +* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) +* [MIT license](http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. + +[//]: # (badges) + +[crate-image]: https://img.shields.io/crates/v/bitref.svg +[crate-link]: https://crates.io/crates/bitref +[docs-image]: https://docs.rs/bitref/badge.svg +[docs-link]: https://docs.rs/bitref/ +[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg +[msrv-image]: https://img.shields.io/badge/rustc-1.85+-blue.svg +[build-image]: https://github.com/RustCrypto/utils/actions/workflows/bitref.yml/badge.svg +[build-link]: https://github.com/RustCrypto/utils/actions/workflows/bitref.yml +[chat-image]: https://img.shields.io/badge/zulip-join_chat-blue.svg +[chat-link]: https://rustcrypto.zulipchat.com/#narrow/stream/260052-utils + +[//]: # (links) + +[RustCrypto]: https://github.com/RustCrypto +[`bitvec`]: https://docs.rs/bitvec diff --git a/bitref/src/errors.rs b/bitref/src/errors.rs new file mode 100644 index 00000000..b7662e35 --- /dev/null +++ b/bitref/src/errors.rs @@ -0,0 +1,15 @@ +//! Error types. + +use core::fmt::{self, Display}; + +/// Index is out-of-bounds. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct IndexOutOfBounds; + +impl core::error::Error for IndexOutOfBounds {} + +impl Display for IndexOutOfBounds { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("index out of bounds") + } +} diff --git a/bitref/src/iter.rs b/bitref/src/iter.rs new file mode 100644 index 00000000..c281656c --- /dev/null +++ b/bitref/src/iter.rs @@ -0,0 +1,68 @@ +//! Iterator over bits. + +use crate::BitSlice; +use core::iter::{FusedIterator, Iterator}; + +/// Iterator over the bits of a [`BitSlice`]. +#[derive(Clone, Debug)] +pub struct Iter<'a> { + slice: &'a BitSlice, +} + +impl<'a> Iter<'a> { + /// Create a new iterator over a `BitSlice`. + #[must_use] + pub fn new(slice: &'a BitSlice) -> Self { + Self { slice } + } +} + +impl Iterator for Iter<'_> { + type Item = bool; + + fn next(&mut self) -> Option { + let (bit, rest) = self.slice.split_first()?; + self.slice = rest; + Some(bit) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.slice.len(); + (len, Some(len)) + } + + fn count(self) -> usize { + self.slice.len() + } + + fn last(self) -> Option { + self.slice.last() + } + + fn nth(&mut self, n: usize) -> Option { + self.slice = self.slice.get_slice(n..self.slice.len()).ok()?; + self.next() + } +} + +impl DoubleEndedIterator for Iter<'_> { + fn next_back(&mut self) -> Option { + let (bit, rest) = self.slice.split_last()?; + self.slice = rest; + Some(bit) + } + + fn nth_back(&mut self, n: usize) -> Option { + let new_end = self.slice.len().checked_sub(n)?; + self.slice = self.slice.get_slice(0..new_end).ok()?; + self.next_back() + } +} + +impl ExactSizeIterator for Iter<'_> { + fn len(&self) -> usize { + self.slice.len() + } +} + +impl FusedIterator for Iter<'_> {} diff --git a/bitref/src/lib.rs b/bitref/src/lib.rs new file mode 100644 index 00000000..2e8f080e --- /dev/null +++ b/bitref/src/lib.rs @@ -0,0 +1,436 @@ +#![no_std] +#![doc = include_str!("../README.md")] +#![doc( + html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg" +)] + +mod errors; +mod iter; +mod tagged_len; + +pub use errors::IndexOutOfBounds; +pub use iter::Iter; + +use core::{ + fmt::{self, Debug}, + iter::IntoIterator, + ops::{Index, IndexMut, Range, RangeFrom, RangeFull, RangeTo}, + panic::RefUnwindSafe, + ptr, slice, +}; +use tagged_len::TaggedLen; + +/// Slice of bits: similar to `&[u8]`, but with bit-level granularity. +#[repr(transparent)] +pub struct BitSlice { + /// Fat pointer type which carries the original pointer to a `&[u8]` and its length, but also + /// carries additional bits for encoding the sub-byte positions of the beginning and ending of + /// the bit slice. + inner: [Inner], +} + +/// Inaccessible placeholder ZST which is sound to construct slices of in any length (since ZST +/// slices occupy no memory regardless of their length). +type Inner = (); + +impl BitSlice { + /// An empty bit slice, the equivalent of `&[]`. + pub const EMPTY: &Self = Self::new(&[]); + + /// Create a new immutable bit slice from an immutable byte slice. + #[must_use] + pub const fn new(bytes: &[u8]) -> &Self { + Self::new_with_offsets(bytes, 0, 0) + } + + /// Create a new immutable bit slice backed by the given byte slice, with the given bit-level + /// offsets in to the first and last byte, which may be the same if `bytes` is 1-byte long. + #[inline] + const fn new_with_offsets(bytes: &[u8], head_offset: usize, tail_offset: usize) -> &Self { + let len = TaggedLen::new(bytes.len(), head_offset, tail_offset).encode(); + + // SAFETY: we are constructing a slice whose elements are `()` a.k.a. `Inner`, which is a + // zero-sized type (ZST). + // + // We can't actually read or write memory via this slice itself since it's a slice of ZSTs, + // which occupies no memory regardless of element count. + // + // Note that under Stacked Borrows, this loses the pointer's provenance, which doesn't + // become an issue until we try to reconstruct the original slice (see SAFETY comment on + // `as_raw_bytes` below). However, the provenance is preserved under Tree Borrows. + let slice = unsafe { slice::from_raw_parts::(bytes.as_ptr().cast(), len) }; + + // SAFETY: `Self` is a `repr(transparent)` newtype for `[()]` a.k.a. `[Inner]`, so the fat + // pointer metadata is preserved and the cast is valid. + unsafe { &*(ptr::from_ref(slice) as *const Self) } + } + + /// Create a new mutable bit slice from a mutable byte slice. + #[must_use] + pub const fn new_mut(bytes: &mut [u8]) -> &mut Self { + Self::new_mut_with_offsets(bytes, 0, 0) + } + + /// Create a new mutable bit slice from a mutable byte slice. + #[must_use] + const fn new_mut_with_offsets( + bytes: &mut [u8], + head_offset: usize, + tail_offset: usize, + ) -> &mut Self { + let len = TaggedLen::new(bytes.len(), head_offset, tail_offset).encode(); + + // SAFETY: we are using the same approach as outlined in `new`, except constructing a + // mutable slice of a ZST which occupies no memory regardless of element count. + // + // The same caveats about soundness under Stacked Borrows vs Tree Borrows also hold. + let slice = unsafe { slice::from_raw_parts_mut::(bytes.as_mut_ptr().cast(), len) }; + + // SAFETY: `Self` is a `repr(transparent)` newtype for `[()]` a.k.a. `[Inner]`, so the fat + // pointer metadata is preserved and the cast is valid. + unsafe { &mut *(ptr::from_mut(slice) as *mut Self) } + } + + /// Raw access to the backing memory for this bit slice. + #[must_use] + const fn as_raw_bytes(&self) -> &[u8] { + let ptr = self.inner.as_ptr(); + let len = self.tagged_len().byte_len(); + + // SAFETY: `len` is the original length of the valid slice this bit slice was constructed + // from, and the lifetime of `ptr` is tied to the lifetime of `&self` which is in turn tied + // to the returned slice's lifetime. + // + // However, this particular conversion is not yet fully specified by the Rust memory model. + // See: rust-lang/unsafe-code-guidelines#134 + // + // Notably, Stacked Borrows loses provenance of the original pointer when it's cast to + // `*const Inner`, so Miri considers this UB, e.g.: + // + // > error: Undefined Behavior: trying to retag from <177556> for SharedReadOnly permission + // > at alloc64685[0x0], but that tag does not exist in the borrow stack for this location + // + // However, Tree Borrows retains the provenance and accepts this code under Miri, i.e. with + // MIRIFLAGS="-Zmiri-tree-borrows". + // + // While reconstructing the original slice using its original length and a pointer cast is + // sound on all existing versions of the Rust compiler, this doesn't necessarily hold for + // future versions of the compiler and is still awaiting a resolution of the discrepancy + // between Stacked Borrows and Tree Borrows. The possibility remains that this may be UB + // in future versions of the Rust compiler. + unsafe { slice::from_raw_parts(ptr.cast(), len) } + } + + /// Raw mutable access to the backing memory for this bit slice. + #[must_use] + const fn as_mut_raw_bytes(&mut self) -> &mut [u8] { + let ptr = self.inner.as_mut_ptr(); + let len = self.tagged_len().byte_len(); + + // SAFETY: we are using the same approach as outlined in `as_raw_bytes`, with the same + // caveats. `len` is the original length of the valid slice this bit slice was constructed + // from, and the lifetime of `ptr` is tied to the lifetime of `&mut self` which is in turn + // tied to the returned slice's lifetime. + unsafe { slice::from_raw_parts_mut(ptr.cast(), len) } + } + + /// Decode the [`TaggedLen`] for this bit slice. + const fn tagged_len(&self) -> TaggedLen { + TaggedLen::decode(self.inner.len()) + } + + /// Get the length of this bit slice in bits. + #[must_use] + pub const fn len(&self) -> usize { + self.tagged_len().bit_len() + } + + /// Is this bit slice empty? + #[must_use] + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get an iterator over the bits in this slice. + #[must_use] + pub fn iter(&self) -> Iter<'_> { + Iter::new(self) + } + + /// Return the first bit in the bit slice, or `None` if it's empty. + #[must_use] + pub const fn first(&self) -> Option { + match self.get_bit(0) { + Ok(bit) => Some(bit), + Err(_) => None, + } + } + + /// Return the last bit in the bit slice, or `None` if it's empty. + #[must_use] + pub const fn last(&self) -> Option { + if let Some(index) = self.len().checked_sub(1) { + if let Ok(bit) = self.get_bit(index) { + return Some(bit); + } + } + + None + } + + /// Get the bit at the given position within the bit slice. + /// + /// # Errors + /// Returns [`IndexOutOfBounds`] if `index` is past the number of bits in the slice. + pub const fn get_bit(&self, index: usize) -> Result { + match self.tagged_len().offset_and_mask(index) { + Ok((offset, mask)) => Ok(self.as_raw_bytes()[offset] & mask != 0), + Err(e) => Err(e), + } + } + + /// Get a subslice of this bit slice. + /// + /// # Errors + /// Returns [`IndexOutOfBounds`] if the given range is out-of-bounds. + pub const fn get_slice(&self, bits: Range) -> Result<&Self, IndexOutOfBounds> { + match self.tagged_len().slice(bits) { + Ok((len, offset)) => { + // Abusing `split_at` as a workaround for `const fn` slicing with dynamic positions + let tail = self.as_raw_bytes().split_at(offset).1; + let bytes = tail.split_at(len.byte_len()).0; + Ok(Self::new_with_offsets( + bytes, + len.head_offset(), + len.tail_offset(), + )) + } + Err(e) => Err(e), + } + } + + /// Get a mutable subslice of this bit slice. + /// + /// # Errors + /// Returns [`IndexOutOfBounds`] if the given range is out-of-bounds. + pub const fn get_mut_slice( + &mut self, + bits: Range, + ) -> Result<&mut Self, IndexOutOfBounds> { + match self.tagged_len().slice(bits) { + Ok((len, offset)) => { + // Abusing `split_at` as a workaround for `const fn` slicing with dynamic positions + let tail = self.as_mut_raw_bytes().split_at_mut(offset).1; + let bytes = tail.split_at_mut(len.byte_len()).0; + Ok(Self::new_mut_with_offsets( + bytes, + len.head_offset(), + len.tail_offset(), + )) + } + Err(e) => Err(e), + } + } + + /// Set the bit at the given position within the bit slice to the given value. + /// + /// # Errors + /// Returns [`IndexOutOfBounds`] if `index` is past the number of bits in the slice. + pub const fn set_bit(&mut self, index: usize, value: bool) -> Result<(), IndexOutOfBounds> { + if let Err(e) = self.replace_bit(index, value) { + return Err(e); + } + + Ok(()) + } + + /// Set the bit at the given position within the bit slice to the given value, returning the + /// original value. + /// + /// # Errors + /// Returns [`IndexOutOfBounds`] if `index` is past the number of bits in the slice. + pub const fn replace_bit( + &mut self, + index: usize, + value: bool, + ) -> Result { + match self.tagged_len().offset_and_mask(index) { + Ok((offset, mask)) => { + let orig = self.as_raw_bytes()[offset] & mask != 0; + + if value { + self.as_mut_raw_bytes()[offset] |= mask; + } else { + self.as_mut_raw_bytes()[offset] &= !mask; + } + + Ok(orig) + } + Err(e) => Err(e), + } + } + + /// Return the first bit and the rest of the elements of the bit slice, or `None` if it's empty. + #[must_use] + pub const fn split_first(&self) -> Option<(bool, &Self)> { + match (self.first(), self.get_slice(1..self.len())) { + (Some(bit), Ok(rest)) => Some((bit, rest)), + _ => None, + } + } + + /// Return the last bit and the rest of the elements of the bit slice, or `None` if it's empty. + #[must_use] + pub const fn split_last(&self) -> Option<(bool, &Self)> { + if let Some(index) = self.len().checked_sub(1) { + match (self.get_bit(index), self.get_slice(0..index)) { + (Ok(bit), Ok(rest)) => Some((bit, rest)), + _ => None, + } + } else { + None + } + } +} + +impl Debug for BitSlice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BitSlice([")?; + for bit in self { + write!(f, "{}", u8::from(bit))?; + } + write!(f, "])") + } +} + +impl<'a> From<&'a [u8]> for &'a BitSlice { + fn from(bytes: &'a [u8]) -> Self { + BitSlice::new(bytes) + } +} + +impl<'a> From<&'a mut [u8]> for &'a mut BitSlice { + fn from(bytes: &'a mut [u8]) -> Self { + BitSlice::new_mut(bytes) + } +} + +// NOTE: can't impl `IndexMut` since we can't borrow a single bit from a byte mutably +impl Index for BitSlice { + type Output = bool; + + fn index(&self, index: usize) -> &bool { + if self.get_bit(index).expect("index out of bounds") { + &true + } else { + &false + } + } +} + +impl Index> for BitSlice { + type Output = Self; + + fn index(&self, range: Range) -> &Self { + self.get_slice(range).expect("index out of bounds") + } +} + +impl Index for BitSlice { + type Output = Self; + + fn index(&self, _range: RangeFull) -> &Self { + self + } +} + +impl Index> for BitSlice { + type Output = Self; + + fn index(&self, range: RangeFrom) -> &Self { + self.get_slice(range.start..self.len()) + .expect("index out of bounds") + } +} + +impl Index> for BitSlice { + type Output = Self; + + fn index(&self, range: RangeTo) -> &Self { + self.get_slice(0..range.end).expect("index out of bounds") + } +} + +impl IndexMut> for BitSlice { + fn index_mut(&mut self, range: Range) -> &mut Self { + self.get_mut_slice(range).expect("index out of bounds") + } +} + +impl IndexMut for BitSlice { + fn index_mut(&mut self, _range: RangeFull) -> &mut Self { + self + } +} + +impl IndexMut> for BitSlice { + fn index_mut(&mut self, range: RangeFrom) -> &mut Self { + self.get_mut_slice(range.start..self.len()) + .expect("index out of bounds") + } +} + +impl IndexMut> for BitSlice { + fn index_mut(&mut self, range: RangeTo) -> &mut Self { + self.get_mut_slice(0..range.end) + .expect("index out of bounds") + } +} + +impl<'a> IntoIterator for &'a BitSlice { + type Item = bool; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Iter<'a> { + self.iter() + } +} + +impl Eq for BitSlice {} +impl PartialEq for BitSlice { + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + for (a, b) in self.iter().zip(other.iter()) { + if a != b { + return false; + } + } + + true + } +} + +impl RefUnwindSafe for BitSlice {} + +/// Tests for private APIs. +#[cfg(test)] +mod tests { + use crate::BitSlice; + const BYTES: [u8; 2] = [0xa0, 0x0a]; + + #[test] + fn as_raw_bytes() { + assert_eq!(BitSlice::new(&BYTES).as_raw_bytes(), BYTES); + } + + #[test] + fn as_mut_raw_bytes() { + let mut bytes = BYTES; + let bits = BitSlice::new_mut(&mut bytes); + assert_eq!(bits.as_mut_raw_bytes(), BYTES); + } +} diff --git a/bitref/src/tagged_len.rs b/bitref/src/tagged_len.rs new file mode 100644 index 00000000..9cd2b95f --- /dev/null +++ b/bitref/src/tagged_len.rs @@ -0,0 +1,349 @@ +//! `TaggedLen` encodes additional position data into the length field of a fat pointer, ala a +//! normal tagged pointer. + +use crate::IndexOutOfBounds; +use core::{ + fmt::{self, Debug}, + ops::Range, +}; + +/// Panic message in the event of overflow. +const OVERFLOW_MSG: &str = "overflow"; + +/// Tagged length which encodes the original length of the byte slice. +/// +/// This includes the length of the original `&[u8]` verbatim, but left-shifted by 6-bits to make +/// room for two 3-bit sub-byte position cursors within the head and tail bytes (which may refer to +/// the same byte in the event the `&[u8]` is only 1-byte in length). +/// +/// They are encoded as follows in little endian: +/// +/// ```text +/// | tail offset (3-bits) | head offset (3-bits) | byte len (remaining bits in `usize`) | +/// ``` +/// +/// The tail and head offsets are stored as a special 3-bit `u3` type, and they encode offsets +/// within a particular byte . +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct TaggedLen { + /// Length of the underlying slice in bytes. + byte_len: usize, + + /// Bit offset within the head byte. + head_offset: u3, + + /// Bit offset within the tail byte. + tail_offset: u3, +} + +impl TaggedLen { + /// Create a new tagged length from the original length in bytes, with the given bit-level + /// offsets into the first and last bytes, which may be the same byte if `byte_len` is 1. + /// + /// # Panics + /// In debug builds, panics if either of the following are true: + /// - `head_offset` or `tail_offset` overflows `u3::MAX`. + /// - `byte_len` is zero and either `head_offset` or `tail_offset` are nonzero. + #[inline] + pub(crate) const fn new(byte_len: usize, head_offset: usize, tail_offset: usize) -> Self { + debug_assert!(head_offset <= u3::MAX && tail_offset <= u3::MAX); + debug_assert!(byte_len > 0 || (head_offset == 0 && tail_offset == 0)); + + Self { + byte_len, + head_offset: u3::new(head_offset), + tail_offset: u3::new(tail_offset), + } + } + + /// Slice the bit range represented by this `TaggedLen`, returning a new one along with a + /// byte-level offset into the backing slice. + pub(crate) const fn slice(self, bits: Range) -> Result<(Self, usize), IndexOutOfBounds> { + if bits.start > bits.end || bits.end > self.bit_len() { + return Err(IndexOutOfBounds); + } + + let (head_byte, head_offset) = self.offsets_unchecked(bits.start); + let (tail_byte, tail_offset) = self.offsets_unchecked(bits.end); + + if let Some(mut byte_len) = tail_byte.checked_sub(head_byte) { + // The computed `byte_len` won't include this extra byte if `tail_offset` is non-zero. + if !tail_offset.is_zero() { + byte_len += 1; + } + + let sliced_len = Self { + byte_len, + head_offset, + tail_offset, + }; + + return Ok((sliced_len, head_byte)); + } + + Err(IndexOutOfBounds) + } + + /// Get the length of this slice in bytes. + pub(crate) const fn byte_len(self) -> usize { + self.byte_len + } + + /// Get the length of this slice in bits. + pub(crate) const fn bit_len(self) -> usize { + let unsliced_bits = self.byte_len.checked_mul(8).expect(OVERFLOW_MSG); + let sliced_bits = self.head_offset.value() + + if self.tail_offset.is_zero() { + 0 + } else { + 8 - self.tail_offset.value() + }; + + unsliced_bits.checked_sub(sliced_bits).expect(OVERFLOW_MSG) + } + + /// Decode an encoded tagged length back into this type. + pub(crate) const fn decode(encoded: usize) -> Self { + let byte_len = encoded >> (u3::BITS * 2); + let head_offset = (encoded >> u3::BITS) & u3::MAX; + let tail_offset = encoded & u3::MAX; + Self::new(byte_len, head_offset, tail_offset) + } + + /// Encode a tagged length for storage in a fat pointer. + pub(crate) const fn encode(self) -> usize { + self.byte_len.checked_shl(u3::BITS * 2).expect(OVERFLOW_MSG) + | (self.head_offset.value() << u3::BITS) + | self.tail_offset.value() + } + + /// Get the head offset. + pub(crate) const fn head_offset(self) -> usize { + self.head_offset.value() + } + + /// Get the tail offset. + pub(crate) const fn tail_offset(self) -> usize { + self.tail_offset.value() + } + + /// Compute the byte-level offset and bitmask for a bit at the given `index`. + pub(crate) const fn offset_and_mask( + self, + index: usize, + ) -> Result<(usize, u8), IndexOutOfBounds> { + if index >= self.bit_len() { + return Err(IndexOutOfBounds); + } + + let (byte_offset, bit_offset) = self.offsets_unchecked(index); + Ok((byte_offset, bit_offset.bitmask())) + } + + /// Compute the byte and bit offsets for a given index. + /// + /// Does not ensure that `index` lies within this bit slice. + const fn offsets_unchecked(self, index: usize) -> (usize, u3) { + let index = index + .checked_add(self.head_offset.value()) + .expect(OVERFLOW_MSG); + + let byte_offset = index >> u3::BITS; + let bit_offset = u3::new(index); + (byte_offset, bit_offset) + } +} + +/// A 3-bit integer, used to represent a specific bit within a byte. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] +#[repr(u8)] +enum u3 { + #[default] + V0 = 0, + V1 = 1, + V2 = 2, + V3 = 3, + V4 = 4, + V5 = 5, + V6 = 6, + V7 = 7, +} + +impl u3 { + /// Size in bits. + const BITS: u32 = 3; + + /// Maximum value for a `u3` (7). + const MAX: usize = 0b111; + + /// Create a new `u3` from the lowest three bits of a `usize`, masking off the rest. + const fn new(n: usize) -> Self { + match n & Self::MAX { + 0 => Self::V0, + 1 => Self::V1, + 2 => Self::V2, + 3 => Self::V3, + 4 => Self::V4, + 5 => Self::V5, + 6 => Self::V6, + 7 => Self::V7, + _ => unreachable!(), + } + } + + /// Convert this `u3` to a `usize` + const fn value(self) -> usize { + self as usize + } + + /// Is this `u3` equal to zero? + const fn is_zero(self) -> bool { + self.value() == 0 + } + + /// Compute a 1-bit byte-width mask to select the bit identified by this `u3`. + const fn bitmask(self) -> u8 { + 1u8 << (Self::MAX - self.value()) + } +} + +impl Debug for u3 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.value()) + } +} + +#[cfg(test)] +mod tests { + use super::{TaggedLen, u3}; + + #[cfg(all(any(unix, windows), not(miri)))] + use {core::ops::Range, proptest::prelude::*}; + + #[test] + fn bit_len() { + assert_eq!(TaggedLen::new(0, 0, 0).bit_len(), 0); + assert_eq!(TaggedLen::new(1, 0, 1).bit_len(), 1); + assert_eq!(TaggedLen::new(1, 0, 0).slice(1..7).unwrap().0.bit_len(), 6); + assert_eq!(TaggedLen::new(1, 0, 0).bit_len(), 8); + assert_eq!(TaggedLen::new(2, 0, 0).bit_len(), 16); + } + + #[test] + fn decode() { + assert_eq!(TaggedLen::decode(0), TaggedLen::new(0, 0, 0)); + assert_eq!(TaggedLen::decode(0b1000001), TaggedLen::new(1, 0, 1)); + assert_eq!(TaggedLen::decode(0b1000000), TaggedLen::new(1, 0, 0)); + assert_eq!(TaggedLen::decode(0b1001000), TaggedLen::new(1, 1, 0)); + assert_eq!(TaggedLen::decode(0b10001011111), TaggedLen::new(17, 3, 7)); + } + + #[test] + fn encode() { + assert_eq!(TaggedLen::new(0, 0, 0).encode(), 0); + assert_eq!(TaggedLen::new(1, 0, 0).encode(), 0b1000000); + assert_eq!(TaggedLen::new(1, 0, 1).encode(), 0b1000001); + assert_eq!(TaggedLen::new(1, 1, 0).encode(), 0b1001000); + assert_eq!(TaggedLen::new(17, 3, 7).encode(), 0b10001011111); + } + + #[test] + fn offset_and_mask() { + let len = TaggedLen::new(1, 0, 0); + assert_eq!(len.offset_and_mask(7).unwrap(), (0, 1)); + assert!(len.offset_and_mask(8).is_err()); + assert!(len.offset_and_mask(9).is_err()); + } + + #[test] + #[allow(clippy::reversed_empty_ranges, reason = "inputs for testing")] + fn slice() { + // len: 0 + let len0 = TaggedLen::new(0, 0, 0); + assert_eq!(len0.slice(0..0).unwrap().0, len0); + assert!(len0.slice(0..1).is_err()); + + let (tagged_len, byte_offset) = len0.slice(0..0).unwrap(); + assert_eq!(tagged_len.byte_len, 0); + assert_eq!(tagged_len.head_offset, u3::V0); + assert_eq!(tagged_len.tail_offset, u3::V0); + assert_eq!(byte_offset, 0); + + // len: 1 + let len1 = TaggedLen::new(1, 0, 0); + assert_eq!(len1.slice(0..1).unwrap().0.bit_len(), 1); + assert_eq!(len1.slice(0..8).unwrap().0, len1); + assert!(len1.slice(1..0).is_err()); + assert!(len1.slice(0..9).is_err()); + + assert_eq!(len1.slice(0..0).unwrap().0.byte_len, 0); + assert_eq!(len1.slice(0..7).unwrap().0.byte_len, 1); + assert_eq!(len1.slice(1..7).unwrap().0.byte_len, 1); + + let (tagged_len, byte_offset) = len1.slice(1..8).unwrap(); + assert_eq!(tagged_len.byte_len, 1); + assert_eq!(tagged_len.head_offset, u3::V1); + assert_eq!(tagged_len.tail_offset, u3::V0); + assert_eq!(byte_offset, 0); + + // len: 1 (with sub-bit positioning) + let len1_subbits = TaggedLen::new(1, 1, 6); + assert!(len1_subbits.slice(1..6).is_err()); + + let (tagged_len, byte_offset) = len1_subbits.slice(1..5).unwrap(); + assert_eq!(tagged_len.byte_len, 1); + assert_eq!(tagged_len.head_offset, u3::V2); + assert_eq!(tagged_len.tail_offset, u3::V6); + assert_eq!(byte_offset, 0); + + // len: 2 + let len2 = TaggedLen::new(2, 0, 0); + assert_eq!(len2.slice(0..16).unwrap().0, len2); + assert!(len2.slice(0..17).is_err()); + + let (tagged_len, byte_offset) = len2.slice(3..14).unwrap(); + assert_eq!(tagged_len.byte_len(), 2); + assert_eq!(tagged_len.bit_len(), 11); + assert_eq!(tagged_len.head_offset, u3::V3); + assert_eq!(tagged_len.tail_offset, u3::V6); + assert_eq!(byte_offset, 0); + + let (tagged_len, byte_offset) = len2.slice(9..15).unwrap(); + assert_eq!(tagged_len.byte_len, 1); + assert_eq!(tagged_len.head_offset, u3::V1); + assert_eq!(tagged_len.tail_offset, u3::V7); + assert_eq!(byte_offset, 1); + } + + #[cfg(all(any(unix, windows), not(miri)))] + prop_compose! { + fn tagged_len()(len in any::(), offsets in any::<[u8; 2]>()) -> TaggedLen { + // Reserve 6-bits of space so we don't overflow + let len_mask = !(0b111111 << (usize::BITS as usize - 6)); + let head_offset = usize::from(offsets[0]) & u3::MAX; + let tail_offset = usize::from(offsets[1]) & u3::MAX; + TaggedLen::new(len & len_mask, head_offset, tail_offset) + } + } + + #[cfg(all(any(unix, windows), not(miri)))] + proptest! { + #[test] + fn encode_decode_roundtrip(tagged_len in tagged_len()) { + let encoded = tagged_len.encode(); + prop_assert_eq!(TaggedLen::decode(encoded), tagged_len); + } + + #[test] + fn slice_proptests(tagged_len in tagged_len(), range in any::>()) { + if range.start <= range.end && range.end < tagged_len.bit_len() { + let expected_len = range.end - range.start; + let (sliced, _offset) = tagged_len.slice(range).unwrap(); + prop_assert_eq!(sliced.bit_len(), expected_len); + } else { + prop_assert!(tagged_len.slice(range).is_err()); + } + } + } +} diff --git a/bitref/tests/bitslice.rs b/bitref/tests/bitslice.rs new file mode 100644 index 00000000..dc4f80f3 --- /dev/null +++ b/bitref/tests/bitslice.rs @@ -0,0 +1,178 @@ +//! `BitSlice` integration tests. + +#![expect(clippy::unwrap_used, reason = "tests")] + +use bitref::BitSlice; +use core::ops::Range; + +const BYTES: [u8; 2] = [0xa0, 0x0a]; +const BITS: [bool; 16] = [ + true, false, true, false, false, false, false, false, false, false, false, false, true, false, + true, false, +]; + +#[test] +fn debug() { + let bits = BitSlice::new(&BYTES); + assert_eq!(&format!("{bits:?}"), "BitSlice([1010000000001010])"); + + let bits2 = &bits[1..14]; + assert_eq!(&format!("{bits2:?}"), "BitSlice([0100000000010])"); +} + +#[test] +fn first() { + assert!(BitSlice::EMPTY.first().is_none()); + assert!(!BitSlice::new(&[0]).first().unwrap()); +} + +#[test] +fn last() { + assert!(BitSlice::EMPTY.last().is_none()); + assert!(BitSlice::new(&[0, 1]).last().unwrap()); +} + +#[test] +fn get_bit() { + let bits = BitSlice::new(&BYTES); + + for (i, expected) in BITS.into_iter().enumerate() { + assert_eq!(bits.get_bit(i).unwrap(), expected); + } + + assert!(bits.get_bit(bits.len()).is_err()); +} + +#[test] +fn get_slice() { + let bits = BitSlice::new(&BYTES); + + // Ensure `BitSlice::get_slice` behaves like `&[bool]`. + for i in 0..BITS.len() { + for j in i..BITS.len() { + let bitslice = bits.get_slice(i..j).unwrap(); + verify_against_expected(bitslice, i..j); + } + } +} + +// TODO(tarcieri): test mutations +#[test] +fn get_mut_slice() { + let mut bytes = BYTES; + let bits = BitSlice::new_mut(&mut bytes); + + // Ensure `BitSlice::get_mut_slice` behaves like `&mut [bool]`. + for i in 0..BITS.len() { + for j in i..BITS.len() { + let bitslice = bits.get_mut_slice(i..j).unwrap(); + verify_against_expected(bitslice, i..j); + } + } +} + +#[test] +fn index_range_slicing() { + let bits = BitSlice::new(&BYTES); + + // Ensure `&bitslice[i..j]` behaves like `&[bool]`. + for i in 0..BITS.len() { + for j in i..BITS.len() { + verify_against_expected(&bits[i..j], i..j); + } + } +} + +// TODO(tarcieri): test `IndexMut` +#[test] +fn index_range_from_slicing() { + let bits = BitSlice::new(&BYTES); + + // Ensure `&bitslice[i..]` behaves like `&[bool]`. + for i in 0..BITS.len() { + verify_against_expected(&bits[i..], i..BITS.len()); + } +} + +// TODO(tarcieri): test mutations +#[test] +fn index_mut_range_slicing() { + let mut bytes = BYTES; + let bits = BitSlice::new_mut(&mut bytes); + + // Ensure `&mut bitslice[i..j]` behaves like `&mut [bool]`. + for i in 0..BITS.len() { + for j in i..BITS.len() { + verify_against_expected(&bits[i..j], i..j); + } + } +} + +#[test] +fn is_empty() { + assert!(BitSlice::EMPTY.is_empty()); + assert!(!BitSlice::new(&[0]).is_empty()); + assert!(!BitSlice::new(&BYTES).is_empty()); +} + +#[test] +fn len() { + assert_eq!(BitSlice::EMPTY.len(), 0); + assert_eq!(BitSlice::new(&[0]).len(), 8); + assert_eq!(BitSlice::new(&BYTES).len(), 16); +} + +#[test] +fn set_bit() { + let mut bytes = [0x0, 0x0]; + let bits = BitSlice::new_mut(&mut bytes); + + for (i, bit) in BITS.into_iter().enumerate() { + bits.set_bit(i, bit).unwrap(); + } + + assert!(bits.set_bit(bits.len(), true).is_err()); + assert_eq!(bytes, BYTES); +} + +#[test] +fn split_first() { + assert!(BitSlice::EMPTY.split_first().is_none()); + let bits = BitSlice::new(&[0b10000000]); + let (first, rest) = bits.split_first().unwrap(); + assert!(first); + assert_eq!(rest, &BitSlice::new(&[0])[..7]); +} + +#[test] +fn split_last() { + assert!(BitSlice::EMPTY.split_last().is_none()); + let bits = BitSlice::new(&[1]); + let (last, rest) = bits.split_last().unwrap(); + assert!(last); + assert_eq!(rest, &BitSlice::new(&[0])[..7]); +} + +#[test] +fn replace_bit() { + let mut bytes = [!BYTES[0], !BYTES[1]]; + let bits = BitSlice::new_mut(&mut bytes); + + for (i, bit) in BITS.into_iter().enumerate() { + let old = bits.replace_bit(i, bit).unwrap(); + assert_eq!(old, !bit); + } + + assert_eq!(bytes, BYTES); +} + +/// Check `bitslice` matches the given `range` of `BITS`. +#[track_caller] +fn verify_against_expected(bitslice: &BitSlice, range: Range) { + let coreslice = &BITS[range]; + assert_eq!(bitslice.len(), coreslice.len()); + + for k in 0..bitslice.len() { + assert_eq!(bitslice.get_bit(k).unwrap(), coreslice[k]); + } +}