From 729a434967756db44776395ed398974cfc7e5cf6 Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 01:50:13 +1100 Subject: [PATCH 01/10] Reorganise hist.rs layout to match hist.c --- lib/compress/hist.rs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index a89f0327..74f7662e 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -1,19 +1,21 @@ -pub type HIST_checkInput_e = core::ffi::c_uint; -pub const checkMaxSymbolValue: HIST_checkInput_e = 1; -pub const trustInput: HIST_checkInput_e = 0; use core::ptr; use libc::size_t; use crate::lib::common::error_private::{ERR_isError, Error}; use crate::lib::common::mem::MEM_read32; + pub const HIST_WKSP_SIZE_U32: core::ffi::c_int = 1024; + pub const HIST_WKSP_SIZE: size_t = - (HIST_WKSP_SIZE_U32 as size_t).wrapping_mul(::core::mem::size_of::()); + (HIST_WKSP_SIZE_U32 as size_t) * (size_of::()); + pub const HIST_FAST_THRESHOLD: core::ffi::c_int = 1500; + pub unsafe fn HIST_isError(code: size_t) -> core::ffi::c_uint { ERR_isError(code) as _ } + pub unsafe fn HIST_add( count: *mut core::ffi::c_uint, src: *const core::ffi::c_void, @@ -28,6 +30,7 @@ pub unsafe fn HIST_add( *fresh1 = (*fresh1).wrapping_add(1); } } + pub unsafe fn HIST_count_simple( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -69,6 +72,12 @@ pub unsafe fn HIST_count_simple( } largestCount } + +enum HIST_checkInput_e { + checkMaxSymbolValue = 1, + trustInput = 0, +} + unsafe fn HIST_count_parallel_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -178,6 +187,7 @@ unsafe fn HIST_count_parallel_wksp( core::ptr::copy(Counting1 as *const u8, count as *mut u8, countSize as usize); max as size_t } + pub unsafe fn HIST_countFast_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -200,10 +210,11 @@ pub unsafe fn HIST_countFast_wksp( maxSymbolValuePtr, source, sourceSize, - trustInput, + HIST_checkInput_e::trustInput, workSpace as *mut u32, ) } + pub unsafe fn HIST_count_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -224,7 +235,7 @@ pub unsafe fn HIST_count_wksp( maxSymbolValuePtr, source, sourceSize, - checkMaxSymbolValue, + HIST_checkInput_e::checkMaxSymbolValue, workSpace as *mut u32, ); } @@ -238,6 +249,7 @@ pub unsafe fn HIST_count_wksp( workSpaceSize, ) } + pub unsafe fn HIST_countFast( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -254,6 +266,7 @@ pub unsafe fn HIST_countFast( ::core::mem::size_of::<[core::ffi::c_uint; 1024]>(), ) } + pub unsafe fn HIST_count( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, From 791fd8b1a3b549bb20b7eb2d50904cd1bbcc7b02 Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:01:29 +1100 Subject: [PATCH 02/10] Add sve2 stubs to hist.rs --- lib/compress/hist.rs | 128 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 25 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index 74f7662e..a808b53b 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -5,11 +5,18 @@ use libc::size_t; use crate::lib::common::error_private::{ERR_isError, Error}; use crate::lib::common::mem::MEM_read32; -pub const HIST_WKSP_SIZE_U32: core::ffi::c_int = 1024; +#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] +pub const HIST_WKSP_SIZE_U32: usize = 0; -pub const HIST_WKSP_SIZE: size_t = - (HIST_WKSP_SIZE_U32 as size_t) * (size_of::()); +#[cfg(not(target_feature = "sve2"))] +pub const HIST_WKSP_SIZE_U32: usize = 1024; +pub const HIST_WKSP_SIZE: usize = HIST_WKSP_SIZE_U32 * size_of::(); + +#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] +pub const HIST_FAST_THRESHOLD: core::ffi::c_int = 500; + +#[cfg(not(target_feature = "sve2"))] pub const HIST_FAST_THRESHOLD: core::ffi::c_int = 1500; pub unsafe fn HIST_isError(code: size_t) -> core::ffi::c_uint { @@ -78,6 +85,43 @@ enum HIST_checkInput_e { trustInput = 0, } +#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] +#[inline(always)] +fn min_size(a: usize, b: usize) -> usize { + return if a < b { a } else { b }; +} + +#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] +use core::arch::aarch64::{svuint16_t, svuint8_t}; + +#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] +unsafe fn HIST_count_6_sve2( + src: *const BYTE, + size: usize, + dst: *const U32, + c0: svuint8_t, + c1: svuint8_t, + c2: svuint8_t, + c3: svuint8_t, + c4: svuint8_t, + c5: svuint8_t, + histmax: svuint16_t, + maxCount: usize, +) -> svuint16_t { + unimplemented!("SVE2 histogram counting not yet implemented"); +} + +#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] +unsafe fn HIST_count_sve2( + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + source: *const c_void, + sourceSize: usize, + check: HIST_checkInput_e, +) -> usize { + unimplemented!("SVE2 histogram counting not yet implemented"); +} + unsafe fn HIST_count_parallel_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -199,20 +243,37 @@ pub unsafe fn HIST_countFast_wksp( if sourceSize < HIST_FAST_THRESHOLD as size_t { return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize) as size_t; } - if workSpace as size_t & 3 != 0 { - return Error::GENERIC.to_error_code(); + + #[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] + { + return HIST_count_sve2( + count, + maxSymbolValuePtr, + source, + sourceSize, + HIST_checkInput_e::trustInput, + ); } - if workSpaceSize < HIST_WKSP_SIZE { - return Error::workSpace_tooSmall.to_error_code(); + + #[cfg(not(target_feature = "sve2"))] + { + if workSpace as size_t & 3 != 0 { + // must be aligned on 4-bytes boundaries + return Error::GENERIC.to_error_code(); + } + + if workSpaceSize < HIST_WKSP_SIZE { + return Error::workSpace_tooSmall.to_error_code(); + } + HIST_count_parallel_wksp( + count, + maxSymbolValuePtr, + source, + sourceSize, + HIST_checkInput_e::trustInput, + workSpace as *mut u32, + ) } - HIST_count_parallel_wksp( - count, - maxSymbolValuePtr, - source, - sourceSize, - HIST_checkInput_e::trustInput, - workSpace as *mut u32, - ) } pub unsafe fn HIST_count_wksp( @@ -223,22 +284,39 @@ pub unsafe fn HIST_count_wksp( workSpace: *mut core::ffi::c_void, workSpaceSize: size_t, ) -> size_t { - if workSpace as size_t & 3 != 0 { - return Error::GENERIC.to_error_code(); - } - if workSpaceSize < HIST_WKSP_SIZE { - return Error::workSpace_tooSmall.to_error_code(); - } - if *maxSymbolValuePtr < 255 { - return HIST_count_parallel_wksp( + #[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] + if (*maxSymbolValuePtr < 255) { + return HIST_count_sve2( count, maxSymbolValuePtr, source, sourceSize, - HIST_checkInput_e::checkMaxSymbolValue, - workSpace as *mut u32, + checkMaxSymbolValue, ); } + + #[cfg(not(target_feature = "sve2"))] + { + if workSpace as size_t & 3 != 0 { + // must be aligned on 4-bytes boundaries + return Error::GENERIC.to_error_code(); + } + if workSpaceSize < HIST_WKSP_SIZE { + return Error::workSpace_tooSmall.to_error_code(); + } + + if *maxSymbolValuePtr < 255 { + return HIST_count_parallel_wksp( + count, + maxSymbolValuePtr, + source, + sourceSize, + HIST_checkInput_e::checkMaxSymbolValue, + workSpace as *mut u32, + ); + } + } + *maxSymbolValuePtr = 255; HIST_countFast_wksp( count, From f3d7babc46e7abab2d58c6ce43e744f87bcf7069 Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:08:41 +1100 Subject: [PATCH 03/10] Add original comments to hist.rs --- lib/compress/hist.rs | 183 +++++++++++++++++++++++++++---------------- 1 file changed, 116 insertions(+), 67 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index a808b53b..b7eb40c9 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -23,6 +23,9 @@ pub unsafe fn HIST_isError(code: size_t) -> core::ffi::c_uint { ERR_isError(code) as _ } +/// Lowest level: just add nb of occurrences of characters from `src` into `count`. +/// `count` is not reset. `count` array is presumed large enough (i.e. 1 KB). +/// This function does not need any additional stack memory. pub unsafe fn HIST_add( count: *mut core::ffi::c_uint, src: *const core::ffi::c_void, @@ -38,6 +41,12 @@ pub unsafe fn HIST_add( } } +/// Same as [`HIST_countFast`], this function is unsafe, +/// and will segfault if any value within `src` is `> *maxSymbolValuePtr`. +/// It is also a bit slower for large inputs. +/// However, it does not need any additional memory (not even on stack). +/// @return : count of the most frequent symbol. +/// Note this function doesn't produce any error (i.e. it must succeed). pub unsafe fn HIST_count_simple( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -122,6 +131,13 @@ unsafe fn HIST_count_sve2( unimplemented!("SVE2 histogram counting not yet implemented"); } +/// store histogram into 4 intermediate tables, recombined at the end. +/// this design makes better use of OoO cpus, +/// and is noticeably faster when some values are heavily repeated. +/// But it needs some additional workspace for intermediate tables. +/// `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. +/// @return : largest histogram frequency, +/// or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) unsafe fn HIST_count_parallel_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -139,6 +155,8 @@ unsafe fn HIST_count_parallel_wksp( let Counting2 = Counting1.add(256); let Counting3 = Counting2.add(256); let Counting4 = Counting3.add(256); + + /* safety checks */ if sourceSize == 0 { ptr::write_bytes(count as *mut u8, 0, countSize as libc::size_t); *maxSymbolValuePtr = 0; @@ -151,87 +169,104 @@ unsafe fn HIST_count_parallel_wksp( .wrapping_mul(::core::mem::size_of::() as core::ffi::c_ulong) as libc::size_t, ); - let mut cached = MEM_read32(ip as *const core::ffi::c_void); - ip = ip.add(4); - while ip < iend.sub(15) { - let mut c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); - ip = ip.add(4); - let fresh4 = &mut (*Counting1.offset(c as u8 as isize)); - *fresh4 = (*fresh4).wrapping_add(1); - let fresh5 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); - *fresh5 = (*fresh5).wrapping_add(1); - let fresh6 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); - *fresh6 = (*fresh6).wrapping_add(1); - let fresh7 = &mut (*Counting4.offset((c >> 24) as isize)); - *fresh7 = (*fresh7).wrapping_add(1); - c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); - ip = ip.add(4); - let fresh8 = &mut (*Counting1.offset(c as u8 as isize)); - *fresh8 = (*fresh8).wrapping_add(1); - let fresh9 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); - *fresh9 = (*fresh9).wrapping_add(1); - let fresh10 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); - *fresh10 = (*fresh10).wrapping_add(1); - let fresh11 = &mut (*Counting4.offset((c >> 24) as isize)); - *fresh11 = (*fresh11).wrapping_add(1); - c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); - ip = ip.add(4); - let fresh12 = &mut (*Counting1.offset(c as u8 as isize)); - *fresh12 = (*fresh12).wrapping_add(1); - let fresh13 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); - *fresh13 = (*fresh13).wrapping_add(1); - let fresh14 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); - *fresh14 = (*fresh14).wrapping_add(1); - let fresh15 = &mut (*Counting4.offset((c >> 24) as isize)); - *fresh15 = (*fresh15).wrapping_add(1); - c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); + + /* by stripes of 16 bytes */ + { + let mut cached = MEM_read32(ip as *const core::ffi::c_void); ip = ip.add(4); - let fresh16 = &mut (*Counting1.offset(c as u8 as isize)); - *fresh16 = (*fresh16).wrapping_add(1); - let fresh17 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); - *fresh17 = (*fresh17).wrapping_add(1); - let fresh18 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); - *fresh18 = (*fresh18).wrapping_add(1); - let fresh19 = &mut (*Counting4.offset((c >> 24) as isize)); - *fresh19 = (*fresh19).wrapping_add(1); + while ip < iend.sub(15) { + let mut c = cached; + cached = MEM_read32(ip as *const core::ffi::c_void); + ip = ip.add(4); + let fresh4 = &mut (*Counting1.offset(c as u8 as isize)); + *fresh4 = (*fresh4).wrapping_add(1); + let fresh5 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + *fresh5 = (*fresh5).wrapping_add(1); + let fresh6 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + *fresh6 = (*fresh6).wrapping_add(1); + let fresh7 = &mut (*Counting4.offset((c >> 24) as isize)); + *fresh7 = (*fresh7).wrapping_add(1); + c = cached; + cached = MEM_read32(ip as *const core::ffi::c_void); + ip = ip.add(4); + let fresh8 = &mut (*Counting1.offset(c as u8 as isize)); + *fresh8 = (*fresh8).wrapping_add(1); + let fresh9 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + *fresh9 = (*fresh9).wrapping_add(1); + let fresh10 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + *fresh10 = (*fresh10).wrapping_add(1); + let fresh11 = &mut (*Counting4.offset((c >> 24) as isize)); + *fresh11 = (*fresh11).wrapping_add(1); + c = cached; + cached = MEM_read32(ip as *const core::ffi::c_void); + ip = ip.add(4); + let fresh12 = &mut (*Counting1.offset(c as u8 as isize)); + *fresh12 = (*fresh12).wrapping_add(1); + let fresh13 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + *fresh13 = (*fresh13).wrapping_add(1); + let fresh14 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + *fresh14 = (*fresh14).wrapping_add(1); + let fresh15 = &mut (*Counting4.offset((c >> 24) as isize)); + *fresh15 = (*fresh15).wrapping_add(1); + c = cached; + cached = MEM_read32(ip as *const core::ffi::c_void); + ip = ip.add(4); + let fresh16 = &mut (*Counting1.offset(c as u8 as isize)); + *fresh16 = (*fresh16).wrapping_add(1); + let fresh17 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + *fresh17 = (*fresh17).wrapping_add(1); + let fresh18 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + *fresh18 = (*fresh18).wrapping_add(1); + let fresh19 = &mut (*Counting4.offset((c >> 24) as isize)); + *fresh19 = (*fresh19).wrapping_add(1); + } + ip = ip.sub(4); } - ip = ip.sub(4); + + /* finish last symbols */ while ip < iend { let fresh20 = ip; ip = ip.add(1); let fresh21 = &mut (*Counting1.offset(*fresh20 as isize)); *fresh21 = (*fresh21).wrapping_add(1); } - let mut s: u32 = 0; - s = 0; - while s < 256 { - let fresh22 = &mut (*Counting1.offset(s as isize)); - *fresh22 = (*fresh22).wrapping_add( - (*Counting2.offset(s as isize)) - .wrapping_add(*Counting3.offset(s as isize)) - .wrapping_add(*Counting4.offset(s as isize)), - ); - if *Counting1.offset(s as isize) > max { - max = *Counting1.offset(s as isize); + + { + let mut s: u32 = 0; + s = 0; + while s < 256 { + let fresh22 = &mut (*Counting1.offset(s as isize)); + *fresh22 = (*fresh22).wrapping_add( + (*Counting2.offset(s as isize)) + .wrapping_add(*Counting3.offset(s as isize)) + .wrapping_add(*Counting4.offset(s as isize)), + ); + if *Counting1.offset(s as isize) > max { + max = *Counting1.offset(s as isize); + } + s = s.wrapping_add(1); } - s = s.wrapping_add(1); } - let mut maxSymbolValue = 255 as core::ffi::c_uint; - while *Counting1.offset(maxSymbolValue as isize) == 0 { - maxSymbolValue = maxSymbolValue.wrapping_sub(1); - } - if check as core::ffi::c_uint != 0 && maxSymbolValue > *maxSymbolValuePtr { - return Error::maxSymbolValue_tooSmall.to_error_code(); + + { + let mut maxSymbolValue = 255 as core::ffi::c_uint; + while *Counting1.offset(maxSymbolValue as isize) == 0 { + maxSymbolValue = maxSymbolValue.wrapping_sub(1); + } + if check as core::ffi::c_uint != 0 && maxSymbolValue > *maxSymbolValuePtr { + return Error::maxSymbolValue_tooSmall.to_error_code(); + } + *maxSymbolValuePtr = maxSymbolValue; + + /* in case count & Counting1 are overlapping */ + core::ptr::copy(Counting1 as *const u8, count as *mut u8, countSize as usize); } - *maxSymbolValuePtr = maxSymbolValue; - core::ptr::copy(Counting1 as *const u8, count as *mut u8, countSize as usize); max as size_t } +/// Same as [`HIST_countFast`], but using an externally provided scratch buffer. +/// `workSpace` is a writable buffer which must be 4-bytes aligned, +/// `workSpaceSize` must be >= `HIST_WKSP_SIZE` pub unsafe fn HIST_countFast_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -240,6 +275,7 @@ pub unsafe fn HIST_countFast_wksp( workSpace: *mut core::ffi::c_void, workSpaceSize: size_t, ) -> size_t { + // heuristic threshold if sourceSize < HIST_FAST_THRESHOLD as size_t { return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize) as size_t; } @@ -276,6 +312,10 @@ pub unsafe fn HIST_countFast_wksp( } } +/// Same as [`HIST_count`], but using an externally provided scratch buffer. +/// Benefit is this function will use very little stack space. +/// `workSpace` is a writable buffer which must be 4-bytes aligned, +/// `workSpaceSize` must be >= HIST_WKSP_SIZE pub unsafe fn HIST_count_wksp( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -328,6 +368,9 @@ pub unsafe fn HIST_count_wksp( ) } +/// same as [`HIST_count`], but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. +/// This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` +/// fast variant (unsafe : won't check if src contains values beyond count[] limit) pub unsafe fn HIST_countFast( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, @@ -345,6 +388,12 @@ pub unsafe fn HIST_countFast( ) } +/// Provides the precise count of each byte within a table 'count'. +/// 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). +/// Updates *maxSymbolValuePtr with actual largest symbol value detected. +/// @return : count of the most frequent symbol (which isn't identified). +/// or an error code, which can be tested using HIST_isError(). +/// note : if return == srcSize, there is only one symbol. pub unsafe fn HIST_count( count: *mut core::ffi::c_uint, maxSymbolValuePtr: *mut core::ffi::c_uint, From 296676265e326530ec00de9f8633b9f3696581b2 Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:11:39 +1100 Subject: [PATCH 04/10] Add asserts to hist.rs --- lib/compress/hist.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index b7eb40c9..feba183b 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -57,6 +57,7 @@ pub unsafe fn HIST_count_simple( let end = ip.add(srcSize); let mut maxSymbolValue = *maxSymbolValuePtr; let mut largestCount = 0; + ptr::write_bytes( count as *mut u8, 0, @@ -68,24 +69,31 @@ pub unsafe fn HIST_count_simple( *maxSymbolValuePtr = 0; return 0; } + while ip < end { + debug_assert!(*ip as u32 <= maxSymbolValue); let fresh2 = ip; ip = ip.add(1); let fresh3 = &mut (*count.offset(*fresh2 as isize)); *fresh3 = (*fresh3).wrapping_add(1); } + while *count.offset(maxSymbolValue as isize) == 0 { maxSymbolValue = maxSymbolValue.wrapping_sub(1); } *maxSymbolValuePtr = maxSymbolValue; - let mut s: u32 = 0; - s = 0; - while s <= maxSymbolValue { - if *count.offset(s as isize) > largestCount { - largestCount = *count.offset(s as isize); + + { + let mut s: u32 = 0; + s = 0; + while s <= maxSymbolValue { + if *count.offset(s as isize) > largestCount { + largestCount = *count.offset(s as isize); + } + s = s.wrapping_add(1); } - s = s.wrapping_add(1); } + largestCount } @@ -157,6 +165,7 @@ unsafe fn HIST_count_parallel_wksp( let Counting4 = Counting3.add(256); /* safety checks */ + debug_assert!(*maxSymbolValuePtr <= 255); if sourceSize == 0 { ptr::write_bytes(count as *mut u8, 0, countSize as libc::size_t); *maxSymbolValuePtr = 0; From 79b13cecea5c7ee8ce34ba51a6e9062a473f7579 Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:13:49 +1100 Subject: [PATCH 05/10] Import core::ffi::c_* types in hist.rs --- lib/compress/hist.rs | 95 +++++++++++++++++------------------- lib/compress/huf_compress.rs | 2 +- 2 files changed, 46 insertions(+), 51 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index feba183b..ac91320b 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -1,3 +1,4 @@ +use core::ffi::{c_int, c_uint, c_ulong, c_void}; use core::ptr; use libc::size_t; @@ -11,26 +12,22 @@ pub const HIST_WKSP_SIZE_U32: usize = 0; #[cfg(not(target_feature = "sve2"))] pub const HIST_WKSP_SIZE_U32: usize = 1024; -pub const HIST_WKSP_SIZE: usize = HIST_WKSP_SIZE_U32 * size_of::(); +pub const HIST_WKSP_SIZE: usize = HIST_WKSP_SIZE_U32 * size_of::(); #[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] -pub const HIST_FAST_THRESHOLD: core::ffi::c_int = 500; +pub const HIST_FAST_THRESHOLD: c_int = 500; #[cfg(not(target_feature = "sve2"))] -pub const HIST_FAST_THRESHOLD: core::ffi::c_int = 1500; +pub const HIST_FAST_THRESHOLD: c_int = 1500; -pub unsafe fn HIST_isError(code: size_t) -> core::ffi::c_uint { +pub unsafe fn HIST_isError(code: size_t) -> c_uint { ERR_isError(code) as _ } /// Lowest level: just add nb of occurrences of characters from `src` into `count`. /// `count` is not reset. `count` array is presumed large enough (i.e. 1 KB). /// This function does not need any additional stack memory. -pub unsafe fn HIST_add( - count: *mut core::ffi::c_uint, - src: *const core::ffi::c_void, - srcSize: size_t, -) { +pub unsafe fn HIST_add(count: *mut c_uint, src: *const c_void, srcSize: size_t) { let mut ip = src as *const u8; let end = ip.add(srcSize); while ip < end { @@ -48,11 +45,11 @@ pub unsafe fn HIST_add( /// @return : count of the most frequent symbol. /// Note this function doesn't produce any error (i.e. it must succeed). pub unsafe fn HIST_count_simple( - count: *mut core::ffi::c_uint, - maxSymbolValuePtr: *mut core::ffi::c_uint, - src: *const core::ffi::c_void, + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + src: *const c_void, srcSize: size_t, -) -> core::ffi::c_uint { +) -> c_uint { let mut ip = src as *const u8; let end = ip.add(srcSize); let mut maxSymbolValue = *maxSymbolValuePtr; @@ -61,9 +58,8 @@ pub unsafe fn HIST_count_simple( ptr::write_bytes( count as *mut u8, 0, - (maxSymbolValue.wrapping_add(1) as core::ffi::c_ulong) - .wrapping_mul(::core::mem::size_of::() as core::ffi::c_ulong) - as libc::size_t, + (maxSymbolValue.wrapping_add(1) as c_ulong) + .wrapping_mul(::core::mem::size_of::() as c_ulong) as libc::size_t, ); if srcSize == 0 { *maxSymbolValuePtr = 0; @@ -147,17 +143,17 @@ unsafe fn HIST_count_sve2( /// @return : largest histogram frequency, /// or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) unsafe fn HIST_count_parallel_wksp( - count: *mut core::ffi::c_uint, - maxSymbolValuePtr: *mut core::ffi::c_uint, - source: *const core::ffi::c_void, + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + source: *const c_void, sourceSize: size_t, check: HIST_checkInput_e, workSpace: *mut u32, ) -> size_t { let mut ip = source as *const u8; let iend = ip.add(sourceSize); - let countSize = ((*maxSymbolValuePtr).wrapping_add(1) as core::ffi::c_ulong) - .wrapping_mul(::core::mem::size_of::() as core::ffi::c_ulong); + let countSize = ((*maxSymbolValuePtr).wrapping_add(1) as c_ulong) + .wrapping_mul(::core::mem::size_of::() as c_ulong); let mut max = 0; let Counting1 = workSpace; let Counting2 = Counting1.add(256); @@ -174,18 +170,17 @@ unsafe fn HIST_count_parallel_wksp( ptr::write_bytes( workSpace as *mut u8, 0, - ((4 * 256) as core::ffi::c_ulong) - .wrapping_mul(::core::mem::size_of::() as core::ffi::c_ulong) + ((4 * 256) as c_ulong).wrapping_mul(::core::mem::size_of::() as c_ulong) as libc::size_t, ); /* by stripes of 16 bytes */ { - let mut cached = MEM_read32(ip as *const core::ffi::c_void); + let mut cached = MEM_read32(ip as *const c_void); ip = ip.add(4); while ip < iend.sub(15) { let mut c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); + cached = MEM_read32(ip as *const c_void); ip = ip.add(4); let fresh4 = &mut (*Counting1.offset(c as u8 as isize)); *fresh4 = (*fresh4).wrapping_add(1); @@ -196,7 +191,7 @@ unsafe fn HIST_count_parallel_wksp( let fresh7 = &mut (*Counting4.offset((c >> 24) as isize)); *fresh7 = (*fresh7).wrapping_add(1); c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); + cached = MEM_read32(ip as *const c_void); ip = ip.add(4); let fresh8 = &mut (*Counting1.offset(c as u8 as isize)); *fresh8 = (*fresh8).wrapping_add(1); @@ -207,7 +202,7 @@ unsafe fn HIST_count_parallel_wksp( let fresh11 = &mut (*Counting4.offset((c >> 24) as isize)); *fresh11 = (*fresh11).wrapping_add(1); c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); + cached = MEM_read32(ip as *const c_void); ip = ip.add(4); let fresh12 = &mut (*Counting1.offset(c as u8 as isize)); *fresh12 = (*fresh12).wrapping_add(1); @@ -218,7 +213,7 @@ unsafe fn HIST_count_parallel_wksp( let fresh15 = &mut (*Counting4.offset((c >> 24) as isize)); *fresh15 = (*fresh15).wrapping_add(1); c = cached; - cached = MEM_read32(ip as *const core::ffi::c_void); + cached = MEM_read32(ip as *const c_void); ip = ip.add(4); let fresh16 = &mut (*Counting1.offset(c as u8 as isize)); *fresh16 = (*fresh16).wrapping_add(1); @@ -258,11 +253,11 @@ unsafe fn HIST_count_parallel_wksp( } { - let mut maxSymbolValue = 255 as core::ffi::c_uint; + let mut maxSymbolValue = 255 as c_uint; while *Counting1.offset(maxSymbolValue as isize) == 0 { maxSymbolValue = maxSymbolValue.wrapping_sub(1); } - if check as core::ffi::c_uint != 0 && maxSymbolValue > *maxSymbolValuePtr { + if check as c_uint != 0 && maxSymbolValue > *maxSymbolValuePtr { return Error::maxSymbolValue_tooSmall.to_error_code(); } *maxSymbolValuePtr = maxSymbolValue; @@ -277,11 +272,11 @@ unsafe fn HIST_count_parallel_wksp( /// `workSpace` is a writable buffer which must be 4-bytes aligned, /// `workSpaceSize` must be >= `HIST_WKSP_SIZE` pub unsafe fn HIST_countFast_wksp( - count: *mut core::ffi::c_uint, - maxSymbolValuePtr: *mut core::ffi::c_uint, - source: *const core::ffi::c_void, + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + source: *const c_void, sourceSize: size_t, - workSpace: *mut core::ffi::c_void, + workSpace: *mut c_void, workSpaceSize: size_t, ) -> size_t { // heuristic threshold @@ -326,11 +321,11 @@ pub unsafe fn HIST_countFast_wksp( /// `workSpace` is a writable buffer which must be 4-bytes aligned, /// `workSpaceSize` must be >= HIST_WKSP_SIZE pub unsafe fn HIST_count_wksp( - count: *mut core::ffi::c_uint, - maxSymbolValuePtr: *mut core::ffi::c_uint, - source: *const core::ffi::c_void, + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + source: *const c_void, sourceSize: size_t, - workSpace: *mut core::ffi::c_void, + workSpace: *mut c_void, workSpaceSize: size_t, ) -> size_t { #[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] @@ -381,19 +376,19 @@ pub unsafe fn HIST_count_wksp( /// This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` /// fast variant (unsafe : won't check if src contains values beyond count[] limit) pub unsafe fn HIST_countFast( - count: *mut core::ffi::c_uint, - maxSymbolValuePtr: *mut core::ffi::c_uint, - source: *const core::ffi::c_void, + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + source: *const c_void, sourceSize: size_t, ) -> size_t { - let mut tmpCounters: [core::ffi::c_uint; 1024] = [0; 1024]; + let mut tmpCounters: [c_uint; 1024] = [0; 1024]; HIST_countFast_wksp( count, maxSymbolValuePtr, source, sourceSize, - tmpCounters.as_mut_ptr() as *mut core::ffi::c_void, - ::core::mem::size_of::<[core::ffi::c_uint; 1024]>(), + tmpCounters.as_mut_ptr() as *mut c_void, + ::core::mem::size_of::<[c_uint; 1024]>(), ) } @@ -404,18 +399,18 @@ pub unsafe fn HIST_countFast( /// or an error code, which can be tested using HIST_isError(). /// note : if return == srcSize, there is only one symbol. pub unsafe fn HIST_count( - count: *mut core::ffi::c_uint, - maxSymbolValuePtr: *mut core::ffi::c_uint, - src: *const core::ffi::c_void, + count: *mut c_uint, + maxSymbolValuePtr: *mut c_uint, + src: *const c_void, srcSize: size_t, ) -> size_t { - let mut tmpCounters: [core::ffi::c_uint; 1024] = [0; 1024]; + let mut tmpCounters: [c_uint; 1024] = [0; 1024]; HIST_count_wksp( count, maxSymbolValuePtr, src, srcSize, - tmpCounters.as_mut_ptr() as *mut core::ffi::c_void, - ::core::mem::size_of::<[core::ffi::c_uint; 1024]>(), + tmpCounters.as_mut_ptr() as *mut c_void, + ::core::mem::size_of::<[c_uint; 1024]>(), ) } diff --git a/lib/compress/huf_compress.rs b/lib/compress/huf_compress.rs index 4b53743c..82a9b6ae 100644 --- a/lib/compress/huf_compress.rs +++ b/lib/compress/huf_compress.rs @@ -1704,7 +1704,7 @@ unsafe fn HUF_compressCTable_internal( pub union workspace_union { pub buildCTable_wksp: HUF_buildCTable_wksp_tables, pub writeCTable_wksp: HUF_WriteCTableWksp, - pub hist_wksp: [u32; HIST_WKSP_SIZE_U32 as usize], + pub hist_wksp: [u32; HIST_WKSP_SIZE_U32], } #[derive(Copy, Clone)] From a48593c21a7dd351ad97b9af3bbfda1d09f99bcc Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:54:17 +1100 Subject: [PATCH 06/10] Refactor HIST_* workspace functions to take a slice instead of raw pointer --- lib/compress/hist.rs | 93 +++++++++++++----------- lib/compress/huf_compress.rs | 2 +- lib/compress/zstd_compress.rs | 42 +++++++++-- lib/compress/zstd_compress_superblock.rs | 13 +++- 4 files changed, 97 insertions(+), 53 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index ac91320b..2f38855c 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -148,17 +148,15 @@ unsafe fn HIST_count_parallel_wksp( source: *const c_void, sourceSize: size_t, check: HIST_checkInput_e, - workSpace: *mut u32, + workSpace: &mut [u32], ) -> size_t { let mut ip = source as *const u8; let iend = ip.add(sourceSize); let countSize = ((*maxSymbolValuePtr).wrapping_add(1) as c_ulong) .wrapping_mul(::core::mem::size_of::() as c_ulong); let mut max = 0; - let Counting1 = workSpace; - let Counting2 = Counting1.add(256); - let Counting3 = Counting2.add(256); - let Counting4 = Counting3.add(256); + + debug_assert!(workSpace.len() >= HIST_WKSP_SIZE_U32); /* safety checks */ debug_assert!(*maxSymbolValuePtr <= 255); @@ -167,12 +165,13 @@ unsafe fn HIST_count_parallel_wksp( *maxSymbolValuePtr = 0; return 0; } - ptr::write_bytes( - workSpace as *mut u8, - 0, - ((4 * 256) as c_ulong).wrapping_mul(::core::mem::size_of::() as c_ulong) - as libc::size_t, - ); + + workSpace[..1024].fill(0); + + // Split workspace into 4 counting tables of 256 u32 each + let (Counting1, remainder) = workSpace.split_at_mut(256); + let (Counting2, remainder) = remainder.split_at_mut(256); + let (Counting3, Counting4) = remainder.split_at_mut(256); /* by stripes of 16 bytes */ { @@ -182,46 +181,46 @@ unsafe fn HIST_count_parallel_wksp( let mut c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh4 = &mut (*Counting1.offset(c as u8 as isize)); + let fresh4 = &mut Counting1[c as u8 as usize]; *fresh4 = (*fresh4).wrapping_add(1); - let fresh5 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + let fresh5 = &mut Counting2[(c >> 8) as u8 as usize]; *fresh5 = (*fresh5).wrapping_add(1); - let fresh6 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + let fresh6 = &mut Counting3[(c >> 16) as u8 as usize]; *fresh6 = (*fresh6).wrapping_add(1); - let fresh7 = &mut (*Counting4.offset((c >> 24) as isize)); + let fresh7 = &mut Counting4[(c >> 24) as usize]; *fresh7 = (*fresh7).wrapping_add(1); c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh8 = &mut (*Counting1.offset(c as u8 as isize)); + let fresh8 = &mut Counting1[c as u8 as usize]; *fresh8 = (*fresh8).wrapping_add(1); - let fresh9 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + let fresh9 = &mut Counting2[(c >> 8) as u8 as usize]; *fresh9 = (*fresh9).wrapping_add(1); - let fresh10 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + let fresh10 = &mut Counting3[(c >> 16) as u8 as usize]; *fresh10 = (*fresh10).wrapping_add(1); - let fresh11 = &mut (*Counting4.offset((c >> 24) as isize)); + let fresh11 = &mut Counting4[(c >> 24) as usize]; *fresh11 = (*fresh11).wrapping_add(1); c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh12 = &mut (*Counting1.offset(c as u8 as isize)); + let fresh12 = &mut Counting1[c as u8 as usize]; *fresh12 = (*fresh12).wrapping_add(1); - let fresh13 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + let fresh13 = &mut Counting2[(c >> 8) as u8 as usize]; *fresh13 = (*fresh13).wrapping_add(1); - let fresh14 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + let fresh14 = &mut Counting3[(c >> 16) as u8 as usize]; *fresh14 = (*fresh14).wrapping_add(1); - let fresh15 = &mut (*Counting4.offset((c >> 24) as isize)); + let fresh15 = &mut Counting4[(c >> 24) as usize]; *fresh15 = (*fresh15).wrapping_add(1); c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh16 = &mut (*Counting1.offset(c as u8 as isize)); + let fresh16 = &mut Counting1[c as u8 as usize]; *fresh16 = (*fresh16).wrapping_add(1); - let fresh17 = &mut (*Counting2.offset((c >> 8) as u8 as isize)); + let fresh17 = &mut Counting2[(c >> 8) as u8 as usize]; *fresh17 = (*fresh17).wrapping_add(1); - let fresh18 = &mut (*Counting3.offset((c >> 16) as u8 as isize)); + let fresh18 = &mut Counting3[(c >> 16) as u8 as usize]; *fresh18 = (*fresh18).wrapping_add(1); - let fresh19 = &mut (*Counting4.offset((c >> 24) as isize)); + let fresh19 = &mut Counting4[(c >> 24) as usize]; *fresh19 = (*fresh19).wrapping_add(1); } ip = ip.sub(4); @@ -231,7 +230,7 @@ unsafe fn HIST_count_parallel_wksp( while ip < iend { let fresh20 = ip; ip = ip.add(1); - let fresh21 = &mut (*Counting1.offset(*fresh20 as isize)); + let fresh21 = &mut Counting1[*fresh20 as usize]; *fresh21 = (*fresh21).wrapping_add(1); } @@ -239,14 +238,14 @@ unsafe fn HIST_count_parallel_wksp( let mut s: u32 = 0; s = 0; while s < 256 { - let fresh22 = &mut (*Counting1.offset(s as isize)); + let fresh22 = &mut Counting1[s as usize]; *fresh22 = (*fresh22).wrapping_add( - (*Counting2.offset(s as isize)) - .wrapping_add(*Counting3.offset(s as isize)) - .wrapping_add(*Counting4.offset(s as isize)), + (Counting2[s as usize]) + .wrapping_add(Counting3[s as usize]) + .wrapping_add(Counting4[s as usize]), ); - if *Counting1.offset(s as isize) > max { - max = *Counting1.offset(s as isize); + if Counting1[s as usize] > max { + max = Counting1[s as usize]; } s = s.wrapping_add(1); } @@ -254,7 +253,7 @@ unsafe fn HIST_count_parallel_wksp( { let mut maxSymbolValue = 255 as c_uint; - while *Counting1.offset(maxSymbolValue as isize) == 0 { + while Counting1[maxSymbolValue as usize] == 0 { maxSymbolValue = maxSymbolValue.wrapping_sub(1); } if check as c_uint != 0 && maxSymbolValue > *maxSymbolValuePtr { @@ -263,7 +262,11 @@ unsafe fn HIST_count_parallel_wksp( *maxSymbolValuePtr = maxSymbolValue; /* in case count & Counting1 are overlapping */ - core::ptr::copy(Counting1 as *const u8, count as *mut u8, countSize as usize); + core::ptr::copy( + Counting1.as_ptr() as *const u8, + count as *mut u8, + countSize as usize, + ); } max as size_t } @@ -276,7 +279,7 @@ pub unsafe fn HIST_countFast_wksp( maxSymbolValuePtr: *mut c_uint, source: *const c_void, sourceSize: size_t, - workSpace: *mut c_void, + workSpace: &mut [u32], workSpaceSize: size_t, ) -> size_t { // heuristic threshold @@ -297,7 +300,7 @@ pub unsafe fn HIST_countFast_wksp( #[cfg(not(target_feature = "sve2"))] { - if workSpace as size_t & 3 != 0 { + if workSpace.as_ptr() as size_t & 3 != 0 { // must be aligned on 4-bytes boundaries return Error::GENERIC.to_error_code(); } @@ -311,7 +314,7 @@ pub unsafe fn HIST_countFast_wksp( source, sourceSize, HIST_checkInput_e::trustInput, - workSpace as *mut u32, + workSpace, ) } } @@ -325,7 +328,7 @@ pub unsafe fn HIST_count_wksp( maxSymbolValuePtr: *mut c_uint, source: *const c_void, sourceSize: size_t, - workSpace: *mut c_void, + workSpace: &mut [u32], workSpaceSize: size_t, ) -> size_t { #[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] @@ -341,10 +344,11 @@ pub unsafe fn HIST_count_wksp( #[cfg(not(target_feature = "sve2"))] { - if workSpace as size_t & 3 != 0 { + if workSpace.as_ptr() as size_t & 3 != 0 { // must be aligned on 4-bytes boundaries return Error::GENERIC.to_error_code(); } + if workSpaceSize < HIST_WKSP_SIZE { return Error::workSpace_tooSmall.to_error_code(); } @@ -356,7 +360,7 @@ pub unsafe fn HIST_count_wksp( source, sourceSize, HIST_checkInput_e::checkMaxSymbolValue, - workSpace as *mut u32, + workSpace, ); } } @@ -382,12 +386,13 @@ pub unsafe fn HIST_countFast( sourceSize: size_t, ) -> size_t { let mut tmpCounters: [c_uint; 1024] = [0; 1024]; + HIST_countFast_wksp( count, maxSymbolValuePtr, source, sourceSize, - tmpCounters.as_mut_ptr() as *mut c_void, + &mut tmpCounters, ::core::mem::size_of::<[c_uint; 1024]>(), ) } @@ -410,7 +415,7 @@ pub unsafe fn HIST_count( maxSymbolValuePtr, src, srcSize, - tmpCounters.as_mut_ptr() as *mut c_void, + &mut tmpCounters, ::core::mem::size_of::<[c_uint; 1024]>(), ) } diff --git a/lib/compress/huf_compress.rs b/lib/compress/huf_compress.rs index 82a9b6ae..f42e5c84 100644 --- a/lib/compress/huf_compress.rs +++ b/lib/compress/huf_compress.rs @@ -1920,7 +1920,7 @@ unsafe fn HUF_compress_internal( &mut maxSymbolValue, src as *const u8 as *const c_void, srcSize, - ((*table).wksps.hist_wksp).as_mut_ptr() as *mut c_void, + &mut ((*table).wksps.hist_wksp), size_of::<[u32; 1024]>(), ); if ERR_isError(largest) { diff --git a/lib/compress/zstd_compress.rs b/lib/compress/zstd_compress.rs index 77816248..04fe1890 100644 --- a/lib/compress/zstd_compress.rs +++ b/lib/compress/zstd_compress.rs @@ -4669,12 +4669,19 @@ unsafe fn ZSTD_buildSequencesStatistics( stats.lastCountSize = 0; stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); let mut max = MaxLL; + + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let entropyWorkspace_slice: &mut [u32] = core::slice::from_raw_parts_mut( + entropyWorkspace as *mut u32, + entropyWkspSize / size_of::(), + ); + let mostFrequent = HIST_countFast_wksp( countWorkspace, &mut max, llCodeTable as *const core::ffi::c_void, nbSeq, - entropyWorkspace, + entropyWorkspace_slice, entropyWkspSize, ); (*nextEntropy).litlength_repeatMode = (*prevEntropy).litlength_repeatMode; @@ -4718,12 +4725,18 @@ unsafe fn ZSTD_buildSequencesStatistics( } op = op.add(countSize); let mut max_0 = MaxOff; + + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let entropyWorkspace_slice: &mut [u32] = core::slice::from_raw_parts_mut( + entropyWorkspace as *mut u32, + entropyWkspSize / size_of::(), + ); let mostFrequent_0 = HIST_countFast_wksp( countWorkspace, &mut max_0, ofCodeTable as *const core::ffi::c_void, nbSeq, - entropyWorkspace, + entropyWorkspace_slice, entropyWkspSize, ); let defaultPolicy = (if max_0 <= DefaultMaxOff { @@ -4772,12 +4785,18 @@ unsafe fn ZSTD_buildSequencesStatistics( } op = op.add(countSize_0); let mut max_1 = MaxML; + + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let entropyWorkspace_slice: &mut [u32] = core::slice::from_raw_parts_mut( + entropyWorkspace as *mut u32, + entropyWkspSize / size_of::(), + ); let mostFrequent_1 = HIST_countFast_wksp( countWorkspace, &mut max_1, mlCodeTable as *const core::ffi::c_void, nbSeq, - entropyWorkspace, + entropyWorkspace_slice, entropyWkspSize, ); (*nextEntropy).matchlength_repeatMode = (*prevEntropy).matchlength_repeatMode; @@ -5694,12 +5713,16 @@ unsafe fn ZSTD_buildBlockEntropyStats_literals( (*hufMetadata).hType = set_basic; return 0; } + + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let workspace_slice: &mut [u32] = + core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); let largest = HIST_count_wksp( countWksp, &mut maxSymbolValue, src as *const u8 as *const core::ffi::c_void, srcSize, - workspace, + workspace_slice, wkspSize, ); let err_code = largest; @@ -5935,12 +5958,15 @@ unsafe fn ZSTD_estimateBlockSize_literal( || (*hufMetadata).hType as core::ffi::c_uint == set_repeat as core::ffi::c_int as core::ffi::c_uint { + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let workspace_slice: &mut [u32] = + core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); let largest = HIST_count_wksp( countWksp, &mut maxSymbolValue, literals as *const core::ffi::c_void, litSize, - workspace, + workspace_slice, wkspSize, ); if ERR_isError(largest) { @@ -5977,12 +6003,16 @@ unsafe fn ZSTD_estimateBlockSize_symbolType( let ctEnd = ctStart.add(nbSeq); let mut cSymbolTypeSizeEstimateInBits = 0; let mut max = maxCode; + + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let workspace_slice: &mut [u32] = + core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); HIST_countFast_wksp( countWksp, &mut max, codeTable as *const core::ffi::c_void, nbSeq, - workspace, + workspace_slice, wkspSize, ); if type_0 as core::ffi::c_uint == set_basic as core::ffi::c_int as core::ffi::c_uint { diff --git a/lib/compress/zstd_compress_superblock.rs b/lib/compress/zstd_compress_superblock.rs index 2caaf9f4..8b319a71 100644 --- a/lib/compress/zstd_compress_superblock.rs +++ b/lib/compress/zstd_compress_superblock.rs @@ -642,12 +642,16 @@ unsafe fn ZSTD_estimateSubBlockSize_literal( || (*hufMetadata).hType as core::ffi::c_uint == set_repeat as core::ffi::c_int as core::ffi::c_uint { + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let workspace_slice: &mut [u32] = + core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); + let largest = HIST_count_wksp( countWksp, &mut maxSymbolValue, literals as *const core::ffi::c_void, litSize, - workspace, + workspace_slice, wkspSize, ); if ERR_isError(largest) { @@ -681,12 +685,17 @@ unsafe fn ZSTD_estimateSubBlockSize_symbolType( let ctEnd = ctStart.add(nbSeq); let mut cSymbolTypeSizeEstimateInBits = 0; let mut max = maxCode; + + //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + let workspace_slice: &mut [u32] = + core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); + HIST_countFast_wksp( countWksp, &mut max, codeTable as *const core::ffi::c_void, nbSeq, - workspace, + workspace_slice, wkspSize, ); if type_0 as core::ffi::c_uint == set_basic as core::ffi::c_int as core::ffi::c_uint { From d1f10606bddde72fd8dc5752fdb85a4d853e155d Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 03:03:15 +1100 Subject: [PATCH 07/10] Cleanup HIST_add function --- lib/compress/hist.rs | 21 +++++++++------------ lib/compress/zstd_preSplit.rs | 6 +++--- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index 2f38855c..e0616647 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -27,14 +27,11 @@ pub unsafe fn HIST_isError(code: size_t) -> c_uint { /// Lowest level: just add nb of occurrences of characters from `src` into `count`. /// `count` is not reset. `count` array is presumed large enough (i.e. 1 KB). /// This function does not need any additional stack memory. -pub unsafe fn HIST_add(count: *mut c_uint, src: *const c_void, srcSize: size_t) { - let mut ip = src as *const u8; - let end = ip.add(srcSize); - while ip < end { - let fresh0 = ip; - ip = ip.add(1); - let fresh1 = &mut (*count.offset(*fresh0 as isize)); - *fresh1 = (*fresh1).wrapping_add(1); +pub unsafe fn HIST_add(count: &mut [c_uint], src: *const c_void, srcSize: size_t) { + let ip = core::slice::from_raw_parts(src as *const u8, srcSize); + + for item in ip.iter() { + count[*item as usize] += 1; } } @@ -59,7 +56,7 @@ pub unsafe fn HIST_count_simple( count as *mut u8, 0, (maxSymbolValue.wrapping_add(1) as c_ulong) - .wrapping_mul(::core::mem::size_of::() as c_ulong) as libc::size_t, + .wrapping_mul(size_of::() as c_ulong) as libc::size_t, ); if srcSize == 0 { *maxSymbolValuePtr = 0; @@ -153,7 +150,7 @@ unsafe fn HIST_count_parallel_wksp( let mut ip = source as *const u8; let iend = ip.add(sourceSize); let countSize = ((*maxSymbolValuePtr).wrapping_add(1) as c_ulong) - .wrapping_mul(::core::mem::size_of::() as c_ulong); + .wrapping_mul(size_of::() as c_ulong); let mut max = 0; debug_assert!(workSpace.len() >= HIST_WKSP_SIZE_U32); @@ -393,7 +390,7 @@ pub unsafe fn HIST_countFast( source, sourceSize, &mut tmpCounters, - ::core::mem::size_of::<[c_uint; 1024]>(), + size_of::<[c_uint; 1024]>(), ) } @@ -416,6 +413,6 @@ pub unsafe fn HIST_count( src, srcSize, &mut tmpCounters, - ::core::mem::size_of::<[c_uint; 1024]>(), + size_of::<[c_uint; 1024]>(), ) } diff --git a/lib/compress/zstd_preSplit.rs b/lib/compress/zstd_preSplit.rs index e8f0bcf1..735fc309 100644 --- a/lib/compress/zstd_preSplit.rs +++ b/lib/compress/zstd_preSplit.rs @@ -232,12 +232,12 @@ unsafe fn ZSTD_splitBlock_fromBorders( initStats(fpstats); HIST_add( - ((*fpstats).pastEvents.events).as_mut_ptr(), + &mut (*fpstats).pastEvents.events, blockStart, SEGMENT_SIZE as size_t, ); HIST_add( - ((*fpstats).newEvents.events).as_mut_ptr(), + &mut (*fpstats).newEvents.events, (blockStart as *const core::ffi::c_char) .add(blockSize) .offset(-(SEGMENT_SIZE as isize)) as *const core::ffi::c_void, @@ -249,7 +249,7 @@ unsafe fn ZSTD_splitBlock_fromBorders( return blockSize; } HIST_add( - ((*middleEvents).events).as_mut_ptr(), + &mut (*middleEvents).events, (blockStart as *const core::ffi::c_char) .add(blockSize / 2) .offset(-((SEGMENT_SIZE / 2) as isize)) as *const core::ffi::c_void, From 88afa74ef66c88d9ba41de5d52d35a617a15830b Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 03:10:17 +1100 Subject: [PATCH 08/10] Cleanup HIST_count_parallel_wksp --- lib/compress/hist.rs | 72 ++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index e0616647..5d5a8192 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -55,8 +55,8 @@ pub unsafe fn HIST_count_simple( ptr::write_bytes( count as *mut u8, 0, - (maxSymbolValue.wrapping_add(1) as c_ulong) - .wrapping_mul(size_of::() as c_ulong) as libc::size_t, + (maxSymbolValue.wrapping_add(1) as c_ulong).wrapping_mul(size_of::() as c_ulong) + as libc::size_t, ); if srcSize == 0 { *maxSymbolValuePtr = 0; @@ -178,73 +178,47 @@ unsafe fn HIST_count_parallel_wksp( let mut c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh4 = &mut Counting1[c as u8 as usize]; - *fresh4 = (*fresh4).wrapping_add(1); - let fresh5 = &mut Counting2[(c >> 8) as u8 as usize]; - *fresh5 = (*fresh5).wrapping_add(1); - let fresh6 = &mut Counting3[(c >> 16) as u8 as usize]; - *fresh6 = (*fresh6).wrapping_add(1); - let fresh7 = &mut Counting4[(c >> 24) as usize]; - *fresh7 = (*fresh7).wrapping_add(1); + Counting1[c as u8 as usize] += 1; + Counting2[(c >> 8) as u8 as usize] += 1; + Counting3[(c >> 16) as u8 as usize] += 1; + Counting4[(c >> 24) as usize] += 1; c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh8 = &mut Counting1[c as u8 as usize]; - *fresh8 = (*fresh8).wrapping_add(1); - let fresh9 = &mut Counting2[(c >> 8) as u8 as usize]; - *fresh9 = (*fresh9).wrapping_add(1); - let fresh10 = &mut Counting3[(c >> 16) as u8 as usize]; - *fresh10 = (*fresh10).wrapping_add(1); - let fresh11 = &mut Counting4[(c >> 24) as usize]; - *fresh11 = (*fresh11).wrapping_add(1); + Counting1[c as u8 as usize] += 1; + Counting2[(c >> 8) as u8 as usize] += 1; + Counting3[(c >> 16) as u8 as usize] += 1; + Counting4[(c >> 24) as usize] += 1; c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh12 = &mut Counting1[c as u8 as usize]; - *fresh12 = (*fresh12).wrapping_add(1); - let fresh13 = &mut Counting2[(c >> 8) as u8 as usize]; - *fresh13 = (*fresh13).wrapping_add(1); - let fresh14 = &mut Counting3[(c >> 16) as u8 as usize]; - *fresh14 = (*fresh14).wrapping_add(1); - let fresh15 = &mut Counting4[(c >> 24) as usize]; - *fresh15 = (*fresh15).wrapping_add(1); + Counting1[c as u8 as usize] += 1; + Counting2[(c >> 8) as u8 as usize] += 1; + Counting3[(c >> 16) as u8 as usize] += 1; + Counting4[(c >> 24) as usize] += 1; c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - let fresh16 = &mut Counting1[c as u8 as usize]; - *fresh16 = (*fresh16).wrapping_add(1); - let fresh17 = &mut Counting2[(c >> 8) as u8 as usize]; - *fresh17 = (*fresh17).wrapping_add(1); - let fresh18 = &mut Counting3[(c >> 16) as u8 as usize]; - *fresh18 = (*fresh18).wrapping_add(1); - let fresh19 = &mut Counting4[(c >> 24) as usize]; - *fresh19 = (*fresh19).wrapping_add(1); + Counting1[c as u8 as usize] += 1; + Counting2[(c >> 8) as u8 as usize] += 1; + Counting3[(c >> 16) as u8 as usize] += 1; + Counting4[(c >> 24) as usize] += 1; } ip = ip.sub(4); } /* finish last symbols */ while ip < iend { - let fresh20 = ip; + Counting1[*ip as usize] += 1; ip = ip.add(1); - let fresh21 = &mut Counting1[*fresh20 as usize]; - *fresh21 = (*fresh21).wrapping_add(1); } { - let mut s: u32 = 0; - s = 0; - while s < 256 { - let fresh22 = &mut Counting1[s as usize]; - *fresh22 = (*fresh22).wrapping_add( - (Counting2[s as usize]) - .wrapping_add(Counting3[s as usize]) - .wrapping_add(Counting4[s as usize]), - ); - if Counting1[s as usize] > max { - max = Counting1[s as usize]; + for s in 0..256 { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s] > max) { + max = Counting1[s]; } - s = s.wrapping_add(1); } } From 343a0c62b93bcafe96bbe148fd38538f6d89543a Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 03:17:02 +1100 Subject: [PATCH 09/10] Clippy --- lib/compress/hist.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index 5d5a8192..47611a32 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -216,7 +216,7 @@ unsafe fn HIST_count_parallel_wksp( { for s in 0..256 { Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s] > max) { + if Counting1[s] > max { max = Counting1[s]; } } From fde145cd150694a5f8e2b1aceb1b2f4e8fc53056 Mon Sep 17 00:00:00 2001 From: PeterM <1434309+petesmc@users.noreply.github.com> Date: Sun, 1 Feb 2026 14:36:45 +1100 Subject: [PATCH 10/10] Tidy up hist.rs, clarify notes on slices --- lib/compress/hist.rs | 60 ++++++++++++++---------- lib/compress/zstd_compress.rs | 12 ++--- lib/compress/zstd_compress_superblock.rs | 4 +- 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/lib/compress/hist.rs b/lib/compress/hist.rs index 47611a32..70d475db 100644 --- a/lib/compress/hist.rs +++ b/lib/compress/hist.rs @@ -1,4 +1,4 @@ -use core::ffi::{c_int, c_uint, c_ulong, c_void}; +use core::ffi::{c_uint, c_ulong, c_void}; use core::ptr; use libc::size_t; @@ -14,11 +14,13 @@ pub const HIST_WKSP_SIZE_U32: usize = 1024; pub const HIST_WKSP_SIZE: usize = HIST_WKSP_SIZE_U32 * size_of::(); -#[cfg(all(target_arch = "aarch64", target_feature = "sve2"))] -pub const HIST_FAST_THRESHOLD: c_int = 500; - -#[cfg(not(target_feature = "sve2"))] -pub const HIST_FAST_THRESHOLD: c_int = 1500; +pub const HIST_FAST_THRESHOLD: core::ffi::c_int = { + if cfg!(all(target_arch = "aarch64", target_feature = "sve2")) { + 500 + } else { + 1500 + } +}; pub unsafe fn HIST_isError(code: size_t) -> c_uint { ERR_isError(code) as _ @@ -166,9 +168,10 @@ unsafe fn HIST_count_parallel_wksp( workSpace[..1024].fill(0); // Split workspace into 4 counting tables of 256 u32 each - let (Counting1, remainder) = workSpace.split_at_mut(256); - let (Counting2, remainder) = remainder.split_at_mut(256); - let (Counting3, Counting4) = remainder.split_at_mut(256); + let ([Counting1, Counting2, Counting3, Counting4], &mut []) = workSpace.as_chunks_mut::<256>() + else { + unreachable!(); + }; /* by stripes of 16 bytes */ { @@ -178,31 +181,38 @@ unsafe fn HIST_count_parallel_wksp( let mut c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - Counting1[c as u8 as usize] += 1; - Counting2[(c >> 8) as u8 as usize] += 1; - Counting3[(c >> 16) as u8 as usize] += 1; - Counting4[(c >> 24) as usize] += 1; + let indices: [u8; 4] = c.to_le_bytes(); + Counting1[indices[0] as usize] += 1; + Counting2[indices[1] as usize] += 1; + Counting3[indices[2] as usize] += 1; + Counting4[indices[3] as usize] += 1; + c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - Counting1[c as u8 as usize] += 1; - Counting2[(c >> 8) as u8 as usize] += 1; - Counting3[(c >> 16) as u8 as usize] += 1; - Counting4[(c >> 24) as usize] += 1; + let indices: [u8; 4] = c.to_le_bytes(); + Counting1[indices[0] as usize] += 1; + Counting2[indices[1] as usize] += 1; + Counting3[indices[2] as usize] += 1; + Counting4[indices[3] as usize] += 1; + c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - Counting1[c as u8 as usize] += 1; - Counting2[(c >> 8) as u8 as usize] += 1; - Counting3[(c >> 16) as u8 as usize] += 1; - Counting4[(c >> 24) as usize] += 1; + let indices: [u8; 4] = c.to_le_bytes(); + Counting1[indices[0] as usize] += 1; + Counting2[indices[1] as usize] += 1; + Counting3[indices[2] as usize] += 1; + Counting4[indices[3] as usize] += 1; + c = cached; cached = MEM_read32(ip as *const c_void); ip = ip.add(4); - Counting1[c as u8 as usize] += 1; - Counting2[(c >> 8) as u8 as usize] += 1; - Counting3[(c >> 16) as u8 as usize] += 1; - Counting4[(c >> 24) as usize] += 1; + let indices: [u8; 4] = c.to_le_bytes(); + Counting1[indices[0] as usize] += 1; + Counting2[indices[1] as usize] += 1; + Counting3[indices[2] as usize] += 1; + Counting4[indices[3] as usize] += 1; } ip = ip.sub(4); } diff --git a/lib/compress/zstd_compress.rs b/lib/compress/zstd_compress.rs index 04fe1890..a26cd48a 100644 --- a/lib/compress/zstd_compress.rs +++ b/lib/compress/zstd_compress.rs @@ -4670,7 +4670,7 @@ unsafe fn ZSTD_buildSequencesStatistics( stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); let mut max = MaxLL; - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let entropyWorkspace_slice: &mut [u32] = core::slice::from_raw_parts_mut( entropyWorkspace as *mut u32, entropyWkspSize / size_of::(), @@ -4726,7 +4726,7 @@ unsafe fn ZSTD_buildSequencesStatistics( op = op.add(countSize); let mut max_0 = MaxOff; - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let entropyWorkspace_slice: &mut [u32] = core::slice::from_raw_parts_mut( entropyWorkspace as *mut u32, entropyWkspSize / size_of::(), @@ -4786,7 +4786,7 @@ unsafe fn ZSTD_buildSequencesStatistics( op = op.add(countSize_0); let mut max_1 = MaxML; - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let entropyWorkspace_slice: &mut [u32] = core::slice::from_raw_parts_mut( entropyWorkspace as *mut u32, entropyWkspSize / size_of::(), @@ -5714,7 +5714,7 @@ unsafe fn ZSTD_buildBlockEntropyStats_literals( return 0; } - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let workspace_slice: &mut [u32] = core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); let largest = HIST_count_wksp( @@ -5958,7 +5958,7 @@ unsafe fn ZSTD_estimateBlockSize_literal( || (*hufMetadata).hType as core::ffi::c_uint == set_repeat as core::ffi::c_int as core::ffi::c_uint { - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let workspace_slice: &mut [u32] = core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); let largest = HIST_count_wksp( @@ -6004,7 +6004,7 @@ unsafe fn ZSTD_estimateBlockSize_symbolType( let mut cSymbolTypeSizeEstimateInBits = 0; let mut max = maxCode; - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let workspace_slice: &mut [u32] = core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); HIST_countFast_wksp( diff --git a/lib/compress/zstd_compress_superblock.rs b/lib/compress/zstd_compress_superblock.rs index 8b319a71..873370c5 100644 --- a/lib/compress/zstd_compress_superblock.rs +++ b/lib/compress/zstd_compress_superblock.rs @@ -642,7 +642,7 @@ unsafe fn ZSTD_estimateSubBlockSize_literal( || (*hufMetadata).hType as core::ffi::c_uint == set_repeat as core::ffi::c_int as core::ffi::c_uint { - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let workspace_slice: &mut [u32] = core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::()); @@ -686,7 +686,7 @@ unsafe fn ZSTD_estimateSubBlockSize_symbolType( let mut cSymbolTypeSizeEstimateInBits = 0; let mut max = maxCode; - //SAFETY: workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes + //NOTE: existing requirement of implementation is that workspace must be 4 bytes aligned and >= HIST_WKSP_SIZE in bytes let workspace_slice: &mut [u32] = core::slice::from_raw_parts_mut(workspace as *mut u32, wkspSize / size_of::());