Skip to content

Commit 966b07a

Browse files
committed
fix(hpc): address #218 review — fidelity length-guard + edge_codec examples & decode asserts
- reliability: FidelityReport::compute returns a DEGENERATE report (zeros, rel_l2=1.0) on length mismatch instead of silently truncating to the shorter prefix (codex P2 + coderabbit Major) — consistent with the module's return-degenerate-not-panic convention; a dropped tail can't read as high fidelity. - edge_codec: added runnable /// examples to the 6 remaining public methods (Codebook::assign/centroid, CoarseResidueCodec::encode/reconstruct, ProductQuantizer::encode/reconstruct) per all-public-APIs-need-examples. - edge_codec: both reconstruct() decoders now assert the packed-code length (dim/2, m/2) so malformed input fails with a clear message instead of an out-of-bounds panic deep in unpack (coderabbit Major). edge_codec doctests 4→10; reliability 5; unit tests green; clippy -D warnings clean. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo
1 parent aa235b7 commit 966b07a

2 files changed

Lines changed: 83 additions & 5 deletions

File tree

src/hpc/edge_codec.rs

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ impl Codebook {
130130

131131
/// Index of the nearest centroid to `v` (scalar; identical result to the
132132
/// AMX `matmul_i8_to_i32` assignment path). Panics if `v.len() != dim`.
133+
///
134+
/// # Examples
135+
/// ```
136+
/// use ndarray::hpc::edge_codec::Codebook;
137+
/// let data = [0.0, 0.0, 9.0, 9.0]; // 2 points × dim 2
138+
/// let cb = Codebook::train(&data, 2, 2, 2, 5, 1);
139+
/// assert!(cb.assign(&[9.0, 9.0]) < cb.k as u32);
140+
/// ```
133141
#[inline]
134142
pub fn assign(&self, v: &[f32]) -> u32 {
135143
assert_eq!(v.len(), self.dim);
@@ -146,6 +154,14 @@ impl Codebook {
146154
}
147155

148156
/// Borrow centroid `c` as a slice.
157+
///
158+
/// # Examples
159+
/// ```
160+
/// use ndarray::hpc::edge_codec::Codebook;
161+
/// let data = [0.0, 0.0, 9.0, 9.0];
162+
/// let cb = Codebook::train(&data, 2, 2, 2, 5, 1);
163+
/// assert_eq!(cb.centroid(0).len(), 2);
164+
/// ```
149165
#[inline]
150166
pub fn centroid(&self, c: usize) -> &[f32] {
151167
&self.centroids[c * self.dim..(c + 1) * self.dim]
@@ -245,6 +261,15 @@ impl CoarseResidueCodec {
245261
}
246262

247263
/// Encode one vector to coarse index + packed residue nibbles.
264+
///
265+
/// # Examples
266+
/// ```
267+
/// use ndarray::hpc::edge_codec::CoarseResidueCodec;
268+
/// let data: Vec<f32> = (0..8 * 4).map(|i| (i % 5) as f32 - 2.0).collect();
269+
/// let codec = CoarseResidueCodec::fit(&data, 8, 4, 4, 6, 1);
270+
/// let code = codec.encode(&data[0..4]);
271+
/// assert_eq!(code.residue.len(), 2); // dim/2 packed bytes
272+
/// ```
248273
pub fn encode(&self, v: &[f32]) -> CoarseResidueCode {
249274
let dim = self.cb.dim;
250275
let index = self.cb.assign(v);
@@ -261,9 +286,20 @@ impl CoarseResidueCodec {
261286
}
262287
}
263288

264-
/// Reconstruct `centroid + dequantized residue`.
289+
/// Reconstruct `centroid + dequantized residue`. Panics if `code.residue` is
290+
/// not the expected `dim/2` packed bytes (guards malformed decode input).
291+
///
292+
/// # Examples
293+
/// ```
294+
/// use ndarray::hpc::edge_codec::CoarseResidueCodec;
295+
/// let data: Vec<f32> = (0..8 * 4).map(|i| (i % 5) as f32 - 2.0).collect();
296+
/// let codec = CoarseResidueCodec::fit(&data, 8, 4, 4, 6, 1);
297+
/// let v = codec.reconstruct(&codec.encode(&data[0..4]));
298+
/// assert_eq!(v.len(), 4);
299+
/// ```
265300
pub fn reconstruct(&self, code: &CoarseResidueCode) -> Vec<f32> {
266301
let dim = self.cb.dim;
302+
assert_eq!(code.residue.len(), dim / 2, "residue must be dim/2 packed bytes");
267303
let c = self.cb.centroid(code.index as usize);
268304
let res = unpack_nibbles_signed(&code.residue, dim);
269305
(0..dim)
@@ -320,6 +356,14 @@ impl ProductQuantizer {
320356
}
321357

322358
/// Encode one vector to `m/2` packed nibble bytes (16 bytes when `m = 32`).
359+
///
360+
/// # Examples
361+
/// ```
362+
/// use ndarray::hpc::edge_codec::ProductQuantizer;
363+
/// let data: Vec<f32> = (0..32 * 8).map(|i| ((i * 7 % 11) as f32) - 5.0).collect();
364+
/// let pq = ProductQuantizer::fit(&data, 32, 8, 4, 6, 1);
365+
/// assert_eq!(pq.encode(&data[0..8]).len(), 2); // m/2 bytes
366+
/// ```
323367
pub fn encode(&self, v: &[f32]) -> Vec<u8> {
324368
let codes: Vec<i8> = (0..self.m)
325369
.map(|s| {
@@ -334,8 +378,19 @@ impl ProductQuantizer {
334378
.collect()
335379
}
336380

337-
/// Reconstruct by concatenating the selected sub-centroids.
381+
/// Reconstruct by concatenating the selected sub-centroids. Panics if `code`
382+
/// is not the expected `m/2` packed bytes (guards malformed decode input).
383+
///
384+
/// # Examples
385+
/// ```
386+
/// use ndarray::hpc::edge_codec::ProductQuantizer;
387+
/// let data: Vec<f32> = (0..32 * 8).map(|i| ((i * 7 % 11) as f32) - 5.0).collect();
388+
/// let pq = ProductQuantizer::fit(&data, 32, 8, 4, 6, 1);
389+
/// let v = pq.reconstruct(&pq.encode(&data[0..8]));
390+
/// assert_eq!(v.len(), 8);
391+
/// ```
338392
pub fn reconstruct(&self, code: &[u8]) -> Vec<f32> {
393+
assert_eq!(code.len(), self.m / 2, "code must be m/2 packed bytes");
339394
let mut out = vec![0.0f32; self.m * self.sub_dim];
340395
for s in 0..self.m {
341396
let byte = code[s / 2];

src/hpc/reliability.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,13 @@ pub struct FidelityReport {
232232
}
233233

234234
impl FidelityReport {
235-
/// Compute every coefficient for `(truth, estimate)` (equal-length samples).
235+
/// Compute every coefficient for `(truth, estimate)`.
236+
///
237+
/// Mismatched lengths are a caller error and yield a **degenerate** report
238+
/// (all coefficients `0.0`, `rel_l2 = 1.0`) rather than silently truncating to
239+
/// the shorter prefix — consistent with the module's return-degenerate (never
240+
/// panic, never hide) convention, so a dropped tail cannot masquerade as high
241+
/// fidelity.
236242
///
237243
/// # Examples
238244
/// ```
@@ -242,10 +248,27 @@ impl FidelityReport {
242248
/// let r = FidelityReport::compute(&truth, &est);
243249
/// assert!(r.pearson > 0.99 && r.spearman > 0.99);
244250
/// assert!(r.rel_l2 < 0.1);
251+
/// // Mismatched lengths → degenerate (does NOT truncate-then-score):
252+
/// let bad = FidelityReport::compute(&[1.0, 2.0, 100.0], &[1.0, 2.0]);
253+
/// assert_eq!(bad.pearson, 0.0);
254+
/// assert_eq!(bad.rel_l2, 1.0);
245255
/// ```
246256
pub fn compute(truth: &[f64], estimate: &[f64]) -> Self {
247-
let n = truth.len().min(estimate.len());
248-
let (t, e) = (&truth[..n], &estimate[..n]);
257+
// Length mismatch is a caller bug: return a degenerate (all-zero,
258+
// max-error) report rather than truncating to the shorter prefix, which
259+
// could hide a dropped tail behind a falsely-high score.
260+
if truth.len() != estimate.len() {
261+
return FidelityReport {
262+
pearson: 0.0,
263+
spearman: 0.0,
264+
icc: 0.0,
265+
cronbach: 0.0,
266+
rel_l2: 1.0,
267+
cosine: 0.0,
268+
};
269+
}
270+
let n = truth.len();
271+
let (t, e) = (truth, estimate);
249272
let mut se = 0.0;
250273
let mut st = 0.0;
251274
let mut dot = 0.0;

0 commit comments

Comments
 (0)