From ad2bfeea4c3a714bd5ed2f8718f2e477883fe4ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 23:14:09 +0000 Subject: [PATCH 1/3] Initial plan From b81465bc2352ff51a6b80c2b9207b5d6e5f66b83 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 23:22:24 +0000 Subject: [PATCH 2/3] Add int8 data type support to diskann-garnet and vectorset - Add SB8 variant to VectorValueType for signed 8-bit input - Handle SB8 in interpret_vector (direct storage and f32 conversion) - Handle Q8 quant type in create_index to create index with i8 VectorRepr - Handle FP32 + Q8 path for converting f32 input to i8 storage - Add Int8 variant to vectorset DataType enum - Implement Element trait for i8 and wire up dispatch - Use SB8/Q8 protocol values in ingest and query commands - Update README to document SB8 and Q8 extensions Agent-Logs-Url: https://github.com/microsoft/DiskANN/sessions/7664c19b-7cdd-4a84-8249-951b2e2e040b Co-authored-by: harsha-simhadri <5590673+harsha-simhadri@users.noreply.github.com> --- diskann-garnet/README.md | 9 +++++-- diskann-garnet/src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++- vectorset/src/main.rs | 10 ++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/diskann-garnet/README.md b/diskann-garnet/README.md index cc3f347cf..6cf9f564e 100644 --- a/diskann-garnet/README.md +++ b/diskann-garnet/README.md @@ -18,11 +18,16 @@ added: - `XB8`: When specifying vector input type, you can use `XB8` instead of `FP32` to specify binary data in uint8 format, one byte per dimension. +- `SB8`: When specifying vector input type, you can use `SB8` instead of `FP32` + to specify binary data in int8 (signed) format, one byte per dimension. - `XPREQ8`: This is a pseudo-quantizer that specifies the vector data will be stored as full precision data in uint8 format. +- `Q8`: This is a pseudo-quantizer that specifies the vector data will be + stored as full precision data in int8 (signed) format. -Generally you will use `XB8` with `XPREQ8` to input and store uint8 vectors and -`FP32` with `NOQUANT` to input and store f32 vectors. +Generally you will use `XB8` with `XPREQ8` to input and store uint8 vectors, +`SB8` with `Q8` to input and store int8 vectors, and `FP32` with `NOQUANT` to +input and store f32 vectors. Support for binary and scalar quantization is coming, along with support for customizing the distance metric. diff --git a/diskann-garnet/src/lib.rs b/diskann-garnet/src/lib.rs index a66cd7ab0..3e7112656 100644 --- a/diskann-garnet/src/lib.rs +++ b/diskann-garnet/src/lib.rs @@ -83,6 +83,7 @@ pub enum VectorValueType { Invalid = 0, FP32, XB8, + SB8, } #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -245,6 +246,21 @@ pub unsafe extern "C" fn create_index( ptr::null() } } + VectorQuantType::Q8 => { + if let Ok(index) = create_index_impl::( + quant_type, + config, + dim as usize, + metric_type, + max_degree as usize, + callbacks, + context, + ) { + Arc::into_raw(index).cast::() + } else { + ptr::null() + } + } VectorQuantType::NoQuant => { if let Ok(index) = create_index_impl::( quant_type, @@ -309,7 +325,7 @@ fn interpret_vector<'a>( ) -> Option> { let vector_len_bytes = match vector_value_type { VectorValueType::FP32 => vector_len * 4, - VectorValueType::XB8 => vector_len, + VectorValueType::XB8 | VectorValueType::SB8 => vector_len, VectorValueType::Invalid => return None, }; @@ -332,6 +348,20 @@ fn interpret_vector<'a>( } PolyCow::from(bp) } + VectorQuantType::Q8 => { + let mut bp = if let Ok(bp) = Poly::broadcast(0u8, vector_len, AlignToEight) { + bp + } else { + return None; + }; + for (idx, e) in bp.iter_mut().enumerate() { + let el_size = mem::size_of::(); + *e = f32::from_le_bytes( + v[idx * el_size..(idx + 1) * el_size].try_into().unwrap(), + ) as i8 as u8; + } + PolyCow::from(bp) + } VectorQuantType::NoQuant if v.as_ptr().align_offset(4) == 0 => { // pointer is correctly aligned to interpret as f32 PolyCow::from(v) @@ -370,6 +400,26 @@ fn interpret_vector<'a>( } _ => return None, }, + VectorValueType::SB8 => match quant_type { + VectorQuantType::Q8 => PolyCow::from(v), + VectorQuantType::NoQuant => { + let mut fp = if let Ok(p) = + Poly::broadcast(0u8, vector_len_bytes * mem::size_of::(), AlignToEight) + { + p + } else { + return None; + }; + for (fe, be) in bytemuck::cast_slice_mut::(&mut fp) + .iter_mut() + .zip(v) + { + *fe = (*be as i8) as f32; + } + PolyCow::from(fp) + } + _ => return None, + }, }; Some(v) diff --git a/vectorset/src/main.rs b/vectorset/src/main.rs index 4ba864d56..e331d0dc8 100644 --- a/vectorset/src/main.rs +++ b/vectorset/src/main.rs @@ -32,6 +32,7 @@ const DEFAULT_PORT: u16 = 6379; trait Element: bytemuck::Pod + std::fmt::Debug + Send + Sync + 'static {} impl Element for u8 {} +impl Element for i8 {} impl Element for f32 {} #[derive(Deserialize)] @@ -158,6 +159,7 @@ struct QueryArgs { #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] enum DataType { Uint8, + Int8, Float32, } @@ -287,6 +289,7 @@ async fn async_main(opts: Options) -> Result<()> { match opts.data_type { DataType::Uint8 => dispatch::(&opts.command, &opts, infos, cred).await, + DataType::Int8 => dispatch::(&opts.command, &opts, infos, cred).await, DataType::Float32 => dispatch::(&opts.command, &opts, infos, cred).await, } } @@ -393,6 +396,9 @@ async fn ingest( DataType::Uint8 => { pipeline.arg(b"XB8"); } + DataType::Int8 => { + pipeline.arg(b"SB8"); + } DataType::Float32 => { pipeline.arg(b"FP32"); } @@ -406,6 +412,9 @@ async fn ingest( DataType::Uint8 => { pipeline.arg(b"XPREQ8"); } + DataType::Int8 => { + pipeline.arg(b"Q8"); + } DataType::Float32 => { pipeline.arg(b"NOQUANT"); } @@ -533,6 +542,7 @@ async fn query( match data_type { DataType::Uint8 => pipeline.arg(b"XB8"), + DataType::Int8 => pipeline.arg(b"SB8"), DataType::Float32 => pipeline.arg(b"FP32"), }; From 8339830d22074dbc9df5277d732858ba365891aa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 13 May 2026 00:34:34 +0000 Subject: [PATCH 3/3] Add Garnet C# companion patch instructions for SB8/Q8 support Agent-Logs-Url: https://github.com/microsoft/DiskANN/sessions/9adc0f60-1eec-419c-ae00-3fbc815d2e5b Co-authored-by: harsha-simhadri <5590673+harsha-simhadri@users.noreply.github.com> --- garnet-sb8-support.patch.md | 271 ++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 garnet-sb8-support.patch.md diff --git a/garnet-sb8-support.patch.md b/garnet-sb8-support.patch.md new file mode 100644 index 000000000..102253de3 --- /dev/null +++ b/garnet-sb8-support.patch.md @@ -0,0 +1,271 @@ +# Garnet SB8 (Signed Int8) Support — Manual Patch Instructions +# +# These changes add VectorValueType.SB8 and wire it through the Garnet server +# to match the DiskANN Rust FFI changes (VectorValueType::SB8 = 3, VectorQuantType::Q8). +# +# 4 files to edit: +# +# ══════════════════════════════════════════════════════════════════════ +# 1. libs/server/Storage/Session/MainStore/VectorStoreOps.cs +# ══════════════════════════════════════════════════════════════════════ +# +# In the VectorValueType enum, after the XB8 member, add SB8: +# +# FIND: +# /// +# /// Bytes (8 bit). +# /// +# XB8, +# } +# +# REPLACE WITH: +# /// +# /// Bytes (8 bit). +# /// +# XB8, +# +# /// +# /// Signed bytes (int8, [-128, 127]). +# /// +# SB8, +# } +# +# +# ══════════════════════════════════════════════════════════════════════ +# 2. libs/server/Resp/Vector/RespServerSessionVectors.cs +# ══════════════════════════════════════════════════════════════════════ +# +# --- 2a. In NetworkVADD, after the "XB8" parsing block, add an "SB8" block. +# +# FIND (the XB8 block in VADD, around line 115): +# else if (parseState.GetArgSliceByRef(curIx).Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8)) +# { +# curIx++; +# if (curIx >= parseState.Count) +# { +# return AbortWithWrongNumberOfArguments("VADD"); +# } +# +# var asBytes = parseState.GetArgSliceByRef(curIx).Span; +# curIx++; +# +# valueType = VectorValueType.XB8; +# values = asBytes; +# } +# +# REPLACE WITH: +# else if (parseState.GetArgSliceByRef(curIx).Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8)) +# { +# curIx++; +# if (curIx >= parseState.Count) +# { +# return AbortWithWrongNumberOfArguments("VADD"); +# } +# +# var asBytes = parseState.GetArgSliceByRef(curIx).Span; +# curIx++; +# +# valueType = VectorValueType.XB8; +# values = asBytes; +# } +# else if (parseState.GetArgSliceByRef(curIx).Span.EqualsUpperCaseSpanIgnoringCase("SB8"u8)) +# { +# curIx++; +# if (curIx >= parseState.Count) +# { +# return AbortWithWrongNumberOfArguments("VADD"); +# } +# +# var asBytes = parseState.GetArgSliceByRef(curIx).Span; +# curIx++; +# +# valueType = VectorValueType.SB8; +# values = asBytes; +# } +# +# +# --- 2b. In the quant guard (around line 349), allow Q8 through: +# +# FIND: +# if (quantType != VectorQuantType.XPreQ8 && quantType != VectorQuantType.NoQuant) +# +# REPLACE WITH: +# if (quantType != VectorQuantType.XPreQ8 && quantType != VectorQuantType.NoQuant && quantType != VectorQuantType.Q8) +# +# +# --- 2c. In NetworkVSIM, after the "XB8" parsing block, add an "SB8" block. +# +# FIND (the XB8 block in VSIM, around line 502): +# else if (kind.Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8)) +# { +# if (curIx >= parseState.Count) +# { +# return AbortWithWrongNumberOfArguments("VSIM"); +# } +# +# var asBytes = parseState.GetArgSliceByRef(curIx).Span; +# +# valueType = VectorValueType.XB8; +# values = asBytes; +# curIx++; +# } +# +# REPLACE WITH: +# else if (kind.Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8)) +# { +# if (curIx >= parseState.Count) +# { +# return AbortWithWrongNumberOfArguments("VSIM"); +# } +# +# var asBytes = parseState.GetArgSliceByRef(curIx).Span; +# +# valueType = VectorValueType.XB8; +# values = asBytes; +# curIx++; +# } +# else if (kind.Span.EqualsUpperCaseSpanIgnoringCase("SB8"u8)) +# { +# if (curIx >= parseState.Count) +# { +# return AbortWithWrongNumberOfArguments("VSIM"); +# } +# +# var asBytes = parseState.GetArgSliceByRef(curIx).Span; +# +# valueType = VectorValueType.SB8; +# values = asBytes; +# curIx++; +# } +# +# +# ══════════════════════════════════════════════════════════════════════ +# 3. libs/server/Resp/Vector/DiskANNService.cs +# ══════════════════════════════════════════════════════════════════════ +# +# --- 3a. In the Insert method, add SB8 arm after XB8 (around line 79): +# +# FIND: +# else if (vectorType == VectorValueType.XB8) +# { +# vector_len = vector.Length; +# } +# else +# { +# throw new NotImplementedException($"{vectorType}"); +# } +# +# REPLACE WITH: +# else if (vectorType == VectorValueType.XB8) +# { +# vector_len = vector.Length; +# } +# else if (vectorType == VectorValueType.SB8) +# { +# vector_len = vector.Length; +# } +# else +# { +# throw new NotImplementedException($"{vectorType}"); +# } +# +# +# --- 3b. In SearchVector method, same pattern (around line 117): +# +# FIND: +# else if (vectorType == VectorValueType.XB8) +# { +# vector_len = vector.Length; +# } +# else +# { +# throw new NotImplementedException($"{vectorType}"); +# } +# +# REPLACE WITH: +# else if (vectorType == VectorValueType.XB8) +# { +# vector_len = vector.Length; +# } +# else if (vectorType == VectorValueType.SB8) +# { +# vector_len = vector.Length; +# } +# else +# { +# throw new NotImplementedException($"{vectorType}"); +# } +# +# +# ══════════════════════════════════════════════════════════════════════ +# 4. libs/server/Resp/Vector/VectorManager.cs +# ══════════════════════════════════════════════════════════════════════ +# +# --- 4a. In CalculateValueDimensions (around line 945), add SB8: +# +# FIND: +# else if (valueType == VectorValueType.XB8) +# { +# return (uint)(values.Length); +# } +# else +# { +# throw new NotImplementedException($"{valueType}"); +# } +# +# REPLACE WITH: +# else if (valueType == VectorValueType.XB8) +# { +# return (uint)(values.Length); +# } +# else if (valueType == VectorValueType.SB8) +# { +# return (uint)(values.Length); +# } +# else +# { +# throw new NotImplementedException($"{valueType}"); +# } +# +# +# --- 4b. In TryGetEmbedding (around line 920), add Q8 dequant before the +# throw NotImplementedException: +# +# FIND: +# else if (quantType == VectorQuantType.XPreQ8) +# { +# for (var i = 0; i < asBytes.Length; i++) +# { +# into[i] = from[i]; +# } +# } +# else +# { +# // TODO: Handle Q8 and BIN as they are implemented +# throw new NotImplementedException($"Unexpected quantization: {quantType}"); +# } +# +# REPLACE WITH: +# else if (quantType == VectorQuantType.XPreQ8) +# { +# for (var i = 0; i < asBytes.Length; i++) +# { +# into[i] = from[i]; +# } +# } +# else if (quantType == VectorQuantType.Q8) +# { +# // Q8 stores signed bytes; dequantize by sign-extending to float +# for (var i = 0; i < asBytes.Length; i++) +# { +# into[i] = (float)(sbyte)from[i]; +# } +# } +# else +# { +# throw new NotImplementedException($"Unexpected quantization: {quantType}"); +# } +# +# ══════════════════════════════════════════════════════════════════════ +# End of patch instructions. +# ══════════════════════════════════════════════════════════════════════