diff --git a/README.md b/README.md index fffaebf..38bb30c 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ Options: --ignore-errors Keep going on search error --quantization - [possible values: none, binary, binary2bit, binary1p5bit, scalar, product-x4, product-x8, product-x16, product-x32, product-x64] + [possible values: none, binary, binary2bit, binary1p5bit, turbo1bit, turbo1p5bit, turbo2bit, turbo4bit, scalar, product-x4, product-x8, product-x16, product-x32, product-x64] --quantization-in-ram Keep quantized vectors in memory [possible values: true, false] --quantization-rescore diff --git a/src/args.rs b/src/args.rs index c664bf7..cf025a4 100644 --- a/src/args.rs +++ b/src/args.rs @@ -10,6 +10,10 @@ pub enum QuantizationArg { Binary, Binary2bit, Binary1p5bit, + Turbo1bit, + Turbo1p5bit, + Turbo2bit, + Turbo4bit, Scalar, ProductX4, ProductX8, diff --git a/src/collection.rs b/src/collection.rs index 4b7e399..cdec774 100644 --- a/src/collection.rs +++ b/src/collection.rs @@ -15,7 +15,8 @@ use qdrant_client::qdrant::{ MultiVectorConfigBuilder, OptimizersConfigDiffBuilder, ProductQuantizationBuilder, QuantizationType, ScalarQuantizationBuilder, ShardingMethod, SparseIndexConfigBuilder, SparseVectorConfig, SparseVectorParamsBuilder, TextIndexParamsBuilder, TokenizerType, - UuidIndexParamsBuilder, VectorParams, VectorParamsMap, VectorsConfig, + TurboQuantBitSize, TurboQuantizationBuilder, UuidIndexParamsBuilder, VectorParams, + VectorParamsMap, VectorsConfig, }; use tokio::time::sleep; @@ -220,6 +221,26 @@ pub async fn recreate_collection(args: &Args, stopped: Arc) -> Resul BinaryQuantizationBuilder::new(args.quantization_in_ram.unwrap_or_default()) .encoding(BinaryQuantizationEncoding::OneAndHalfBits), ), + QuantizationArg::Turbo1bit => create_collection_builder.quantization_config( + TurboQuantizationBuilder::new() + .bits(TurboQuantBitSize::Bits1) + .always_ram(args.quantization_in_ram.unwrap_or_default()), + ), + QuantizationArg::Turbo1p5bit => create_collection_builder.quantization_config( + TurboQuantizationBuilder::new() + .bits(TurboQuantBitSize::Bits15) + .always_ram(args.quantization_in_ram.unwrap_or_default()), + ), + QuantizationArg::Turbo2bit => create_collection_builder.quantization_config( + TurboQuantizationBuilder::new() + .bits(TurboQuantBitSize::Bits2) + .always_ram(args.quantization_in_ram.unwrap_or_default()), + ), + QuantizationArg::Turbo4bit => create_collection_builder.quantization_config( + TurboQuantizationBuilder::new() + .bits(TurboQuantBitSize::Bits4) + .always_ram(args.quantization_in_ram.unwrap_or_default()), + ), quantization => { let compression = match quantization { QuantizationArg::ProductX4 => CompressionRatio::X4, @@ -231,6 +252,10 @@ pub async fn recreate_collection(args: &Args, stopped: Arc) -> Resul | QuantizationArg::Binary | QuantizationArg::Binary2bit | QuantizationArg::Binary1p5bit + | QuantizationArg::Turbo1bit + | QuantizationArg::Turbo1p5bit + | QuantizationArg::Turbo2bit + | QuantizationArg::Turbo4bit | QuantizationArg::None => { unreachable!() }