diff --git a/Cargo.lock b/Cargo.lock index 4d69022..ddc9228 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1391,9 +1391,8 @@ dependencies = [ [[package]] name = "qdrant-client" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d0a9b168ecf8f30a3eb7e8f4766e3050701242ffbe99838b58e6c4251e7211" +version = "1.16.1-dev" +source = "git+https://github.com/qdrant/rust-client?branch=v1-18-x#578ec507905037c8bca2aabd0a057acab6d09453" dependencies = [ "anyhow", "derive_builder", diff --git a/Cargo.toml b/Cargo.toml index ab2f8e1..17f93a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ ctrlc = "3.5.2" futures = "0.3.32" indicatif = "0.18.4" memmap2 = "0.9.10" -qdrant-client = "1.17.0" +qdrant-client = { git = "https://github.com/qdrant/rust-client", branch = "v1-18-x" } # Workaround until we release the client. rand = "0.10.1" rand_distr = "0.6.0" serde = "1.0" @@ -31,4 +31,4 @@ tracing = { version = "0.1.44", features = ["log"] } [profile.release] lto = "fat" -codegen-units = 1 \ No newline at end of file +codegen-units = 1 diff --git a/src/args.rs b/src/args.rs index fded104..ee96464 100644 --- a/src/args.rs +++ b/src/args.rs @@ -16,6 +16,10 @@ pub enum QuantizationArg { ProductX16, ProductX32, ProductX64, + Turbo1Bit, + Turbo1_5Bit, + Turbo2Bit, + Turbo4Bit, } /// Big F*cking Benchmark tool for stress-testing Qdrant @@ -341,6 +345,10 @@ pub struct Args { #[clap(long)] pub quantization_oversampling: Option, + /// Disable datafitting in turbo-quant quantization. Requires --quantization turboX-bit + #[clap(long)] + pub turbo_quant_disable_data_fit: Option, + /// Delay between requests in milliseconds #[clap(long, value_parser = parse_number)] pub delay: Option, diff --git a/src/collection.rs b/src/collection.rs index 7667c38..b8a8512 100644 --- a/src/collection.rs +++ b/src/collection.rs @@ -14,8 +14,8 @@ use qdrant_client::qdrant::{ KeywordIndexParamsBuilder, MultiVectorComparator, MultiVectorConfigBuilder, OptimizersConfigDiffBuilder, ProductQuantizationBuilder, QuantizationType, ScalarQuantizationBuilder, ShardingMethod, SparseIndexConfigBuilder, SparseVectorConfig, - SparseVectorParamsBuilder, TextIndexParamsBuilder, TokenizerType, UuidIndexParamsBuilder, - VectorParams, VectorParamsMap, VectorsConfig, + SparseVectorParamsBuilder, TextIndexParamsBuilder, TokenizerType, TurboQuantBitSize, + TurboQuantizationBuilder, UuidIndexParamsBuilder, VectorParams, VectorParamsMap, VectorsConfig, }; use tokio::time::sleep; @@ -220,6 +220,39 @@ pub async fn recreate_collection(args: &Args, stopped: Arc) -> Resul BinaryQuantizationBuilder::new(args.quantization_in_ram.unwrap_or_default()) .encoding(BinaryQuantizationEncoding::OneAndHalfBits), ), + QuantizationArg::Turbo1Bit + | QuantizationArg::Turbo1_5Bit + | QuantizationArg::Turbo2Bit + | QuantizationArg::Turbo4Bit => { + let size = match quantization { + QuantizationArg::Turbo1Bit => TurboQuantBitSize::Bits1, + QuantizationArg::Turbo1_5Bit => TurboQuantBitSize::Bits15, + QuantizationArg::Turbo2Bit => TurboQuantBitSize::Bits2, + QuantizationArg::Turbo4Bit => TurboQuantBitSize::Bits4, + QuantizationArg::None + | QuantizationArg::Binary + | QuantizationArg::Binary2bit + | QuantizationArg::Binary1p5bit + | QuantizationArg::Scalar + | QuantizationArg::ProductX4 + | QuantizationArg::ProductX8 + | QuantizationArg::ProductX16 + | QuantizationArg::ProductX32 + | QuantizationArg::ProductX64 => unreachable!(), + }; + + let data_fit_disabled = args.turbo_quant_disable_data_fit == Some(true); + + let tq_builder = TurboQuantizationBuilder::new() + .always_ram(args.quantization_in_ram.unwrap_or_default()) + .bits(size); + + create_collection_builder.quantization_config(if data_fit_disabled { + tq_builder.data_fit_disabled() + } else { + tq_builder + }) + } quantization => { let compression = match quantization { QuantizationArg::ProductX4 => CompressionRatio::X4, @@ -231,7 +264,11 @@ pub async fn recreate_collection(args: &Args, stopped: Arc) -> Resul | QuantizationArg::Binary | QuantizationArg::Binary2bit | QuantizationArg::Binary1p5bit - | QuantizationArg::None => { + | QuantizationArg::None + | QuantizationArg::Turbo1Bit + | QuantizationArg::Turbo1_5Bit + | QuantizationArg::Turbo2Bit + | QuantizationArg::Turbo4Bit => { unreachable!() } };