Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ctrlc = "3.5.2"
futures = "0.3.32"
indicatif = "0.18.4"
memmap2 = "0.9.10"
qdrant-client = "1.17.0"
qdrant-client = { git = "https://github.com/qdrant/rust-client", branch = "v1-18-x" } # Workaround until we release the client.
rand = "0.10.1"
rand_distr = "0.6.0"
serde = "1.0"
Expand All @@ -31,4 +31,4 @@ tracing = { version = "0.1.44", features = ["log"] }

[profile.release]
lto = "fat"
codegen-units = 1
codegen-units = 1
8 changes: 8 additions & 0 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ pub enum QuantizationArg {
ProductX16,
ProductX32,
ProductX64,
Turbo1Bit,
Turbo1_5Bit,
Turbo2Bit,
Turbo4Bit,
}

/// Big F*cking Benchmark tool for stress-testing Qdrant
Expand Down Expand Up @@ -341,6 +345,10 @@ pub struct Args {
#[clap(long)]
pub quantization_oversampling: Option<f64>,

/// Disable datafitting in turbo-quant quantization. Requires --quantization turboX-bit
#[clap(long)]
pub turbo_quant_disable_data_fit: Option<bool>,

/// Delay between requests in milliseconds
#[clap(long, value_parser = parse_number)]
pub delay: Option<usize>,
Expand Down
43 changes: 40 additions & 3 deletions src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use qdrant_client::qdrant::{
KeywordIndexParamsBuilder, MultiVectorComparator, MultiVectorConfigBuilder,
OptimizersConfigDiffBuilder, ProductQuantizationBuilder, QuantizationType,
ScalarQuantizationBuilder, ShardingMethod, SparseIndexConfigBuilder, SparseVectorConfig,
SparseVectorParamsBuilder, TextIndexParamsBuilder, TokenizerType, UuidIndexParamsBuilder,
VectorParams, VectorParamsMap, VectorsConfig,
SparseVectorParamsBuilder, TextIndexParamsBuilder, TokenizerType, TurboQuantBitSize,
TurboQuantizationBuilder, UuidIndexParamsBuilder, VectorParams, VectorParamsMap, VectorsConfig,
};
use tokio::time::sleep;

Expand Down Expand Up @@ -220,6 +220,39 @@ pub async fn recreate_collection(args: &Args, stopped: Arc<AtomicBool>) -> Resul
BinaryQuantizationBuilder::new(args.quantization_in_ram.unwrap_or_default())
.encoding(BinaryQuantizationEncoding::OneAndHalfBits),
),
QuantizationArg::Turbo1Bit
| QuantizationArg::Turbo1_5Bit
| QuantizationArg::Turbo2Bit
| QuantizationArg::Turbo4Bit => {
let size = match quantization {
QuantizationArg::Turbo1Bit => TurboQuantBitSize::Bits1,
QuantizationArg::Turbo1_5Bit => TurboQuantBitSize::Bits15,
QuantizationArg::Turbo2Bit => TurboQuantBitSize::Bits2,
QuantizationArg::Turbo4Bit => TurboQuantBitSize::Bits4,
QuantizationArg::None
| QuantizationArg::Binary
| QuantizationArg::Binary2bit
| QuantizationArg::Binary1p5bit
| QuantizationArg::Scalar
| QuantizationArg::ProductX4
| QuantizationArg::ProductX8
| QuantizationArg::ProductX16
| QuantizationArg::ProductX32
| QuantizationArg::ProductX64 => unreachable!(),
};

let data_fit_disabled = args.turbo_quant_disable_data_fit == Some(true);

let tq_builder = TurboQuantizationBuilder::new()
.always_ram(args.quantization_in_ram.unwrap_or_default())
.bits(size);

create_collection_builder.quantization_config(if data_fit_disabled {
tq_builder.data_fit_disabled()
} else {
tq_builder
})
}
quantization => {
let compression = match quantization {
QuantizationArg::ProductX4 => CompressionRatio::X4,
Expand All @@ -231,7 +264,11 @@ pub async fn recreate_collection(args: &Args, stopped: Arc<AtomicBool>) -> Resul
| QuantizationArg::Binary
| QuantizationArg::Binary2bit
| QuantizationArg::Binary1p5bit
| QuantizationArg::None => {
| QuantizationArg::None
| QuantizationArg::Turbo1Bit
| QuantizationArg::Turbo1_5Bit
| QuantizationArg::Turbo2Bit
| QuantizationArg::Turbo4Bit => {
unreachable!()
}
};
Expand Down