Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
dcdc377
Make benchmarks stateful.
Apr 28, 2026
d60ac07
Checkpoint.
Apr 28, 2026
6cc92bd
Checkopint.
Apr 29, 2026
f050e3d
Oh boy.
Apr 29, 2026
2d03d11
Merge remote-tracking branch 'origin/main' into mhildebr/benchmark-pl…
May 1, 2026
aa78836
Fix delimiters.
May 1, 2026
ccf891a
Revert formatting.
May 1, 2026
2e060ea
Make plugins slightly more flexible.
May 1, 2026
7a85df8
Merge branch 'main' into mhildebr/benchmark-plugins
hildebrandmw May 1, 2026
fc8008a
Add determinant-diversity plugin support on search-plugin architecture
narendatha May 4, 2026
298d1b8
Integrate determinant-diversity via disk search_with post-processor
narendatha May 4, 2026
fbd34fd
Restrict determinant-diversity to async full-precision topk
narendatha May 4, 2026
851174e
Keep single async determinant-diversity example JSON
narendatha May 4, 2026
ed0c918
Improve plugin matching resilience via phase-shape helpers
narendatha May 5, 2026
3a6aa1a
Use SearchPhaseKind::as_str in benchmark plugin kinds
narendatha May 5, 2026
ccfe4d7
remove serde defaults
narendatha May 5, 2026
01e194d
Merge remote-tracking branch 'origin/main' into u/narendatha/det_div_…
narendatha May 5, 2026
86883b7
minor merge fix
narendatha May 5, 2026
554bc7f
hook up actual algorithm, replace placeholder.
narendatha May 5, 2026
8c59e6f
WIP: Trait bound experiment for async determinant-diversity - HRTB pr…
narendatha May 5, 2026
b73abc8
apply mark's beautiful fix for lifetime issue
narendatha May 6, 2026
d1884c3
Fix async determinant-diversity: wire real vectors, timing metrics, r…
narendatha May 6, 2026
701ce8e
Fix CI clippy-features spherical plugin errors and apply formatting
narendatha May 6, 2026
6b935d3
Add determinant-diversity support for async and disk-index benchmarks
narendatha May 6, 2026
6f47ba3
Merge branch 'u/narendatha/det_div_plugins' of https://github.com/mic…
narendatha May 6, 2026
8acbba2
imrpove code coverage
narendatha May 6, 2026
a48e255
minor fix
narendatha May 6, 2026
468b5d2
cargo fmt
narendatha May 6, 2026
d9e66ba
WIP: Benchmarks refactoring - threading fix, rich struct params, as_s…
narendatha May 13, 2026
9cda6e0
Add post-processor generic parameter to KNN struct in benchmark-core
narendatha May 13, 2026
8b07676
Task 5: Create unified validation struct for DeterminantDiversity in …
narendatha May 13, 2026
8ce2130
Task 6: Add module-level documentation to determinant_diversity_post_…
narendatha May 13, 2026
309ecb3
Task 7: Add algorithmic tests to determinant_diversity_post_process.rs
narendatha May 13, 2026
3be546a
Task 8: Merge similar routines in determinant_diversity_post_process.rs
narendatha May 13, 2026
f92c3be
Task 9: Replace Vec<Vec<f32>> with Matrix for residuals storage
narendatha May 13, 2026
10b0182
Task 10: Move determinant_diversity_post_process out of async_ module
narendatha May 13, 2026
ca20a24
Refactor determinant-diversity benchmark path
narendatha May 13, 2026
f538583
cargo fmt and clippy fixes for CI
narendatha May 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 68 additions & 5 deletions diskann-benchmark-core/src/search/graph/knn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
};

/// A built-in helper for benchmarking the K-nearest neighbors method
/// [`graph::DiskANNIndex::search`].
/// [`graph::DiskANNIndex::search`] with optional post-processing support.
///
/// This is intended to be used in conjunction with [`search::search`] or
/// [`search::search_all`] and provides some basic additional metrics for
Expand All @@ -31,21 +31,29 @@ use crate::{
///
/// The provided implementation of [`Search`] accepts [`graph::search::Knn`]
/// and returns [`Metrics`] as additional output.
///
/// # Type Parameters
///
/// - `DP`: The data provider type
/// - `T`: The query element type
/// - `S`: The search strategy type
/// - `PP`: Optional post-processor type (defaults to `()` for no post-processing)
#[derive(Debug)]
pub struct KNN<DP, T, S>
pub struct KNN<DP, T, S, PP = ()>
where
DP: provider::DataProvider,
{
index: Arc<graph::DiskANNIndex<DP>>,
queries: Arc<Matrix<T>>,
strategy: Strategy<S>,
post_processor: Option<PP>,
}

impl<DP, T, S> KNN<DP, T, S>
impl<DP, T, S> KNN<DP, T, S, ()>
where
DP: provider::DataProvider,
{
/// Construct a new [`KNN`] searcher.
/// Construct a new [`KNN`] searcher without post-processing.
///
/// If `strategy` is one of the container variants of [`Strategy`], its length
/// must match the number of rows in `queries`. If this is the case, then the
Expand All @@ -67,10 +75,58 @@ where
index,
queries,
strategy,
post_processor: None,
}))
}
}

impl<DP, T, S, PP> KNN<DP, T, S, PP>
where
DP: provider::DataProvider,
{
/// Construct a new [`KNN`] searcher with post-processing.
///
/// # Errors
///
/// Returns an error if the number of elements in `strategy` is not compatible with
/// the number of rows in `queries`.
pub fn with_postprocessor(
index: Arc<graph::DiskANNIndex<DP>>,
queries: Arc<Matrix<T>>,
strategy: Strategy<S>,
post_processor: PP,
) -> anyhow::Result<Arc<Self>> {
strategy.length_compatible(queries.nrows())?;

Ok(Arc::new(Self {
index,
queries,
strategy,
post_processor: Some(post_processor),
}))
}

/// Access the index.
pub fn index(&self) -> &Arc<graph::DiskANNIndex<DP>> {
&self.index
}

/// Access the queries.
pub fn queries(&self) -> &Arc<Matrix<T>> {
&self.queries
}

/// Access the strategy.
pub fn strategy(&self) -> &Strategy<S> {
&self.strategy
}

/// Access the post-processor, if present.
pub fn post_processor(&self) -> &Option<PP> {
&self.post_processor
}
}

/// Additional metrics collected during [`KNN`] search.
///
/// # Note
Expand All @@ -85,7 +141,14 @@ pub struct Metrics {
pub hops: u32,
}

impl<DP, T, S> Search for KNN<DP, T, S>
impl Metrics {
/// Construct a new metrics value.
pub fn new(comparisons: u32, hops: u32) -> Self {
Self { comparisons, hops }
}
}

impl<DP, T, S> Search for KNN<DP, T, S, ()>
where
DP: provider::DataProvider<Context: Default, ExternalId: search::Id>,
S: for<'a> glue::DefaultSearchStrategy<DP, &'a [T], DP::ExternalId> + Clone + AsyncFriendly,
Expand Down
51 changes: 51 additions & 0 deletions diskann-benchmark/example/async-determinant-diversity.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"search_directories": [
"test_data/disk_index_search"
],
"jobs": [
{
"type": "graph-index-build",
"content": {
"source": {
"index-source": "Build",
"data_type": "float32",
"data": "disk_index_siftsmall_learn_256pts_data.fbin",
"distance": "squared_l2",
"max_degree": 32,
"l_build": 50,
"alpha": 1.2,
"backedge_ratio": 1.0,
"num_threads": 1,
"start_point_strategy": "medoid",
"num_insert_attempts": 1,
"saturate_inserts": false
},
"search_phase": {
"search-type": "topk",
"queries": "disk_index_sample_query_10pts.fbin",
"groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin",
"reps": 5,
"num_threads": [
1
],
"post_processor": {
"type": "determinant-diversity",
"power": 2.0,
"eta": 0.01
},
"runs": [
{
"search_n": 20,
"search_l": [
20,
30,
40
],
"recall_k": 10
}
]
}
}
}
]
}
42 changes: 42 additions & 0 deletions diskann-benchmark/example/disk-index-determinant-diversity.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"search_directories": [
"test_data/disk_index_search"
],
"jobs": [
{
"type": "disk-index",
"content": {
"source": {
"disk-index-source": "Build",
"data_type": "float32",
"data": "disk_index_siftsmall_learn_256pts_data.fbin",
"distance": "squared_l2",
"dim": 128,
"max_degree": 32,
"l_build": 50,
"num_threads": 1,
"build_ram_limit_gb": 2.0,
"num_pq_chunks": 128,
"quantization_type": "FP",
"save_path": "siftsmall_index_full_det_div"
},
"search_phase": {
"queries": "disk_index_sample_query_10pts.fbin",
"groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin",
"search_list": [10, 20, 40],
"beam_width": 4,
"recall_at": 10,
"num_threads": 1,
"is_flat_search": false,
"distance": "squared_l2",
"vector_filters_file": null,
"post_processor": {
"type": "determinant-diversity",
"power": 2.0,
"eta": 1.0
}
}
}
}
]
}
37 changes: 25 additions & 12 deletions diskann-benchmark/src/backend/disk_index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ use diskann_benchmark_runner::{files::InputFile, utils::MicroSeconds};
use diskann_disk::{
data_model::{AdHoc, CachingStrategy},
search::provider::{
disk_provider::DiskIndexSearcher, disk_vertex_provider_factory::DiskVertexProviderFactory,
disk_provider::{DiskIndexSearcher, SearchPostProcessorKind},
disk_vertex_provider_factory::DiskVertexProviderFactory,
},
storage::disk_index_reader::DiskIndexReader,
utils::{instrumentation::PerfLogger, statistics, AlignedFileReaderFactory, QueryStatistics},
Expand All @@ -32,7 +33,10 @@ use serde::{Deserialize, Serialize};

use crate::{
backend::disk_index::json_spancollector::JsonSpanCollector,
inputs::disk::{DiskIndexLoad, DiskSearchPhase},
inputs::{
disk::{DiskIndexLoad, DiskSearchPhase},
post_processor::TopkPostProcessor,
},
utils::{datafiles, SimilarityMeasure},
};

Expand Down Expand Up @@ -264,6 +268,14 @@ where
zipped.for_each_in_pool(
pool.as_ref(),
|(((((q, vf), id_chunk), dist_chunk), stats), rc)| {
let post_processor = search_params.post_processor.as_ref().map(
|TopkPostProcessor::DeterminantDiversity { power, eta }| {
SearchPostProcessorKind::DeterminantDiversity {
power: *power,
eta: *eta,
}
},
);
let vector_filter = if search_params.vector_filters_file.is_none() {
None
} else {
Expand All @@ -277,20 +289,21 @@ where
l,
Some(search_params.beam_width),
vector_filter,
post_processor,
search_params.is_flat_search,
) {
Ok(search_result) => {
*stats = search_result.stats.query_statistics;
*rc = search_result.results.len() as u32;
let actual_results = search_result
.results
.len()
.min(search_params.recall_at as usize);
for (i, result_item) in search_result
.results
.iter()
.take(actual_results)
.enumerate()
let base_count = (search_result.stats.result_count as usize)
.min(search_params.recall_at as usize)
.min(search_result.results.len());

*rc = base_count as u32;
id_chunk.fill(0);
dist_chunk.fill(0.0);

for (i, result_item) in
search_result.results.iter().take(base_count).enumerate()
{
id_chunk[i] = result_item.vertex_id;
dist_chunk[i] = result_item.distance;
Expand Down
Loading
Loading