Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ topk_tpch: Benchmark of top-k (sorting with limit) queries on TPC-H
external_aggr: External aggregation benchmark on TPC-H dataset (SF=1)
wide_schema: Small-projection queries on a wide synthetic dataset (1024 cols × 256 files) — measures per-file metadata overhead
(runs both 'wide' and 'narrow' subgroups: narrow is an internal baseline; the wide-vs-narrow ratio is the signal)
adversarial_filter: Conjunct-ordering stress test for adaptive filter reordering (synthetic data, generated inline)
(set DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING=true to enable the optimization; ADV_ROWS sizes the data)

# ClickBench Benchmarks
clickbench_1: ClickBench queries against a single parquet file
Expand Down Expand Up @@ -245,6 +247,10 @@ main() {
wide_schema)
data_wide_schema
;;
adversarial_filter)
# Data is generated inline by the suite's init SQL.
echo "adversarial_filter: no external data to generate"
;;
tpcds)
data_tpcds
;;
Expand Down Expand Up @@ -458,6 +464,9 @@ main() {
wide_schema)
run_wide_schema
;;
adversarial_filter)
run_adversarial_filter
;;
tpcds)
run_tpcds
;;
Expand Down Expand Up @@ -778,6 +787,22 @@ run_wide_schema() {
bash -c "$SQL_CARGO_COMMAND"
}

# Runs the adversarial_filter benchmark. Data is generated inline by the suite,
# so there is no data step. Toggle the optimization under test with
# DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING (off by default — the same env
# var the dfbench suites read via SessionConfig::from_env) and size the data
# with ADV_ROWS; both are consumed by the suite's init SQL, so flag-on vs
# flag-off comparisons are driven entirely by the environment.
run_adversarial_filter() {
echo "Running adversarial_filter benchmark (adaptive_filter_reordering=${DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING:-false})..."
debug_run env BENCH_NAME=adversarial_filter \
DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING="${DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING:-false}" \
ADV_ROWS="${ADV_ROWS:-10000000}" \
SIMULATE_LATENCY="${SIMULATE_LATENCY}" \
${QUERY:+BENCH_QUERY="${QUERY}"} \
bash -c "$SQL_CARGO_COMMAND"
}

# Runs the tpch in memory (needs tpch parquet data)
run_tpch_mem() {
SCALE_FACTOR=$1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name = "adversarial_filter"
description = "Conjunct-ordering stress test for adaptive filter reordering: five equally-expensive regexp predicates whose selective member is written last. Enable the optimization with DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING=true; size the synthetic data with ADV_ROWS (default 10M)."
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Five expensive regexp predicates with the VERY selective one ('rare', ~0.1%)
-- written LAST. Neither SQL order, the cheap/expensive heuristic (#22343), nor
-- BinaryExpr's leftmost-only pre-selection reorders it, so the baseline scans
-- every predicate over ~every row. Only runtime measurement promotes `rare` to
-- gate the rest. This is where adaptive reordering wins.

name Q01
group adversarial_filter

init sql_benchmarks/adversarial_filter/init/set_config.sql

load sql_benchmarks/adversarial_filter/init/load.sql

assert I
SELECT COUNT(*) > 0 FROM adv WHERE regexp_like(s, 'rare');
----
true

run
SELECT count(*) FROM adv
WHERE regexp_like(s, 'aaa')
AND regexp_like(s, 'bbb')
AND regexp_like(s, 'ccc')
AND regexp_like(s, 'ddd')
AND regexp_like(s, 'rare');

cleanup sql_benchmarks/adversarial_filter/init/cleanup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Control: the same five predicates, but the selective one ('rare') FIRST.
-- BinaryExpr's AND short-circuit already gates on a leftmost selective
-- conjunct, so the baseline is already near-optimal and the flag is ~neutral
-- here. This isolates the Q01 win as purely an ordering fix (and confirms the
-- adaptive path adds no measurable overhead once it can't help).

name Q02
group adversarial_filter

init sql_benchmarks/adversarial_filter/init/set_config.sql

load sql_benchmarks/adversarial_filter/init/load.sql

assert I
SELECT COUNT(*) > 0 FROM adv WHERE regexp_like(s, 'rare');
----
true

run
SELECT count(*) FROM adv
WHERE regexp_like(s, 'rare')
AND regexp_like(s, 'aaa')
AND regexp_like(s, 'bbb')
AND regexp_like(s, 'ccc')
AND regexp_like(s, 'ddd');

cleanup sql_benchmarks/adversarial_filter/init/cleanup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS adv;
25 changes: 25 additions & 0 deletions benchmarks/sql_benchmarks/adversarial_filter/init/load.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- Synthetic dataset generated inline (no external data files). Each row's
-- string column `s` embeds five markers inside ~180 chars of filler so every
-- regexp_like must scan the whole value (i.e. each predicate is expensive),
-- with deliberately different selectivity. The moduli are coprime so the
-- markers are independent; `rare` is keyed on a prime so the AND is non-empty.
--
-- 'aaa' present in 90% of rows (i % 10 <> 0)
-- 'bbb' present in ~86% of rows (i % 7 <> 0)
-- 'ccc' present in 80% of rows (i % 5 <> 0)
-- 'ddd' present in 75% of rows (i % 4 <> 0)
-- 'rare' present in ~0.1% of rows (i % 1009 = 5) <- the selective one
CREATE TABLE adv AS
SELECT
repeat('q', 30)
|| CASE WHEN value % 10 <> 0 THEN 'aaa' ELSE 'zzz' END
|| repeat('q', 30)
|| CASE WHEN value % 7 <> 0 THEN 'bbb' ELSE 'zzz' END
|| repeat('q', 30)
|| CASE WHEN value % 5 <> 0 THEN 'ccc' ELSE 'zzz' END
|| repeat('q', 30)
|| CASE WHEN value % 4 <> 0 THEN 'ddd' ELSE 'zzz' END
|| repeat('q', 30)
|| CASE WHEN value % 1009 = 5 THEN 'rare' ELSE 'zzzz' END
|| repeat('q', 30) AS s
FROM generate_series(1, ${ADV_ROWS:-10000000});
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- Toggle the optimization under test. Off by default (acts as the baseline);
-- set DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING=true to enable it. This
-- is the same env var the dfbench suites pick up via SessionConfig::from_env;
-- the SQL bench harness uses SessionContext::new(), so we wire it in explicitly
-- here via env interpolation.
set datafusion.execution.adaptive_filter_reordering = ${DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING:-false};
11 changes: 11 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,17 @@ config_namespace! {
/// tables with a highly-selective join filter, but is also slightly slower.
pub enforce_batch_size_in_joins: bool, default = false

/// (experimental) When enabled, `FilterExec` adaptively reorders the
/// conjuncts of a conjunctive predicate at runtime. It measures each
/// conjunct's selectivity and evaluation cost on the rows that reach it
/// and runs the conjuncts that discard the most rows per unit of CPU
/// time first, so cheap-and-selective predicates gate expensive ones.
/// Reordering never changes query results (only the evaluation order of
/// a conjunction) but can change observable side effects of fallible
/// predicates, so it is off by default. Predicates containing volatile
/// expressions are never reordered.
pub adaptive_filter_reordering: bool, default = false

/// Size (bytes) of data buffer DataFusion uses when writing output files.
/// This affects the size of the data chunks that are uploaded to remote
/// object stores (e.g. AWS S3). If very large (>= 100 GiB) output files are being
Expand Down
40 changes: 40 additions & 0 deletions datafusion/physical-expr-common/src/adaptive/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Shared substrate for adaptive (measurement-driven) filtering.
//!
//! Adaptive filter policies observe how predicates behave at runtime and
//! re-decide accordingly — the parquet scan adapts filter *placement*
//! (row-level vs. post-scan vs. dropped), and an adaptive `FilterExec` could
//! adapt conjunct evaluation *order*. Both need the same ingredients:
//!
//! - per-predicate online **selectivity + cost** measurement with confidence
//! intervals — [`SelectivityStats`];
//! - a concurrent **registry** keyed by a caller-local [`FilterId`], with
//! per-predicate skip flags so an optional predicate can be made a no-op
//! mid-stream — [`AdaptiveStatsRegistry`].
//!
//! What stays with each consumer is *policy*: the per-batch effectiveness
//! metric it feeds in, and the ranking/decision function it computes over the
//! snapshots. This module intentionally contains no placement or ordering
//! logic.

pub mod registry;
pub mod stats;

pub use registry::AdaptiveStatsRegistry;
pub use stats::{FilterId, SelectivityStats};
Loading
Loading