Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
37768b8
fix: Enable sliding window execution for covar_pop, covar_samp, and c…
pchintar Jun 13, 2026
f931728
fix: handle `date_bin` negative subsecond and overflow cases (#22610)
kumarUjjawal Jun 13, 2026
e5f7af1
feat(spark): add `concat_ws` with array support (#20928)
davidlghellin Jun 13, 2026
574a1e6
fix: preserve Spark next_day whitespace validation (#22720)
xfocus3 Jun 13, 2026
3bece3d
Upgrade minimal tokio-postgres version to address security advisory (…
AdamGS Jun 13, 2026
58e37a0
Clearly gate sliding SUM(DISTINCT) type support (#22866)
kumarUjjawal Jun 13, 2026
a7280b8
FFI: plumb `placement` for `FFI_ScalarUDF` (#22608)
Amogh-2404 Jun 13, 2026
78033fa
refactor: introduce ProbeEnd state in NestedLoopJoinExec (#22865)
nathanb9 Jun 13, 2026
cb2542c
fix: TRY_CAST returns NULL for timestamp/date overflow (#22897)
fengys1996 Jun 14, 2026
d428760
fix: count shared buffers once in hash join build-side memory account…
jordepic Jun 14, 2026
6520315
fix(topk): call attempt_early_completion when filter rejects entire b…
ajegou Jun 15, 2026
99895e6
refactor: Simplify heap size estimation for types that own no heap al…
mkleen Jun 15, 2026
e20763c
refactor(hash-aggr): Migrate the partial aggregation skip optimizatio…
2010YOUY01 Jun 15, 2026
dede33c
refactor(hash-aggr): Migrate existing tests on `GroupsHashAggregateSt…
2010YOUY01 Jun 15, 2026
c14379b
refactor: remove `opt_filter` in `GroupsAccumulator::merge_batch` (#2…
haohuaijin Jun 15, 2026
127731b
Include `null_aware` status in the relevant Join node display impleme…
AdamGS Jun 16, 2026
49b99bb
chore(deps): bump pyjwt from 2.12.0 to 2.13.0 (#22966)
dependabot[bot] Jun 16, 2026
a66c898
ci: Setup valid `Cargo.lock` for `depcheck` to unblock CI (#22933)
AdamGS Jun 16, 2026
a1e88e2
feat: decimal support for gcd and lcm (#22655)
theirix Jun 16, 2026
152d8c4
Add `file_row_index` UDF to query file-level row indexes from Parquet…
AdamGS Jun 16, 2026
9849513
chore(deps-dev): bump launch-editor from 2.10.0 to 2.14.1 in /datafus…
dependabot[bot] Jun 16, 2026
15bc933
chore(deps): bump cryptography from 46.0.7 to 48.0.1 (#22968)
dependabot[bot] Jun 16, 2026
3c6734e
refactor: Simplify heap size estimation for arrays (#22954)
mkleen Jun 16, 2026
8cda78b
Remove orphaned `snowflake_flatten_validation.sql` script (#22938)
AdamGS Jun 16, 2026
baa497d
fix: Disable join dynamic filters for null-equal joins (#22965)
neilconway Jun 16, 2026
0fb650a
chore(deps): bump insta-cmd from 0.6.0 to 0.7.0 (#22976)
dependabot[bot] Jun 16, 2026
46d241d
chore(deps): update maturin requirement from <2,>=1.13.3 to >=1.14.0,…
dependabot[bot] Jun 16, 2026
2282d23
chore(deps): bump taiki-e/install-action from 2.81.8 to 2.81.11 (#22973)
dependabot[bot] Jun 16, 2026
fbd64b4
chore(deps): update pydata-sphinx-theme requirement from <1,>=0.18.0 …
dependabot[bot] Jun 16, 2026
ae5f3f5
chore(deps): bump prost-build from 0.14.3 to 0.14.4 (#22843)
dependabot[bot] Jun 16, 2026
fa271ce
refactor: Update SortMergeJoin to use async spill abstractions (#22230)
pantShrey Jun 16, 2026
6176a6d
Add `.gitignore` for `proto-models` (#22977)
Jefffrey Jun 16, 2026
d5f03d9
Fix leaf expression reconciliation (#22971)
cetra3 Jun 16, 2026
408dad3
Add MERGE INTO types to datafusion-expr (#20763)
wirybeaver Jun 16, 2026
c7e9284
refactor: use raw view access in do_append_val_inner and consolidate …
EeshanBembi Jun 16, 2026
a0e6d49
Make LogicalPlan::Unnest expression/rebuild contracts consistent (#22…
nathanb9 Jun 16, 2026
96a6096
feat: support reading from stdin in datafusion-cli (#22839)
huan233usc Jun 16, 2026
2c6eada
Add merge_into hook to TableProvider trait
wirybeaver Mar 6, 2026
cb89e53
Add SQL and physical planner support for MERGE INTO
wirybeaver Mar 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/audit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- name: Install cargo-audit
uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2.81.8
uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595 # v2.81.11
with:
tool: cargo-audit
- name: Run audit check
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/breaking_changes_detector.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:

- name: Install cargo-semver-checks
if: steps.changed_crates.outputs.packages != ''
uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2.81.8
uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595 # v2.81.11
with:
tool: cargo-semver-checks

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ jobs:
with:
rust-version: stable
- name: Check dependencies
working-directory: dev/depcheck
run: |
cd dev/depcheck
cargo run
cargo run --locked
detect-unused-dependencies:
name: Detect Unused Dependencies
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
source ci/scripts/utils/tool_versions.sh
echo "LYCHEE_VERSION=${LYCHEE_VERSION}" >> "$GITHUB_ENV"
- name: Install lychee
uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2.81.8
uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595 # v2.81.11
with:
tool: lychee@${{ env.LYCHEE_VERSION }}
- name: Run markdown link check
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ jobs:
sudo apt-get update -qq
sudo apt-get install -y -qq clang
- name: Setup wasm-pack
uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2.81.8
uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595 # v2.81.11
with:
tool: wasm-pack
- name: Run tests with headless mode
Expand Down Expand Up @@ -773,7 +773,7 @@ jobs:
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Install cargo-msrv
uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2.81.8
uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595 # v2.81.11
with:
tool: cargo-msrv

Expand Down
40 changes: 20 additions & 20 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ workspace = true
[dev-dependencies]
ctor = { workspace = true }
insta = { workspace = true }
insta-cmd = "0.6.0"
insta-cmd = "0.7.0"
rstest = { workspace = true }
testcontainers-modules = { workspace = true, features = ["minio"] }
# Makes sure `test_display_pg_json` behaves in a consistent way regardless of
Expand Down
9 changes: 7 additions & 2 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::print_format::PrintFormat;
use crate::{
command::{Command, OutputFormat},
helper::CliHelper,
object_storage::get_object_store,
object_storage::{get_object_store, stdin::StdinUtils},
print_options::{MaxRows, PrintOptions},
};
use datafusion::common::instant::Instant;
Expand Down Expand Up @@ -417,9 +417,14 @@ async fn create_plan(
// Note that cmd is a mutable reference so that create_external_table function can remove all
// datafusion-cli specific options before passing through to datafusion. Otherwise, datafusion
// will raise Configuration errors.
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
// To support custom formats, treat error as None
let format = config_file_type_from_str(&cmd.file_type);

// Expose stdin (e.g. `cat data.csv | datafusion-cli`) as a `stdin://`
// object store, registered like any other scheme in `get_object_store`.
cmd.location = StdinUtils::rewrite_location(&cmd.location, format.as_ref());

register_object_store_and_config_extensions(
ctx,
&cmd.location,
Expand Down
31 changes: 29 additions & 2 deletions datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ use datafusion_cli::functions::{
use datafusion_cli::object_storage::instrumented::{
InstrumentedObjectStoreMode, InstrumentedObjectStoreRegistry,
};
use datafusion_cli::object_storage::{StdinCarriesCommands, is_stdin_location};
use datafusion_cli::{
DATAFUSION_CLI_VERSION, exec,
pool_type::PoolType,
Expand Down Expand Up @@ -158,6 +159,23 @@ struct Args {
object_store_profiling: InstrumentedObjectStoreMode,
}

impl Args {
/// Without -c/-f the CLI enters the REPL, which reads its SQL from
/// stdin — interactively or piped.
fn repl_mode(&self) -> bool {
self.command.is_empty() && self.file.is_empty()
}

/// Whether the CLI consumes stdin for its own SQL input. This covers the
/// REPL (no -c/-f, reading SQL interactively or piped) as well as an
/// explicit `-f /dev/stdin` (or the other stdin pseudo-paths), where the
/// SQL file *is* stdin. In either case stdin is already spoken for and
/// cannot also back a `LOCATION '/dev/stdin'` table.
fn reads_sql_from_stdin(&self) -> bool {
self.repl_mode() || self.file.iter().any(|f| is_stdin_location(f))
}
}

#[tokio::main]
/// Calls [`main_inner`], then handles printing errors and returning the correct exit code
pub async fn main() -> ExitCode {
Expand Down Expand Up @@ -268,6 +286,7 @@ async fn main_inner() -> Result<()> {
instrumented_registry: Arc::clone(&instrumented_registry),
};

let repl_mode = args.repl_mode();
let commands = args.command;
let files = args.file;
let rc = match args.rc {
Expand All @@ -285,7 +304,7 @@ async fn main_inner() -> Result<()> {
}
};

if commands.is_empty() && files.is_empty() {
if repl_mode {
if !rc.is_empty() {
exec::exec_from_files(&ctx, rc, &print_options).await?;
}
Expand Down Expand Up @@ -330,8 +349,16 @@ fn get_session_config(args: &Args) -> Result<SessionConfig> {
config_options.format.null = String::from("NULL");
}

let session_config =
let mut session_config =
SessionConfig::from(config_options).with_information_schema(true);

if args.reads_sql_from_stdin() {
// When stdin carries the session's SQL — the REPL (including any rc
// file run before it) or an explicit `-f /dev/stdin` — it cannot also
// serve as a data source for `LOCATION '/dev/stdin'`.
session_config = session_config.with_extension(Arc::new(StdinCarriesCommands));
}

Ok(session_config)
}

Expand Down
6 changes: 6 additions & 0 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
// under the License.

pub mod instrumented;
pub(crate) mod stdin;

pub use stdin::{StdinCarriesCommands, is_stdin_location};

use async_trait::async_trait;
use aws_config::BehaviorVersion;
Expand Down Expand Up @@ -564,6 +567,9 @@ pub(crate) async fn get_object_store(
.with_url(url.origin().ascii_serialization())
.build()?,
),
_ if scheme == stdin::StdinUtils::SCHEME => {
stdin::StdinUtils::get_or_create(state, url).await?
}
_ => {
// For other types, try to get from `object_store_registry`:
state
Expand Down
Loading
Loading