From 4c7d2536509198f7d0f5e4a7de16ca963f48c12a Mon Sep 17 00:00:00 2001 From: Christian McArthur Date: Mon, 30 Mar 2026 08:00:07 -0400 Subject: [PATCH] feat: add optional JSON functions support Add `datafusion-functions-json` as an optional feature (`json`), giving Python users `json_get_str`, `json_get`, `->`, `->>` and other JSON operators in SQL queries. When built with `--features json`, JSON functions are automatically registered with every SessionContext. Default builds are unaffected. Tested locally: json_get_str extracts values, nested paths work, GROUP BY on extracted JSON fields works. Changes: - Add `datafusion-functions-json` to workspace dependencies - Add optional dependency and `json` feature flag to core crate - Register JSON functions in SessionContext creation when feature is enabled Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.toml | 1 + crates/core/Cargo.toml | 2 ++ crates/core/src/context.rs | 11 ++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 346f6da3e..813234daa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ datafusion-common = { version = "53", default-features = false } datafusion-functions-aggregate = { version = "53" } datafusion-functions-window = { version = "53" } datafusion-expr = { version = "53" } +datafusion-functions-json = { version = "0.53" } prost = "0.14.3" serde_json = "1" uuid = { version = "1.23" } diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 3e2b01c8e..7a1ee4651 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -53,6 +53,7 @@ datafusion = { workspace = true, features = ["avro", "unicode_expressions"] } datafusion-substrait = { workspace = true, optional = true } datafusion-proto = { workspace = true } datafusion-ffi = { workspace = true } +datafusion-functions-json = { workspace = true, optional = true } prost = { workspace = true } # keep in line with `datafusion-substrait` serde_json = { workspace = true } uuid = { workspace = true, features = ["v4"] } @@ -74,6 +75,7 @@ pyo3-build-config = { workspace = true } [features] default = ["mimalloc"] +json = ["dep:datafusion-functions-json"] protoc = ["datafusion-substrait/protoc"] substrait = ["dep:datafusion-substrait"] diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index 53994d2f5..906fd96d2 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -390,7 +390,16 @@ impl PySessionContext { .with_runtime_env(runtime) .with_default_features() .build(); - let ctx = Arc::new(SessionContext::new_with_state(session_state)); + let mut ctx = SessionContext::new_with_state(session_state); + + // Register JSON functions (json_extract, json_get, ->, ->>) when feature is enabled + #[cfg(feature = "json")] + datafusion_functions_json::register_all(&mut ctx) + .map_err(|e| PyErr::new::( + format!("Failed to register JSON functions: {e}"), + ))?; + + let ctx = Arc::new(ctx); let logical_codec = Self::default_logical_codec(&ctx); Ok(PySessionContext { ctx, logical_codec }) }