Skip to content

Commit 7a90164

Browse files
cpsievertclaude
andcommitted
feat(python): support data= dict parameter on execute() for inline DataFrames
Allows passing a dict of DataFrames to reader.execute() and the module-level execute() function, which are registered before query execution and unregistered afterward (cleanup happens even on error). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e94b8bd commit 7a90164

3 files changed

Lines changed: 210 additions & 11 deletions

File tree

ggsql-python/python/ggsql/_ggsql.pyi

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,12 @@ class DuckDBReader:
9494
"""
9595
...
9696

97-
def execute(self, query: str) -> Spec:
97+
def execute(
98+
self,
99+
query: str,
100+
*,
101+
data: dict[str, pl.DataFrame] | None = None,
102+
) -> Spec:
98103
"""Execute a ggsql query and return the visualization specification.
99104
100105
This is the main entry point for creating visualizations. It parses
@@ -105,6 +110,10 @@ class DuckDBReader:
105110
----------
106111
query
107112
The ggsql query (SQL + VISUALISE clause).
113+
data
114+
Optional dictionary mapping table names to DataFrames. Tables are
115+
registered before execution and unregistered afterward (even on
116+
error).
108117
109118
Returns
110119
-------
@@ -385,7 +394,12 @@ def validate(query: str) -> Validated:
385394
"""
386395
...
387396

388-
def execute(query: str, reader: object) -> Spec:
397+
def execute(
398+
query: str,
399+
reader: object,
400+
*,
401+
data: dict[str, pl.DataFrame] | None = None,
402+
) -> Spec:
389403
"""Execute a ggsql query with a reader (native or custom Python object).
390404
391405
This is a convenience function for custom readers. For native readers,
@@ -399,6 +413,10 @@ def execute(query: str, reader: object) -> Spec:
399413
The database reader to execute SQL against. Can be a native
400414
``DuckDBReader`` for optimal performance, or any Python object with
401415
an ``execute_sql(sql: str) -> polars.DataFrame`` method.
416+
data
417+
Optional dictionary mapping table names to DataFrames. Tables are
418+
registered before execution and unregistered afterward (even on
419+
error).
402420
403421
Returns
404422
-------

ggsql-python/src/lib.rs

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ impl PyDuckDBReader {
352352
/// ----------
353353
/// query : str
354354
/// The ggsql query (SQL + VISUALISE clause).
355+
/// data : dict[str, polars.DataFrame] | None
356+
/// Optional dictionary mapping table names to DataFrames. Tables are
357+
/// registered before execution and unregistered afterward (even on error).
355358
///
356359
/// Returns
357360
/// -------
@@ -369,11 +372,48 @@ impl PyDuckDBReader {
369372
/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
370373
/// >>> writer = VegaLiteWriter()
371374
/// >>> json_output = writer.render(spec)
372-
fn execute(&self, query: &str) -> PyResult<PySpec> {
373-
self.inner
375+
#[pyo3(signature = (query, *, data=None))]
376+
fn execute(&self, py: Python<'_>, query: &str, data: Option<&Bound<'_, PyDict>>) -> PyResult<PySpec> {
377+
// Register DataFrames from data dict
378+
let registered_names = if let Some(data_dict) = data {
379+
self.register_data_dict(py, data_dict)?
380+
} else {
381+
vec![]
382+
};
383+
384+
// Execute query (capture result, don't return early)
385+
let result = self.inner
374386
.execute(query)
375387
.map(|s| PySpec { inner: s })
376-
.map_err(ggsql_err_to_py)
388+
.map_err(ggsql_err_to_py);
389+
390+
// Cleanup: unregister temporary tables (even on error)
391+
for name in &registered_names {
392+
let _ = self.inner.unregister(name);
393+
}
394+
395+
result
396+
}
397+
}
398+
399+
impl PyDuckDBReader {
400+
/// Register DataFrames from a Python dict. Returns list of registered names for cleanup.
401+
/// This is a private Rust helper, not exposed to Python.
402+
fn register_data_dict(
403+
&self,
404+
py: Python<'_>,
405+
data: &Bound<'_, PyDict>,
406+
) -> PyResult<Vec<String>> {
407+
let mut names = Vec::new();
408+
for (key, value) in data.iter() {
409+
let name: String = key.extract()?;
410+
let df = py_to_polars(py, &value)?;
411+
self.inner
412+
.register(&name, df, true)
413+
.map_err(ggsql_err_to_py)?;
414+
names.push(name);
415+
}
416+
Ok(names)
377417
}
378418
}
379419

@@ -741,6 +781,9 @@ fn validate(query: &str) -> PyResult<PyValidated> {
741781
/// The database reader to execute SQL against. Can be a native Reader
742782
/// for optimal performance, or any Python object with an
743783
/// `execute_sql(sql: str) -> polars.DataFrame` method.
784+
/// data : dict[str, polars.DataFrame] | None
785+
/// Optional dictionary mapping table names to DataFrames. Tables are
786+
/// registered before execution and unregistered afterward (even on error).
744787
///
745788
/// Returns
746789
/// -------
@@ -767,19 +810,80 @@ fn validate(query: &str) -> PyResult<PyValidated> {
767810
/// >>> reader = MyReader()
768811
/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
769812
#[pyfunction]
770-
fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PySpec> {
771-
// Fast path: try all known native reader types
772-
// Add new native readers to this list as they're implemented
773-
try_native_readers!(query, reader, PyDuckDBReader);
813+
#[pyo3(signature = (query, reader, *, data=None))]
814+
fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<&Bound<'_, PyDict>>) -> PyResult<PySpec> {
815+
// Native reader fast path: DuckDBReader
816+
// Note: we can't use the try_native_readers! macro here because it uses `return`
817+
// which would skip cleanup of registered tables.
818+
if let Ok(native) = reader.downcast::<PyDuckDBReader>() {
819+
// Register DataFrames if provided
820+
let registered_names = if let Some(data_dict) = data {
821+
native.borrow().register_data_dict(py, data_dict)?
822+
} else {
823+
vec![]
824+
};
825+
826+
// Execute (capture result for cleanup)
827+
let result = native.borrow().inner.execute(query)
828+
.map(|s| PySpec { inner: s })
829+
.map_err(ggsql_err_to_py);
830+
831+
// Cleanup: unregister temporary tables (even on error)
832+
for name in &registered_names {
833+
let _ = native.borrow().inner.unregister(name);
834+
}
835+
836+
return result;
837+
}
774838

775839
// Bridge path: wrap Python object as Reader
840+
// Register DataFrames if provided
841+
let registered_names = if let Some(data_dict) = data {
842+
register_data_on_reader(py, reader, data_dict)?
843+
} else {
844+
vec![]
845+
};
846+
776847
let bridge = PyReaderBridge {
777848
obj: reader.clone().unbind(),
778849
};
779-
bridge
850+
let result = bridge
780851
.execute(query)
781852
.map(|s| PySpec { inner: s })
782-
.map_err(ggsql_err_to_py)
853+
.map_err(ggsql_err_to_py);
854+
855+
// Cleanup for bridge path
856+
for name in &registered_names {
857+
let _ = call_unregister(py, reader, name);
858+
}
859+
860+
result
861+
}
862+
863+
/// Register DataFrames from a Python dict onto a Python reader object.
864+
/// Returns list of registered names for cleanup.
865+
fn register_data_on_reader(
866+
py: Python<'_>,
867+
reader: &Bound<'_, PyAny>,
868+
data: &Bound<'_, PyDict>,
869+
) -> PyResult<Vec<String>> {
870+
let mut names = Vec::new();
871+
for (key, value) in data.iter() {
872+
let name: String = key.extract()?;
873+
let df = py_to_polars(py, &value)?;
874+
let py_df = polars_to_py(py, &df)?;
875+
reader.call_method("register", (&name, py_df, true), None)?;
876+
names.push(name);
877+
}
878+
Ok(names)
879+
}
880+
881+
/// Call unregister on a reader if the method exists.
882+
fn call_unregister(_py: Python<'_>, reader: &Bound<'_, PyAny>, name: &str) -> PyResult<()> {
883+
if reader.hasattr("unregister")? {
884+
reader.call_method1("unregister", (name,))?;
885+
}
886+
Ok(())
783887
}
784888

785889
// ============================================================================

ggsql-python/tests/test_ggsql.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,83 @@ def test_render_chart_facet(self):
656656
assert isinstance(chart, altair.FacetChart)
657657

658658

659+
class TestExecuteWithData:
660+
"""Tests for reader.execute() with data= parameter."""
661+
662+
def test_execute_with_single_dataframe(self):
663+
"""Can pass a single DataFrame via data dict."""
664+
reader = ggsql.DuckDBReader("duckdb://memory")
665+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
666+
spec = reader.execute(
667+
"SELECT * FROM mydata VISUALISE x, y DRAW point",
668+
data={"mydata": df},
669+
)
670+
assert spec.metadata()["rows"] == 3
671+
672+
def test_execute_with_multiple_dataframes(self):
673+
"""Can pass multiple DataFrames via data dict."""
674+
reader = ggsql.DuckDBReader("duckdb://memory")
675+
df1 = pl.DataFrame({"id": [1, 2, 3], "y": [10, 20, 30]})
676+
df2 = pl.DataFrame({"id": [2, 3], "category": ["A", "B"]})
677+
spec = reader.execute(
678+
"SELECT t1.id AS x, t1.y FROM t1 JOIN t2 ON t1.id = t2.id "
679+
"VISUALISE x, y DRAW point",
680+
data={"t1": df1, "t2": df2},
681+
)
682+
assert spec.metadata()["rows"] == 2
683+
684+
def test_execute_with_data_cleans_up(self):
685+
"""DataFrames passed via data= are unregistered after execution."""
686+
reader = ggsql.DuckDBReader("duckdb://memory")
687+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
688+
reader.execute(
689+
"SELECT * FROM temp VISUALISE x, y DRAW point",
690+
data={"temp": df},
691+
)
692+
# Table should be cleaned up — querying it should fail
693+
with pytest.raises((ggsql.ReaderError, ValueError)):
694+
reader.execute_sql("SELECT * FROM temp")
695+
696+
def test_execute_with_data_cleans_up_on_error(self):
697+
"""DataFrames are unregistered even if execution fails."""
698+
reader = ggsql.DuckDBReader("duckdb://memory")
699+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
700+
with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)):
701+
reader.execute(
702+
"SELECT * FROM temp VISUALISE DRAW not_a_geom",
703+
data={"temp": df},
704+
)
705+
# Table should still be cleaned up
706+
with pytest.raises((ggsql.ReaderError, ValueError)):
707+
reader.execute_sql("SELECT * FROM temp")
708+
709+
def test_execute_without_data_still_works(self):
710+
"""Calling execute() without data= still works as before."""
711+
reader = ggsql.DuckDBReader("duckdb://memory")
712+
spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
713+
assert spec.metadata()["rows"] == 1
714+
715+
def test_execute_with_empty_data(self):
716+
"""Passing empty data= dict works fine."""
717+
reader = ggsql.DuckDBReader("duckdb://memory")
718+
spec = reader.execute(
719+
"SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point",
720+
data={},
721+
)
722+
assert spec.metadata()["rows"] == 1
723+
724+
def test_module_execute_with_data(self):
725+
"""Module-level execute() also supports data= parameter."""
726+
reader = ggsql.DuckDBReader("duckdb://memory")
727+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
728+
spec = ggsql.execute(
729+
"SELECT * FROM mydata VISUALISE x, y DRAW point",
730+
reader,
731+
data={"mydata": df},
732+
)
733+
assert spec.metadata()["rows"] == 3
734+
735+
659736
class TestTypeStubs:
660737
"""Tests for type stub presence and correctness."""
661738

0 commit comments

Comments
 (0)