Skip to content

Commit bfbd7e9

Browse files
cpsievertclaude
andcommitted
feat(python): support data= dict parameter on execute() for inline DataFrames
Allows passing a dict of DataFrames to reader.execute() and the module-level execute() function, which are registered before query execution and unregistered afterward (cleanup happens even on error). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f2a8bd3 commit bfbd7e9

3 files changed

Lines changed: 210 additions & 11 deletions

File tree

ggsql-python/python/ggsql/_ggsql.pyi

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,12 @@ class DuckDBReader:
9494
"""
9595
...
9696

97-
def execute(self, query: str) -> Spec:
97+
def execute(
98+
self,
99+
query: str,
100+
*,
101+
data: dict[str, pl.DataFrame] | None = None,
102+
) -> Spec:
98103
"""Execute a ggsql query and return the visualization specification.
99104
100105
This is the main entry point for creating visualizations. It parses
@@ -105,6 +110,10 @@ class DuckDBReader:
105110
----------
106111
query
107112
The ggsql query (SQL + VISUALISE clause).
113+
data
114+
Optional dictionary mapping table names to DataFrames. Tables are
115+
registered before execution and unregistered afterward (even on
116+
error).
108117
109118
Returns
110119
-------
@@ -385,7 +394,12 @@ def validate(query: str) -> Validated:
385394
"""
386395
...
387396

388-
def execute(query: str, reader: object) -> Spec:
397+
def execute(
398+
query: str,
399+
reader: object,
400+
*,
401+
data: dict[str, pl.DataFrame] | None = None,
402+
) -> Spec:
389403
"""Execute a ggsql query with a reader (native or custom Python object).
390404
391405
This is a convenience function for custom readers. For native readers,
@@ -399,6 +413,10 @@ def execute(query: str, reader: object) -> Spec:
399413
The database reader to execute SQL against. Can be a native
400414
``DuckDBReader`` for optimal performance, or any Python object with
401415
an ``execute_sql(sql: str) -> polars.DataFrame`` method.
416+
data
417+
Optional dictionary mapping table names to DataFrames. Tables are
418+
registered before execution and unregistered afterward (even on
419+
error).
402420
403421
Returns
404422
-------

ggsql-python/src/lib.rs

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,9 @@ impl PyDuckDBReader {
333333
/// ----------
334334
/// query : str
335335
/// The ggsql query (SQL + VISUALISE clause).
336+
/// data : dict[str, polars.DataFrame] | None
337+
/// Optional dictionary mapping table names to DataFrames. Tables are
338+
/// registered before execution and unregistered afterward (even on error).
336339
///
337340
/// Returns
338341
/// -------
@@ -350,11 +353,48 @@ impl PyDuckDBReader {
350353
/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
351354
/// >>> writer = VegaLiteWriter()
352355
/// >>> json_output = writer.render(spec)
353-
fn execute(&self, query: &str) -> PyResult<PySpec> {
354-
self.inner
356+
#[pyo3(signature = (query, *, data=None))]
357+
fn execute(&self, py: Python<'_>, query: &str, data: Option<&Bound<'_, PyDict>>) -> PyResult<PySpec> {
358+
// Register DataFrames from data dict
359+
let registered_names = if let Some(data_dict) = data {
360+
self.register_data_dict(py, data_dict)?
361+
} else {
362+
vec![]
363+
};
364+
365+
// Execute query (capture result, don't return early)
366+
let result = self.inner
355367
.execute(query)
356368
.map(|s| PySpec { inner: s })
357-
.map_err(ggsql_err_to_py)
369+
.map_err(ggsql_err_to_py);
370+
371+
// Cleanup: unregister temporary tables (even on error)
372+
for name in &registered_names {
373+
let _ = self.inner.unregister(name);
374+
}
375+
376+
result
377+
}
378+
}
379+
380+
impl PyDuckDBReader {
381+
/// Register DataFrames from a Python dict. Returns list of registered names for cleanup.
382+
/// This is a private Rust helper, not exposed to Python.
383+
fn register_data_dict(
384+
&self,
385+
py: Python<'_>,
386+
data: &Bound<'_, PyDict>,
387+
) -> PyResult<Vec<String>> {
388+
let mut names = Vec::new();
389+
for (key, value) in data.iter() {
390+
let name: String = key.extract()?;
391+
let df = py_to_polars(py, &value)?;
392+
self.inner
393+
.register(&name, df, true)
394+
.map_err(ggsql_err_to_py)?;
395+
names.push(name);
396+
}
397+
Ok(names)
358398
}
359399
}
360400

@@ -725,6 +765,9 @@ fn validate(query: &str) -> PyResult<PyValidated> {
725765
/// The database reader to execute SQL against. Can be a native Reader
726766
/// for optimal performance, or any Python object with an
727767
/// `execute_sql(sql: str) -> polars.DataFrame` method.
768+
/// data : dict[str, polars.DataFrame] | None
769+
/// Optional dictionary mapping table names to DataFrames. Tables are
770+
/// registered before execution and unregistered afterward (even on error).
728771
///
729772
/// Returns
730773
/// -------
@@ -751,19 +794,80 @@ fn validate(query: &str) -> PyResult<PyValidated> {
751794
/// >>> reader = MyReader()
752795
/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
753796
#[pyfunction]
754-
fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PySpec> {
755-
// Fast path: try all known native reader types
756-
// Add new native readers to this list as they're implemented
757-
try_native_readers!(query, reader, PyDuckDBReader);
797+
#[pyo3(signature = (query, reader, *, data=None))]
798+
fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<&Bound<'_, PyDict>>) -> PyResult<PySpec> {
799+
// Native reader fast path: DuckDBReader
800+
// Note: we can't use the try_native_readers! macro here because it uses `return`
801+
// which would skip cleanup of registered tables.
802+
if let Ok(native) = reader.downcast::<PyDuckDBReader>() {
803+
// Register DataFrames if provided
804+
let registered_names = if let Some(data_dict) = data {
805+
native.borrow().register_data_dict(py, data_dict)?
806+
} else {
807+
vec![]
808+
};
809+
810+
// Execute (capture result for cleanup)
811+
let result = native.borrow().inner.execute(query)
812+
.map(|s| PySpec { inner: s })
813+
.map_err(ggsql_err_to_py);
814+
815+
// Cleanup: unregister temporary tables (even on error)
816+
for name in &registered_names {
817+
let _ = native.borrow().inner.unregister(name);
818+
}
819+
820+
return result;
821+
}
758822

759823
// Bridge path: wrap Python object as Reader
824+
// Register DataFrames if provided
825+
let registered_names = if let Some(data_dict) = data {
826+
register_data_on_reader(py, reader, data_dict)?
827+
} else {
828+
vec![]
829+
};
830+
760831
let bridge = PyReaderBridge {
761832
obj: reader.clone().unbind(),
762833
};
763-
bridge
834+
let result = bridge
764835
.execute(query)
765836
.map(|s| PySpec { inner: s })
766-
.map_err(ggsql_err_to_py)
837+
.map_err(ggsql_err_to_py);
838+
839+
// Cleanup for bridge path
840+
for name in &registered_names {
841+
let _ = call_unregister(py, reader, name);
842+
}
843+
844+
result
845+
}
846+
847+
/// Register DataFrames from a Python dict onto a Python reader object.
848+
/// Returns list of registered names for cleanup.
849+
fn register_data_on_reader(
850+
py: Python<'_>,
851+
reader: &Bound<'_, PyAny>,
852+
data: &Bound<'_, PyDict>,
853+
) -> PyResult<Vec<String>> {
854+
let mut names = Vec::new();
855+
for (key, value) in data.iter() {
856+
let name: String = key.extract()?;
857+
let df = py_to_polars(py, &value)?;
858+
let py_df = polars_to_py(py, &df)?;
859+
reader.call_method("register", (&name, py_df, true), None)?;
860+
names.push(name);
861+
}
862+
Ok(names)
863+
}
864+
865+
/// Call unregister on a reader if the method exists.
866+
fn call_unregister(_py: Python<'_>, reader: &Bound<'_, PyAny>, name: &str) -> PyResult<()> {
867+
if reader.hasattr("unregister")? {
868+
reader.call_method1("unregister", (name,))?;
869+
}
870+
Ok(())
767871
}
768872

769873
// ============================================================================

ggsql-python/tests/test_ggsql.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,83 @@ def test_render_chart_kwargs_forwarded(self):
665665
assert isinstance(chart, altair.TopLevelMixin)
666666

667667

668+
class TestExecuteWithData:
669+
"""Tests for reader.execute() with data= parameter."""
670+
671+
def test_execute_with_single_dataframe(self):
672+
"""Can pass a single DataFrame via data dict."""
673+
reader = ggsql.DuckDBReader("duckdb://memory")
674+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
675+
spec = reader.execute(
676+
"SELECT * FROM mydata VISUALISE x, y DRAW point",
677+
data={"mydata": df},
678+
)
679+
assert spec.metadata()["rows"] == 3
680+
681+
def test_execute_with_multiple_dataframes(self):
682+
"""Can pass multiple DataFrames via data dict."""
683+
reader = ggsql.DuckDBReader("duckdb://memory")
684+
df1 = pl.DataFrame({"id": [1, 2, 3], "y": [10, 20, 30]})
685+
df2 = pl.DataFrame({"id": [2, 3], "category": ["A", "B"]})
686+
spec = reader.execute(
687+
"SELECT t1.id AS x, t1.y FROM t1 JOIN t2 ON t1.id = t2.id "
688+
"VISUALISE x, y DRAW point",
689+
data={"t1": df1, "t2": df2},
690+
)
691+
assert spec.metadata()["rows"] == 2
692+
693+
def test_execute_with_data_cleans_up(self):
694+
"""DataFrames passed via data= are unregistered after execution."""
695+
reader = ggsql.DuckDBReader("duckdb://memory")
696+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
697+
reader.execute(
698+
"SELECT * FROM temp VISUALISE x, y DRAW point",
699+
data={"temp": df},
700+
)
701+
# Table should be cleaned up — querying it should fail
702+
with pytest.raises((ggsql.ReaderError, ValueError)):
703+
reader.execute_sql("SELECT * FROM temp")
704+
705+
def test_execute_with_data_cleans_up_on_error(self):
706+
"""DataFrames are unregistered even if execution fails."""
707+
reader = ggsql.DuckDBReader("duckdb://memory")
708+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
709+
with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)):
710+
reader.execute(
711+
"SELECT * FROM temp VISUALISE DRAW not_a_geom",
712+
data={"temp": df},
713+
)
714+
# Table should still be cleaned up
715+
with pytest.raises((ggsql.ReaderError, ValueError)):
716+
reader.execute_sql("SELECT * FROM temp")
717+
718+
def test_execute_without_data_still_works(self):
719+
"""Calling execute() without data= still works as before."""
720+
reader = ggsql.DuckDBReader("duckdb://memory")
721+
spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
722+
assert spec.metadata()["rows"] == 1
723+
724+
def test_execute_with_empty_data(self):
725+
"""Passing empty data= dict works fine."""
726+
reader = ggsql.DuckDBReader("duckdb://memory")
727+
spec = reader.execute(
728+
"SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point",
729+
data={},
730+
)
731+
assert spec.metadata()["rows"] == 1
732+
733+
def test_module_execute_with_data(self):
734+
"""Module-level execute() also supports data= parameter."""
735+
reader = ggsql.DuckDBReader("duckdb://memory")
736+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
737+
spec = ggsql.execute(
738+
"SELECT * FROM mydata VISUALISE x, y DRAW point",
739+
reader,
740+
data={"mydata": df},
741+
)
742+
assert spec.metadata()["rows"] == 3
743+
744+
668745
class TestTypeStubs:
669746
"""Tests for type stub presence and correctness."""
670747

0 commit comments

Comments
 (0)