Skip to content

Commit f71dbc2

Browse files
cpsievertclaude
andcommitted
fix(python): improve type stubs, data= cleanup safety, and exception specificity
- Update type stubs to document typed exceptions (ParseError, ReaderError, etc.) instead of generic ValueError - Guard data= cleanup against destroying pre-existing tables by checking table existence before registration - Pass replace as keyword arg in bridge register_data_on_reader for compatibility with custom readers that omit the parameter - Tighten test assertions to expect specific exception types - Remove unused try_native_readers! macro Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent bfbd7e9 commit f71dbc2

3 files changed

Lines changed: 69 additions & 37 deletions

File tree

ggsql-python/python/ggsql/_ggsql.pyi

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class DuckDBReader:
3131
3232
Raises
3333
------
34-
ValueError
34+
ReaderError
3535
If the connection string is invalid or the database cannot be opened.
3636
"""
3737

@@ -51,7 +51,7 @@ class DuckDBReader:
5151
5252
Raises
5353
------
54-
ValueError
54+
ReaderError
5555
If the SQL is invalid or execution fails.
5656
"""
5757
...
@@ -74,7 +74,7 @@ class DuckDBReader:
7474
7575
Raises
7676
------
77-
ValueError
77+
ReaderError
7878
If registration fails or the table name is invalid.
7979
"""
8080
...
@@ -89,7 +89,7 @@ class DuckDBReader:
8989
9090
Raises
9191
------
92-
ValueError
92+
ReaderError
9393
If the table was not registered or unregistration fails.
9494
"""
9595
...
@@ -122,9 +122,12 @@ class DuckDBReader:
122122
123123
Raises
124124
------
125-
ValueError
126-
If the query syntax is invalid, has no VISUALISE clause, or
127-
SQL execution fails.
125+
ParseError
126+
If the query syntax is invalid.
127+
ValidationError
128+
If the query has no VISUALISE clause or fails semantic checks.
129+
ReaderError
130+
If SQL execution fails.
128131
"""
129132
...
130133

@@ -154,7 +157,7 @@ class VegaLiteWriter:
154157
155158
Raises
156159
------
157-
ValueError
160+
WriterError
158161
If rendering fails.
159162
"""
160163
...
@@ -388,7 +391,7 @@ def validate(query: str) -> Validated:
388391
389392
Raises
390393
------
391-
ValueError
394+
ParseError
392395
If validation fails unexpectedly (syntax errors are captured in
393396
the returned ``Validated`` object, not raised).
394397
"""
@@ -425,7 +428,11 @@ def execute(
425428
426429
Raises
427430
------
428-
ValueError
429-
If parsing, validation, or SQL execution fails.
431+
ParseError
432+
If the query syntax is invalid.
433+
ValidationError
434+
If semantic validation fails.
435+
ReaderError
436+
If SQL execution fails.
430437
"""
431438
...

ggsql-python/src/lib.rs

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -187,24 +187,6 @@ impl Reader for PyReaderBridge {
187187
}
188188
}
189189

190-
// ============================================================================
191-
// Native Reader Detection Macro
192-
// ============================================================================
193-
194-
/// Macro to try native readers and fall back to bridge.
195-
/// Adding new native readers = add to the macro invocation list.
196-
macro_rules! try_native_readers {
197-
($query:expr, $reader:expr, $($native_type:ty),*) => {{
198-
$(
199-
if let Ok(native) = $reader.downcast::<$native_type>() {
200-
return native.borrow().inner.execute($query)
201-
.map(|s| PySpec { inner: s })
202-
.map_err(ggsql_err_to_py);
203-
}
204-
)*
205-
}};
206-
}
207-
208190
// ============================================================================
209191
// PyDuckDBReader
210192
// ============================================================================
@@ -378,8 +360,16 @@ impl PyDuckDBReader {
378360
}
379361

380362
impl PyDuckDBReader {
381-
/// Register DataFrames from a Python dict. Returns list of registered names for cleanup.
382-
/// This is a private Rust helper, not exposed to Python.
363+
/// Check whether a table already exists in the reader.
364+
fn table_exists(&self, name: &str) -> bool {
365+
// A lightweight probe: try to select zero rows from the table.
366+
self.inner
367+
.execute_sql(&format!("SELECT 1 FROM {name} LIMIT 0"))
368+
.is_ok()
369+
}
370+
371+
/// Register DataFrames from a Python dict. Returns list of *newly created*
372+
/// table names for cleanup (pre-existing tables are not tracked).
383373
fn register_data_dict(
384374
&self,
385375
py: Python<'_>,
@@ -388,11 +378,14 @@ impl PyDuckDBReader {
388378
let mut names = Vec::new();
389379
for (key, value) in data.iter() {
390380
let name: String = key.extract()?;
381+
let existed = self.table_exists(&name);
391382
let df = py_to_polars(py, &value)?;
392383
self.inner
393384
.register(&name, df, true)
394385
.map_err(ggsql_err_to_py)?;
395-
names.push(name);
386+
if !existed {
387+
names.push(name);
388+
}
396389
}
397390
Ok(names)
398391
}
@@ -846,6 +839,15 @@ fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<
846839

847840
/// Register DataFrames from a Python dict onto a Python reader object.
848841
/// Returns list of registered names for cleanup.
842+
/// Check whether a table exists via a Python reader's execute_sql method.
843+
fn reader_table_exists(reader: &Bound<'_, PyAny>, name: &str) -> bool {
844+
reader
845+
.call_method1("execute_sql", (format!("SELECT 1 FROM {name} LIMIT 0"),))
846+
.is_ok()
847+
}
848+
849+
/// Register DataFrames from a Python dict onto a Python reader object.
850+
/// Returns list of *newly created* table names for cleanup.
849851
fn register_data_on_reader(
850852
py: Python<'_>,
851853
reader: &Bound<'_, PyAny>,
@@ -854,10 +856,15 @@ fn register_data_on_reader(
854856
let mut names = Vec::new();
855857
for (key, value) in data.iter() {
856858
let name: String = key.extract()?;
859+
let existed = reader_table_exists(reader, &name);
857860
let df = py_to_polars(py, &value)?;
858861
let py_df = polars_to_py(py, &df)?;
859-
reader.call_method("register", (&name, py_df, true), None)?;
860-
names.push(name);
862+
let kwargs = PyDict::new(py);
863+
kwargs.set_item("replace", true)?;
864+
reader.call_method("register", (&name, py_df), Some(&kwargs))?;
865+
if !existed {
866+
names.push(name);
867+
}
861868
}
862869
Ok(names)
863870
}

ggsql-python/tests/test_ggsql.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,20 +699,20 @@ def test_execute_with_data_cleans_up(self):
699699
data={"temp": df},
700700
)
701701
# Table should be cleaned up — querying it should fail
702-
with pytest.raises((ggsql.ReaderError, ValueError)):
702+
with pytest.raises(ggsql.ReaderError):
703703
reader.execute_sql("SELECT * FROM temp")
704704

705705
def test_execute_with_data_cleans_up_on_error(self):
706706
"""DataFrames are unregistered even if execution fails."""
707707
reader = ggsql.DuckDBReader("duckdb://memory")
708708
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
709-
with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)):
709+
with pytest.raises(ggsql.ParseError):
710710
reader.execute(
711711
"SELECT * FROM temp VISUALISE DRAW not_a_geom",
712712
data={"temp": df},
713713
)
714714
# Table should still be cleaned up
715-
with pytest.raises((ggsql.ReaderError, ValueError)):
715+
with pytest.raises(ggsql.ReaderError):
716716
reader.execute_sql("SELECT * FROM temp")
717717

718718
def test_execute_without_data_still_works(self):
@@ -730,6 +730,24 @@ def test_execute_with_empty_data(self):
730730
)
731731
assert spec.metadata()["rows"] == 1
732732

733+
def test_execute_with_data_preserves_preexisting_table(self):
734+
"""data= does not unregister a table that existed before the call."""
735+
reader = ggsql.DuckDBReader("duckdb://memory")
736+
existing = pl.DataFrame({"x": [1, 2], "y": [10, 20]})
737+
reader.register("mytable", existing)
738+
739+
# Pass same name via data= — should replace temporarily, then NOT unregister
740+
override = pl.DataFrame({"x": [3, 4, 5], "y": [30, 40, 50]})
741+
spec = reader.execute(
742+
"SELECT * FROM mytable VISUALISE x, y DRAW point",
743+
data={"mytable": override},
744+
)
745+
assert spec.metadata()["rows"] == 3
746+
747+
# The pre-existing table should still be queryable (not unregistered)
748+
result = reader.execute_sql("SELECT * FROM mytable")
749+
assert result.shape[0] > 0
750+
733751
def test_module_execute_with_data(self):
734752
"""Module-level execute() also supports data= parameter."""
735753
reader = ggsql.DuckDBReader("duckdb://memory")

0 commit comments

Comments
 (0)