Skip to content

Commit 16ef14b

Browse files
cpsievertclaude
andcommitted
fix(python): improve type stubs, data= cleanup safety, and exception specificity
- Update type stubs to document typed exceptions (ParseError, ReaderError, etc.) instead of generic ValueError - Guard data= cleanup against destroying pre-existing tables by checking table existence before registration - Pass replace as keyword arg in bridge register_data_on_reader for compatibility with custom readers that omit the parameter - Tighten test assertions to expect specific exception types - Remove unused try_native_readers! macro Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7a90164 commit 16ef14b

3 files changed

Lines changed: 69 additions & 37 deletions

File tree

ggsql-python/python/ggsql/_ggsql.pyi

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class DuckDBReader:
3131
3232
Raises
3333
------
34-
ValueError
34+
ReaderError
3535
If the connection string is invalid or the database cannot be opened.
3636
"""
3737

@@ -51,7 +51,7 @@ class DuckDBReader:
5151
5252
Raises
5353
------
54-
ValueError
54+
ReaderError
5555
If the SQL is invalid or execution fails.
5656
"""
5757
...
@@ -74,7 +74,7 @@ class DuckDBReader:
7474
7575
Raises
7676
------
77-
ValueError
77+
ReaderError
7878
If registration fails or the table name is invalid.
7979
"""
8080
...
@@ -89,7 +89,7 @@ class DuckDBReader:
8989
9090
Raises
9191
------
92-
ValueError
92+
ReaderError
9393
If the table was not registered or unregistration fails.
9494
"""
9595
...
@@ -122,9 +122,12 @@ class DuckDBReader:
122122
123123
Raises
124124
------
125-
ValueError
126-
If the query syntax is invalid, has no VISUALISE clause, or
127-
SQL execution fails.
125+
ParseError
126+
If the query syntax is invalid.
127+
ValidationError
128+
If the query has no VISUALISE clause or fails semantic checks.
129+
ReaderError
130+
If SQL execution fails.
128131
"""
129132
...
130133

@@ -154,7 +157,7 @@ class VegaLiteWriter:
154157
155158
Raises
156159
------
157-
ValueError
160+
WriterError
158161
If rendering fails.
159162
"""
160163
...
@@ -388,7 +391,7 @@ def validate(query: str) -> Validated:
388391
389392
Raises
390393
------
391-
ValueError
394+
ParseError
392395
If validation fails unexpectedly (syntax errors are captured in
393396
the returned ``Validated`` object, not raised).
394397
"""
@@ -425,7 +428,11 @@ def execute(
425428
426429
Raises
427430
------
428-
ValueError
429-
If parsing, validation, or SQL execution fails.
431+
ParseError
432+
If the query syntax is invalid.
433+
ValidationError
434+
If semantic validation fails.
435+
ReaderError
436+
If SQL execution fails.
430437
"""
431438
...

ggsql-python/src/lib.rs

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -211,24 +211,6 @@ impl Reader for PyReaderBridge {
211211
}
212212
}
213213

214-
// ============================================================================
215-
// Native Reader Detection Macro
216-
// ============================================================================
217-
218-
/// Macro to try native readers and fall back to bridge.
219-
/// Adding new native readers = add to the macro invocation list.
220-
macro_rules! try_native_readers {
221-
($query:expr, $reader:expr, $($native_type:ty),*) => {{
222-
$(
223-
if let Ok(native) = $reader.downcast::<$native_type>() {
224-
return native.borrow().inner.execute($query)
225-
.map(|s| PySpec { inner: s })
226-
.map_err(ggsql_err_to_py);
227-
}
228-
)*
229-
}};
230-
}
231-
232214
// ============================================================================
233215
// PyDuckDBReader
234216
// ============================================================================
@@ -397,8 +379,16 @@ impl PyDuckDBReader {
397379
}
398380

399381
impl PyDuckDBReader {
400-
/// Register DataFrames from a Python dict. Returns list of registered names for cleanup.
401-
/// This is a private Rust helper, not exposed to Python.
382+
/// Check whether a table already exists in the reader.
383+
fn table_exists(&self, name: &str) -> bool {
384+
// A lightweight probe: try to select zero rows from the table.
385+
self.inner
386+
.execute_sql(&format!("SELECT 1 FROM {name} LIMIT 0"))
387+
.is_ok()
388+
}
389+
390+
/// Register DataFrames from a Python dict. Returns list of *newly created*
391+
/// table names for cleanup (pre-existing tables are not tracked).
402392
fn register_data_dict(
403393
&self,
404394
py: Python<'_>,
@@ -407,11 +397,14 @@ impl PyDuckDBReader {
407397
let mut names = Vec::new();
408398
for (key, value) in data.iter() {
409399
let name: String = key.extract()?;
400+
let existed = self.table_exists(&name);
410401
let df = py_to_polars(py, &value)?;
411402
self.inner
412403
.register(&name, df, true)
413404
.map_err(ggsql_err_to_py)?;
414-
names.push(name);
405+
if !existed {
406+
names.push(name);
407+
}
415408
}
416409
Ok(names)
417410
}
@@ -862,6 +855,15 @@ fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<
862855

863856
/// Register DataFrames from a Python dict onto a Python reader object.
864857
/// Returns list of registered names for cleanup.
858+
/// Check whether a table exists via a Python reader's execute_sql method.
859+
fn reader_table_exists(reader: &Bound<'_, PyAny>, name: &str) -> bool {
860+
reader
861+
.call_method1("execute_sql", (format!("SELECT 1 FROM {name} LIMIT 0"),))
862+
.is_ok()
863+
}
864+
865+
/// Register DataFrames from a Python dict onto a Python reader object.
866+
/// Returns list of *newly created* table names for cleanup.
865867
fn register_data_on_reader(
866868
py: Python<'_>,
867869
reader: &Bound<'_, PyAny>,
@@ -870,10 +872,15 @@ fn register_data_on_reader(
870872
let mut names = Vec::new();
871873
for (key, value) in data.iter() {
872874
let name: String = key.extract()?;
875+
let existed = reader_table_exists(reader, &name);
873876
let df = py_to_polars(py, &value)?;
874877
let py_df = polars_to_py(py, &df)?;
875-
reader.call_method("register", (&name, py_df, true), None)?;
876-
names.push(name);
878+
let kwargs = PyDict::new(py);
879+
kwargs.set_item("replace", true)?;
880+
reader.call_method("register", (&name, py_df), Some(&kwargs))?;
881+
if !existed {
882+
names.push(name);
883+
}
877884
}
878885
Ok(names)
879886
}

ggsql-python/tests/test_ggsql.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,20 +690,20 @@ def test_execute_with_data_cleans_up(self):
690690
data={"temp": df},
691691
)
692692
# Table should be cleaned up — querying it should fail
693-
with pytest.raises((ggsql.ReaderError, ValueError)):
693+
with pytest.raises(ggsql.ReaderError):
694694
reader.execute_sql("SELECT * FROM temp")
695695

696696
def test_execute_with_data_cleans_up_on_error(self):
697697
"""DataFrames are unregistered even if execution fails."""
698698
reader = ggsql.DuckDBReader("duckdb://memory")
699699
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
700-
with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)):
700+
with pytest.raises(ggsql.ParseError):
701701
reader.execute(
702702
"SELECT * FROM temp VISUALISE DRAW not_a_geom",
703703
data={"temp": df},
704704
)
705705
# Table should still be cleaned up
706-
with pytest.raises((ggsql.ReaderError, ValueError)):
706+
with pytest.raises(ggsql.ReaderError):
707707
reader.execute_sql("SELECT * FROM temp")
708708

709709
def test_execute_without_data_still_works(self):
@@ -721,6 +721,24 @@ def test_execute_with_empty_data(self):
721721
)
722722
assert spec.metadata()["rows"] == 1
723723

724+
def test_execute_with_data_preserves_preexisting_table(self):
725+
"""data= does not unregister a table that existed before the call."""
726+
reader = ggsql.DuckDBReader("duckdb://memory")
727+
existing = pl.DataFrame({"x": [1, 2], "y": [10, 20]})
728+
reader.register("mytable", existing)
729+
730+
# Pass same name via data= — should replace temporarily, then NOT unregister
731+
override = pl.DataFrame({"x": [3, 4, 5], "y": [30, 40, 50]})
732+
spec = reader.execute(
733+
"SELECT * FROM mytable VISUALISE x, y DRAW point",
734+
data={"mytable": override},
735+
)
736+
assert spec.metadata()["rows"] == 3
737+
738+
# The pre-existing table should still be queryable (not unregistered)
739+
result = reader.execute_sql("SELECT * FROM mytable")
740+
assert result.shape[0] > 0
741+
724742
def test_module_execute_with_data(self):
725743
"""Module-level execute() also supports data= parameter."""
726744
reader = ggsql.DuckDBReader("duckdb://memory")

0 commit comments

Comments
 (0)