Skip to content

Commit 871a2b1

Browse files
authored
Add ggsql WebAssembly library (#126)
* Build ggsql/polars-sql for wasm32-unknown-unknown * Add GitHub CI to build wasm package * cargo fmt * Make register a strictly required method * cargo fmt * Use GGSQL_PREFIX * Add additional register argument where required * Use existing infrastructure to manipulate CTEs * Keep clippy, the formatter, and pytest happy
1 parent 3b6b659 commit 871a2b1

40 files changed

Lines changed: 2376 additions & 514 deletions

.cargo/config.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[target.wasm32-unknown-unknown]
2+
rustflags = [
3+
"-C", "link-args=-z stack-size=16777216",
4+
]

.github/workflows/wasm.yaml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: wasm.yaml
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch:
9+
10+
jobs:
11+
build:
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- name: Check out repository
16+
uses: actions/checkout@v4
17+
18+
- name: Increase disk space
19+
run: |
20+
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
21+
sudo docker image prune --all --force
22+
sudo docker builder prune -a
23+
24+
- name: Setup Node.js
25+
uses: actions/setup-node@v4
26+
with:
27+
node-version: "20"
28+
29+
- name: Install tree-sitter-cli
30+
run: npm install -g tree-sitter-cli
31+
32+
- name: Install Rust
33+
uses: dtolnay/rust-toolchain@stable
34+
35+
- name: Install LLVM
36+
run: sudo apt-get install -y llvm
37+
38+
- name: Caching
39+
uses: Swatinem/rust-cache@v2
40+
with:
41+
shared-key: ${{ runner.os }}-build
42+
cache-on-failure: true
43+
44+
- name: Install wasm-opt
45+
run: cargo install wasm-opt
46+
47+
- name: Install wasm-pack
48+
run: cargo install wasm-pack
49+
50+
- name: Build WASM package
51+
working-directory: ggsql-wasm
52+
run: wasm-pack build --target web --profile wasm --no-opt
53+
54+
- name: Optimise WASM binary
55+
working-directory: ggsql-wasm
56+
run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ docs/_build/
104104
.env.local
105105
.env.production
106106
config.toml
107+
!.cargo/config.toml
107108
secrets.toml
108109

109110
# Generated documentation

CLAUDE.md

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -994,10 +994,12 @@ spec = ggsql.execute(
994994
)
995995
```
996996

997+
Required methods for custom readers (in addition to `execute_sql`):
998+
999+
- `register(name: str, df: polars.DataFrame, replace: bool = False) -> None` - Register a DataFrame as a table
1000+
9971001
Optional methods for custom readers:
9981002

999-
- `supports_register() -> bool` - Return `True` if registration is supported
1000-
- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table
10011003
- `unregister(name: str) -> None` - Unregister a previously registered table
10021004

10031005
Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
@@ -1091,15 +1093,15 @@ Where `<global_mapping>` can be:
10911093

10921094
### Clause Types
10931095

1094-
| Clause | Repeatable | Purpose | Example |
1095-
| -------------- | ---------- | ------------------ | ------------------------------------ |
1096-
| `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` |
1097-
| `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` |
1098-
| `SCALE` | ✅ Yes | Configure scales | `SCALE x VIA date` |
1099-
| `FACET` | ❌ No | Small multiples | `FACET WRAP region` |
1100-
| `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` |
1101-
| `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` |
1102-
| `THEME` | ❌ No | Visual styling | `THEME minimal` |
1096+
| Clause | Repeatable | Purpose | Example |
1097+
| ----------- | ---------- | ----------------- | ----------------------------------------- |
1098+
| `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` |
1099+
| `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` |
1100+
| `SCALE` | ✅ Yes | Configure scales | `SCALE x VIA date` |
1101+
| `FACET` | ❌ No | Small multiples | `FACET WRAP region` |
1102+
| `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` |
1103+
| `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` |
1104+
| `THEME` | ❌ No | Visual styling | `THEME minimal` |
11031105

11041106
### DRAW Clause (Layers)
11051107

Cargo.toml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ members = [
33
"tree-sitter-ggsql",
44
"src",
55
"ggsql-jupyter",
6-
"ggsql-python"
6+
"ggsql-python",
7+
"ggsql-wasm"
78
]
89
# ggsql-python is excluded from default builds because it's a PyO3 extension
910
# that requires Python dev headers. Build it separately with maturin.
@@ -25,18 +26,18 @@ description = "SQL extension for declarative data visualization"
2526

2627
[workspace.dependencies]
2728
# Parsing
28-
tree-sitter = "0.25"
29+
tree-sitter = "0.26"
2930
csscolorparser = "0.8.1"
3031

3132
# Data processing
32-
polars = { version = "0.52", features = ["lazy", "sql", "ipc"] }
33+
polars = { version = "0.52", default-features = false }
3334
polars-ops = { version = "0.52", features = ["pivot"] }
3435

3536
# Readers
3637
duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] }
3738
arrow = { version = "56", default-features = false, features = ["ipc"] }
3839
postgres = "0.19"
39-
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] }
40+
sqlx = { version = "0.8", features = ["postgres"] }
4041
rusqlite = "0.32"
4142

4243
# Writers
@@ -68,7 +69,15 @@ uuid = { version = "1.0", features = ["v4"] }
6869

6970
# Web server
7071
axum = "0.7"
71-
tokio = { version = "1.35", features = ["full"] }
72+
tokio = { version = "1.35", default-features = false }
7273
tower-http = { version = "0.5", features = ["cors", "trace"] }
7374
tracing = "0.1"
7475
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
76+
77+
[profile.wasm]
78+
inherits = "release"
79+
opt-level = "z"
80+
lto = true
81+
codegen-units = 1
82+
strip = true
83+
panic = "abort"

ggsql-python/README.md

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,9 @@ reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database
125125

126126
**Methods:**
127127

128-
- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table
128+
- `register(name: str, df: polars.DataFrame, replace: bool = False)` - Register a DataFrame as a queryable table
129129
- `unregister(name: str)` - Unregister a previously registered table
130130
- `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results
131-
- `supports_register() -> bool` - Check if registration is supported
132131

133132
#### `VegaLiteWriter()`
134133

@@ -262,11 +261,10 @@ writer = ggsql.VegaLiteWriter()
262261
json_output = writer.render(spec)
263262
```
264263

265-
**Optional methods** for custom readers:
264+
**Additional methods** for custom readers:
266265

267-
- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration
268-
- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table
269-
- `unregister(name: str) -> None` - Unregister a previously registered table
266+
- `register(name: str, df: polars.DataFrame, replace: bool = False) -> None` - Register a DataFrame as a queryable table (required)
267+
- `unregister(name: str) -> None` - Unregister a previously registered table (optional)
270268

271269
```python
272270
class AdvancedReader:
@@ -279,10 +277,7 @@ class AdvancedReader:
279277
# Your SQL execution logic here
280278
...
281279

282-
def supports_register(self) -> bool:
283-
return True
284-
285-
def register(self, name: str, df: pl.DataFrame) -> None:
280+
def register(self, name: str, df: pl.DataFrame, replace: bool = False) -> None:
286281
self.tables[name] = df
287282

288283
def unregister(self, name: str) -> None:
@@ -313,11 +308,8 @@ class IbisReader:
313308
def execute_sql(self, sql: str) -> pl.DataFrame:
314309
return self.con.con.execute(sql).pl()
315310

316-
def supports_register(self) -> bool:
317-
return True
318-
319-
def register(self, name: str, df: pl.DataFrame) -> None:
320-
self.con.create_table(name, df.to_arrow(), overwrite=True)
311+
def register(self, name: str, df: pl.DataFrame, replace: bool = False) -> None:
312+
self.con.create_table(name, df.to_arrow(), overwrite=replace)
321313

322314
def unregister(self, name: str) -> None:
323315
self.con.drop_table(name)

ggsql-python/src/lib.rs

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -138,29 +138,19 @@ impl Reader for PyReaderBridge {
138138
})
139139
}
140140

141-
fn supports_register(&self) -> bool {
142-
Python::attach(|py| {
143-
self.obj
144-
.bind(py)
145-
.call_method0("supports_register")
146-
.and_then(|r| r.extract::<bool>())
147-
.unwrap_or(false)
148-
})
149-
}
150-
151-
fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> {
141+
fn register(&self, name: &str, df: DataFrame, replace: bool) -> ggsql::Result<()> {
152142
Python::attach(|py| {
153143
let py_df =
154144
polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?;
155145
self.obj
156146
.bind(py)
157-
.call_method1("register", (name, py_df))
147+
.call_method1("register", (name, py_df, replace))
158148
.map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?;
159149
Ok(())
160150
})
161151
}
162152

163-
fn unregister(&mut self, name: &str) -> ggsql::Result<()> {
153+
fn unregister(&self, name: &str) -> ggsql::Result<()> {
164154
Python::attach(|py| {
165155
self.obj
166156
.bind(py)
@@ -254,10 +244,17 @@ impl PyDuckDBReader {
254244
/// ------
255245
/// ValueError
256246
/// If registration fails or the table name is invalid.
257-
fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> {
247+
#[pyo3(signature = (name, df, replace=false))]
248+
fn register(
249+
&self,
250+
py: Python<'_>,
251+
name: &str,
252+
df: &Bound<'_, PyAny>,
253+
replace: bool,
254+
) -> PyResult<()> {
258255
let rust_df = py_to_polars(py, df)?;
259256
self.inner
260-
.register(name, rust_df)
257+
.register(name, rust_df, replace)
261258
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
262259
}
263260

@@ -272,7 +269,7 @@ impl PyDuckDBReader {
272269
/// ------
273270
/// ValueError
274271
/// If the table wasn't registered via this reader or unregistration fails.
275-
fn unregister(&mut self, name: &str) -> PyResult<()> {
272+
fn unregister(&self, name: &str) -> PyResult<()> {
276273
self.inner
277274
.unregister(name)
278275
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
@@ -302,16 +299,6 @@ impl PyDuckDBReader {
302299
polars_to_py(py, &df)
303300
}
304301

305-
/// Check if this reader supports DataFrame registration.
306-
///
307-
/// Returns
308-
/// -------
309-
/// bool
310-
/// True if register() is supported, False otherwise.
311-
fn supports_register(&self) -> bool {
312-
self.inner.supports_register()
313-
}
314-
315302
/// Execute a ggsql query and return the visualization specification.
316303
///
317304
/// This is the main entry point for creating visualizations. It parses

ggsql-python/tests/test_ggsql.py

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,6 @@ def test_register_and_query(self):
8585
assert isinstance(result, pl.DataFrame)
8686
assert result.shape == (2, 2)
8787

88-
def test_supports_register(self):
89-
reader = ggsql.DuckDBReader("duckdb://memory")
90-
assert reader.supports_register() is True
91-
9288
def test_invalid_connection_string(self):
9389
with pytest.raises(ValueError):
9490
ggsql.DuckDBReader("invalid://connection")
@@ -396,25 +392,6 @@ def test_can_introspect_spec(self):
396392
class TestCustomReader:
397393
"""Tests for custom Python reader support."""
398394

399-
def test_simple_custom_reader(self):
400-
"""Custom reader with execute_sql() method works."""
401-
402-
class SimpleReader:
403-
def __init__(self):
404-
self.conn = duckdb.connect()
405-
self.conn.execute(
406-
"CREATE TABLE data AS SELECT * FROM ("
407-
"VALUES (1, 10), (2, 20), (3, 30)"
408-
") AS t(x, y)"
409-
)
410-
411-
def execute_sql(self, sql: str) -> pl.DataFrame:
412-
return self.conn.execute(sql).pl()
413-
414-
reader = SimpleReader()
415-
spec = ggsql.execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
416-
assert spec.metadata()["rows"] == 3
417-
418395
def test_custom_reader_with_register(self):
419396
"""Custom reader with register() support."""
420397

@@ -425,10 +402,7 @@ def __init__(self):
425402
def execute_sql(self, sql: str) -> pl.DataFrame:
426403
return self.conn.execute(sql).pl()
427404

428-
def supports_register(self) -> bool:
429-
return True
430-
431-
def register(self, name: str, df: pl.DataFrame) -> None:
405+
def register(self, name: str, df: pl.DataFrame, _replace: bool) -> None:
432406
self.conn.register(name, df)
433407

434408
reader = RegisterReader()
@@ -479,6 +453,9 @@ def __init__(self):
479453
def execute_sql(self, sql: str) -> pl.DataFrame:
480454
return self.conn.execute(sql).pl()
481455

456+
def register(self, name: str, df: pl.DataFrame, _replace: bool) -> None:
457+
self.conn.register(name, df)
458+
482459
reader = DuckDBBackedReader()
483460
spec = ggsql.execute(
484461
"SELECT * FROM data VISUALISE x, y, category AS color DRAW point",
@@ -499,15 +476,17 @@ class RecordingReader:
499476
def __init__(self):
500477
self.conn = duckdb.connect()
501478
self.conn.execute(
502-
"CREATE TABLE data AS SELECT * FROM ("
503-
"VALUES (1, 2)) AS t(x, y)"
479+
"CREATE TABLE data AS SELECT * FROM (VALUES (1, 2)) AS t(x, y)"
504480
)
505481
self.execute_calls = []
506482

507483
def execute_sql(self, sql: str) -> pl.DataFrame:
508484
self.execute_calls.append(sql)
509485
return self.conn.execute(sql).pl()
510486

487+
def register(self, name: str, df: pl.DataFrame, _replace: bool) -> None:
488+
self.conn.register(name, df)
489+
511490
reader = RecordingReader()
512491
ggsql.execute(
513492
"SELECT * FROM data VISUALISE x, y DRAW point",
@@ -530,11 +509,10 @@ def __init__(self):
530509
def execute_sql(self, sql: str) -> pl.DataFrame:
531510
return self.con.con.execute(sql).pl()
532511

533-
def supports_register(self) -> bool:
534-
return True
535-
536-
def register(self, name: str, df: pl.DataFrame) -> None:
537-
self.con.create_table(name, df.to_arrow(), overwrite=True)
512+
def register(
513+
self, name: str, df: pl.DataFrame, replace: bool = True
514+
) -> None:
515+
self.con.create_table(name, df.to_arrow(), overwrite=replace)
538516

539517
def unregister(self, name: str) -> None:
540518
self.con.drop_table(name)

0 commit comments

Comments
 (0)