Skip to content

Commit 938fffb

Browse files
committed
Add sqlite reader
1 parent 7f23445 commit 938fffb

10 files changed

Lines changed: 1054 additions & 55 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ polars-ops = { version = "0.52", features = ["pivot"] }
3737
duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] }
3838
arrow = { version = "56", default-features = false, features = ["ipc"] }
3939
postgres = "0.19"
40-
sqlx = { version = "0.8", features = ["postgres"] }
41-
rusqlite = "0.32"
40+
rusqlite = { version = "0.38", features = ["bundled", "chrono", "functions", "window"] }
4241

4342
# Writers
4443
plotters = "0.3"

src/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ polars-ops.workspace = true
3838
duckdb = { workspace = true, optional = true }
3939
arrow = { workspace = true, optional = true }
4040
postgres = { workspace = true, optional = true }
41-
sqlx = { workspace = true, optional = true }
4241
rusqlite = { workspace = true, optional = true }
4342

4443
# Writers

src/execute/layer.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -518,12 +518,6 @@ where
518518
if stat_rename_exprs.is_empty() {
519519
transformed_query
520520
} else {
521-
let stat_col_names: Vec<String> = stat_columns
522-
.iter()
523-
.map(|s| naming::stat_column(s))
524-
.collect();
525-
let exclude_clause = format!("EXCLUDE ({})", stat_col_names.join(", "));
526-
527521
// If the transformed query uses CTEs (WITH ... SELECT ...),
528522
// we can't wrap it in a subquery because Polars SQL doesn't
529523
// support CTEs inside subqueries. Instead, split into CTE
@@ -536,16 +530,14 @@ where
536530
.and_then(super::cte::split_with_query)
537531
{
538532
format!(
539-
"{}, __ggsql_stat__ AS ({}) SELECT * {}, {} FROM __ggsql_stat__",
533+
"{}, __ggsql_stat__ AS ({}) SELECT *, {} FROM __ggsql_stat__",
540534
cte_prefix,
541535
trailing_select,
542-
exclude_clause,
543536
stat_rename_exprs.join(", ")
544537
)
545538
} else {
546539
format!(
547-
"SELECT * {}, {} FROM ({}) AS __ggsql_stat__",
548-
exclude_clause,
540+
"SELECT *, {} FROM ({}) AS __ggsql_stat__",
549541
stat_rename_exprs.join(", "),
550542
transformed_query
551543
)

src/execute/schema.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ pub fn extract_series_value(
145145

146146
/// Fetch only column types (no min/max) from a query.
147147
///
148-
/// Uses LIMIT 0 to get schema without reading data.
148+
/// Uses LIMIT 1 to get schema while minimally reading data.
149149
/// Returns `(name, dtype, is_discrete)` tuples for each column.
150150
///
151151
/// This is the first phase of the split schema extraction approach:
@@ -157,7 +157,7 @@ where
157157
F: Fn(&str) -> Result<DataFrame>,
158158
{
159159
let schema_query = format!(
160-
"SELECT * FROM ({}) AS {} LIMIT 0",
160+
"SELECT * FROM ({}) AS {} LIMIT 1",
161161
query,
162162
naming::SCHEMA_ALIAS
163163
);

src/plot/layer/geom/boxplot.rs

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -166,16 +166,16 @@ fn boxplot_sql_compute_summary(from: &str, groups: &[String], value: &str, coef:
166166
format!(
167167
"SELECT
168168
*,
169-
GREATEST(q1 - {coef} * (q3 - q1), min) AS lower,
170-
LEAST( q3 + {coef} * (q3 - q1), max) AS upper
169+
MAX(q1 - {coef} * (q3 - q1), min) AS lower,
170+
MIN(q3 + {coef} * (q3 - q1), max) AS upper
171171
FROM (
172172
SELECT
173173
{groups},
174174
MIN({value}) AS min,
175175
MAX({value}) AS max,
176-
QUANTILE_CONT({value}, 0.25) AS q1,
177-
QUANTILE_CONT({value}, 0.50) AS median,
178-
QUANTILE_CONT({value}, 0.75) AS q3
176+
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {value}) AS q1,
177+
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {value}) AS median,
178+
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {value}) AS q3
179179
FROM ({from}) AS __ggsql_qt__
180180
WHERE {value} IS NOT NULL
181181
GROUP BY {groups}
@@ -266,10 +266,8 @@ fn boxplot_sql_append_outliers(
266266
)
267267
{summary_select}
268268
UNION ALL
269-
(
270269
SELECT {groups}, type AS {type_name}, value AS {value_name}, NULL AS {value2_name}
271270
FROM outliers
272-
)
273271
",
274272
summary = from,
275273
outliers = outliers,
@@ -307,32 +305,32 @@ mod tests {
307305
fn test_sql_compute_summary_basic() {
308306
let groups = vec!["category".to_string()];
309307
let result = boxplot_sql_compute_summary("data", &groups, "value", &1.5);
310-
assert!(result.contains("QUANTILE_CONT(value, 0.25)"));
311-
assert!(result.contains("QUANTILE_CONT(value, 0.50)"));
312-
assert!(result.contains("QUANTILE_CONT(value, 0.75)"));
308+
assert!(result.contains("PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY value)"));
309+
assert!(result.contains("PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY value)"));
310+
assert!(result.contains("PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY value)"));
313311
assert!(result.contains("MIN(value) AS min"));
314312
assert!(result.contains("MAX(value) AS max"));
315313
assert!(result.contains("WHERE value IS NOT NULL"));
316314
assert!(result.contains("GROUP BY category"));
317-
assert!(result.contains("GREATEST"));
318-
assert!(result.contains("LEAST"));
315+
assert!(result.contains("MAX(q1 - 1.5"));
316+
assert!(result.contains("MIN(q3 + 1.5"));
319317
}
320318

321319
#[test]
322320
fn test_sql_compute_summary_multiple_groups() {
323321
let groups = vec!["cat".to_string(), "region".to_string()];
324322
let result = boxplot_sql_compute_summary("tbl", &groups, "val", &1.5);
325323
assert!(result.contains("GROUP BY cat, region"));
326-
assert!(result.contains("QUANTILE_CONT(val, 0.25)"));
324+
assert!(result.contains("PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY val)"));
327325
}
328326

329327
#[test]
330328
fn test_sql_compute_summary_custom_coef() {
331329
let groups = vec!["pos1".to_string()];
332330
let result = boxplot_sql_compute_summary("q", &groups, "pos2", &2.5);
333331
assert!(result.contains("2.5"));
334-
assert!(result.contains("GREATEST(q1 - 2.5 * (q3 - q1), min)"));
335-
assert!(result.contains("LEAST( q3 + 2.5 * (q3 - q1), max)"));
332+
assert!(result.contains("MAX(q1 - 2.5 * (q3 - q1), min)"));
333+
assert!(result.contains("MIN(q3 + 2.5 * (q3 - q1), max)"));
336334
}
337335

338336
#[test]
@@ -355,16 +353,16 @@ mod tests {
355353

356354
let expected = r#"SELECT
357355
*,
358-
GREATEST(q1 - 1.5 * (q3 - q1), min) AS lower,
359-
LEAST( q3 + 1.5 * (q3 - q1), max) AS upper
356+
MAX(q1 - 1.5 * (q3 - q1), min) AS lower,
357+
MIN(q3 + 1.5 * (q3 - q1), max) AS upper
360358
FROM (
361359
SELECT
362360
category,
363361
MIN(price) AS min,
364362
MAX(price) AS max,
365-
QUANTILE_CONT(price, 0.25) AS q1,
366-
QUANTILE_CONT(price, 0.50) AS median,
367-
QUANTILE_CONT(price, 0.75) AS q3
363+
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY price) AS q1,
364+
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY price) AS median,
365+
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY price) AS q3
368366
FROM (SELECT * FROM sales) AS __ggsql_qt__
369367
WHERE price IS NOT NULL
370368
GROUP BY category
@@ -380,16 +378,16 @@ mod tests {
380378

381379
let expected = r#"SELECT
382380
*,
383-
GREATEST(q1 - 1.5 * (q3 - q1), min) AS lower,
384-
LEAST( q3 + 1.5 * (q3 - q1), max) AS upper
381+
MAX(q1 - 1.5 * (q3 - q1), min) AS lower,
382+
MIN(q3 + 1.5 * (q3 - q1), max) AS upper
385383
FROM (
386384
SELECT
387385
region, product,
388386
MIN(revenue) AS min,
389387
MAX(revenue) AS max,
390-
QUANTILE_CONT(revenue, 0.25) AS q1,
391-
QUANTILE_CONT(revenue, 0.50) AS median,
392-
QUANTILE_CONT(revenue, 0.75) AS q3
388+
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) AS q1,
389+
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) AS median,
390+
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) AS q3
393391
FROM (SELECT * FROM data) AS __ggsql_qt__
394392
WHERE revenue IS NOT NULL
395393
GROUP BY region, product

src/plot/layer/geom/density.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ fn silverman_rule(adjust: f64, value_column: &str) -> String {
282282
// We absorb the adjustment in the 0.9 multiplier of the rule
283283
let adjust = 0.9 * adjust;
284284
format!(
285-
"{adjust} * LEAST(STDDEV({value}), (QUANTILE_CONT({value}, 0.75) - QUANTILE_CONT({value}, 0.25)) / 1.34) * POWER(COUNT(*), -0.2)",
285+
"{adjust} * MIN(SQRT(AVG({value}*{value}) - AVG({value})*AVG({value})), (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {value}) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {value})) / 1.34) * POW(COUNT(*), -0.2)",
286286
adjust = adjust,
287287
value = value_column
288288
)
@@ -340,7 +340,7 @@ fn choose_kde_kernel(parameters: &HashMap<String, ParameterValue>) -> Result<Str
340340
// Use weighted sum for density computation
341341
// Weighted: density = (1/h) × Σ(wi × K((x-xi)/h)) / Σwi
342342
Ok(format!(
343-
"SUM(data.weight * ({kernel})) / ANY_VALUE(bandwidth.bw)",
343+
"SUM(data.weight * ({kernel})) / MIN(bandwidth.bw)",
344344
kernel = kernel
345345
))
346346
}
@@ -662,11 +662,11 @@ mod tests {
662662

663663
let bw_cte = density_sql_bandwidth(query, &groups, "x", &parameters);
664664

665-
// Verify exact SQL structure uses QUANTILE_CONT
666-
let expected = "WITH
665+
// Verify exact SQL structure uses PERCENTILE_CONT
666+
let expected = "WITH RECURSIVE
667667
bandwidth AS (
668668
SELECT
669-
0.9 * LEAST(STDDEV(x), (QUANTILE_CONT(x, 0.75) - QUANTILE_CONT(x, 0.25)) / 1.34) * POWER(COUNT(*), -0.2) AS bw
669+
0.9 * MIN(SQRT(AVG(x*x) - AVG(x)*AVG(x)), (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY x) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY x)) / 1.34) * POW(COUNT(*), -0.2) AS bw
670670
FROM (SELECT x FROM (VALUES (1.0), (2.0), (3.0), (4.0), (5.0)) AS t(x))
671671
WHERE x IS NOT NULL
672672
@@ -692,11 +692,11 @@ mod tests {
692692

693693
let bw_cte = density_sql_bandwidth(query, &groups, "x", &parameters);
694694

695-
// Verify exact SQL structure uses QUANTILE_CONT with GROUP BY
696-
let expected = "WITH
695+
// Verify exact SQL structure uses PERCENTILE_CONT with GROUP BY
696+
let expected = "WITH RECURSIVE
697697
bandwidth AS (
698698
SELECT
699-
0.9 * LEAST(STDDEV(x), (QUANTILE_CONT(x, 0.75) - QUANTILE_CONT(x, 0.25)) / 1.34) * POWER(COUNT(*), -0.2) AS bw,
699+
0.9 * MIN(SQRT(AVG(x*x) - AVG(x)*AVG(x)), (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY x) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY x)) / 1.34) * POW(COUNT(*), -0.2) AS bw,
700700
region
701701
FROM (SELECT x, region FROM (VALUES (1.0, 'A'), (2.0, 'A'), (3.0, 'B')) AS t(x, region))
702702
WHERE x IS NOT NULL

src/plot/layer/geom/histogram.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,11 @@ fn stat_histogram(
163163
w = bin_width
164164
)
165165
} else {
166-
// Right-closed (a, b]: use CEIL - 1 with GREATEST for min value
166+
// Right-closed (a, b]: use CEIL - 1, clamped to 0 minimum
167+
// Use CASE instead of MAX(a,b) because this expression appears in GROUP BY
168+
// where MAX would be interpreted as the aggregate function
167169
format!(
168-
"(GREATEST(CEIL(({x} - {min} + {w} * 0.5) / {w}) - 1, 0)) * {w} + {min} - {w} * 0.5",
170+
"(CASE WHEN CEIL(({x} - {min} + {w} * 0.5) / {w}) - 1 > 0 THEN CEIL(({x} - {min} + {w} * 0.5) / {w}) - 1 ELSE 0 END) * {w} + {min} - {w} * 0.5",
169171
x = x_col,
170172
min = min_val,
171173
w = bin_width

src/plot/scale/scale_type/continuous.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ impl ScaleTypeTrait for Continuous {
153153
///
154154
/// Supports OOB modes:
155155
/// - "censor": CASE WHEN col >= min AND col <= max THEN col ELSE NULL END
156-
/// - "squish": GREATEST(min, LEAST(col, max))
156+
/// - "squish": MAX(min, MIN(col, max))
157157
/// - "keep": No transformation (returns None)
158158
///
159159
/// Only applies when input_range is explicitly specified via FROM clause.
@@ -197,7 +197,7 @@ impl ScaleTypeTrait for Continuous {
197197
column_name, min, column_name, max, column_name
198198
)),
199199
OOB_SQUISH => Some(format!(
200-
"GREATEST({}, LEAST({}, {}))",
200+
"MAX({}, MIN({}, {}))",
201201
min, max, column_name
202202
)),
203203
_ => None, // "keep" = no transformation
@@ -268,9 +268,9 @@ mod tests {
268268

269269
assert!(sql.is_some());
270270
let sql = sql.unwrap();
271-
// Should generate GREATEST/LEAST for squish
272-
assert!(sql.contains("GREATEST"));
273-
assert!(sql.contains("LEAST"));
271+
// Should generate MAX/MIN for squish
272+
assert!(sql.contains("MAX("));
273+
assert!(sql.contains("MIN("));
274274
}
275275

276276
#[test]

src/reader/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ pub mod duckdb;
4444
#[cfg(feature = "polars-sql")]
4545
pub mod polars_sql;
4646

47+
#[cfg(feature = "sqlite")]
48+
pub mod sqlite;
49+
4750
pub mod connection;
4851
pub mod data;
4952
mod spec;
@@ -54,6 +57,9 @@ pub use duckdb::DuckDBReader;
5457
#[cfg(feature = "polars-sql")]
5558
pub use polars_sql::PolarsReader;
5659

60+
#[cfg(feature = "sqlite")]
61+
pub use sqlite::SqliteReader;
62+
5763
// ============================================================================
5864
// Spec - Result of reader.execute()
5965
// ============================================================================

0 commit comments

Comments
 (0)