Skip to content

Commit b6a46aa

Browse files
committed
Portable GREATEST/LEAST and various sqlite tweaks
1 parent 0154596 commit b6a46aa

7 files changed

Lines changed: 94 additions & 29 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ polars-ops = { version = "0.52", features = ["pivot"] }
3737
duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] }
3838
arrow = { version = "56", default-features = false, features = ["ipc"] }
3939
postgres = "0.19"
40-
rusqlite = { version = "0.38", features = ["bundled"] }
40+
rusqlite = { version = "0.38", features = ["bundled", "functions", "window", "series"] }
4141

4242
# Writers
4343
plotters = "0.3"

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub mod writer;
4848

4949
pub mod execute;
5050

51+
pub mod utils;
5152
pub mod validate;
5253

5354
// Re-export key types for convenience

src/plot/layer/geom/boxplot.rs

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::{
99
geom::types::get_column_name, DefaultAestheticValue, DefaultParam, DefaultParamValue,
1010
ParameterValue, StatResult,
1111
},
12+
utils::{scalar_max, scalar_min},
1213
DataFrame, GgsqlError, Mappings, Result,
1314
};
1415

@@ -163,11 +164,13 @@ fn stat_boxplot(
163164

164165
fn boxplot_sql_compute_summary(from: &str, groups: &[String], value: &str, coef: &f64) -> String {
165166
let groups_str = groups.join(", ");
167+
let lower_expr = scalar_max(&[&format!("q1 - {coef} * (q3 - q1)"), "min"]);
168+
let upper_expr = scalar_min(&[&format!("q3 + {coef} * (q3 - q1)"), "max"]);
166169
format!(
167170
"SELECT
168171
*,
169-
MAX(q1 - {coef} * (q3 - q1), min) AS lower,
170-
MIN(q3 + {coef} * (q3 - q1), max) AS upper
172+
{lower_expr} AS lower,
173+
{upper_expr} AS upper
171174
FROM (
172175
SELECT
173176
{groups},
@@ -180,7 +183,8 @@ fn boxplot_sql_compute_summary(from: &str, groups: &[String], value: &str, coef:
180183
WHERE {value} IS NOT NULL
181184
GROUP BY {groups}
182185
) AS __ggsql_fn__",
183-
coef = coef,
186+
lower_expr = lower_expr,
187+
upper_expr = upper_expr,
184188
groups = groups_str,
185189
value = value,
186190
from = from
@@ -312,8 +316,8 @@ mod tests {
312316
assert!(result.contains("MAX(value) AS max"));
313317
assert!(result.contains("WHERE value IS NOT NULL"));
314318
assert!(result.contains("GROUP BY category"));
315-
assert!(result.contains("MAX(q1 - 1.5"));
316-
assert!(result.contains("MIN(q3 + 1.5"));
319+
assert!(result.contains("SELECT MAX(v) FROM (VALUES (q1 - 1.5"));
320+
assert!(result.contains("SELECT MIN(v) FROM (VALUES (q3 + 1.5"));
317321
}
318322

319323
#[test]
@@ -329,8 +333,8 @@ mod tests {
329333
let groups = vec!["pos1".to_string()];
330334
let result = boxplot_sql_compute_summary("q", &groups, "pos2", &2.5);
331335
assert!(result.contains("2.5"));
332-
assert!(result.contains("MAX(q1 - 2.5 * (q3 - q1), min)"));
333-
assert!(result.contains("MIN(q3 + 2.5 * (q3 - q1), max)"));
336+
assert!(result.contains("SELECT MAX(v) FROM (VALUES (q1 - 2.5 * (q3 - q1)), (min)) AS t(v)"));
337+
assert!(result.contains("SELECT MIN(v) FROM (VALUES (q3 + 2.5 * (q3 - q1)), (max)) AS t(v)"));
334338
}
335339

336340
#[test]
@@ -353,8 +357,8 @@ mod tests {
353357

354358
let expected = r#"SELECT
355359
*,
356-
MAX(q1 - 1.5 * (q3 - q1), min) AS lower,
357-
MIN(q3 + 1.5 * (q3 - q1), max) AS upper
360+
(SELECT MAX(v) FROM (VALUES (q1 - 1.5 * (q3 - q1)), (min)) AS t(v)) AS lower,
361+
(SELECT MIN(v) FROM (VALUES (q3 + 1.5 * (q3 - q1)), (max)) AS t(v)) AS upper
358362
FROM (
359363
SELECT
360364
category,
@@ -378,8 +382,8 @@ mod tests {
378382

379383
let expected = r#"SELECT
380384
*,
381-
MAX(q1 - 1.5 * (q3 - q1), min) AS lower,
382-
MIN(q3 + 1.5 * (q3 - q1), max) AS upper
385+
(SELECT MAX(v) FROM (VALUES (q1 - 1.5 * (q3 - q1)), (min)) AS t(v)) AS lower,
386+
(SELECT MIN(v) FROM (VALUES (q3 + 1.5 * (q3 - q1)), (max)) AS t(v)) AS upper
383387
FROM (
384388
SELECT
385389
region, product,

src/plot/layer/geom/density.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::{
77
geom::types::get_column_name, DefaultAestheticValue, DefaultParam, DefaultParamValue,
88
ParameterValue, StatResult,
99
},
10+
utils::scalar_min,
1011
GgsqlError, Mappings, Result,
1112
};
1213
use std::collections::HashMap;
@@ -281,11 +282,16 @@ fn silverman_rule(adjust: f64, value_column: &str) -> String {
281282
// The query computes Silverman's rule of thumb (R's `stats::bw.nrd0()`).
282283
// We absorb the adjustment in the 0.9 multiplier of the rule
283284
let adjust = 0.9 * adjust;
284-
format!(
285-
"{adjust} * MIN(SQRT(AVG({value}*{value}) - AVG({value})*AVG({value})), (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {value}) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {value})) / 1.34) * POW(COUNT(*), -0.2)",
286-
adjust = adjust,
287-
value = value_column
288-
)
285+
let stddev = format!(
286+
"SQRT(AVG({v}*{v}) - AVG({v})*AVG({v}))",
287+
v = value_column
288+
);
289+
let iqr = format!(
290+
"(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {v}) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {v})) / 1.34",
291+
v = value_column
292+
);
293+
let min_expr = scalar_min(&[&stddev, &iqr]);
294+
format!("{adjust} * {min_expr} * POW(COUNT(*), -0.2)")
289295
}
290296

291297
fn choose_kde_kernel(parameters: &HashMap<String, ParameterValue>) -> Result<String> {
@@ -533,7 +539,7 @@ mod tests {
533539
FROM (
534540
SELECT
535541
grid.x AS __ggsql_stat_x,
536-
SUM(data.weight * ((EXP(-0.5 * (grid.x - data.val) * (grid.x - data.val) / (bandwidth.bw * bandwidth.bw))) * 0.3989422804014327)) / ANY_VALUE(bandwidth.bw) AS __ggsql_stat_intensity,
542+
SUM(data.weight * ((EXP(-0.5 * (grid.x - data.val) * (grid.x - data.val) / (bandwidth.bw * bandwidth.bw))) * 0.3989422804014327)) / MIN(bandwidth.bw) AS __ggsql_stat_intensity,
537543
SUM(data.weight) AS __norm
538544
FROM data
539545
INNER JOIN bandwidth ON true
@@ -600,7 +606,7 @@ mod tests {
600606
SELECT
601607
grid.x AS __ggsql_stat_x,
602608
grid.region, grid.category,
603-
SUM(data.weight * ((EXP(-0.5 * (grid.x - data.val) * (grid.x - data.val) / (bandwidth.bw * bandwidth.bw))) * 0.3989422804014327)) / ANY_VALUE(bandwidth.bw) AS __ggsql_stat_intensity,
609+
SUM(data.weight * ((EXP(-0.5 * (grid.x - data.val) * (grid.x - data.val) / (bandwidth.bw * bandwidth.bw))) * 0.3989422804014327)) / MIN(bandwidth.bw) AS __ggsql_stat_intensity,
604610
SUM(data.weight) AS __norm
605611
FROM data
606612
INNER JOIN bandwidth ON data.region IS NOT DISTINCT FROM bandwidth.region AND data.category IS NOT DISTINCT FROM bandwidth.category
@@ -666,7 +672,7 @@ mod tests {
666672
let expected = "WITH RECURSIVE
667673
bandwidth AS (
668674
SELECT
669-
0.9 * MIN(SQRT(AVG(x*x) - AVG(x)*AVG(x)), (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY x) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY x)) / 1.34) * POW(COUNT(*), -0.2) AS bw
675+
0.9 * (SELECT MIN(v) FROM (VALUES (SQRT(AVG(x*x) - AVG(x)*AVG(x))), ((PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY x) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY x)) / 1.34)) AS t(v)) * POW(COUNT(*), -0.2) AS bw
670676
FROM (SELECT x FROM (VALUES (1.0), (2.0), (3.0), (4.0), (5.0)) AS t(x))
671677
WHERE x IS NOT NULL
672678
@@ -696,7 +702,7 @@ mod tests {
696702
let expected = "WITH RECURSIVE
697703
bandwidth AS (
698704
SELECT
699-
0.9 * MIN(SQRT(AVG(x*x) - AVG(x)*AVG(x)), (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY x) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY x)) / 1.34) * POW(COUNT(*), -0.2) AS bw,
705+
0.9 * (SELECT MIN(v) FROM (VALUES (SQRT(AVG(x*x) - AVG(x)*AVG(x))), ((PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY x) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY x)) / 1.34)) AS t(v)) * POW(COUNT(*), -0.2) AS bw,
700706
region
701707
FROM (SELECT x, region FROM (VALUES (1.0, 'A'), (2.0, 'A'), (3.0, 'B')) AS t(x, region))
702708
WHERE x IS NOT NULL

src/plot/scale/scale_type/continuous.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use polars::prelude::DataType;
55
use super::{ScaleTypeKind, ScaleTypeTrait, SqlTypeNames, TransformKind, OOB_CENSOR, OOB_SQUISH};
66
use crate::plot::types::{DefaultParam, DefaultParamValue};
77
use crate::plot::{ArrayElement, ParameterValue};
8+
use crate::utils::{scalar_max, scalar_min};
89

910
/// Continuous scale type - for continuous numeric data
1011
#[derive(Debug, Clone, Copy)]
@@ -196,10 +197,12 @@ impl ScaleTypeTrait for Continuous {
196197
"(CASE WHEN {} >= {} AND {} <= {} THEN {} ELSE NULL END)",
197198
column_name, min, column_name, max, column_name
198199
)),
199-
OOB_SQUISH => Some(format!(
200-
"MAX({}, MIN({}, {}))",
201-
min, max, column_name
202-
)),
200+
OOB_SQUISH => {
201+
let min_s = min.to_string();
202+
let max_s = max.to_string();
203+
let inner = scalar_min(&[&max_s, column_name]);
204+
Some(scalar_max(&[&min_s, &inner]))
205+
}
203206
_ => None, // "keep" = no transformation
204207
}
205208
}
@@ -268,9 +271,9 @@ mod tests {
268271

269272
assert!(sql.is_some());
270273
let sql = sql.unwrap();
271-
// Should generate MAX/MIN for squish
272-
assert!(sql.contains("MAX("));
273-
assert!(sql.contains("MIN("));
274+
// Should generate portable scalar MAX/MIN via subquery for squish
275+
assert!(sql.contains("SELECT MAX(v) FROM (VALUES"));
276+
assert!(sql.contains("SELECT MIN(v) FROM (VALUES"));
274277
}
275278

276279
#[test]

src/reader/sqlite.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
//! Provides a reader for SQLite databases with Polars DataFrame integration.
44
//! Works on both native targets and wasm32-unknown-unknown (via sqlite-wasm-rs).
55
6-
use crate::naming::DATA_PREFIX;
76
use crate::reader::Reader;
87
use crate::{DataFrame, GgsqlError, Result};
98
use chrono::Datelike;

src/utils.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/// Portable scalar MAX across any number of SQL expressions.
2+
/// Replaces GREATEST(a, b, ...) which is not supported by all backends.
3+
/// Generates: `(SELECT MAX(v) FROM (VALUES (a), (b), ...) AS t(v))`
4+
pub fn scalar_max(exprs: &[&str]) -> String {
5+
let values = exprs
6+
.iter()
7+
.map(|e| format!("({e})"))
8+
.collect::<Vec<_>>()
9+
.join(", ");
10+
format!("(SELECT MAX(v) FROM (VALUES {values}) AS t(v))")
11+
}
12+
13+
/// Portable scalar MIN across any number of SQL expressions.
14+
/// Replaces LEAST(a, b, ...) which is not supported by all backends.
15+
/// Generates: `(SELECT MIN(v) FROM (VALUES (a), (b), ...) AS t(v))`
16+
pub fn scalar_min(exprs: &[&str]) -> String {
17+
let values = exprs
18+
.iter()
19+
.map(|e| format!("({e})"))
20+
.collect::<Vec<_>>()
21+
.join(", ");
22+
format!("(SELECT MIN(v) FROM (VALUES {values}) AS t(v))")
23+
}
24+
25+
#[cfg(test)]
26+
mod tests {
27+
use super::*;
28+
29+
#[test]
30+
fn test_scalar_max_two_args() {
31+
assert_eq!(
32+
scalar_max(&["a", "b"]),
33+
"(SELECT MAX(v) FROM (VALUES (a), (b)) AS t(v))"
34+
);
35+
}
36+
37+
#[test]
38+
fn test_scalar_min_two_args() {
39+
assert_eq!(
40+
scalar_min(&["a", "b"]),
41+
"(SELECT MIN(v) FROM (VALUES (a), (b)) AS t(v))"
42+
);
43+
}
44+
45+
#[test]
46+
fn test_scalar_max_three_args() {
47+
assert_eq!(
48+
scalar_max(&["x", "y", "z"]),
49+
"(SELECT MAX(v) FROM (VALUES (x), (y), (z)) AS t(v))"
50+
);
51+
}
52+
}

0 commit comments

Comments
 (0)