Skip to content

Commit 2eaeedb

Browse files
authored
refactor vegalite (#128)
1 parent 6b88e52 commit 2eaeedb

8 files changed

Lines changed: 3459 additions & 8080 deletions

File tree

src/lib.rs

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,7 @@ mod integration_tests {
154154

155155
// Data values should be ISO temporal strings
156156
// (DuckDB returns Datetime for DATE + INTERVAL, so we get ISO datetime format)
157-
let data_values = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
158-
.as_array()
159-
.unwrap();
157+
let data_values = vl_spec["data"]["values"].as_array().unwrap();
160158
let date_str = data_values[0]["date"].as_str().unwrap();
161159
assert!(
162160
date_str.starts_with("2024-01-01"),
@@ -210,9 +208,7 @@ mod integration_tests {
210208
assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "temporal");
211209

212210
// Data values should be ISO datetime strings
213-
let data_values = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
214-
.as_array()
215-
.unwrap();
211+
let data_values = vl_spec["data"]["values"].as_array().unwrap();
216212
assert!(data_values[0]["timestamp"]
217213
.as_str()
218214
.unwrap()
@@ -267,9 +263,7 @@ mod integration_tests {
267263
assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");
268264

269265
// Data values should be numbers (not strings!)
270-
let data_values = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
271-
.as_array()
272-
.unwrap();
266+
let data_values = vl_spec["data"]["values"].as_array().unwrap();
273267
assert_eq!(data_values[0]["int_col"], 1);
274268
assert_eq!(data_values[0]["float_col"], 2.5);
275269
assert_eq!(data_values[0]["bool_col"], true);
@@ -316,9 +310,7 @@ mod integration_tests {
316310
let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
317311

318312
// Check null handling in JSON
319-
let data_values = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
320-
.as_array()
321-
.unwrap();
313+
let data_values = vl_spec["data"]["values"].as_array().unwrap();
322314
assert_eq!(data_values[0]["int_col"], 1);
323315
assert_eq!(data_values[0]["float_col"], 2.5);
324316
assert_eq!(data_values[1]["float_col"], serde_json::Value::Null);
@@ -449,9 +441,7 @@ mod integration_tests {
449441
let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
450442

451443
// Check values are preserved
452-
let data_values = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
453-
.as_array()
454-
.unwrap();
444+
let data_values = vl_spec["data"]["values"].as_array().unwrap();
455445
let small_val = data_values[0]["small"].as_f64().unwrap();
456446
let medium_val = data_values[0]["medium"].as_f64().unwrap();
457447
let large_val = data_values[0]["large"].as_f64().unwrap();
@@ -509,9 +499,7 @@ mod integration_tests {
509499
assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");
510500

511501
// Check values
512-
let data_values = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
513-
.as_array()
514-
.unwrap();
502+
let data_values = vl_spec["data"]["values"].as_array().unwrap();
515503
assert_eq!(data_values[0]["tiny"], 1);
516504
assert_eq!(data_values[0]["small"], 1000);
517505
assert_eq!(data_values[0]["int"], 1000000);
@@ -594,9 +582,9 @@ mod integration_tests {
594582
"Layer 1 shape should map to prefixed aesthetic-named column"
595583
);
596584

597-
// With unified data approach, all data is in a single global dataset
585+
// With unified data approach, all data is in a single dataset
598586
// Each row has __ggsql_source__ identifying which layer's data it belongs to
599-
let global_data = &vl_spec["datasets"][naming::GLOBAL_DATA_KEY];
587+
let global_data = &vl_spec["data"]["values"];
600588
assert!(
601589
global_data.is_array(),
602590
"Should have unified global data array"
@@ -816,9 +804,9 @@ mod integration_tests {
816804
stroke_col
817805
);
818806

819-
// With unified data approach, all data is in the global dataset
807+
// With unified data approach, all data is in the data.values array
820808
// Verify the stroke value appears in the unified data
821-
let global_data = vl_spec["datasets"][naming::GLOBAL_DATA_KEY]
809+
let global_data = vl_spec["data"]["values"]
822810
.as_array()
823811
.expect("Should have unified global data");
824812

src/reader/mod.rs

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,4 +457,148 @@ mod tests {
457457
let result = reader.execute(query);
458458
assert!(result.is_err());
459459
}
460+
461+
#[test]
462+
fn test_binned_fill_legend_renders_threshold_scale() {
463+
// End-to-end test for binned fill scale rendering to Vega-Lite
464+
// Verifies that binned non-positional aesthetics use threshold scale type
465+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
466+
467+
// Create data with values that span the binned range
468+
// Binned scales use FROM [min, max] for range and SETTING breaks => [...] for explicit breaks
469+
let query = r#"
470+
SELECT * FROM (VALUES
471+
(1, 10, 15.0),
472+
(2, 20, 35.0),
473+
(3, 30, 55.0),
474+
(4, 40, 85.0)
475+
) AS t(x, y, value)
476+
VISUALISE
477+
DRAW tile MAPPING x AS x, y AS y, value AS fill
478+
SCALE BINNED fill FROM [0, 100] TO viridis SETTING breaks => [0, 25, 50, 75, 100]
479+
"#;
480+
481+
let spec = reader.execute(query).unwrap();
482+
483+
// Verify spec structure
484+
assert_eq!(spec.plot().layers.len(), 1);
485+
// Note: scales may include auto-generated x/y scales plus the explicit fill scale
486+
assert!(
487+
spec.plot().find_scale("fill").is_some(),
488+
"Should have a fill scale"
489+
);
490+
491+
// Render to Vega-Lite
492+
let writer = VegaLiteWriter::new();
493+
let result = writer.render(&spec).unwrap();
494+
let vl: serde_json::Value = serde_json::from_str(&result).unwrap();
495+
496+
// Verify threshold scale type for fill
497+
let fill_scale = &vl["layer"][0]["encoding"]["fill"]["scale"];
498+
assert_eq!(
499+
fill_scale["type"],
500+
"threshold",
501+
"Binned fill should use threshold scale type. Got: {}",
502+
serde_json::to_string_pretty(&vl["layer"][0]["encoding"]["fill"]).unwrap()
503+
);
504+
505+
// Verify internal breaks as domain (excludes first and last terminals)
506+
// breaks = [0, 25, 50, 75, 100] → domain = [25, 50, 75]
507+
let domain = fill_scale["domain"].as_array().unwrap();
508+
assert_eq!(
509+
domain.len(),
510+
3,
511+
"Threshold domain should have internal breaks only. Got: {:?}",
512+
domain
513+
);
514+
assert_eq!(domain[0], 25.0);
515+
assert_eq!(domain[1], 50.0);
516+
assert_eq!(domain[2], 75.0);
517+
518+
// Verify color output - viridis palette gets expanded to an explicit range array
519+
// for threshold scales (Vega-Lite needs explicit colors for threshold domain)
520+
assert!(
521+
fill_scale["range"].is_array() || fill_scale["scheme"] == "viridis",
522+
"Should have color range or scheme. Got scale: {}",
523+
serde_json::to_string_pretty(fill_scale).unwrap()
524+
);
525+
526+
// Verify legend values
527+
// For `fill` alone (single binned legend scale), uses gradient legend with all 5 break values
528+
// For symbol legends (multiple binned scales or non-gradient aesthetics), would have N-1 values
529+
let legend_values = &vl["layer"][0]["encoding"]["fill"]["legend"]["values"];
530+
assert!(
531+
legend_values.is_array(),
532+
"Legend should have values array. Got: {}",
533+
serde_json::to_string_pretty(&vl["layer"][0]["encoding"]["fill"]["legend"]).unwrap()
534+
);
535+
let values = legend_values.as_array().unwrap();
536+
assert_eq!(
537+
values.len(),
538+
5,
539+
"Gradient legend should have all 5 break values. Got: {:?}",
540+
values
541+
);
542+
}
543+
544+
#[test]
545+
fn test_binned_color_legend_with_label_mapping() {
546+
// Test binned color scale with custom labels renders correctly
547+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
548+
549+
let query = r#"
550+
SELECT * FROM (VALUES
551+
(1, 10, 20.0),
552+
(2, 20, 60.0),
553+
(3, 30, 90.0)
554+
) AS t(x, y, score)
555+
VISUALISE
556+
DRAW point MAPPING x AS x, y AS y, score AS color
557+
SCALE BINNED color FROM [0, 100] TO ['blue', 'yellow', 'red'] SETTING breaks => [0, 50, 100]
558+
RENAMING 0 => 'Low', 50 => 'High'
559+
"#;
560+
561+
let spec = reader.execute(query).unwrap();
562+
563+
let writer = VegaLiteWriter::new();
564+
let result = writer.render(&spec).unwrap();
565+
let vl: serde_json::Value = serde_json::from_str(&result).unwrap();
566+
567+
// Verify threshold scale
568+
// Note: "color" aesthetic is mapped to "stroke" for point geom (not fill)
569+
let encoding = if vl["layer"].is_array() {
570+
&vl["layer"][0]["encoding"]
571+
} else {
572+
&vl["encoding"]
573+
};
574+
// Find the stroke or fill encoding (color maps to one of these)
575+
let color_encoding = if encoding["stroke"].is_object() {
576+
&encoding["stroke"]
577+
} else {
578+
&encoding["fill"]
579+
};
580+
assert_eq!(
581+
color_encoding["scale"]["type"],
582+
"threshold",
583+
"Binned color should use threshold scale. Got encoding: {}",
584+
serde_json::to_string_pretty(color_encoding).unwrap()
585+
);
586+
587+
// Verify labelExpr exists for custom labels
588+
let legend = &color_encoding["legend"];
589+
assert!(
590+
legend["labelExpr"].is_string(),
591+
"Legend should have labelExpr for custom labels. Got legend: {}",
592+
serde_json::to_string_pretty(legend).unwrap()
593+
);
594+
595+
let label_expr = legend["labelExpr"].as_str().unwrap_or("");
596+
// For symbol legends, VL generates range-style labels like "0 – 50"
597+
// Our labelExpr should map these to custom range formats
598+
assert!(
599+
label_expr.contains("Low") || label_expr.contains("High"),
600+
"labelExpr should contain custom labels, got: {}",
601+
label_expr
602+
);
603+
}
460604
}

0 commit comments

Comments
 (0)