Skip to content

Commit eece4f2

Browse files
cpsievertclaude
andcommitted
Fix case-sensitive VISUALISE column reference validation
The VISUALISE parser preserves the exact casing from the user's query (e.g., `VISUALISE ROOM_TYPE AS x` stores "ROOM_TYPE"). However, DuckDB lowercases unquoted identifiers in query results, so the schema column is "room_type". Since ggsql quotes column names in generated SQL (making them case-sensitive in DuckDB), this mismatch caused validation errors like: "aesthetic 'x' references non-existent column 'ROOM_TYPE'" Add a normalize_column_names() step early in the prepare_data pipeline that resolves VISUALISE column references to match the actual schema column names using case-insensitive matching. This is reader-agnostic: it normalizes to whatever the reader returns, not specifically to lowercase. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ccf4b28 commit eece4f2

1 file changed

Lines changed: 96 additions & 0 deletions

File tree

src/execute/mod.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,57 @@ use crate::reader::Reader;
3333
#[cfg(all(feature = "duckdb", test))]
3434
use crate::reader::DuckDBReader;
3535

36+
// =============================================================================
37+
// Column Name Normalization
38+
// =============================================================================
39+
40+
/// Normalize aesthetic column references to match the actual schema column names.
41+
///
42+
/// DuckDB lowercases unquoted identifiers, but users may write column names in
43+
/// any case in VISUALISE/MAPPING clauses. Since ggsql quotes column names in
44+
/// generated SQL (making them case-sensitive), we must normalize references to
45+
/// match the actual column names returned by the database.
46+
///
47+
/// This function walks all aesthetic mappings (global and per-layer) and replaces
48+
/// column names with the matching schema column name (matched case-insensitively).
49+
fn normalize_column_names(specs: &mut [Plot], layer_schemas: &[Schema]) {
50+
for spec in specs {
51+
// Normalize global mappings using the first layer's schema (global mappings
52+
// are merged into all layers, so any layer's schema suffices for normalization)
53+
if let Some(schema) = layer_schemas.first() {
54+
let schema_name_map: HashMap<String, &str> = schema
55+
.iter()
56+
.map(|c| (c.name.to_lowercase(), c.name.as_str()))
57+
.collect();
58+
for value in spec.global_mappings.aesthetics.values_mut() {
59+
normalize_aesthetic_value(value, &schema_name_map);
60+
}
61+
}
62+
63+
// Normalize per-layer mappings using each layer's own schema
64+
for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) {
65+
let schema_name_map: HashMap<String, &str> = schema
66+
.iter()
67+
.map(|c| (c.name.to_lowercase(), c.name.as_str()))
68+
.collect();
69+
for value in layer.mappings.aesthetics.values_mut() {
70+
normalize_aesthetic_value(value, &schema_name_map);
71+
}
72+
}
73+
}
74+
}
75+
76+
/// Normalize a single aesthetic value's column name to match schema casing.
77+
fn normalize_aesthetic_value(value: &mut AestheticValue, schema_name_map: &HashMap<String, &str>) {
78+
if let AestheticValue::Column { name, .. } = value {
79+
if let Some(&actual_name) = schema_name_map.get(&name.to_lowercase()) {
80+
if name.as_str() != actual_name {
81+
*name = actual_name.to_string();
82+
}
83+
}
84+
}
85+
}
86+
3687
// =============================================================================
3788
// Validation
3889
// =============================================================================
@@ -570,6 +621,12 @@ pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<Pr
570621
.map(|ti| schema::type_info_to_schema(ti))
571622
.collect();
572623

624+
// Normalize column names in aesthetic references to match actual schema casing.
625+
// DuckDB lowercases unquoted identifiers, but users may write them in any case
626+
// in VISUALISE/MAPPING. Since generated SQL quotes column names (case-sensitive),
627+
// we must normalize before merging or building queries.
628+
normalize_column_names(&mut specs, &layer_schemas);
629+
573630
// Merge global mappings into layer aesthetics and expand wildcards
574631
// Smart wildcard expansion only creates mappings for columns that exist in schema
575632
merge_global_mappings_into_layers(&mut specs, &layer_schemas);
@@ -1243,4 +1300,43 @@ mod tests {
12431300
// Should have fewer rows than original (binned)
12441301
assert!(layer_df.height() < 100);
12451302
}
1303+
1304+
/// Test that VISUALISE column references are matched case-insensitively.
1305+
///
1306+
/// DuckDB lowercases unquoted identifiers, so `SELECT UPPER_COL` returns a
1307+
/// column named `upper_col`. VISUALISE references like `UPPER_COL AS x` must
1308+
/// be normalized to match the actual schema column name before validation
1309+
/// and query building.
1310+
#[cfg(feature = "duckdb")]
1311+
#[test]
1312+
fn test_case_insensitive_column_references() {
1313+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1314+
1315+
// Create a table with uppercase column names via aliasing
1316+
reader
1317+
.connection()
1318+
.execute(
1319+
"CREATE TABLE case_test AS SELECT 'A' AS category, 10 AS value UNION ALL SELECT 'B', 20",
1320+
duckdb::params![],
1321+
)
1322+
.unwrap();
1323+
1324+
// VISUALISE references use UPPERCASE but DuckDB stores them as lowercase
1325+
let query = r#"
1326+
SELECT category, value FROM case_test
1327+
VISUALISE CATEGORY AS x, VALUE AS y
1328+
DRAW bar
1329+
"#;
1330+
1331+
let result = prepare_data_with_reader(query, &reader);
1332+
assert!(
1333+
result.is_ok(),
1334+
"Case-insensitive column references should work: {:?}",
1335+
result.err()
1336+
);
1337+
1338+
let result = result.unwrap();
1339+
let layer_df = result.data.get(&naming::layer_key(0)).unwrap();
1340+
assert_eq!(layer_df.height(), 2);
1341+
}
12461342
}

0 commit comments

Comments
 (0)