Skip to content

Commit b949f97

Browse files
cpsievertclaude
andcommitted
Fix case-sensitive VISUALISE column reference validation
The VISUALISE parser preserves the exact casing from the user's query (e.g., `VISUALISE ROOM_TYPE AS x` stores "ROOM_TYPE"). However, DuckDB lowercases unquoted identifiers in query results, so the schema column is "room_type". Since ggsql quotes column names in generated SQL (making them case-sensitive in DuckDB), this mismatch caused validation errors like: "aesthetic 'x' references non-existent column 'ROOM_TYPE'" Add a normalize_column_names() step early in the prepare_data pipeline that resolves VISUALISE column references to match the actual schema column names using case-insensitive matching. This is reader-agnostic: it normalizes to whatever the reader returns, not specifically to lowercase. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ccf4b28 commit b949f97

1 file changed

Lines changed: 105 additions & 0 deletions

File tree

src/execute/mod.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,66 @@ use crate::reader::Reader;
3333
#[cfg(all(feature = "duckdb", test))]
3434
use crate::reader::DuckDBReader;
3535

36+
// =============================================================================
37+
// Column Name Normalization
38+
// =============================================================================
39+
40+
/// Normalize aesthetic column references to match the actual schema column names.
41+
///
42+
/// DuckDB lowercases unquoted identifiers, but users may write column names in
43+
/// any case in VISUALISE/MAPPING clauses. Since ggsql quotes column names in
44+
/// generated SQL (making them case-sensitive), we must normalize references to
45+
/// match the actual column names returned by the database.
46+
///
47+
/// This function walks all aesthetic mappings (global and per-layer) and replaces
48+
/// column names with the matching schema column name (matched case-insensitively).
49+
fn normalize_column_names(specs: &mut [Plot], layer_schemas: &[Schema]) {
50+
for spec in specs {
51+
// Normalize global mappings using the first layer's schema (global mappings
52+
// are merged into all layers, so any layer's schema suffices for normalization)
53+
if let Some(schema) = layer_schemas.first() {
54+
let schema_names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect();
55+
for value in spec.global_mappings.aesthetics.values_mut() {
56+
normalize_aesthetic_value(value, &schema_names);
57+
}
58+
}
59+
60+
// Normalize per-layer mappings using each layer's own schema
61+
for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) {
62+
let schema_names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect();
63+
for value in layer.mappings.aesthetics.values_mut() {
64+
normalize_aesthetic_value(value, &schema_names);
65+
}
66+
}
67+
}
68+
}
69+
70+
/// Normalize a single aesthetic value's column name to match schema casing.
71+
///
72+
/// Only applies case-insensitive normalization when there is no exact match.
73+
/// This preserves correct behavior when the schema contains columns that differ
74+
/// only by case (e.g., via quoted identifiers like `"Foo"` and `"foo"`).
75+
fn normalize_aesthetic_value(value: &mut AestheticValue, schema_names: &[&str]) {
76+
if let AestheticValue::Column { name, .. } = value {
77+
// If there's already an exact match, no normalization needed
78+
if schema_names.contains(&name.as_str()) {
79+
return;
80+
}
81+
82+
// No exact match — try case-insensitive lookup
83+
let name_lower = name.to_lowercase();
84+
let matches: Vec<&&str> = schema_names
85+
.iter()
86+
.filter(|s| s.to_lowercase() == name_lower)
87+
.collect();
88+
89+
// Only normalize if there's exactly one case-insensitive match
90+
if matches.len() == 1 {
91+
*name = matches[0].to_string();
92+
}
93+
}
94+
}
95+
3696
// =============================================================================
3797
// Validation
3898
// =============================================================================
@@ -570,6 +630,12 @@ pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<Pr
570630
.map(|ti| schema::type_info_to_schema(ti))
571631
.collect();
572632

633+
// Normalize column names in aesthetic references to match actual schema casing.
634+
// DuckDB lowercases unquoted identifiers, but users may write them in any case
635+
// in VISUALISE/MAPPING. Since generated SQL quotes column names (case-sensitive),
636+
// we must normalize before merging or building queries.
637+
normalize_column_names(&mut specs, &layer_schemas);
638+
573639
// Merge global mappings into layer aesthetics and expand wildcards
574640
// Smart wildcard expansion only creates mappings for columns that exist in schema
575641
merge_global_mappings_into_layers(&mut specs, &layer_schemas);
@@ -1243,4 +1309,43 @@ mod tests {
12431309
// Should have fewer rows than original (binned)
12441310
assert!(layer_df.height() < 100);
12451311
}
1312+
1313+
/// Test that VISUALISE column references are matched case-insensitively.
1314+
///
1315+
/// DuckDB lowercases unquoted identifiers, so `SELECT UPPER_COL` returns a
1316+
/// column named `upper_col`. VISUALISE references like `UPPER_COL AS x` must
1317+
/// be normalized to match the actual schema column name before validation
1318+
/// and query building.
1319+
#[cfg(feature = "duckdb")]
1320+
#[test]
1321+
fn test_case_insensitive_column_references() {
1322+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1323+
1324+
// Create a table with uppercase column names via aliasing
1325+
reader
1326+
.connection()
1327+
.execute(
1328+
"CREATE TABLE case_test AS SELECT 'A' AS category, 10 AS value UNION ALL SELECT 'B', 20",
1329+
duckdb::params![],
1330+
)
1331+
.unwrap();
1332+
1333+
// VISUALISE references use UPPERCASE but DuckDB stores them as lowercase
1334+
let query = r#"
1335+
SELECT category, value FROM case_test
1336+
VISUALISE CATEGORY AS x, VALUE AS y
1337+
DRAW bar
1338+
"#;
1339+
1340+
let result = prepare_data_with_reader(query, &reader);
1341+
assert!(
1342+
result.is_ok(),
1343+
"Case-insensitive column references should work: {:?}",
1344+
result.err()
1345+
);
1346+
1347+
let result = result.unwrap();
1348+
let layer_df = result.data.get(&naming::layer_key(0)).unwrap();
1349+
assert_eq!(layer_df.height(), 2);
1350+
}
12461351
}

0 commit comments

Comments
 (0)