diff --git a/CHANGELOG.md b/CHANGELOG.md index 594a1e82..7e7a23d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### In Development — v0.5.0 (Rust Core, `rust-core` branch) +- Group-by keys of a mixed aggregate query are named (and materialized) correctly + (#599) — in `RETURN n.name, count(*)` the `n.name` group-key column was named by + its lowered expression, not the `n.name` header openCypher expects; and a + whole-node group key (`RETURN n, count(*)`) lowered to a bare `var_N` (not a real + column) instead of a node value. The `Aggregate` IR op now carries an optional + per-key output alias (`group_aliases`, parallel to `group_by`), filled by the + binder from the RETURN item's `AS` alias or source text and applied by the + lowerer; and group keys are materialized through the same path as terminal RETURN + items (a bare node var → `_node_struct`). Corpus passing **570 → 575** (+5). - Un-aliased aggregate columns are named by their source text (#598/#599) — an aggregate `RETURN` item without `AS` (`RETURN count(*)`, `min(x)`, `sum(i)`) was named `agg_`, failing the TCK's header-exact comparison. It now uses the diff --git a/crates/gf-ir/src/binder.rs b/crates/gf-ir/src/binder.rs index 15acea79..b0138a7d 100644 --- a/crates/gf-ir/src/binder.rs +++ b/crates/gf-ir/src/binder.rs @@ -1052,6 +1052,7 @@ impl Binder { /// aggregate items become [`AggExpr`]s. fn lower_return_aggregate(&self, items: &[ReturnItem], s: &mut BinderState) { let mut group_by: Vec = Vec::new(); + let mut group_aliases: Vec> = Vec::new(); let mut aggs: Vec = Vec::new(); for (idx, item) in items.iter().enumerate() { if let Some(func) = agg_func_of(&item.expr) { @@ -1085,10 +1086,25 @@ impl Binder { .unwrap_or_else(|| format!("agg_{idx}")); aggs.push(AggExpr { func, arg, alias }); } else { - group_by.push(self.lower_expr(&item.expr, item.span, s)); + // Materialize whole-node / path group keys (`RETURN n, count(*)`) + // the same way terminal RETURN items do — a bare node var rewrites + // to `_node_struct`, not a bare `var_N` (which is not a real column + // and would lower against an unbound reference). Non-node exprs + // (`n.name`) fall through to the same `lower_expr` inside. + group_by.push(self.lower_return_item_expr(&item.expr, item.span, s, true)); + // Name the group-key column by the RETURN item's source text + // (or its `AS` alias) so a mixed `RETURN n.name, count(*)` yields + // the `n.name` header openCypher expects (#599). A bare variable + // (`RETURN n, count(*)`) keeps its lowered name — `display` is the + // var name, which is the right column name anyway. + group_aliases.push(item.alias.clone().or_else(|| item.display.clone())); } } - s.builder.push_op_mut(GraphOp::Aggregate { group_by, aggs }); + s.builder.push_op_mut(GraphOp::Aggregate { + group_by, + group_aliases, + aggs, + }); } // ----------------------------------------------------------------------- @@ -2449,7 +2465,7 @@ mod tests { let ast = parse("MATCH (n:Person) RETURN count(n) AS total").unwrap(); let plan = binder.bind(&ast).unwrap(); let agg = plan.ops.iter().find_map(|op| match op { - GraphOp::Aggregate { group_by, aggs } => Some((group_by, aggs)), + GraphOp::Aggregate { group_by, aggs, .. } => Some((group_by, aggs)), _ => None, }); let (group_by, aggs) = agg.expect("count should emit an Aggregate"); @@ -2479,7 +2495,7 @@ mod tests { .ops .iter() .find_map(|op| match op { - GraphOp::Aggregate { group_by, aggs } => Some((group_by, aggs)), + GraphOp::Aggregate { group_by, aggs, .. } => Some((group_by, aggs)), _ => None, }) .expect("Aggregate"); diff --git a/crates/gf-ir/src/plan.rs b/crates/gf-ir/src/plan.rs index e563b5bb..7339901f 100644 --- a/crates/gf-ir/src/plan.rs +++ b/crates/gf-ir/src/plan.rs @@ -125,6 +125,12 @@ pub enum GraphOp { Aggregate { /// Expressions to group by. group_by: Vec, + /// Output column name for each group-by key (parallel to `group_by`): + /// `Some(name)` aliases that key's result column (openCypher names it by + /// the RETURN item's source text), `None` leaves the lowered expr's + /// default name. Empty or shorter-than-`group_by` ⇒ no aliasing (#599). + #[serde(default, skip_serializing_if = "Vec::is_empty")] + group_aliases: Vec>, /// Aggregate expressions. aggs: Vec, }, @@ -626,6 +632,7 @@ mod tests { }, GraphOp::Aggregate { group_by: vec![e0], + group_aliases: vec![Some("g".into())], aggs: vec![AggExpr { func: AggFunc::Count, arg: None, diff --git a/crates/gf-rel/src/lowerer.rs b/crates/gf-rel/src/lowerer.rs index f1e770f0..6eda93ff 100644 --- a/crates/gf-rel/src/lowerer.rs +++ b/crates/gf-rel/src/lowerer.rs @@ -1120,9 +1120,11 @@ fn lower_relational_op( GraphOp::Project { items, distinct } => { lower_project(items, *distinct, input, expr_lowerer) } - GraphOp::Aggregate { group_by, aggs } => { - lower_aggregate(group_by, aggs, input, exprs, expr_lowerer) - } + GraphOp::Aggregate { + group_by, + group_aliases, + aggs, + } => lower_aggregate(group_by, group_aliases, aggs, input, exprs, expr_lowerer), GraphOp::Sort { keys } => lower_sort(keys, input, expr_lowerer), GraphOp::Limit { count } => lower_limit(*count, input), GraphOp::Skip { count } => lower_skip(*count, input), @@ -1181,13 +1183,25 @@ fn lower_project( fn lower_aggregate( group_by: &[ExprId], + group_aliases: &[Option], aggs: &[AggExpr], input: LogicalPlan, _exprs: &ExprArena, lowerer: &ExprLowerer<'_>, ) -> Result { - let group_exprs: Result, LoweringError> = - group_by.iter().map(|&id| lowerer.lower(id)).collect(); + // Each group key may carry an output-column alias (its RETURN source text, so + // a mixed `RETURN n.name, count(*)` produces the `n.name` header — #599). + let group_exprs: Result, LoweringError> = group_by + .iter() + .enumerate() + .map(|(i, &id)| { + let e = lowerer.lower(id)?; + Ok(match group_aliases.get(i).and_then(Option::as_ref) { + Some(alias) => e.alias(alias), + None => e, + }) + }) + .collect(); let aggr_exprs: Result, LoweringError> = aggs .iter() @@ -2356,6 +2370,7 @@ mod tests { .lower_op( &GraphOp::Aggregate { group_by: vec![], + group_aliases: vec![], aggs: vec![agg], }, empty_base(), diff --git a/docs/reference/tck-compliance.md b/docs/reference/tck-compliance.md index 0525eca6..89bf8ae1 100644 --- a/docs/reference/tck-compliance.md +++ b/docs/reference/tck-compliance.md @@ -21,11 +21,11 @@ So "100%" means **every scenario passes or is an explicit, justified xfail**. | | scenarios (cucumber-expanded) | |---|---| -| **Passing** | **570** | -| Not yet passing (failed / skipped) | 3,327 | +| **Passing** | **575** | +| Not yet passing (failed / skipped) | 3,322 | | **Total** | **3,897** | -The whole corpus runs on every CI build; **570** scenarios currently pass. (Total is cucumber's +The whole corpus runs on every CI build; **575** scenarios currently pass. (Total is cucumber's expanded count — Scenario Outlines × `Examples` rows.) ## How it works — advisory whole-corpus run (the v0.4.0 model) diff --git a/tests/tck/coverage_matrix.json b/tests/tck/coverage_matrix.json index d060684f..fb56f0a0 100644 --- a/tests/tck/coverage_matrix.json +++ b/tests/tck/coverage_matrix.json @@ -2,7 +2,7 @@ "_meta": { "corpus": "openCypher TCK tag 2024.3 (vendored, #874)", "scenarios_total": 3897, - "scenarios_passing": 570, + "scenarios_passing": 575, "model": "advisory-whole-corpus", "note": "Advisory whole-corpus model (matches the v0.4.0 run-up). crates/gf-api/tests/bdd runs EVERY scenario (no @skip-rust filter) via cucumber `run()`, which does not exit on failures — individual unsupported scenarios failing does NOT break CI. The gate is PER-SCENARIO (set-based, not just a count, so a regression cannot be masked by an unrelated xpass): the committed baseline `tests/tck/passing_baseline.txt` lists the scenarios that pass (key `::`). The run FAILS if ANY baseline scenario stops passing (regression), and emits a `TCK XPASS` warning listing scenarios that newly pass. This `scenarios_passing` is informational (= baseline line count); `scenarios_total` is cucumber's expanded count of the whole corpus. Lock in xpass with `BLESS_TCK_BASELINE=1 cargo test -p gf-api --test bdd`. NOTE: `@skip-rust` tags are now vestigial for the Rust run (it runs all); `@skip-node` still gates the Node binding's BDD run." } diff --git a/tests/tck/passing_baseline.txt b/tests/tck/passing_baseline.txt index a8ae69c3..aef0e3a0 100644 --- a/tests/tck/passing_baseline.txt +++ b/tests/tck/passing_baseline.txt @@ -1,8 +1,10 @@ +Aggregation1 - Count:34:[1] Count only non-null values Aggregation1 - Count:53:[2] Counting loop relationships Aggregation2 - Min and Max:130:[9] `max()` over list values Aggregation2 - Min and Max:142:[10] `min()` over list values Aggregation2 - Min and Max:34:[1] `max()` over integers Aggregation2 - Min and Max:46:[2] `min()` over integers +Aggregation3 - Sum:34:[1] Sum only non-null values Aggregation8 - DISTINCT:46:[2] Distinct on null Boolean1 - And logical operations:113:[4] Conjunction is commutative on non-null Boolean1 - And logical operations:149:[6] Conjunction is associative on non-null @@ -404,6 +406,9 @@ Return4 - Column renaming:67:[3] Aliasing expressions Return4 - Column renaming:83:[4] Keeping used expression 1 Return4 - Column renaming:99:[5] Keeping used expression 2 Return5 - Implicit grouping with distinct:50:[2] DISTINCT on nullable values +Return6 - Implicit grouping with aggregates:128:[7] Aggregate on property +Return6 - Implicit grouping with aggregates:214:[12] Counting matches per group +Return6 - Implicit grouping with aggregates:34:[1] Return count aggregation over nodes Return8 - Return clause interoperation with other clauses:34:[1] Return aggregation after With filtering ReturnOrderBy1 - Order by a single variable (correct order of values according to their type):109:[6] ORDER BY DESC should order ints in the expected order ReturnOrderBy1 - Order by a single variable (correct order of values according to their type):124:[7] ORDER BY should order floats in the expected order