From db1f0b5b167ad1d21be94ab9d1d6267b24660674 Mon Sep 17 00:00:00 2001 From: David Spencer <1526975+DecisionNerd@users.noreply.github.com> Date: Tue, 23 Jun 2026 13:40:03 -0600 Subject: [PATCH 1/2] feat(ir): lower and execute read-path WITH (#814 slice 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A `MATCH … WITH … RETURN` pipeline now projects/renames mid-query and chains; before, `GraphOp::With` hit "operator not yet lowered". Binder (lower_with): - lower each WITH item's expression in the current scope, mint a fresh `out_var` for its alias, then RESET the scope to exactly those aliases (Cypher semantics) — a variable not carried forward is out of scope, so referencing it errors rather than returning a wrong result; - a WITH WHERE / ORDER BY is lowered against the new scope. - Guards (reject, don't mishandle): carrying a whole node/path through WITH (`WITH n` — a node spans multiple columns) and aggregation in WITH. IR: `ProjectItem.out_var` (serde-default) links a WITH alias to the var it introduces, so the lowerer can map it. `None` for terminal RETURN. Lowerer (lower_with_op): project each item to its alias column, register `out_var -> column` in the var-map, then filter on the WITH WHERE over the projected columns. Deferred to later #814 slices: whole-node/path pass-through, aggregation in WITH, write-result RETURN, and mid-statement reads-after-writes. Validated: new e2e (project/rename, WHERE post-projection, scope-reset errors, node-passthrough deferral) — 80 pass; IR goldens updated for `out_var` + the `with_pipeline` golden re-pointed to a scalar WITH; gf-rel/gf-exec/gf-cli green; clippy --workspace -D warnings clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 14 +++ crates/gf-api/tests/e2e_baseline.rs | 52 +++++++++++ crates/gf-ir/src/binder.rs | 86 ++++++++++++++++++- crates/gf-ir/src/lib.rs | 6 ++ crates/gf-ir/src/plan.rs | 4 + crates/gf-ir/tests/golden.rs | 6 +- .../ir_goldens/golden__filtered_scan.snap | 3 +- ...named_path_fixed_segment_and_return_p.snap | 12 ++- ...golden__named_path_variable_functions.snap | 10 ++- .../ir_goldens/golden__one_hop_expand.snap | 6 +- .../ir_goldens/golden__optional_match.snap | 6 +- .../ir_goldens/golden__order_by_limit.snap | 3 +- .../tests/ir_goldens/golden__parameter.snap | 3 +- .../ir_goldens/golden__simple_node_scan.snap | 3 +- .../ir_goldens/golden__two_hop_expand.snap | 3 +- .../tests/ir_goldens/golden__unwind.snap | 3 +- .../golden__variable_length_expand.snap | 3 +- .../ir_goldens/golden__with_pipeline.snap | 32 +++---- crates/gf-rel/src/lowerer.rs | 70 +++++++++++++++ 19 files changed, 282 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb016fef..d8adafd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### In Development — v0.5.0 (Rust Core, `rust-core` branch) +- `WITH` (read-path projection) now lowers and executes (#814, slice 1) — a + `MATCH … WITH … RETURN` pipeline projects/renames mid-query and chains, where + before `GraphOp::With` hit "operator not yet lowered". The binder lowers each + `WITH` item's expression in the current scope, mints an `out_var` for its + alias, then **resets the scope** to exactly those aliases (Cypher semantics): + a variable not carried forward is out of scope afterwards and referencing it is + an error, not a silently-wrong result. An optional `WITH … WHERE` filters over + the projected aliases; `ORDER BY`/`SKIP`/`LIMIT`/`DISTINCT` after `WITH` work + via their existing ops. The lowerer projects each item to its alias column and + registers the `out_var` in the var-map so downstream clauses resolve. + Deliberately deferred (rejected with a clear error rather than mishandled): + carrying a **whole node/path** through `WITH` (`WITH n` — a node spans multiple + columns) and **aggregation in `WITH`** (`WITH count(*) …`); write-result + `RETURN` and mid-statement reads-after-writes remain #814's later slices. - Unlabelled `MATCH (n) RETURN n` materializes a whole node value (#889, slice 1) — building on #785 (which required the label in the pattern), a bare match with no label now returns the node with its **real label name and properties**, not diff --git a/crates/gf-api/tests/e2e_baseline.rs b/crates/gf-api/tests/e2e_baseline.rs index b51626ee..b67f80c2 100644 --- a/crates/gf-api/tests/e2e_baseline.rs +++ b/crates/gf-api/tests/e2e_baseline.rs @@ -340,6 +340,58 @@ fn count_aggregate() { assert_eq!(total, 5); } +#[test] +fn with_projects_and_renames() { + // #814: WITH is a mid-pipeline projection; the alias carries to RETURN. + let gf = forge(); + let r = rows(&gf, "MATCH (n:Person) WITH n.name AS nm RETURN nm"); + assert_eq!( + r.stats.rows_produced, 5, + "all five names project through WITH" + ); +} + +#[test] +fn with_where_filters_post_projection() { + // #814: a WITH ... WHERE filters on the projected alias. + let gf = forge(); + let r = rows( + &gf, + "MATCH (n:Person) WITH n.name AS nm WHERE nm = 'Alice' RETURN nm", + ); + assert_eq!( + r.stats.rows_produced, 1, + "only Alice survives the WITH WHERE" + ); +} + +#[test] +fn with_resets_scope_dropping_unprojected_vars() { + // #814: WITH resets the scope to exactly its projected aliases — a variable + // not carried forward is out of scope afterwards, so referencing it is an + // error (not a silently-wrong result). `b` is dropped by `WITH a.name AS x`. + let gf = forge(); + let res = gf.execute("MATCH (a:Person)-[:KNOWS]->(b:Person) WITH a.name AS x RETURN b.name"); + assert!( + res.is_err(), + "`b` is out of scope after WITH a.name AS x; expected an error, got {res:?}" + ); +} + +#[test] +fn with_node_passthrough_is_deferred() { + // #814 deferral: carrying a whole node through WITH (`WITH n`) is not yet + // supported — a node var spans multiple columns. The binder rejects it with + // a clear error rather than producing a wrong result; projecting the node's + // properties (`WITH n.age AS age`) is the supported path. + let gf = forge(); + let res = gf.execute("MATCH (n:Person) WITH n WHERE n.age > 28 RETURN n.name"); + assert!( + res.is_err(), + "WITH of a whole node is deferred; expected an error, got {res:?}" + ); +} + // --------------------------------------------------------------------------- // Read — traversal // --------------------------------------------------------------------------- diff --git a/crates/gf-ir/src/binder.rs b/crates/gf-ir/src/binder.rs index f81665ff..f62d19d8 100644 --- a/crates/gf-ir/src/binder.rs +++ b/crates/gf-ir/src/binder.rs @@ -744,10 +744,86 @@ impl Binder { } fn lower_with(&self, w: &WithClause, s: &mut BinderState) { - // WITH is an intermediate pipeline projection: a bare node var must stay - // a node reference so downstream property access (`n.age`) resolves — do - // NOT materialize it as a whole node value (that is terminal-RETURN only). - let items = self.lower_return_items(&w.items, s, false); + // WITH projects/renames the pipeline and then *resets the scope*: only + // the projected aliases are visible to subsequent clauses (#814). + // + // Each item's expression is lowered in the CURRENT scope (it references + // upstream vars), then a fresh `out_var` is minted for its alias and + // becomes the downstream scope. The lowerer maps `out_var` to the + // projected column. + let mut items: Vec = Vec::with_capacity(w.items.len()); + let mut new_scope: Vec<(String, VarId)> = Vec::new(); + for item in &w.items { + // Reject (rather than silently mishandle) the parts not yet + // supported in WITH — these would otherwise produce wrong results. + if agg_func_of(&item.expr).is_some() { + s.errors.push(BindError::new( + BindErrorKind::UnsupportedClause, + item.span, + "aggregation in WITH is not yet supported (#814 follow-up)".to_string(), + )); + } + if let Expr::Var(VarRef { name, .. }) = &item.expr { + if s.vars + .get(name) + .is_some_and(|v| s.node_vars.contains_key(v)) + { + s.errors.push(BindError::new( + BindErrorKind::UnsupportedClause, + item.span, + format!( + "carrying a whole node (`WITH {name}`) through WITH is not yet \ + supported — project its properties instead (#814 follow-up)" + ), + )); + } + if s.path_vars.contains_key(name) { + s.errors.push(BindError::new( + BindErrorKind::UnsupportedClause, + item.span, + format!( + "carrying a path (`WITH {name}`) through WITH is not yet supported \ + (#814 follow-up)" + ), + )); + } + } + + let expr_id = self.lower_expr(&item.expr, item.span, s); + // Alias: explicit (`expr AS name`), or the variable's own name for a + // bare-variable pass-through (`WITH x`). + let alias = item.alias.clone().or_else(|| match &item.expr { + Expr::Var(VarRef { name, .. }) => Some(name.clone()), + _ => None, + }); + let Some(alias) = alias else { + s.errors.push(BindError::new( + BindErrorKind::UnsupportedClause, + item.span, + "a non-variable WITH item must be aliased (`expr AS name`)".to_string(), + )); + continue; + }; + let out_var = alloc_anon_var(s); + new_scope.push((alias.clone(), out_var)); + items.push(ProjectItem { + expr: expr_id, + alias: Some(alias), + out_var: Some(out_var), + }); + } + + // Scope reset: drop everything, then introduce only the WITH aliases. + s.vars.clear(); + s.node_vars.clear(); + s.edge_vars.clear(); + s.path_vars.clear(); + for (name, v) in new_scope { + s.vars.insert(name, v); + } + + // WITH's WHERE / ORDER BY reference the projected aliases — lower them + // against the new scope. let where_pred = w .where_clause .as_ref() @@ -860,6 +936,8 @@ impl Binder { } _ => None, }), + // RETURN items are terminal — they introduce no downstream scope. + out_var: None, }) .collect() } diff --git a/crates/gf-ir/src/lib.rs b/crates/gf-ir/src/lib.rs index 1e9945e2..b7ed486b 100644 --- a/crates/gf-ir/src/lib.rs +++ b/crates/gf-ir/src/lib.rs @@ -151,6 +151,12 @@ pub struct ProjectItem { pub expr: ExprId, /// Optional output column name. pub alias: Option, + /// For a `WITH` item only: the variable this item introduces into the + /// downstream scope (#814). The lowerer maps it to the projected column so a + /// later clause referencing the alias resolves. `None` for terminal `RETURN` + /// items, which introduce no scope. + #[serde(default)] + pub out_var: Option, } /// An aggregation expression inside an AGGREGATE operator. diff --git a/crates/gf-ir/src/plan.rs b/crates/gf-ir/src/plan.rs index 751d1f27..e563b5bb 100644 --- a/crates/gf-ir/src/plan.rs +++ b/crates/gf-ir/src/plan.rs @@ -429,6 +429,7 @@ mod tests { items: vec![ProjectItem { expr: var_r, alias: Some("n".into()), + out_var: None, }], distinct: false, }, @@ -485,6 +486,7 @@ mod tests { items: vec![ProjectItem { expr: ref_b, alias: Some("b".into()), + out_var: None, }], distinct: false, }, @@ -618,6 +620,7 @@ mod tests { items: vec![ProjectItem { expr: e0, alias: None, + out_var: None, }], distinct: true, }, @@ -682,6 +685,7 @@ mod tests { items: vec![ProjectItem { expr: e0, alias: Some("x".into()), + out_var: None, }], where_predicate: Some(e0), }, diff --git a/crates/gf-ir/tests/golden.rs b/crates/gf-ir/tests/golden.rs index b21db6de..df6f4de3 100644 --- a/crates/gf-ir/tests/golden.rs +++ b/crates/gf-ir/tests/golden.rs @@ -178,8 +178,10 @@ fn order_by_limit() { #[test] fn with_pipeline() { - // WITH introduces a pipeline; return n directly to keep binder scope simple. - let plan = bind_query("MATCH (n:Person) WITH n WHERE n.age > 30 RETURN n.name"); + // WITH projects/renames mid-pipeline and resets the scope to its aliases + // (#814): `nm` is introduced (its `out_var`) and the WHERE + RETURN resolve + // against it. Carrying a whole node (`WITH n`) is deferred — #814 follow-up. + let plan = bind_query("MATCH (n:Person) WITH n.name AS nm WHERE nm = 'Alice' RETURN nm"); golden_settings().bind(|| { insta::assert_json_snapshot!("with_pipeline", plan); }); diff --git a/crates/gf-ir/tests/ir_goldens/golden__filtered_scan.snap b/crates/gf-ir/tests/ir_goldens/golden__filtered_scan.snap index 1d2b1205..e8594663 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__filtered_scan.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__filtered_scan.snap @@ -28,7 +28,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 5, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__named_path_fixed_segment_and_return_p.snap b/crates/gf-ir/tests/ir_goldens/golden__named_path_fixed_segment_and_return_p.snap index 8cf9fa1d..aafd73f1 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__named_path_fixed_segment_and_return_p.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__named_path_fixed_segment_and_return_p.snap @@ -40,19 +40,23 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": "hops" + "alias": "hops", + "out_var": null }, { "expr": 5, - "alias": "ns" + "alias": "ns", + "out_var": null }, { "expr": 8, - "alias": "rels" + "alias": "rels", + "out_var": null }, { "expr": 16, - "alias": "p" + "alias": "p", + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__named_path_variable_functions.snap b/crates/gf-ir/tests/ir_goldens/golden__named_path_variable_functions.snap index 61a17034..7fab2beb 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__named_path_variable_functions.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__named_path_variable_functions.snap @@ -1,6 +1,5 @@ --- source: crates/gf-ir/tests/golden.rs -assertion_line: 133 --- { "ir_version": { @@ -41,15 +40,18 @@ assertion_line: 133 "items": [ { "expr": 1, - "alias": "hops" + "alias": "hops", + "out_var": null }, { "expr": 2, - "alias": "rels" + "alias": "rels", + "out_var": null }, { "expr": 5, - "alias": "ns" + "alias": "ns", + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__one_hop_expand.snap b/crates/gf-ir/tests/ir_goldens/golden__one_hop_expand.snap index 5aa1fbb7..83bfd88a 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__one_hop_expand.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__one_hop_expand.snap @@ -40,11 +40,13 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": null + "alias": null, + "out_var": null }, { "expr": 3, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__optional_match.snap b/crates/gf-ir/tests/ir_goldens/golden__optional_match.snap index 736a0404..76d26bdb 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__optional_match.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__optional_match.snap @@ -66,11 +66,13 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": null + "alias": null, + "out_var": null }, { "expr": 3, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__order_by_limit.snap b/crates/gf-ir/tests/ir_goldens/golden__order_by_limit.snap index 979a8fac..83b98e8d 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__order_by_limit.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__order_by_limit.snap @@ -23,7 +23,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__parameter.snap b/crates/gf-ir/tests/ir_goldens/golden__parameter.snap index 7c868788..cf344d09 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__parameter.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__parameter.snap @@ -28,7 +28,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 5, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__simple_node_scan.snap b/crates/gf-ir/tests/ir_goldens/golden__simple_node_scan.snap index db399781..f8fb60de 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__simple_node_scan.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__simple_node_scan.snap @@ -23,7 +23,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__two_hop_expand.snap b/crates/gf-ir/tests/ir_goldens/golden__two_hop_expand.snap index 28c6cbf4..7e13699e 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__two_hop_expand.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__two_hop_expand.snap @@ -57,7 +57,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__unwind.snap b/crates/gf-ir/tests/ir_goldens/golden__unwind.snap index 328cb8a2..85f388a4 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__unwind.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__unwind.snap @@ -23,7 +23,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 4, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__variable_length_expand.snap b/crates/gf-ir/tests/ir_goldens/golden__variable_length_expand.snap index ce807dda..95d59b5b 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__variable_length_expand.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__variable_length_expand.snap @@ -40,7 +40,8 @@ source: crates/gf-ir/tests/golden.rs "items": [ { "expr": 1, - "alias": null + "alias": null, + "out_var": null } ], "distinct": false diff --git a/crates/gf-ir/tests/ir_goldens/golden__with_pipeline.snap b/crates/gf-ir/tests/ir_goldens/golden__with_pipeline.snap index d9326252..33a452c6 100644 --- a/crates/gf-ir/tests/ir_goldens/golden__with_pipeline.snap +++ b/crates/gf-ir/tests/ir_goldens/golden__with_pipeline.snap @@ -22,8 +22,9 @@ source: crates/gf-ir/tests/golden.rs "With": { "items": [ { - "expr": 0, - "alias": null + "expr": 1, + "alias": "nm", + "out_var": 1 } ], "where_predicate": 4 @@ -33,8 +34,9 @@ source: crates/gf-ir/tests/golden.rs "Project": { "items": [ { - "expr": 6, - "alias": null + "expr": 5, + "alias": null, + "out_var": null } ], "distinct": false @@ -43,39 +45,33 @@ source: crates/gf-ir/tests/golden.rs ], "exprs": { "nodes": [ - { - "VarRef": 0 - }, { "VarRef": 0 }, { "PropertyAccess": { - "base": 1, + "base": 0, "prop": 0 } }, + { + "VarRef": 1 + }, { "Literal": { - "type": "Int", - "value": 30 + "type": "Str", + "value": "Alice" } }, { "BinaryOp": { - "op": "Gt", + "op": "Eq", "left": 2, "right": 3 } }, { - "VarRef": 0 - }, - { - "PropertyAccess": { - "base": 5, - "prop": 1 - } + "VarRef": 1 } ] } diff --git a/crates/gf-rel/src/lowerer.rs b/crates/gf-rel/src/lowerer.rs index 75b2f7c9..768ffae0 100644 --- a/crates/gf-rel/src/lowerer.rs +++ b/crates/gf-rel/src/lowerer.rs @@ -557,6 +557,9 @@ impl<'a> GraphPlanLowerer<'a> { /// Internal helper: creates a fresh [`ExprLowerer`] per op so that /// `var_map` can be borrowed mutably for scan operators in the same loop. + // A flat dispatch over every `GraphOp` kind — long by nature, like the + // binder's `lower_expr`. + #[allow(clippy::too_many_lines)] fn lower_op_with_arena( &self, op: &GraphOp, @@ -694,6 +697,12 @@ impl<'a> GraphPlanLowerer<'a> { } GraphOp::Set { items } => return self.lower_set_op(items, input, exprs, var_map), GraphOp::Remove { items } => return self.lower_remove_op(items, input), + GraphOp::With { + items, + where_predicate, + } => { + return self.lower_with_op(items, *where_predicate, input, exprs, var_map); + } _ => {} } // Relational ops need the expression lowerer; var_map is immutable here. @@ -702,6 +711,64 @@ impl<'a> GraphPlanLowerer<'a> { lower_relational_op(op, input, exprs, &expr_lowerer) } + /// Lower a [`GraphOp::With`] (#814): a mid-pipeline projection that also + /// *introduces a new scope*. Each item is projected to its alias column; the + /// item's `out_var` is then registered in the [`VarMap`] so a later clause + /// referencing the alias resolves. An optional `WHERE` filters over the + /// projected columns (it references the new aliases, so it lowers after the + /// projection with the updated `var_map`). The binder resets the scope to + /// exactly these aliases, so downstream IR only references the new `out_var`s. + fn lower_with_op( + &self, + items: &[ProjectItem], + where_predicate: Option, + input: LogicalPlan, + exprs: &ExprArena, + var_map: &mut VarMap, + ) -> Result { + // 1. Project the items against the CURRENT scope (their expressions + // reference upstream vars). Every WITH item carries an alias. + let projected = { + let lowerer = self.expr_lowerer(exprs, var_map); + let select: Result, LoweringError> = items + .iter() + .map(|item| { + let e = lowerer.lower(item.expr)?; + let name = item.alias.as_deref().ok_or_else(|| { + LoweringError::UnsupportedExpr( + "WITH item without an alias (binder should reject)".into(), + ) + })?; + Ok(e.alias(name)) + }) + .collect(); + LogicalPlanBuilder::from(input) + .project(select?) + .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? + .build() + .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? + }; + // 2. Introduce the WITH aliases: each item's out_var -> its column. + for item in items { + if let (Some(v), Some(name)) = (item.out_var, item.alias.as_deref()) { + var_map.insert(v, name); + } + } + // 3. Apply WITH's WHERE over the projected columns (updated var_map). + match where_predicate { + Some(pred) => { + let lowerer = self.expr_lowerer(exprs, var_map); + let df_pred = lowerer.lower(pred)?; + LogicalPlanBuilder::from(projected) + .filter(df_pred) + .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? + .build() + .map_err(|e| LoweringError::UnsupportedExpr(e.to_string())) + } + None => Ok(projected), + } + } + /// Lower a [`GraphOp::Create`] into a self-contained [`GraphCreateNode`] /// wrapped as a DataFusion [`Extension`]. /// @@ -2142,6 +2209,7 @@ mod tests { let item = ProjectItem { expr: lit, alias: Some("x".into()), + out_var: None, }; let mut var_map = VarMap::new(); let expr_lowerer = ExprLowerer::new(&arena, None, &var_map); @@ -2175,6 +2243,7 @@ mod tests { let item = ProjectItem { expr: lit, alias: None, + out_var: None, }; let mut var_map = VarMap::new(); let expr_lowerer = ExprLowerer::new(&arena, None, &var_map); @@ -2366,6 +2435,7 @@ mod tests { items: vec![ProjectItem { expr: col_expr, alias: Some("name".into()), + out_var: None, }], distinct: false, }) From 231809a80add862e2464c08d022fbe28d121d12c Mon Sep 17 00:00:00 2001 From: David Spencer <1526975+DecisionNerd@users.noreply.github.com> Date: Tue, 23 Jun 2026 13:58:27 -0600 Subject: [PATCH 2/2] fix(ir): recursive WITH-aggregate guard + var-map scope reset (#814, CodeRabbit) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two review fixes: - Recursive aggregate rejection: `expr_contains_aggregate` walks compound expressions (BinaryOp/UnaryOp/Parenthesized/FunctionCall args/Property/ List), so `WITH count(n) + 1 AS c` is rejected at bind time, not only a bare top-level `WITH count(n) AS c`. - Var-map scope reset: `lower_with_op` now clears the VarMap before installing the WITH aliases, so a dropped pre-WITH VarId can't resolve through a later lowering path — mirroring the binder's scope reset. Added `VarMap::clear`. New e2e `with_nested_aggregate_is_rejected`; all WITH e2e + IR goldens green; clippy --workspace -D warnings clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/gf-api/tests/e2e_baseline.rs | 13 +++++++++++++ crates/gf-ir/src/binder.rs | 25 ++++++++++++++++++++++++- crates/gf-rel/src/expr.rs | 6 ++++++ crates/gf-rel/src/lowerer.rs | 6 +++++- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/crates/gf-api/tests/e2e_baseline.rs b/crates/gf-api/tests/e2e_baseline.rs index b67f80c2..f6c240eb 100644 --- a/crates/gf-api/tests/e2e_baseline.rs +++ b/crates/gf-api/tests/e2e_baseline.rs @@ -392,6 +392,19 @@ fn with_node_passthrough_is_deferred() { ); } +#[test] +fn with_nested_aggregate_is_rejected() { + // #814 deferral: aggregation in WITH is not yet supported. The guard is + // recursive, so a nested aggregate (`count(n) + 1`) is rejected too, not + // only a bare top-level `WITH count(n) AS c`. + let gf = forge(); + let res = gf.execute("MATCH (n:Person) WITH count(n) + 1 AS c RETURN c"); + assert!( + res.is_err(), + "a nested aggregate in WITH should be rejected, got {res:?}" + ); +} + // --------------------------------------------------------------------------- // Read — traversal // --------------------------------------------------------------------------- diff --git a/crates/gf-ir/src/binder.rs b/crates/gf-ir/src/binder.rs index f62d19d8..7fb84db3 100644 --- a/crates/gf-ir/src/binder.rs +++ b/crates/gf-ir/src/binder.rs @@ -756,7 +756,9 @@ impl Binder { for item in &w.items { // Reject (rather than silently mishandle) the parts not yet // supported in WITH — these would otherwise produce wrong results. - if agg_func_of(&item.expr).is_some() { + // The aggregate check is recursive: `WITH count(n) + 1 AS c` must be + // caught too, not only a top-level `WITH count(n) AS c`. + if expr_contains_aggregate(&item.expr) { s.errors.push(BindError::new( BindErrorKind::UnsupportedClause, item.span, @@ -1542,6 +1544,27 @@ fn agg_func_of(expr: &Expr) -> Option { } } +/// Whether `expr` contains an aggregate function *anywhere* — `count(n) + 1`, +/// `(sum(n.x))`, `[max(n.a)]`, etc., not only a bare top-level call. Used to +/// reject aggregation inside a WITH item, which is not yet supported (#814). +fn expr_contains_aggregate(expr: &Expr) -> bool { + if agg_func_of(expr).is_some() { + return true; + } + match expr { + Expr::BinaryOp(b) => expr_contains_aggregate(&b.left) || expr_contains_aggregate(&b.right), + Expr::UnaryOp(u) => expr_contains_aggregate(&u.expr), + Expr::Parenthesized { inner, .. } => expr_contains_aggregate(inner), + Expr::FunctionCall(c) => c.args.iter().any(expr_contains_aggregate), + Expr::Property(p) => expr_contains_aggregate(&p.object), + Expr::List(l) => l.elements.iter().any(expr_contains_aggregate), + // Other variants (Map, Case, comprehensions, …) are not recursed into; + // an aggregate slipping through still fails at lowering (UnknownFunction), + // so the worst case is a less specific error, never a wrong result. + _ => false, + } +} + fn ensure_var(name: Option<&String>, s: &mut BinderState) -> VarId { if let Some(n) = name { ensure_var_name(n, s) diff --git a/crates/gf-rel/src/expr.rs b/crates/gf-rel/src/expr.rs index ff13edef..3758bda7 100644 --- a/crates/gf-rel/src/expr.rs +++ b/crates/gf-rel/src/expr.rs @@ -46,6 +46,12 @@ impl VarMap { self.0.insert(var.0, col_name.into()); } + /// Drops every registered variable. Used to install a fresh `WITH` scope so + /// pre-`WITH` variables stop resolving (mirrors the binder's scope reset). + pub fn clear(&mut self) { + self.0.clear(); + } + /// Returns the column name for `var`, or `None` if it has not been registered. #[must_use] pub fn get(&self, var: VarId) -> Option<&str> { diff --git a/crates/gf-rel/src/lowerer.rs b/crates/gf-rel/src/lowerer.rs index 768ffae0..0cd89007 100644 --- a/crates/gf-rel/src/lowerer.rs +++ b/crates/gf-rel/src/lowerer.rs @@ -748,7 +748,11 @@ impl<'a> GraphPlanLowerer<'a> { .build() .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? }; - // 2. Introduce the WITH aliases: each item's out_var -> its column. + // 2. Install the new scope: WITH resets it to exactly its aliases, so + // drop every pre-WITH variable (mirroring the binder) before mapping + // each item's out_var to its projected column. This stops a dropped + // VarId from resolving through a later lowering path. + var_map.clear(); for item in items { if let (Some(v), Some(name)) = (item.out_var, item.alias.as_deref()) { var_map.insert(v, name);