From b2f4e3398fd7e8cc182f888c5b7767c22833a4dc Mon Sep 17 00:00:00 2001 From: David Spencer <1526975+DecisionNerd@users.noreply.github.com> Date: Tue, 23 Jun 2026 15:31:28 -0600 Subject: [PATCH] feat(ir): WITH forwards a whole node (#814 slice 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `MATCH (n) WITH n …` now carries a node through the WITH scope reset instead of erroring (the prior #814 deferral). A node variable spans multiple columns (var_.node_uuid + properties), so: - Lowerer (lower_with_op): a WITH item that is a bare VarRef to a node var (one with a NodeScan shape) forwards ALL input columns under that var's qualifier unchanged, rather than projecting a single column. The scope reset then maps the var to its column prefix. - Binder (lower_with): a no-rename node item (`WITH n` / `WITH n AS n`) reuses the node's var (no fresh mint) and re-registers it in node_vars after the reset, so a later `RETURN n` materializes a whole node value (#785) and `n.x` resolves. Still deferred (clear errors): renaming a node (`WITH n AS m`, needs column re-qualification) and forwarding a relationship/path. Validated: new e2e `with_forwards_whole_node_then_property` and `with_forwards_whole_node_then_node_value` (84 pass); IR/rel goldens unchanged (with_pipeline uses scalar WITH); clippy --workspace -D warnings + fmt clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 8 +++ crates/gf-api/tests/e2e_baseline.rs | 44 +++++++++++++---- crates/gf-ir/src/binder.rs | 70 +++++++++++++++++++++----- crates/gf-rel/src/lowerer.rs | 77 +++++++++++++++++++---------- 4 files changed, 153 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2c02fa3..c2b9cb92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### In Development — v0.5.0 (Rust Core, `rust-core` branch) +- `WITH` forwards a whole node (#814, slice 2) — `MATCH (n) WITH n …` now carries + the node through the scope reset instead of erroring. A node variable spans + several columns (`var_.node_uuid` + properties), so the lowerer forwards all + of them under the var's qualifier (rather than projecting a single column), and + the binder reuses the var + re-registers it as a node so a later `RETURN n` + still materializes a whole node value and `n.x` resolves. Still deferred: + renaming a node through `WITH` (`WITH n AS m`, needs column re-qualification) + and forwarding a relationship/path — both rejected with a clear error. - Multi-pattern MATCH + an anonymous-endpoint correctness fix (#598 read-path) — two related fixes: - **Comma-separated patterns are a cross product.** `MATCH (a), (b)` now diff --git a/crates/gf-api/tests/e2e_baseline.rs b/crates/gf-api/tests/e2e_baseline.rs index de257b7a..ca3bc16e 100644 --- a/crates/gf-api/tests/e2e_baseline.rs +++ b/crates/gf-api/tests/e2e_baseline.rs @@ -445,17 +445,43 @@ fn with_resets_scope_dropping_unprojected_vars() { } #[test] -fn with_node_passthrough_is_deferred() { - // #814 deferral: carrying a whole node through WITH (`WITH n`) is not yet - // supported — a node var spans multiple columns. The binder rejects it with - // a clear error rather than producing a wrong result; projecting the node's - // properties (`WITH n.age AS age`) is the supported path. +fn with_forwards_whole_node_then_property() { + // #814: a whole node is carried through WITH (`WITH n`) — all its columns + // forward, so a later WHERE / property access still resolves. Alice (30) and + // Carol (35) are > 28. let gf = forge(); - let res = gf.execute("MATCH (n:Person) WITH n WHERE n.age > 28 RETURN n.name"); - assert!( - res.is_err(), - "WITH of a whole node is deferred; expected an error, got {res:?}" + let r = rows( + &gf, + "MATCH (n:Person) WITH n WHERE n.age > 28 RETURN n.name", ); + assert_eq!(r.stats.rows_produced, 2, "Alice (30) and Carol (35)"); +} + +#[test] +fn with_forwards_whole_node_then_node_value() { + // #814 + #785: a node forwarded through WITH still materializes as a whole + // node value in a terminal RETURN (label + properties). + let gf = forge(); + let r = rows(&gf, "MATCH (n:Person {name:'Alice'}) WITH n RETURN n"); + assert_eq!(r.stats.rows_produced, 1); + let batch = r + .batches + .iter() + .find(|b| b.num_rows() > 0) + .expect("a non-empty result batch"); + let node = batch + .column_by_name("n") + .expect("result column `n`") + .as_any() + .downcast_ref::() + .expect("node-value Struct after WITH"); + let name = node + .column_by_name("name") + .expect("name property") + .as_any() + .downcast_ref::() + .expect("Utf8"); + assert_eq!(name.value(0), "Alice"); } #[test] diff --git a/crates/gf-ir/src/binder.rs b/crates/gf-ir/src/binder.rs index d1ad6714..3621c5de 100644 --- a/crates/gf-ir/src/binder.rs +++ b/crates/gf-ir/src/binder.rs @@ -752,6 +752,9 @@ impl Binder { s.builder.push_op_mut(GraphOp::Filter { predicate: pred }); } + // Item classification (aggregate/node-forward/path/scalar) + scope reset + + // ORDER BY/SKIP/LIMIT make this long but linear, like the RETURN lowering. + #[allow(clippy::too_many_lines)] fn lower_with(&self, w: &WithClause, s: &mut BinderState) { // WITH projects/renames the pipeline and then *resets the scope*: only // the projected aliases are visible to subsequent clauses (#814). @@ -762,6 +765,10 @@ impl Binder { // projected column. let mut items: Vec = Vec::with_capacity(w.items.len()); let mut new_scope: Vec<(String, VarId)> = Vec::new(); + // Node variables forwarded WHOLE (`WITH n`): (out_var, label) to restore + // into `node_vars` after the scope reset, so a later `RETURN n` / `n.x` + // still treats them as nodes. + let mut forwarded_nodes: Vec<(VarId, Option)> = Vec::new(); for item in &w.items { // Reject (rather than silently mishandle) the parts not yet // supported in WITH — these would otherwise produce wrong results. @@ -774,35 +781,72 @@ impl Binder { "aggregation in WITH is not yet supported (#814 follow-up)".to_string(), )); } - if let Expr::Var(VarRef { name, .. }) = &item.expr { - if s.vars + + // A bare node variable forwarded WITHOUT a rename (`WITH n` or + // `WITH n AS n`) is carried through whole: reuse its var so its + // columns (and a downstream `RETURN n`) resolve. Renaming a node + // (`WITH n AS m`) and forwarding a path are still deferred — they + // need column re-qualification / path-struct carry. + let node_forward = match &item.expr { + Expr::Var(VarRef { name, .. }) => s + .vars .get(name) - .is_some_and(|v| s.node_vars.contains_key(v)) - { + .copied() + .filter(|v| s.node_vars.contains_key(v)) + .filter(|_| item.alias.as_deref().is_none_or(|a| a == name)), + _ => None, + }; + if let Expr::Var(VarRef { name, .. }) = &item.expr { + if s.path_vars.contains_key(name) { s.errors.push(BindError::new( BindErrorKind::UnsupportedClause, item.span, format!( - "carrying a whole node (`WITH {name}`) through WITH is not yet \ - supported — project its properties instead (#814 follow-up)" + "carrying a path (`WITH {name}`) through WITH is not yet supported \ + (#814 follow-up)" ), )); } - if s.path_vars.contains_key(name) { + if node_forward.is_none() + && item.alias.as_deref().is_some_and(|a| a != name) + && s.vars + .get(name) + .is_some_and(|v| s.node_vars.contains_key(v)) + { s.errors.push(BindError::new( BindErrorKind::UnsupportedClause, item.span, format!( - "carrying a path (`WITH {name}`) through WITH is not yet supported \ - (#814 follow-up)" + "renaming a whole node through WITH (`WITH {name} AS …`) is not yet \ + supported — project its properties instead (#814 follow-up)" ), )); } } let expr_id = self.lower_expr(&item.expr, item.span, s); + + if let Some(v) = node_forward { + // Forward the whole node: reuse its var (no fresh mint), keep its + // label for `node_vars`. The lowerer projects all of var_'s + // columns for this item. + let label = s.node_vars.get(&v).cloned().flatten(); + let name = match &item.expr { + Expr::Var(VarRef { name, .. }) => name.clone(), + _ => unreachable!("node_forward is Some only for a Var item"), + }; + forwarded_nodes.push((v, label)); + new_scope.push((name.clone(), v)); + items.push(ProjectItem { + expr: expr_id, + alias: Some(name), + out_var: Some(v), + }); + continue; + } + // Alias: explicit (`expr AS name`), or the variable's own name for a - // bare-variable pass-through (`WITH x`). + // bare scalar-variable pass-through (`WITH x`). let alias = item.alias.clone().or_else(|| match &item.expr { Expr::Var(VarRef { name, .. }) => Some(name.clone()), _ => None, @@ -824,7 +868,8 @@ impl Binder { }); } - // Scope reset: drop everything, then introduce only the WITH aliases. + // Scope reset: drop everything, then introduce only the WITH aliases + // (and re-register forwarded whole-node vars as nodes). s.vars.clear(); s.node_vars.clear(); s.edge_vars.clear(); @@ -832,6 +877,9 @@ impl Binder { for (name, v) in new_scope { s.vars.insert(name, v); } + for (v, label) in forwarded_nodes { + s.node_vars.insert(v, label); + } // WITH's WHERE / ORDER BY reference the projected aliases — lower them // against the new scope. diff --git a/crates/gf-rel/src/lowerer.rs b/crates/gf-rel/src/lowerer.rs index f59548e3..b07d8c26 100644 --- a/crates/gf-rel/src/lowerer.rs +++ b/crates/gf-rel/src/lowerer.rs @@ -743,36 +743,61 @@ impl<'a> GraphPlanLowerer<'a> { var_map: &mut VarMap, ) -> Result { // 1. Project the items against the CURRENT scope (their expressions - // reference upstream vars). Every WITH item carries an alias. - let projected = { + // reference upstream vars). A scalar item projects its expression + // aliased to its name; a whole-node item (`WITH n`) forwards ALL of + // the node's columns (a node var spans `var_.node_uuid` + props) + // so a downstream `RETURN n` / `n.x` still resolves. `new_scope` + // collects each out_var's resulting column (or column prefix). + let node_shapes = self.node_shapes.borrow(); + let mut select: Vec = Vec::new(); + let mut new_scope: Vec<(VarId, String)> = Vec::new(); + { let lowerer = self.expr_lowerer(exprs, var_map); - let select: Result, LoweringError> = items - .iter() - .map(|item| { - let e = lowerer.lower(item.expr)?; - let name = item.alias.as_deref().ok_or_else(|| { - LoweringError::UnsupportedExpr( - "WITH item without an alias (binder should reject)".into(), - ) - })?; - Ok(e.alias(name)) - }) - .collect(); - LogicalPlanBuilder::from(input) - .project(select?) - .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? - .build() - .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? - }; + for item in items { + let name = item.alias.as_deref().ok_or_else(|| { + LoweringError::UnsupportedExpr( + "WITH item without an alias (binder should reject)".into(), + ) + })?; + // Whole-node forwarding: a bare VarRef to a node var (one with a + // NodeScan shape). Forward every input column under that var's + // qualifier, unchanged. + if let IrExpr::VarRef(v) = exprs.get(item.expr) + && node_shapes.contains_key(&v.0) + { + let prefix = var_map + .get(*v) + .ok_or(LoweringError::UnboundVar(v.0))? + .to_string(); + for (qualifier, field) in input.schema().iter() { + if qualifier.is_some_and(|q| q.table() == prefix) { + select.push(DfExpr::Column(datafusion::common::Column::new( + qualifier.cloned(), + field.name(), + ))); + } + } + new_scope.push((*v, prefix)); + } else { + select.push(lowerer.lower(item.expr)?.alias(name)); + if let Some(v) = item.out_var { + new_scope.push((v, name.to_string())); + } + } + } + } + let projected = LogicalPlanBuilder::from(input) + .project(select) + .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))? + .build() + .map_err(|e| LoweringError::UnsupportedExpr(e.to_string()))?; // 2. Install the new scope: WITH resets it to exactly its aliases, so // drop every pre-WITH variable (mirroring the binder) before mapping - // each item's out_var to its projected column. This stops a dropped - // VarId from resolving through a later lowering path. + // each out_var to its projected column / prefix. + drop(node_shapes); var_map.clear(); - for item in items { - if let (Some(v), Some(name)) = (item.out_var, item.alias.as_deref()) { - var_map.insert(v, name); - } + for (v, col) in new_scope { + var_map.insert(v, col); } // 3. Apply WITH's WHERE over the projected columns (updated var_map). match where_predicate {