diff --git a/CHANGELOG.md b/CHANGELOG.md index 37cddae8..e0e7ab2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### In Development — v0.5.0 (Rust Core, `rust-core` branch) +- `startNode(r)` / `endNode(r)` return the endpoint **node value** (#753) — over + a matched fixed-hop relationship these now return the start/end node with + identity, labels, and readable properties (reusing the #785 node-value + materialization), not a bare UUID. The binder records each fixed hop's + `(src, dst)` node vars at `Expand` and rewrites the calls onto them: as a node + reference for property access (`startNode(r).name`) and, in a terminal RETURN, + as a whole node value (`RETURN endNode(r)`). Endpoints follow the edge's + **direction** — an incoming `(a)<-[r]-(b)` resolves `startNode(r)` to `b` — not + the pattern's left/right order. Deliberately deferred (fall through to the + usual `UnknownFunction` error rather than risk a wrong answer): variable-length + edges (the var binds to a relationship *list*), undirected edges (orientation + is per matched row), and `OPTIONAL MATCH` edges (an unmatched `r` is null; + honoring `startNode(null) = null` needs an edge-uuid null gate — #889). Closes + the #743 deferral. - Neighborhood-proportional traversal latency (#838) — variable-length and single-hop expansion now read only the **reached destination** node records (`read_nodes_filtered`, an `node_id`-filtered read mirroring the #830 edge diff --git a/crates/gf-api/tests/e2e_baseline.rs b/crates/gf-api/tests/e2e_baseline.rs index 353ba066..6464dd8b 100644 --- a/crates/gf-api/tests/e2e_baseline.rs +++ b/crates/gf-api/tests/e2e_baseline.rs @@ -142,6 +142,143 @@ fn match_returns_whole_node_value() { assert_eq!(name_str.value(0), "Alice"); } +#[test] +fn start_end_node_return_whole_node() { + // #753: startNode(r) / endNode(r) over a matched relationship return the + // endpoint node's whole value (identity + labels + properties), reusing the + // #785 materialization — not the raw UUID. + let gf = forge(); + let r = rows( + &gf, + "MATCH (a:Person {name:'Alice'})-[r:KNOWS]->(b:Person {name:'Bob'}) RETURN startNode(r)", + ); + assert_eq!(r.stats.rows_produced, 1, "Alice KNOWS Bob is one edge"); + let batch = r + .batches + .iter() + .find(|b| b.num_rows() > 0) + .expect("a non-empty result batch"); + // The column name is the expression display string (no friendly alias for a + // function call), so address it positionally. + let col = batch.column(0); + let node = col + .as_any() + .downcast_ref::() + .unwrap_or_else(|| { + panic!( + "startNode(r) should be a node-value Struct, got {:?}", + col.data_type() + ) + }); + // The start node is Alice — label + property both readable off the value. + let labels = node.column_by_name("labels").expect("labels field"); + let list = labels + .as_any() + .downcast_ref::() + .expect("labels is a List"); + let label_vals = list.value(0); + let label_strs = label_vals + .as_any() + .downcast_ref::() + .expect("labels are Utf8"); + assert_eq!(label_strs.value(0), "Person"); + let name = node.column_by_name("name").expect("name property"); + let name_str = name + .as_any() + .downcast_ref::() + .expect("name is Utf8"); + assert_eq!(name_str.value(0), "Alice", "startNode(r) is the start node"); +} + +#[test] +fn start_end_node_property_access() { + // #753: property access on startNode(r) / endNode(r) resolves against the + // src / dst node's columns — `startNode(r).name` == the start node's name. + let gf = forge(); + let r = rows( + &gf, + "MATCH (a:Person {name:'Alice'})-[r:KNOWS]->(b:Person {name:'Bob'}) \ + RETURN startNode(r).name AS s, endNode(r).name AS e", + ); + assert_eq!(r.stats.rows_produced, 1); + let batch = r + .batches + .iter() + .find(|b| b.num_rows() > 0) + .expect("a non-empty result batch"); + let s = batch + .column_by_name("s") + .expect("s") + .as_any() + .downcast_ref::() + .expect("Utf8"); + let e = batch + .column_by_name("e") + .expect("e") + .as_any() + .downcast_ref::() + .expect("Utf8"); + assert_eq!(s.value(0), "Alice", "startNode(r).name"); + assert_eq!(e.value(0), "Bob", "endNode(r).name"); +} + +#[test] +fn start_end_node_respects_incoming_direction() { + // #753: the relationship's start/end follow the edge *direction*, not the + // pattern's left/right order. For `(Bob)<-[r:KNOWS]-(b)`, the only match is + // Alice KNOWS Bob, so startNode(r) is Alice (the edge source) and endNode(r) + // is Bob — even though Bob is the pattern's left-hand node. + let gf = forge(); + let r = rows( + &gf, + "MATCH (a:Person {name:'Bob'})<-[r:KNOWS]-(b:Person) \ + RETURN startNode(r).name AS s, endNode(r).name AS e", + ); + assert_eq!(r.stats.rows_produced, 1, "only Alice KNOWS Bob"); + let batch = r + .batches + .iter() + .find(|b| b.num_rows() > 0) + .expect("a non-empty result batch"); + let s = batch + .column_by_name("s") + .expect("s") + .as_any() + .downcast_ref::() + .expect("Utf8"); + let e = batch + .column_by_name("e") + .expect("e") + .as_any() + .downcast_ref::() + .expect("Utf8"); + assert_eq!( + s.value(0), + "Alice", + "startNode = edge source, not pattern-left" + ); + assert_eq!(e.value(0), "Bob", "endNode = edge target"); +} + +#[test] +fn start_node_over_optional_edge_is_deferred() { + // #753 deferral: over an OPTIONAL-matched relationship, `r` is null on an + // unmatched row and Cypher requires `startNode(r)` to be null. Honoring that + // needs an edge-uuid null gate on endpoint materialization (#889), so the + // rewrite is suppressed for optional edges. The suppression surfaces as a + // bind-time `UnknownFunction` (data-independent), but the query uses Eve — + // who has no outgoing edges — so the represented scenario is a genuinely + // unmatched optional, not one papered over by a row that happens to match. + let gf = forge(); + let res = gf.execute( + "MATCH (a:Person {name:'Eve'}) OPTIONAL MATCH (a)-[r:KNOWS]->(b) RETURN startNode(r)", + ); + assert!( + res.is_err(), + "optional-edge startNode is deferred; expected an error, got {res:?}" + ); +} + #[test] fn count_aggregate() { let gf = forge(); diff --git a/crates/gf-ir/src/binder.rs b/crates/gf-ir/src/binder.rs index bb22a713..f81665ff 100644 --- a/crates/gf-ir/src/binder.rs +++ b/crates/gf-ir/src/binder.rs @@ -145,6 +145,7 @@ impl Binder { vars: HashMap::new(), path_vars: HashMap::new(), node_vars: HashMap::new(), + edge_vars: HashMap::new(), next_var: 0, builder, errors: Vec::new(), @@ -216,6 +217,7 @@ impl Binder { vars: s.vars.clone(), path_vars: s.path_vars.clone(), node_vars: s.node_vars.clone(), + edge_vars: s.edge_vars.clone(), next_var: s.next_var, builder: GraphPlan::builder("openCypher").ontology_mode(self.mode), errors: Vec::new(), @@ -239,6 +241,17 @@ impl Binder { for (v, label) in sub_state.node_vars { s.node_vars.entry(v).or_insert(label); } + // Deliberately do NOT propagate `edge_vars` out of an OPTIONAL MATCH. + // After an unmatched optional row `r` is null, and Cypher requires + // `startNode(r)`/`endNode(r)` to be null — but the rewrite resolves + // to the endpoint node var, which on such a row is the *outer*, + // non-null var (e.g. `a` in `MATCH (a) OPTIONAL MATCH (a)-[r]->(b)`). + // Without an edge-uuid null gate on endpoint materialization that + // would return a wrong non-null value, so optional edges stay + // unresolved here and `startNode`/`endNode` fall through to + // UnknownFunction. (Edges still resolve inside the optional's own + // WHERE, where matched rows have a non-null `r`.) Null-gated endpoint + // values are a node-value-completeness follow-up (#889). s.errors.extend(sub_state.errors); s.warnings.extend(sub_state.warnings); let child = sub_state.builder.build(); @@ -325,6 +338,28 @@ impl Binder { max_hops, }); + // Record the relationship's endpoints so `startNode(r)` / + // `endNode(r)` can return the start/end node value (#753). + // `src_var`/`dst_var` are the pattern's traversal-left/right + // vars; the relationship's true start/end follow its + // *direction*: an outgoing `(a)-[r]->(b)` starts at `a`, an + // incoming `(a)<-[r]-(b)` starts at `b`. An undirected edge's + // orientation is per matched row, so a static left/right + // rewrite could pick the wrong endpoint — skip it (startNode/ + // endNode then fall through to UnknownFunction). Only fixed + // single hops qualify: a variable-length edge var binds to a + // *list*, not one relationship. + if !is_var_hop { + let endpoints = match dir { + Direction::Out => Some((src_var, dst_var)), + Direction::In => Some((dst_var, src_var)), + Direction::Undirected => None, + }; + if let Some(endpoints) = endpoints { + s.edge_vars.insert(edge_var, endpoints); + } + } + if path_nodes.is_empty() { path_nodes.push(src_var); } @@ -840,24 +875,22 @@ impl Binder { s: &mut BinderState, materialize_nodes: bool, ) -> ExprId { - if materialize_nodes - && let Expr::Var(VarRef { name, .. }) = expr - && let Some(&v) = s.vars.get(name) - && let Some(label_opt) = s.node_vars.get(&v).cloned() - { - let var_ref = s.builder.push_expr(IrExpr::VarRef(v)); - let mut args = vec![var_ref]; - // Pass the pattern's label name through: the lowerer's ontology map - // is empty in exploratory mode, so the label can only come from the - // bind-time pattern. Absent (one arg) for an unlabelled match. - if let Some(label) = label_opt { - let lit = s.builder.push_expr(IrExpr::Literal(IrLiteral::Str(label))); - args.push(lit); + if materialize_nodes { + // A bare node var (`RETURN n`, #785) or a relationship endpoint + // (`RETURN startNode(r)` / `endNode(r)`, #753) materializes a whole + // node value. Both resolve to a node var bound to a `NodeScan`. + let node_var = match expr { + Expr::Var(VarRef { name, .. }) => s + .vars + .get(name) + .copied() + .filter(|v| s.node_vars.contains_key(v)), + Expr::FunctionCall(call) => Self::resolve_endpoint_node(call, s), + _ => None, + }; + if let Some(v) = node_var { + return Self::node_struct_expr(v, s); } - return s.builder.push_expr(IrExpr::FunctionCall { - name: "_node_struct".to_string(), - args, - }); } self.lower_expr(expr, span, s) } @@ -1067,6 +1100,13 @@ impl Binder { Expr::FunctionCall(call) => { if let Some(id) = Self::lower_path_function(call, s) { id + } else if let Some(node_var) = Self::resolve_endpoint_node(call, s) { + // `startNode(r)` / `endNode(r)` over a matched relationship + // is the src / dst node var (#753). As a bare value it is a + // node reference; property access (`startNode(r).name`) + // resolves against that var's columns. A terminal RETURN + // upgrades it to a whole node value in `lower_return_item_expr`. + s.builder.push_expr(IrExpr::VarRef(node_var)) } else { let fn_name = call.name.join("."); let ir_args: Vec = call @@ -1174,6 +1214,51 @@ impl Binder { Some(id) } + /// Resolve `startNode(r)` / `endNode(r)` over a fixed-hop matched + /// relationship to the relationship's src / dst node variable (#753). + /// + /// Returns `None` for anything else — a non-matching name, a non-variable + /// argument, an unbound name, or an edge var that is not a recorded + /// fixed-hop endpoint (variable-length edges bind to a list, not one + /// relationship) — so the caller falls through to generic lowering, where + /// the function name surfaces as the usual `UnknownFunction` error. + fn resolve_endpoint_node(call: &FunctionCall, s: &BinderState) -> Option { + let [fn_name] = call.name.as_slice() else { + return None; + }; + let start = match fn_name.to_ascii_lowercase().as_str() { + "startnode" => true, + "endnode" => false, + _ => return None, + }; + let [Expr::Var(VarRef { name, .. })] = call.args.as_slice() else { + return None; + }; + let edge_var = s.vars.get(name)?; + let (src, dst) = s.edge_vars.get(edge_var)?; + Some(if start { *src } else { *dst }) + } + + /// Build a `_node_struct` call that materializes the given node var as a + /// whole node value — identity + labels + properties (#785). Shared by a + /// bare `RETURN n` and by `RETURN startNode(r)` / `endNode(r)` (#753). + fn node_struct_expr(var: VarId, s: &mut BinderState) -> ExprId { + // The pattern's label name, passed through because the lowerer's + // ontology map is empty in exploratory mode; absent for an unlabelled + // match (one arg). + let label_opt = s.node_vars.get(&var).cloned().flatten(); + let var_ref = s.builder.push_expr(IrExpr::VarRef(var)); + let mut args = vec![var_ref]; + if let Some(label) = label_opt { + let lit = s.builder.push_expr(IrExpr::Literal(IrLiteral::Str(label))); + args.push(lit); + } + s.builder.push_expr(IrExpr::FunctionCall { + name: "_node_struct".to_string(), + args, + }) + } + /// IR for `nodes(p)`: the traversal node sequence as a /// `List` value. /// @@ -1419,6 +1504,12 @@ struct BinderState { /// of these materializes a whole node value (#785); the label is passed /// through to lowering since the ontology map is empty in exploratory mode. node_vars: HashMap>, + /// Fixed-hop relationship variables mapped to their `(src, dst)` node vars, + /// recorded at `Expand` build. `startNode(r)` / `endNode(r)` over a matched + /// relationship rewrite onto these endpoints (#753) — so they return the + /// node value (reusing #785), not the raw UUID. Variable-length edges bind + /// to a list, not one relationship, so they are deliberately not recorded. + edge_vars: HashMap, next_var: u32, builder: GraphPlanBuilder, errors: Vec, @@ -1825,6 +1916,7 @@ mod tests { vars: HashMap::new(), path_vars: HashMap::new(), node_vars: HashMap::new(), + edge_vars: HashMap::new(), next_var: 0, builder: GraphPlan::builder("openCypher").ontology_mode(OntologyMode::Advisory), errors: Vec::new(),