Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### In Development — v0.5.0 (Rust Core, `rust-core` branch)
- `startNode(r)` / `endNode(r)` return the endpoint **node value** (#753) — over
a matched fixed-hop relationship these now return the start/end node with
identity, labels, and readable properties (reusing the #785 node-value
materialization), not a bare UUID. The binder records each fixed hop's
`(src, dst)` node vars at `Expand` and rewrites the calls onto them: as a node
reference for property access (`startNode(r).name`) and, in a terminal RETURN,
as a whole node value (`RETURN endNode(r)`). Endpoints follow the edge's
**direction** — an incoming `(a)<-[r]-(b)` resolves `startNode(r)` to `b` — not
the pattern's left/right order. Deliberately deferred (fall through to the
usual `UnknownFunction` error rather than risk a wrong answer): variable-length
edges (the var binds to a relationship *list*), undirected edges (orientation
is per matched row), and `OPTIONAL MATCH` edges (an unmatched `r` is null;
honoring `startNode(null) = null` needs an edge-uuid null gate — #889). Closes
the #743 deferral.
- Neighborhood-proportional traversal latency (#838) — variable-length and
single-hop expansion now read only the **reached destination** node records
(`read_nodes_filtered`, an `node_id`-filtered read mirroring the #830 edge
Expand Down
137 changes: 137 additions & 0 deletions crates/gf-api/tests/e2e_baseline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,143 @@ fn match_returns_whole_node_value() {
assert_eq!(name_str.value(0), "Alice");
}

#[test]
fn start_end_node_return_whole_node() {
// #753: startNode(r) / endNode(r) over a matched relationship return the
// endpoint node's whole value (identity + labels + properties), reusing the
// #785 materialization — not the raw UUID.
let gf = forge();
let r = rows(
&gf,
"MATCH (a:Person {name:'Alice'})-[r:KNOWS]->(b:Person {name:'Bob'}) RETURN startNode(r)",
);
assert_eq!(r.stats.rows_produced, 1, "Alice KNOWS Bob is one edge");
let batch = r
.batches
.iter()
.find(|b| b.num_rows() > 0)
.expect("a non-empty result batch");
// The column name is the expression display string (no friendly alias for a
// function call), so address it positionally.
let col = batch.column(0);
let node = col
.as_any()
.downcast_ref::<StructArray>()
.unwrap_or_else(|| {
panic!(
"startNode(r) should be a node-value Struct, got {:?}",
col.data_type()
)
});
// The start node is Alice — label + property both readable off the value.
let labels = node.column_by_name("labels").expect("labels field");
let list = labels
.as_any()
.downcast_ref::<ListArray>()
.expect("labels is a List");
let label_vals = list.value(0);
let label_strs = label_vals
.as_any()
.downcast_ref::<StringArray>()
.expect("labels are Utf8");
assert_eq!(label_strs.value(0), "Person");
let name = node.column_by_name("name").expect("name property");
let name_str = name
.as_any()
.downcast_ref::<StringArray>()
.expect("name is Utf8");
assert_eq!(name_str.value(0), "Alice", "startNode(r) is the start node");
}

#[test]
fn start_end_node_property_access() {
// #753: property access on startNode(r) / endNode(r) resolves against the
// src / dst node's columns — `startNode(r).name` == the start node's name.
let gf = forge();
let r = rows(
&gf,
"MATCH (a:Person {name:'Alice'})-[r:KNOWS]->(b:Person {name:'Bob'}) \
RETURN startNode(r).name AS s, endNode(r).name AS e",
);
assert_eq!(r.stats.rows_produced, 1);
let batch = r
.batches
.iter()
.find(|b| b.num_rows() > 0)
.expect("a non-empty result batch");
let s = batch
.column_by_name("s")
.expect("s")
.as_any()
.downcast_ref::<StringArray>()
.expect("Utf8");
let e = batch
.column_by_name("e")
.expect("e")
.as_any()
.downcast_ref::<StringArray>()
.expect("Utf8");
assert_eq!(s.value(0), "Alice", "startNode(r).name");
assert_eq!(e.value(0), "Bob", "endNode(r).name");
}

#[test]
fn start_end_node_respects_incoming_direction() {
// #753: the relationship's start/end follow the edge *direction*, not the
// pattern's left/right order. For `(Bob)<-[r:KNOWS]-(b)`, the only match is
// Alice KNOWS Bob, so startNode(r) is Alice (the edge source) and endNode(r)
// is Bob — even though Bob is the pattern's left-hand node.
let gf = forge();
let r = rows(
&gf,
"MATCH (a:Person {name:'Bob'})<-[r:KNOWS]-(b:Person) \
RETURN startNode(r).name AS s, endNode(r).name AS e",
);
assert_eq!(r.stats.rows_produced, 1, "only Alice KNOWS Bob");
let batch = r
.batches
.iter()
.find(|b| b.num_rows() > 0)
.expect("a non-empty result batch");
let s = batch
.column_by_name("s")
.expect("s")
.as_any()
.downcast_ref::<StringArray>()
.expect("Utf8");
let e = batch
.column_by_name("e")
.expect("e")
.as_any()
.downcast_ref::<StringArray>()
.expect("Utf8");
assert_eq!(
s.value(0),
"Alice",
"startNode = edge source, not pattern-left"
);
assert_eq!(e.value(0), "Bob", "endNode = edge target");
}

#[test]
fn start_node_over_optional_edge_is_deferred() {
// #753 deferral: over an OPTIONAL-matched relationship, `r` is null on an
// unmatched row and Cypher requires `startNode(r)` to be null. Honoring that
// needs an edge-uuid null gate on endpoint materialization (#889), so the
// rewrite is suppressed for optional edges. The suppression surfaces as a
// bind-time `UnknownFunction` (data-independent), but the query uses Eve —
// who has no outgoing edges — so the represented scenario is a genuinely
// unmatched optional, not one papered over by a row that happens to match.
let gf = forge();
let res = gf.execute(
"MATCH (a:Person {name:'Eve'}) OPTIONAL MATCH (a)-[r:KNOWS]->(b) RETURN startNode(r)",
);
assert!(
res.is_err(),
"optional-edge startNode is deferred; expected an error, got {res:?}"
);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

#[test]
fn count_aggregate() {
let gf = forge();
Expand Down
126 changes: 109 additions & 17 deletions crates/gf-ir/src/binder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ impl Binder {
vars: HashMap::new(),
path_vars: HashMap::new(),
node_vars: HashMap::new(),
edge_vars: HashMap::new(),
next_var: 0,
builder,
errors: Vec::new(),
Expand Down Expand Up @@ -216,6 +217,7 @@ impl Binder {
vars: s.vars.clone(),
path_vars: s.path_vars.clone(),
node_vars: s.node_vars.clone(),
edge_vars: s.edge_vars.clone(),
next_var: s.next_var,
builder: GraphPlan::builder("openCypher").ontology_mode(self.mode),
errors: Vec::new(),
Expand All @@ -239,6 +241,17 @@ impl Binder {
for (v, label) in sub_state.node_vars {
s.node_vars.entry(v).or_insert(label);
}
// Deliberately do NOT propagate `edge_vars` out of an OPTIONAL MATCH.
// After an unmatched optional row `r` is null, and Cypher requires
// `startNode(r)`/`endNode(r)` to be null — but the rewrite resolves
// to the endpoint node var, which on such a row is the *outer*,
// non-null var (e.g. `a` in `MATCH (a) OPTIONAL MATCH (a)-[r]->(b)`).
// Without an edge-uuid null gate on endpoint materialization that
// would return a wrong non-null value, so optional edges stay
// unresolved here and `startNode`/`endNode` fall through to
// UnknownFunction. (Edges still resolve inside the optional's own
// WHERE, where matched rows have a non-null `r`.) Null-gated endpoint
// values are a node-value-completeness follow-up (#889).
s.errors.extend(sub_state.errors);
s.warnings.extend(sub_state.warnings);
let child = sub_state.builder.build();
Expand Down Expand Up @@ -325,6 +338,28 @@ impl Binder {
max_hops,
});

// Record the relationship's endpoints so `startNode(r)` /
// `endNode(r)` can return the start/end node value (#753).
// `src_var`/`dst_var` are the pattern's traversal-left/right
// vars; the relationship's true start/end follow its
// *direction*: an outgoing `(a)-[r]->(b)` starts at `a`, an
// incoming `(a)<-[r]-(b)` starts at `b`. An undirected edge's
// orientation is per matched row, so a static left/right
// rewrite could pick the wrong endpoint — skip it (startNode/
// endNode then fall through to UnknownFunction). Only fixed
// single hops qualify: a variable-length edge var binds to a
// *list*, not one relationship.
if !is_var_hop {
let endpoints = match dir {
Direction::Out => Some((src_var, dst_var)),
Direction::In => Some((dst_var, src_var)),
Direction::Undirected => None,
};
if let Some(endpoints) = endpoints {
s.edge_vars.insert(edge_var, endpoints);
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if path_nodes.is_empty() {
path_nodes.push(src_var);
}
Expand Down Expand Up @@ -840,24 +875,22 @@ impl Binder {
s: &mut BinderState,
materialize_nodes: bool,
) -> ExprId {
if materialize_nodes
&& let Expr::Var(VarRef { name, .. }) = expr
&& let Some(&v) = s.vars.get(name)
&& let Some(label_opt) = s.node_vars.get(&v).cloned()
{
let var_ref = s.builder.push_expr(IrExpr::VarRef(v));
let mut args = vec![var_ref];
// Pass the pattern's label name through: the lowerer's ontology map
// is empty in exploratory mode, so the label can only come from the
// bind-time pattern. Absent (one arg) for an unlabelled match.
if let Some(label) = label_opt {
let lit = s.builder.push_expr(IrExpr::Literal(IrLiteral::Str(label)));
args.push(lit);
if materialize_nodes {
// A bare node var (`RETURN n`, #785) or a relationship endpoint
// (`RETURN startNode(r)` / `endNode(r)`, #753) materializes a whole
// node value. Both resolve to a node var bound to a `NodeScan`.
let node_var = match expr {
Expr::Var(VarRef { name, .. }) => s
.vars
.get(name)
.copied()
.filter(|v| s.node_vars.contains_key(v)),
Expr::FunctionCall(call) => Self::resolve_endpoint_node(call, s),
_ => None,
};
if let Some(v) = node_var {
return Self::node_struct_expr(v, s);
}
return s.builder.push_expr(IrExpr::FunctionCall {
name: "_node_struct".to_string(),
args,
});
}
self.lower_expr(expr, span, s)
}
Expand Down Expand Up @@ -1067,6 +1100,13 @@ impl Binder {
Expr::FunctionCall(call) => {
if let Some(id) = Self::lower_path_function(call, s) {
id
} else if let Some(node_var) = Self::resolve_endpoint_node(call, s) {
// `startNode(r)` / `endNode(r)` over a matched relationship
// is the src / dst node var (#753). As a bare value it is a
// node reference; property access (`startNode(r).name`)
// resolves against that var's columns. A terminal RETURN
// upgrades it to a whole node value in `lower_return_item_expr`.
s.builder.push_expr(IrExpr::VarRef(node_var))
} else {
let fn_name = call.name.join(".");
let ir_args: Vec<ExprId> = call
Expand Down Expand Up @@ -1174,6 +1214,51 @@ impl Binder {
Some(id)
}

/// Resolve `startNode(r)` / `endNode(r)` over a fixed-hop matched
/// relationship to the relationship's src / dst node variable (#753).
///
/// Returns `None` for anything else — a non-matching name, a non-variable
/// argument, an unbound name, or an edge var that is not a recorded
/// fixed-hop endpoint (variable-length edges bind to a list, not one
/// relationship) — so the caller falls through to generic lowering, where
/// the function name surfaces as the usual `UnknownFunction` error.
fn resolve_endpoint_node(call: &FunctionCall, s: &BinderState) -> Option<VarId> {
let [fn_name] = call.name.as_slice() else {
return None;
};
let start = match fn_name.to_ascii_lowercase().as_str() {
"startnode" => true,
"endnode" => false,
_ => return None,
};
let [Expr::Var(VarRef { name, .. })] = call.args.as_slice() else {
return None;
};
let edge_var = s.vars.get(name)?;
let (src, dst) = s.edge_vars.get(edge_var)?;
Some(if start { *src } else { *dst })
}

/// Build a `_node_struct` call that materializes the given node var as a
/// whole node value — identity + labels + properties (#785). Shared by a
/// bare `RETURN n` and by `RETURN startNode(r)` / `endNode(r)` (#753).
fn node_struct_expr(var: VarId, s: &mut BinderState) -> ExprId {
// The pattern's label name, passed through because the lowerer's
// ontology map is empty in exploratory mode; absent for an unlabelled
// match (one arg).
let label_opt = s.node_vars.get(&var).cloned().flatten();
let var_ref = s.builder.push_expr(IrExpr::VarRef(var));
let mut args = vec![var_ref];
if let Some(label) = label_opt {
let lit = s.builder.push_expr(IrExpr::Literal(IrLiteral::Str(label)));
args.push(lit);
}
s.builder.push_expr(IrExpr::FunctionCall {
name: "_node_struct".to_string(),
args,
})
}

/// IR for `nodes(p)`: the traversal node sequence as a
/// `List<Struct{node_uuid}>` value.
///
Expand Down Expand Up @@ -1419,6 +1504,12 @@ struct BinderState {
/// of these materializes a whole node value (#785); the label is passed
/// through to lowering since the ontology map is empty in exploratory mode.
node_vars: HashMap<VarId, Option<String>>,
/// Fixed-hop relationship variables mapped to their `(src, dst)` node vars,
/// recorded at `Expand` build. `startNode(r)` / `endNode(r)` over a matched
/// relationship rewrite onto these endpoints (#753) — so they return the
/// node value (reusing #785), not the raw UUID. Variable-length edges bind
/// to a list, not one relationship, so they are deliberately not recorded.
edge_vars: HashMap<VarId, (VarId, VarId)>,
next_var: u32,
builder: GraphPlanBuilder,
errors: Vec<BindError>,
Expand Down Expand Up @@ -1825,6 +1916,7 @@ mod tests {
vars: HashMap::new(),
path_vars: HashMap::new(),
node_vars: HashMap::new(),
edge_vars: HashMap::new(),
next_var: 0,
builder: GraphPlan::builder("openCypher").ontology_mode(OntologyMode::Advisory),
errors: Vec::new(),
Expand Down
Loading