diff --git a/crates/tempyr-core/src/ops.rs b/crates/tempyr-core/src/ops.rs index 7ca6304..19ed3c6 100644 --- a/crates/tempyr-core/src/ops.rs +++ b/crates/tempyr-core/src/ops.rs @@ -94,43 +94,94 @@ pub fn rename_node_slug(graph_dir: &Path, old_id: &str, new_slug: &str) -> Resul /// Resolve a node query to a full node ID. /// /// Accepts: -/// - Full hybrid ID: `session-replay-a1b2c3` (exact filename match) -/// - Legacy ID: `feat-session-replay` (exact filename match) +/// - Full hybrid ID: `session-replay-a1b2c3` (matches filename, falls back to frontmatter id) +/// - Legacy ID: `feat-session-replay` (matches filename, falls back to frontmatter id) /// - 6-char suffix only: `a1b2c3` (scans filenames for `-{suffix}.md`) /// /// Returns an error if no match or multiple matches found. pub fn resolve_node_id(graph_dir: &Path, query: &str) -> Result { - // Try exact match first (fastest path) + // Fast path: filename matches `{query}.md`. Walks at any depth >= 2 so we + // stay consistent with `Graph::load_from_directory` and `find_node_file`. + // We still parse the file to canonicalize via frontmatter — `id:` is the + // source of truth, and a filename can drift from it. let exact_filename = format!("{query}.md"); for entry in WalkDir::new(graph_dir) .min_depth(2) - .max_depth(2) .into_iter() .filter_map(|e| e.ok()) { - if entry.file_name().to_string_lossy() == exact_filename { + if entry.file_name().to_string_lossy() != exact_filename { + continue; + } + let path = entry.path(); + let Ok(content) = std::fs::read_to_string(path) else { + continue; + }; + let Ok(node) = parse_node(&content, path.to_path_buf()) else { + continue; + }; + return Ok(node.id().to_string()); + } + + // Slow path: filename doesn't match — scan frontmatter, matching + // `find_node_file`'s behavior. Required for legitimate files like + // `/README.md` whose on-disk name doesn't match their `id:`. + for entry in WalkDir::new(graph_dir) + .min_depth(2) + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.extension().is_none_or(|ext| ext != "md") { + continue; + } + let Ok(content) = std::fs::read_to_string(path) else { + continue; + }; + let Ok(node) = parse_node(&content, path.to_path_buf()) else { + continue; + }; + if node.id() == query { return Ok(query.to_string()); } } - // Try suffix-only match if query looks like a valid 6-char suffix + // Try suffix-only match if query looks like a valid 6-char suffix. + // Walks unbounded (same as the paths above) so suffix lookups behave + // consistently with the rest of the graph traversals. The filename + // pattern is the cheap prefilter; the actual match is on the parsed + // frontmatter `id:` so a drifted filename can't surface a wrong ID. if id::is_valid_suffix(query) { let suffix_pattern = format!("-{query}.md"); + let suffix_id_pattern = format!("-{query}"); let mut matches = Vec::new(); for entry in WalkDir::new(graph_dir) .min_depth(2) - .max_depth(2) .into_iter() .filter_map(|e| e.ok()) { + let path = entry.path(); + if path.extension().is_none_or(|ext| ext != "md") { + continue; + } let name = entry.file_name().to_string_lossy(); - if name.ends_with(&suffix_pattern) { - let stem = name.strip_suffix(".md").unwrap(); - matches.push(stem.to_string()); + if !name.ends_with(&suffix_pattern) { + continue; + } + let Ok(content) = std::fs::read_to_string(path) else { + continue; + }; + let Ok(node) = parse_node(&content, path.to_path_buf()) else { + continue; + }; + if node.id().ends_with(&suffix_id_pattern) { + matches.push(node.id().to_string()); } } + matches.sort(); + matches.dedup(); match matches.len() { 0 => {} 1 => return Ok(matches.into_iter().next().unwrap()), @@ -261,11 +312,17 @@ pub fn remove_edge( /// For every edge A->B (type X), checks that B has the reverse edge B->A (reverse(X)). /// Missing reverses are added and the affected files are written. /// Returns the list of (node_id, added_edge) pairs. +/// +/// All path resolution and edge accumulation happens in-memory first, before +/// any file writes. This way a misaligned node (filename != id, target not in +/// the loaded graph, etc.) is caught up-front instead of partial-writing some +/// files and then aborting mid-loop. pub fn repair_reverse_edges( graph_dir: &Path, schema: &Schema, ) -> Result> { use crate::graph::Graph; + use std::collections::HashMap; let graph = Graph::load_from_directory(graph_dir, schema.clone())?; let mut repairs: Vec<(String, String, String)> = Vec::new(); @@ -300,14 +357,30 @@ pub fn repair_reverse_edges( repairs.sort(); repairs.dedup(); - // Apply repairs: add reverse edges to target files + // Build the set of file mutations in memory. Keyed by target_id so we + // accumulate multiple new reverse edges into a single in-memory copy of + // each target node before writing it. We rely on the file_path the graph + // captured at load — `find_node_file` would reconstruct it from the id + // and would miss files whose on-disk name doesn't match the id (e.g. + // `/README.md` with `id: hub--readme`). + let mut pending: HashMap = HashMap::new(); for (target_id, source_id, reverse_type) in &repairs { - let target_path = find_node_file(graph_dir, target_id)?; - let content = std::fs::read_to_string(&target_path)?; - let mut target_node = parse_node(&content, target_path.clone())?; + use std::collections::hash_map::Entry; + let target = match pending.entry(target_id.clone()) { + Entry::Occupied(e) => e.into_mut(), + Entry::Vacant(v) => { + let target_node = graph.get_node(target_id).ok_or_else(|| { + TempyrError::NotFound(format!( + "Cannot repair: target node '{target_id}' not in graph" + )) + })?; + v.insert(target_node.clone()) + } + }; - // Skip if already present (may have been added by a prior repair in this batch) - let already_has = target_node + // Skip if already present (defensive — fixed-graph load wouldn't show + // it as missing, but a prior repair in this batch may have added it). + let already_has = target .frontmatter .edges .iter() @@ -316,14 +389,18 @@ pub fn repair_reverse_edges( continue; } - target_node + target .frontmatter .edges .push(EdgeEntry::new(source_id, reverse_type)); - sort_edges(&mut target_node.frontmatter.edges); - target_node.frontmatter.updated = Some(Utc::now()); + } - atomic_write(&target_path, &serialize_node(&target_node)?)?; + // Commit: only now do we touch the filesystem. + let now = Utc::now(); + for node in pending.values_mut() { + sort_edges(&mut node.frontmatter.edges); + node.frontmatter.updated = Some(now); + atomic_write(&node.file_path, &serialize_node(node)?)?; } Ok(repairs) @@ -518,12 +595,18 @@ fn find_type_directory(graph_dir: &Path, node_type: &str) -> Result { } /// Find a node file by its ID, searching all subdirectories. +/// +/// Two-phase lookup so we don't require `filename == id`: most files do +/// follow that convention (and we hit them in the fast path), but a node's +/// `id:` in frontmatter is the source of truth, and files like +/// `/README.md` with a non-matching id are legitimate. pub fn find_node_file(graph_dir: &Path, node_id: &str) -> Result { let filename = format!("{node_id}.md"); + // Fast path: filename matches `.md`. Walks at any depth >= 2 so we + // stay consistent with `Graph::load_from_directory`. for entry in WalkDir::new(graph_dir) .min_depth(2) - .max_depth(2) .into_iter() .filter_map(|e| e.ok()) { @@ -532,6 +615,29 @@ pub fn find_node_file(graph_dir: &Path, node_id: &str) -> Result { } } + // Slow path: filename doesn't match — fall back to scanning frontmatter. + // Only triggers when the fast path misses, so cost is bounded to graphs + // that actually have a misaligned file. + for entry in WalkDir::new(graph_dir) + .min_depth(2) + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.extension().is_none_or(|ext| ext != "md") { + continue; + } + let Ok(content) = std::fs::read_to_string(path) else { + continue; + }; + let Ok(node) = parse_node(&content, path.to_path_buf()) else { + continue; + }; + if node.id() == node_id { + return Ok(path.to_path_buf()); + } + } + Err(TempyrError::NotFound(format!( "Node file not found: {node_id}" ))) @@ -1145,4 +1251,192 @@ mod tests { let result = resolve_node_id(&graph_dir, "nonexistent"); assert!(result.is_err()); } + + #[test] + fn test_resolve_node_id_fast_path_canonicalizes_via_frontmatter() { + // Filename happens to match `{query}.md` but the canonical id in + // frontmatter differs. We return the frontmatter id so callers + // never get back a stem that no node actually answers to. + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + + write_node( + &graph_dir, + "features", + "wrong-name", + "---\nid: real-name\ntype: feature\nstatus: draft\nowner: alice\n---\n# Real\n", + ); + + let resolved = resolve_node_id(&graph_dir, "wrong-name").unwrap(); + assert_eq!(resolved, "real-name"); + } + + #[test] + fn test_resolve_node_id_suffix_canonicalizes_via_frontmatter() { + // Filename matches `*-{suffix}.md` but frontmatter id differs. + // The suffix loop must push the parsed `node.id()` (only when it + // also ends with `-{suffix}`), never the filename stem. + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + + write_node( + &graph_dir, + "tasks", + "foo-abc123", + "---\nid: bar-abc123\ntype: task\nstatus: backlog\n---\n# Bar\n", + ); + + let resolved = resolve_node_id(&graph_dir, "abc123").unwrap(); + assert_eq!(resolved, "bar-abc123"); + } + + #[test] + fn test_resolve_node_id_by_suffix_in_nested_directory() { + // Suffix lookup must walk the same depth range as + // `Graph::load_from_directory` — a node nested below the type dir + // (e.g. `tasks/2026-q2/.md`) is loaded by the graph and must be + // resolvable by its suffix as well. + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + let nested = graph_dir.join("tasks").join("2026-q2"); + std::fs::create_dir_all(&nested).unwrap(); + + // Hand-write so we control the path (depth 3, below the type dir). + std::fs::write( + nested.join("ship-it-abc123.md"), + "---\nid: ship-it-abc123\ntype: task\nstatus: backlog\n---\n# Ship It\n", + ) + .unwrap(); + + let resolved = resolve_node_id(&graph_dir, "abc123").unwrap(); + assert_eq!(resolved, "ship-it-abc123"); + } + + #[test] + fn test_resolve_node_id_by_frontmatter_when_filename_differs() { + // Same shape as `find_node_file`'s misaligned-file regression: file + // at `/README.md` with a non-matching id must still be + // resolvable, so MCP/CLI lookups against such nodes don't fail. + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + + std::fs::write( + graph_dir.join("features/README.md"), + "---\nid: feat-overview\ntype: feature\nstatus: draft\nowner: alice\n---\n# Overview\n", + ) + .unwrap(); + + let resolved = resolve_node_id(&graph_dir, "feat-overview").unwrap(); + assert_eq!(resolved, "feat-overview"); + } + + // Regression: nodes can legitimately live at paths where the filename does + // not match the id (e.g., `features/README.md` with `id: feat-overview`). + // `find_node_file` must fall back to parsing frontmatter, and + // `repair_reverse_edges` must use the loaded graph's path rather than + // re-deriving `/.md` — otherwise it crashes mid-loop and + // leaves partial writes on disk. + #[test] + fn test_find_node_file_resolves_by_frontmatter_id_when_filename_differs() { + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + + // File is named README.md but its id is something else. + std::fs::write( + graph_dir.join("features/README.md"), + "---\nid: feat-overview\ntype: feature\nstatus: draft\nowner: alice\n---\n# Overview\n", + ) + .unwrap(); + + let path = find_node_file(&graph_dir, "feat-overview").unwrap(); + assert_eq!(path, graph_dir.join("features/README.md")); + } + + #[test] + fn test_repair_reverse_edges_writes_to_misaligned_target() { + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + let schema = make_schema(); + + // Target file lives at features/README.md but its frontmatter id is + // `feat-overview` — i.e., filename != id, the shape the bug report + // describes (`/README.md` with `id: hub--readme`). + std::fs::write( + graph_dir.join("features/README.md"), + "---\nid: feat-overview\ntype: feature\nstatus: draft\nowner: alice\n---\n# Overview\n", + ) + .unwrap(); + + // Source has the forward edge; target is missing the reverse. + std::fs::write( + graph_dir.join("epics/epic-a.md"), + "---\nid: epic-a\ntype: epic\nstatus: draft\nowner: alice\nedges:\n - target: feat-overview\n type: parent_of\n---\n# Epic A\n", + ) + .unwrap(); + + let repairs = repair_reverse_edges(&graph_dir, &schema).unwrap(); + assert_eq!( + repairs.len(), + 1, + "expected one missing reverse: {repairs:?}" + ); + + // The reverse edge must be written to the file we actually loaded the + // target from (features/README.md), NOT to a phantom path derived from + // `/.md`. + let updated = std::fs::read_to_string(graph_dir.join("features/README.md")).unwrap(); + let node = parse_node(&updated, PathBuf::from("test")).unwrap(); + assert!( + node.edges() + .iter() + .any(|e| e.target == "epic-a" && e.edge_type == "child_of"), + "expected reverse edge added to README.md, got edges: {:?}", + node.edges() + ); + + // The phantom path must not have been created. + assert!( + !graph_dir.join("features/feat-overview.md").exists(), + "repair must not create a phantom file at /.md" + ); + } + + #[test] + fn test_repair_reverse_edges_is_atomic_when_target_absent() { + // If some other node references a target that's not on disk, the + // pre-fix code would write to a few targets and then crash on the + // missing one, leaving partial writes. With the fix, we resolve all + // targets through the loaded graph first; a target that isn't in the + // graph is simply skipped (it's a dangling edge, surfaced separately + // by `validate`), so no partial writes ever land. + let tmp = setup_graph_dir(); + let graph_dir = tmp.path().join("graph"); + let schema = make_schema(); + + // Source references a target that exists on disk and one that doesn't. + std::fs::write( + graph_dir.join("epics/epic-a.md"), + "---\nid: epic-a\ntype: epic\nstatus: draft\nowner: alice\nedges:\n - target: feat-real\n type: parent_of\n - target: feat-ghost\n type: parent_of\n---\n# Epic A\n", + ) + .unwrap(); + std::fs::write( + graph_dir.join("features/feat-real.md"), + "---\nid: feat-real\ntype: feature\nstatus: draft\nowner: alice\n---\n# Real\n", + ) + .unwrap(); + + let repairs = repair_reverse_edges(&graph_dir, &schema).unwrap(); + // Only the existing target gets a repair entry; the dangling one is + // silently skipped during collection. + assert_eq!(repairs.len(), 1); + assert_eq!(repairs[0].0, "feat-real"); + + let updated = std::fs::read_to_string(graph_dir.join("features/feat-real.md")).unwrap(); + let node = parse_node(&updated, PathBuf::from("test")).unwrap(); + assert!( + node.edges() + .iter() + .any(|e| e.target == "epic-a" && e.edge_type == "child_of") + ); + } }