From 5a53322a76bc2499b2245ed5e098f9e84d6b7030 Mon Sep 17 00:00:00 2001 From: Caleb Leak Date: Sat, 2 May 2026 16:19:45 -0700 Subject: [PATCH 1/4] feat(snapshot): dedupe rebuild work, lock per key, and prune the store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminates three sources of waste in the shared snapshot index store under `/tempyr/snapshots//`: 1. `tempyr index rebuild` short-circuits to a free seed when the shared snapshot for the current graph state already exists, instead of rebuilding from scratch and producing identical bytes. Adds `--force` for corruption recovery. 2. New `SnapshotBuildLock` (per snapshot key, exclusive file lock under `snapshots/.locks/`) serializes concurrent rebuilds. A second worktree racing on the same never-seen graph state waits for the first to publish, then short-circuits through path 1. 3. New `tempyr snapshot {prune,list}` commands implement Nix-style hybrid retention: pinned (live worktree cursors cross-checked against `git worktree list`) ∪ recent buffer (`--keep-recent`, default 20) ∪ soft size cap (`--max-size`, default 500 MB). Two-phase rename-then- remove avoids open-file races on Windows. Auto-runs at the tail of `index rebuild` and `index update`. `tempyr doctor` now reports snapshot-store dir count + total bytes with a `consider \`tempyr snapshot prune\`` hint above the configurable thresholds. `is_snapshot_key` and `short_path_hash` are exposed from `tempyr-core::project` so health and prune use identical filtering. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 33 +- crates/tempyr-cli/Cargo.toml | 1 + crates/tempyr-cli/src/commands/doctor.rs | 75 ++ crates/tempyr-cli/src/commands/index_cmd.rs | 395 ++++++++- crates/tempyr-cli/src/commands/init.rs | 2 +- crates/tempyr-cli/src/commands/mod.rs | 1 + .../tempyr-cli/src/commands/snapshot_cmd.rs | 792 ++++++++++++++++++ crates/tempyr-cli/src/config.rs | 20 +- crates/tempyr-cli/src/main.rs | 71 +- crates/tempyr-core/src/project.rs | 135 ++- crates/tempyr-index/Cargo.toml | 1 + crates/tempyr-index/src/health.rs | 57 ++ 12 files changed, 1566 insertions(+), 17 deletions(-) create mode 100644 crates/tempyr-cli/src/commands/snapshot_cmd.rs diff --git a/Cargo.lock b/Cargo.lock index dae51aa..2d15015 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -906,6 +906,17 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -1657,7 +1668,10 @@ version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" dependencies = [ + "bitflags", "libc", + "plain", + "redox_syscall 0.7.4", ] [[package]] @@ -2096,7 +2110,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -2152,6 +2166,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "png" version = "0.18.1" @@ -2454,6 +2474,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.5.2" @@ -3104,6 +3133,7 @@ dependencies = [ "chrono", "clap", "crossterm", + "filetime", "libc", "predicates", "ratatui", @@ -3161,6 +3191,7 @@ dependencies = [ "thiserror", "tokio", "toml", + "walkdir", ] [[package]] diff --git a/crates/tempyr-cli/Cargo.toml b/crates/tempyr-cli/Cargo.toml index d991bb4..5f6f06f 100644 --- a/crates/tempyr-cli/Cargo.toml +++ b/crates/tempyr-cli/Cargo.toml @@ -34,6 +34,7 @@ tempfile = { workspace = true } assert_cmd = "2" predicates = "3" rmcp = { workspace = true } +filetime = "0.2" [target.'cfg(unix)'.dev-dependencies] libc = { workspace = true } diff --git a/crates/tempyr-cli/src/commands/doctor.rs b/crates/tempyr-cli/src/commands/doctor.rs index a8e574b..fbb19f6 100644 --- a/crates/tempyr-cli/src/commands/doctor.rs +++ b/crates/tempyr-cli/src/commands/doctor.rs @@ -5,6 +5,7 @@ use std::io::{self, Write}; +use crate::commands::snapshot_cmd::{SNAPSHOT_STORE_HINT_BYTES, SNAPSHOT_STORE_HINT_DIRS}; use crate::config::ProjectContext; use tempyr_index::health::{self, HealthInputs, HealthReport}; use tempyr_journal::path as jpath; @@ -197,6 +198,21 @@ pub(crate) fn render_text(out: &mut impl Write, report: &HealthReport) -> io::Re if let Some(count) = report.index.embedding_count_for_index { writeln!(out, " embedded nodes: {count}")?; } + if let (Some(count), Some(bytes)) = ( + report.index.snapshot_store_count, + report.index.snapshot_store_bytes, + ) { + let hint = if count > SNAPSHOT_STORE_HINT_DIRS || bytes > SNAPSHOT_STORE_HINT_BYTES { + "consider `tempyr snapshot prune`" + } else { + "ok" + }; + writeln!( + out, + " snapshot store: {count} dirs, {} ({hint})", + human_bytes(bytes), + )?; + } if let Some(journal) = &report.journal { writeln!(out, "\nJournal")?; @@ -255,6 +271,21 @@ fn missing_marker(exists: bool) -> &'static str { if exists { "" } else { " (MISSING)" } } +fn human_bytes(bytes: u64) -> String { + const KB: u64 = 1024; + const MB: u64 = KB * 1024; + const GB: u64 = MB * 1024; + if bytes >= GB { + format!("{:.2} GB", bytes as f64 / GB as f64) + } else if bytes >= MB { + format!("{:.1} MB", bytes as f64 / MB as f64) + } else if bytes >= KB { + format!("{:.1} KB", bytes as f64 / KB as f64) + } else { + format!("{bytes} B") + } +} + #[cfg(test)] mod tests { use super::*; @@ -318,6 +349,8 @@ mod tests { snapshot_key_error: None, fts_entries: None, embedding_count_for_index: None, + snapshot_store_count: None, + snapshot_store_bytes: None, }, journal: None, warnings: vec!["something is amiss".to_string()], @@ -397,6 +430,48 @@ mod tests { assert!(!out.contains("\nJournal\n")); } + #[test] + fn human_bytes_formats_known_magnitudes() { + assert_eq!(human_bytes(0), "0 B"); + assert_eq!(human_bytes(512), "512 B"); + assert_eq!(human_bytes(2 * 1024), "2.0 KB"); + assert_eq!(human_bytes(3 * 1024 * 1024), "3.0 MB"); + assert_eq!(human_bytes(4 * 1024 * 1024 * 1024), "4.00 GB"); + } + + #[test] + fn render_text_includes_snapshot_store_when_populated() { + let mut report = fixture_report(); + report.index.snapshot_store_count = Some(884); + report.index.snapshot_store_bytes = Some(1_649_267_441); // ~1.54 GB + let out = render_to_string(&report); + + assert!(out.contains("snapshot store: 884 dirs")); + assert!(out.contains("GB")); + assert!( + out.contains("consider `tempyr snapshot prune`"), + "high-usage stores should hint at prune; got:\n{out}" + ); + } + + #[test] + fn render_text_marks_small_snapshot_store_ok() { + let mut report = fixture_report(); + report.index.snapshot_store_count = Some(5); + report.index.snapshot_store_bytes = Some(8 * 1024 * 1024); + let out = render_to_string(&report); + assert!(out.contains("snapshot store: 5 dirs")); + assert!(out.contains("ok")); + assert!(!out.contains("consider `tempyr snapshot prune`")); + } + + #[test] + fn render_text_omits_snapshot_store_when_unprobed() { + let report = fixture_report(); // snapshot_store_count is None + let out = render_to_string(&report); + assert!(!out.contains("snapshot store:")); + } + #[test] fn render_text_includes_journal_section_when_present() { use tempyr_journal::JournalHealthReport; diff --git a/crates/tempyr-cli/src/commands/index_cmd.rs b/crates/tempyr-cli/src/commands/index_cmd.rs index 9bbc283..f04b5a2 100644 --- a/crates/tempyr-cli/src/commands/index_cmd.rs +++ b/crates/tempyr-cli/src/commands/index_cmd.rs @@ -1,15 +1,139 @@ use std::path::Path; +use std::thread; +use std::time::{Duration, Instant}; +use crate::commands::snapshot_cmd::{self, PruneOptions}; use crate::config::ProjectContext; use tempyr_core::graph::Graph; +use tempyr_core::project::SnapshotBuildLock; use tempyr_index::embeddings::{self, EmbeddingStore}; use tempyr_index::indexer::Index; -pub fn run_rebuild(ctx: &ProjectContext, json: bool, skip_embeddings: bool) -> anyhow::Result<()> { +/// How long to wait for a competing builder to finish before giving up. +/// Used both for the "wait for the other process to publish the snapshot" +/// poll and the "block on the build lock" retry. Builds are typically a +/// few seconds; the budget is generous to cover cold caches. +const BUILD_LOCK_WAIT: Duration = Duration::from_secs(60); + +/// Polling interval while waiting for another process to publish a snapshot +/// or release the build lock. +const BUILD_LOCK_POLL: Duration = Duration::from_millis(100); + +/// Outcome of negotiating with concurrent rebuilds for the current snapshot. +#[cfg_attr(test, derive(Debug))] +enum RebuildSlot { + /// Caller holds the build lock and must rebuild the snapshot from scratch. + /// Held until the caller drops the value at end of scope. + Build(#[allow(dead_code)] SnapshotBuildLock), + /// The shared snapshot exists; caller should seed-and-report rather than + /// rebuild. The bool records whether another process built it during this + /// invocation (true) or whether it was already there at entry (false). + UseExisting { built_by_other: bool }, +} + +pub fn run_rebuild( + ctx: &ProjectContext, + json: bool, + skip_embeddings: bool, + force: bool, +) -> anyhow::Result<()> { let graph = Graph::load_from_directory(&ctx.graph_dir, ctx.schema.clone())?; - let (snapshot_key, index_path) = ctx.ensure_active_index_seeded()?; + let (snapshot_key, _) = ctx.ensure_active_index_seeded()?; + let shared = ctx.shared_snapshot_index_path()?; + + let slot = negotiate_rebuild_slot(ctx, &shared, force, BUILD_LOCK_WAIT)?; + if let RebuildSlot::UseExisting { built_by_other } = slot { + return seed_and_report( + ctx, + &graph, + &snapshot_key, + json, + skip_embeddings, + built_by_other, + ); + } + + rebuild_from_scratch(ctx, &graph, &snapshot_key, json, skip_embeddings)?; + + // Best-effort snapshot store maintenance. Failures here do not break the + // rebuild — pruning is a hygiene step. + let _ = snapshot_cmd::run_prune( + ctx, + &PruneOptions::default(), + false, + snapshot_cmd::PruneOutput::Silent, + ); + + Ok(()) +} + +/// Decide whether this caller should rebuild the snapshot or reuse an +/// existing one, while serializing against any concurrent builder. +/// +/// Returns: +/// - [`RebuildSlot::UseExisting`] when `force = false` and either (a) the +/// shared snapshot is already on disk at entry, or (b) it appears while +/// we wait for another process to publish. +/// - [`RebuildSlot::Build`] holding the per-key build lock, when this +/// caller must rebuild. Drop the value to release. +/// +/// `wait_budget` bounds **both** the wait-for-publish poll and the +/// block-acquire retry, so the worst-case total wait is roughly +/// `2 * wait_budget`. Tests can pass a small budget to exercise the +/// blocking path without sleeping for the production default. +fn negotiate_rebuild_slot( + ctx: &ProjectContext, + shared: &Path, + force: bool, + wait_budget: Duration, +) -> anyhow::Result { + if !force && shared.exists() { + return Ok(RebuildSlot::UseExisting { + built_by_other: false, + }); + } + + if let Some(lock) = ctx.try_acquire_snapshot_build_lock()? { + if !force && shared.exists() { + // Another process won the race and published while we were + // taking the lock. Release and use what they built. + drop(lock); + return Ok(RebuildSlot::UseExisting { + built_by_other: true, + }); + } + return Ok(RebuildSlot::Build(lock)); + } + + // Lock is held by another rebuild. Wait for them to publish. + wait_for_snapshot(shared, wait_budget); + if !force && shared.exists() { + return Ok(RebuildSlot::UseExisting { + built_by_other: true, + }); + } + + // Other builder didn't publish in time. Block on the lock so the work + // is still serialized — better to wait than to double-build. + let lock = acquire_build_lock_blocking(ctx, wait_budget)?; + if !force && shared.exists() { + drop(lock); + return Ok(RebuildSlot::UseExisting { + built_by_other: true, + }); + } + Ok(RebuildSlot::Build(lock)) +} + +fn rebuild_from_scratch( + ctx: &ProjectContext, + graph: &Graph, + snapshot_key: &str, + json: bool, + skip_embeddings: bool, +) -> anyhow::Result<()> { + let (_key, index_path) = ctx.ensure_active_index_seeded()?; - // Remove existing index if index_path.exists() { std::fs::remove_file(&index_path)?; } @@ -18,12 +142,11 @@ pub fn run_rebuild(ctx: &ProjectContext, json: bool, skip_embeddings: bool) -> a } let index = Index::create(&index_path)?; - let stats = index.rebuild(&graph)?; + let stats = index.rebuild(graph)?; - // Try to generate embeddings - let embed_result = maybe_embed(&graph, ctx, skip_embeddings); - ctx.write_active_snapshot_key(&snapshot_key)?; - ctx.publish_active_snapshot(&snapshot_key)?; + let embed_result = maybe_embed(graph, ctx, skip_embeddings); + ctx.write_active_snapshot_key(snapshot_key)?; + ctx.publish_active_snapshot(snapshot_key)?; if json { let mut result = serde_json::json!({ @@ -31,6 +154,7 @@ pub fn run_rebuild(ctx: &ProjectContext, json: bool, skip_embeddings: bool) -> a "edge_count": stats.edge_count, "fts_entries": stats.fts_entries, "nodes_by_type": stats.nodes_by_type, + "source": "rebuilt", }); if let Ok(ref es) = embed_result { result["embeddings"] = serde_json::json!({ @@ -54,12 +178,96 @@ pub fn run_rebuild(ctx: &ProjectContext, json: bool, skip_embeddings: bool) -> a Ok(()) } +fn wait_for_snapshot(path: &Path, max: Duration) { + let deadline = Instant::now() + max; + while Instant::now() < deadline { + if path.exists() { + return; + } + thread::sleep(BUILD_LOCK_POLL); + } +} + +fn acquire_build_lock_blocking( + ctx: &ProjectContext, + wait_budget: Duration, +) -> anyhow::Result { + let deadline = Instant::now() + wait_budget; + loop { + if let Some(lock) = ctx.try_acquire_snapshot_build_lock()? { + return Ok(lock); + } + if Instant::now() >= deadline { + anyhow::bail!( + "Could not acquire snapshot build lock within {wait_budget:?}; another rebuild is still running." + ); + } + thread::sleep(BUILD_LOCK_POLL); + } +} + +fn seed_and_report( + ctx: &ProjectContext, + graph: &Graph, + snapshot_key: &str, + json: bool, + skip_embeddings: bool, + waited_for_concurrent_builder: bool, +) -> anyhow::Result<()> { + let index_path = ctx.queryable_index_path()?; + let index = Index::open(&index_path)?; + let stats = index.stats()?; + let embed_result = maybe_embed(graph, ctx, skip_embeddings); + ctx.write_active_snapshot_key(snapshot_key)?; + + let source = if waited_for_concurrent_builder { + "snapshot_built_by_other_process" + } else { + "existing_snapshot" + }; + + if json { + let mut result = serde_json::json!({ + "node_count": stats.node_count, + "edge_count": stats.edge_count, + "fts_entries": stats.fts_entries, + "nodes_by_type": stats.nodes_by_type, + "source": source, + "snapshot_key": snapshot_key, + }); + if let Ok(ref es) = embed_result { + result["embeddings"] = serde_json::json!({ + "embedded": es.embedded, + "cached": es.skipped, + "dimensions": es.dimensions, + }); + } + println!("{}", serde_json::to_string_pretty(&result)?); + } else { + let prefix = if waited_for_concurrent_builder { + "Index reused after concurrent build" + } else { + "Index reused from existing snapshot" + }; + println!( + "{prefix}: {} nodes, {} edges, {} FTS entries (snapshot {snapshot_key})", + stats.node_count, stats.edge_count, stats.fts_entries + ); + for (node_type, count) in &stats.nodes_by_type { + println!(" {node_type}: {count}"); + } + render_embedding_message(&embed_result); + } + + Ok(()) +} + pub fn run_update(ctx: &ProjectContext, json: bool, skip_embeddings: bool) -> anyhow::Result<()> { let graph = Graph::load_from_directory(&ctx.graph_dir, ctx.schema.clone())?; let (snapshot_key, index_path) = ctx.ensure_active_index_seeded()?; if !index_path.exists() { - return run_rebuild(ctx, json, skip_embeddings); + return run_rebuild(ctx, json, skip_embeddings, false); } let index = Index::open(&index_path)?; @@ -87,6 +295,14 @@ pub fn run_update(ctx: &ProjectContext, json: bool, skip_embeddings: bool) -> an render_embedding_message(&embed_result); } + // Tier 3: best-effort snapshot store maintenance. + let _ = snapshot_cmd::run_prune( + ctx, + &PruneOptions::default(), + false, + snapshot_cmd::PruneOutput::Silent, + ); + Ok(()) } @@ -228,3 +444,164 @@ fn render_stats( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempyr_core::project as project_mod; + + fn make_ctx(tmp: &Path) -> ProjectContext { + let tempyr_dir = tmp.join(".tempyr"); + fs::create_dir_all(&tempyr_dir).unwrap(); + fs::create_dir_all(tmp.join("graph")).unwrap(); + fs::write(tempyr_dir.join("schema.toml"), "name = 'x'\n").unwrap(); + let cache = project_mod::cache_layout(tmp, &tempyr_dir); + ProjectContext { + root: tmp.to_path_buf(), + graph_dir: tmp.join("graph"), + tempyr_dir, + cache, + schema: tempyr_core::schema::Schema { + meta: tempyr_core::schema::SchemaMeta { + version: "1".to_string(), + description: String::new(), + }, + node_types: Default::default(), + edge_types: Default::default(), + }, + } + } + + fn touch_shared_snapshot(ctx: &ProjectContext) { + let shared = ctx.shared_snapshot_index_path().unwrap(); + fs::create_dir_all(shared.parent().unwrap()).unwrap(); + fs::write(&shared, b"pretend index").unwrap(); + } + + #[test] + fn slot_uses_existing_when_snapshot_present_and_not_forced() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + touch_shared_snapshot(&ctx); + let shared = ctx.shared_snapshot_index_path().unwrap(); + + let slot = negotiate_rebuild_slot(&ctx, &shared, false, Duration::from_millis(50)).unwrap(); + + assert!(matches!( + slot, + RebuildSlot::UseExisting { + built_by_other: false + } + )); + } + + #[test] + fn slot_takes_lock_to_build_when_snapshot_missing() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + let shared = ctx.shared_snapshot_index_path().unwrap(); + + let slot = negotiate_rebuild_slot(&ctx, &shared, false, Duration::from_millis(50)).unwrap(); + + assert!(matches!(slot, RebuildSlot::Build(_))); + } + + #[test] + fn slot_forces_build_even_when_snapshot_present() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + touch_shared_snapshot(&ctx); + let shared = ctx.shared_snapshot_index_path().unwrap(); + + // force=true must bypass the short-circuit and take the build lock. + let slot = negotiate_rebuild_slot(&ctx, &shared, true, Duration::from_millis(50)).unwrap(); + assert!(matches!(slot, RebuildSlot::Build(_))); + } + + #[test] + fn slot_blocking_acquire_succeeds_after_other_releases() { + // Simulate: another process holds the build lock, never publishes a + // snapshot, then releases. The waiter should escalate from "wait for + // snapshot" → "block-acquire" → finally take the lock and rebuild. + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + let shared = ctx.shared_snapshot_index_path().unwrap(); + + let other_lock = ctx.try_acquire_snapshot_build_lock().unwrap().unwrap(); + + let cache = ctx.cache.clone(); + let snapshot_key = { + let layout = + project_mod::IndexLayout::resolve(&ctx.root, &ctx.graph_dir, &ctx.tempyr_dir) + .unwrap(); + layout.snapshot_key().unwrap() + }; + + let releaser = std::thread::spawn(move || { + // Hold long enough that the waiter exhausts the snapshot poll + // and enters the blocking-acquire loop, then release. + std::thread::sleep(Duration::from_millis(60)); + drop(other_lock); + // Keep `cache` and `snapshot_key` alive for the duration so the + // lock-file path stays stable. + let _ = (&cache, &snapshot_key); + }); + + // Total budget = 2 * wait_budget = 200ms. Release happens at ~60ms, + // so the blocking-acquire phase should succeed well within that. + let slot = + negotiate_rebuild_slot(&ctx, &shared, false, Duration::from_millis(100)).unwrap(); + assert!(matches!(slot, RebuildSlot::Build(_))); + releaser.join().unwrap(); + } + + #[test] + fn slot_uses_snapshot_when_other_publishes_during_wait() { + // Other process holds the lock, publishes the snapshot during the + // wait, then releases. The waiter should short-circuit to UseExisting + // without re-doing the work. + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + let shared = ctx.shared_snapshot_index_path().unwrap(); + let shared_for_thread = shared.clone(); + + let other_lock = ctx.try_acquire_snapshot_build_lock().unwrap().unwrap(); + + let publisher = std::thread::spawn(move || { + std::thread::sleep(Duration::from_millis(40)); + fs::create_dir_all(shared_for_thread.parent().unwrap()).unwrap(); + fs::write(&shared_for_thread, b"published").unwrap(); + drop(other_lock); + }); + + let slot = + negotiate_rebuild_slot(&ctx, &shared, false, Duration::from_millis(500)).unwrap(); + assert!(matches!( + slot, + RebuildSlot::UseExisting { + built_by_other: true + } + )); + publisher.join().unwrap(); + } + + #[test] + fn slot_blocking_acquire_times_out_when_lock_never_releases() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + let shared = ctx.shared_snapshot_index_path().unwrap(); + + let _held = ctx.try_acquire_snapshot_build_lock().unwrap().unwrap(); + + // With no publisher and no release, total time bounded by 2*budget. + // Use a small budget so the test is fast. + let err = + negotiate_rebuild_slot(&ctx, &shared, false, Duration::from_millis(60)).unwrap_err(); + assert!( + err.to_string() + .contains("Could not acquire snapshot build lock"), + "unexpected error: {err}" + ); + } +} diff --git a/crates/tempyr-cli/src/commands/init.rs b/crates/tempyr-cli/src/commands/init.rs index 9d5942a..509804c 100644 --- a/crates/tempyr-cli/src/commands/init.rs +++ b/crates/tempyr-cli/src/commands/init.rs @@ -271,7 +271,7 @@ fn initialize_project(root: &Path, selections: &OnboardingSelections) -> anyhow: let ctx = ProjectContext::find(Some(root.join("graph").as_path()))?; println!(); println!("Running initial index rebuild..."); - if let Err(err) = index_cmd::run_rebuild(&ctx, false, false) { + if let Err(err) = index_cmd::run_rebuild(&ctx, false, false, false) { eprintln!("Warning: initial index rebuild failed: {err}"); } } diff --git a/crates/tempyr-cli/src/commands/mod.rs b/crates/tempyr-cli/src/commands/mod.rs index dac8dd4..0cf56d7 100644 --- a/crates/tempyr-cli/src/commands/mod.rs +++ b/crates/tempyr-cli/src/commands/mod.rs @@ -23,6 +23,7 @@ pub(crate) mod process_utils; pub mod rename; pub mod render_cmd; pub mod search; +pub mod snapshot_cmd; pub mod status_cmd; pub mod traverse; pub mod update; diff --git a/crates/tempyr-cli/src/commands/snapshot_cmd.rs b/crates/tempyr-cli/src/commands/snapshot_cmd.rs new file mode 100644 index 0000000..6820d28 --- /dev/null +++ b/crates/tempyr-cli/src/commands/snapshot_cmd.rs @@ -0,0 +1,792 @@ +//! Snapshot store management. +//! +//! The shared snapshot store at `/tempyr/snapshots//` +//! accumulates one ~1.8 MB SQLite index per unique graph state ever indexed +//! by any worktree. With no upper bound, it can balloon to gigabytes. +//! +//! `tempyr snapshot prune` enforces a Nix-style hybrid retention policy: +//! +//! 1. **Pinned set** — every snapshot key cited by a live worktree's +//! `/worktrees//snapshot-key.txt` cursor is permanent. +//! Stale worktree dirs (no matching `.git/worktrees//` private admin +//! dir, when `git worktree list` is available) are excluded from the pin +//! set so their old cursors don't keep dead snapshots alive forever. +//! 2. **Recent buffer** — after pinning, keep up to `--keep-recent` of the +//! most-recently-modified snapshots, even if they are not pinned. This +//! cushions branch-switching workflows so the immediate previous state is +//! not evicted when a new snapshot lands. +//! 3. **Size cap** — among everything that is neither pinned nor in the +//! recent buffer, evict in least-recently-modified order until the total +//! snapshot-store size is under `--max-size`. +//! +//! Deletion is two-phase to dodge open-file races on Windows: each victim is +//! first renamed to a sibling `/snapshots/.gc---/` +//! (invisible to fresh `current_index_path` lookups), then `remove_dir_all` +//! is attempted. If removal fails (most often because a long-running query +//! still holds the file open), the renamed dir is left for the next pass. + +use std::collections::HashSet; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use anyhow::Context; +use serde::Serialize; +use tempyr_core::project::{self, CacheLayout}; + +use crate::config::ProjectContext; + +/// Prefix marking a snapshot dir that has been atomically renamed for +/// deletion by [`run_prune`] but not yet `remove_dir_all`-ed. New readers +/// look up snapshots by exact key, so a renamed dir is invisible to them. +const GC_PREFIX: &str = ".gc-"; + +/// Default size cap for `tempyr snapshot prune --max-size`. At ~1.8 MB per +/// snapshot this fits ~280 snapshots, comfortably more than the typical +/// working set across worktrees on one repo. Also surfaced by `tempyr +/// doctor` (see [`SNAPSHOT_STORE_HINT_BYTES`]). +pub const DEFAULT_MAX_SIZE_BYTES: u64 = 500 * 1024 * 1024; + +/// Default size of the "recent buffer" — the number of newest snapshots to +/// keep beyond the pinned set, so branch-switching doesn't immediately +/// evict the previous state. +pub const DEFAULT_KEEP_RECENT: usize = 20; + +/// `tempyr doctor` switches the snapshot-store summary from "ok" to +/// "consider `tempyr snapshot prune`" when either threshold is exceeded. +/// Set well below [`DEFAULT_MAX_SIZE_BYTES`] so users see the hint before +/// they hit the cap. +pub const SNAPSHOT_STORE_HINT_DIRS: usize = 200; +pub const SNAPSHOT_STORE_HINT_BYTES: u64 = 256 * 1024 * 1024; + +#[derive(Debug, Clone)] +pub struct PruneOptions { + pub keep_recent: usize, + pub max_size_bytes: u64, +} + +impl Default for PruneOptions { + fn default() -> Self { + Self { + keep_recent: DEFAULT_KEEP_RECENT, + max_size_bytes: DEFAULT_MAX_SIZE_BYTES, + } + } +} + +/// Outcome of one [`run_prune`] invocation. +/// +/// All three "kept" counts sum to the number of snapshots remaining on +/// disk after the prune. Each snapshot is counted in exactly one bucket: +/// +/// - `kept_pinned`: cited by a live worktree's `snapshot-key.txt` cursor. +/// - `kept_buffer`: not pinned, but among the most-recent `keep_recent` by mtime. +/// - `kept_under_cap`: neither pinned nor in the buffer, but kept because +/// evicting them wasn't necessary to fit under `max_size_bytes`. +/// +/// `total_bytes_after_estimate` is the sum of sizes for all three kept +/// buckets — it's the on-disk size we expect after eviction completes. +#[derive(Debug, Serialize)] +pub struct PruneReport { + pub kept_pinned: usize, + pub kept_buffer: usize, + pub kept_under_cap: usize, + pub evicted: Vec, + /// Snapshots whose `rename` to a `.gc-*` stub or `remove_dir_all` + /// failed (most often a long-running reader still holds the SQLite + /// file open on Windows). The stub is left in place; the next prune + /// pass sweeps it up. + pub failures: Vec, + pub total_bytes_before: u64, + pub total_bytes_after_estimate: u64, +} + +#[derive(Debug, Serialize)] +pub struct EvictedEntry { + pub snapshot_key: String, + pub bytes: u64, +} + +#[derive(Debug, Serialize)] +pub struct EvictionFailure { + pub snapshot_key: String, + pub message: String, +} + +#[derive(Debug)] +struct SnapshotEntry { + snapshot_key: String, + path: PathBuf, + bytes: u64, + modified_secs: u64, +} + +/// Parse `--max-size` strings like `200`, `200K`, `500M`, `2G`. +pub fn parse_size(input: &str) -> anyhow::Result { + let trimmed = input.trim(); + if trimmed.is_empty() { + anyhow::bail!("size value is empty"); + } + let bytes = trimmed.as_bytes(); + let last = bytes[bytes.len() - 1]; + let (digits, multiplier): (&str, u64) = match last { + b'k' | b'K' => (&trimmed[..trimmed.len() - 1], 1024), + b'm' | b'M' => (&trimmed[..trimmed.len() - 1], 1024 * 1024), + b'g' | b'G' => (&trimmed[..trimmed.len() - 1], 1024 * 1024 * 1024), + b'0'..=b'9' => (trimmed, 1), + _ => anyhow::bail!("unrecognized size suffix in {input:?}; use plain bytes or K/M/G"), + }; + let n: u64 = digits + .parse() + .with_context(|| format!("could not parse size value {input:?}"))?; + Ok(n.saturating_mul(multiplier)) +} + +/// Output format for `tempyr snapshot prune`. Auto-prune callers +/// (rebuild/update tails) use [`PruneOutput::Silent`]; the CLI passes +/// `Human` or `Json` based on the global `--json` flag. +#[derive(Debug, Clone, Copy)] +pub enum PruneOutput { + Silent, + Human, + Json, +} + +pub fn run_prune( + ctx: &ProjectContext, + opts: &PruneOptions, + dry_run: bool, + output: PruneOutput, +) -> anyhow::Result { + let cache = ctx.cache_layout(); + let snapshots_root = cache.snapshots_root(); + + let report = if snapshots_root.exists() { + plan_and_evict(&snapshots_root, cache, opts, dry_run)? + } else { + PruneReport { + kept_pinned: 0, + kept_buffer: 0, + kept_under_cap: 0, + evicted: Vec::new(), + failures: Vec::new(), + total_bytes_before: 0, + total_bytes_after_estimate: 0, + } + }; + + render_prune_output(&report, dry_run, output)?; + Ok(report) +} + +fn plan_and_evict( + snapshots_root: &Path, + cache: &CacheLayout, + opts: &PruneOptions, + dry_run: bool, +) -> anyhow::Result { + let mut entries = enumerate_snapshots(snapshots_root)?; + entries.sort_by_key(|e| std::cmp::Reverse(e.modified_secs)); + + let pinned = collect_pin_set(cache); + + // Pass 1: keep all pinned, plus the most-recent `keep_recent` of the rest. + let mut kept: HashSet = HashSet::new(); + let mut kept_pinned = 0; + let mut kept_buffer = 0; + for entry in &entries { + if pinned.contains(&entry.snapshot_key) && kept.insert(entry.snapshot_key.clone()) { + kept_pinned += 1; + } + } + for entry in &entries { + if kept_buffer >= opts.keep_recent { + break; + } + if kept.insert(entry.snapshot_key.clone()) { + kept_buffer += 1; + } + } + + // Pass 2: among unkept, walk newest-first and keep each whose size fits + // under the remaining cap headroom. Older unkept entries that can't fit + // are evicted. This is a soft cap, not a hard LRU: a single huge + // snapshot doesn't kick out smaller older ones that do fit. + let total_bytes_before: u64 = entries.iter().map(|e| e.bytes).sum(); + let kept_bytes: u64 = entries + .iter() + .filter(|e| kept.contains(&e.snapshot_key)) + .map(|e| e.bytes) + .sum(); + let mut running = kept_bytes; + let mut kept_under_cap = 0; + let mut evict: Vec<&SnapshotEntry> = Vec::new(); + let mut unkept: Vec<&SnapshotEntry> = entries + .iter() + .filter(|e| !kept.contains(&e.snapshot_key)) + .collect(); + unkept.sort_by_key(|e| std::cmp::Reverse(e.modified_secs)); + for entry in &unkept { + if running + entry.bytes <= opts.max_size_bytes { + running += entry.bytes; + kept_under_cap += 1; + } else { + evict.push(*entry); + } + } + + let mut report = PruneReport { + kept_pinned, + kept_buffer, + kept_under_cap, + evicted: Vec::new(), + failures: Vec::new(), + total_bytes_before, + total_bytes_after_estimate: running, + }; + + for entry in &evict { + if dry_run { + report.evicted.push(EvictedEntry { + snapshot_key: entry.snapshot_key.clone(), + bytes: entry.bytes, + }); + continue; + } + match two_phase_remove(&entry.path) { + Ok(()) => report.evicted.push(EvictedEntry { + snapshot_key: entry.snapshot_key.clone(), + bytes: entry.bytes, + }), + Err(err) => report.failures.push(EvictionFailure { + snapshot_key: entry.snapshot_key.clone(), + message: err.to_string(), + }), + } + } + + // Sweep `.gc-*` stubs that prior runs left behind (e.g. EBUSY on Windows). + if !dry_run { + let _ = sweep_orphaned_gc_dirs(snapshots_root); + } + + Ok(report) +} + +fn render_prune_output( + report: &PruneReport, + dry_run: bool, + output: PruneOutput, +) -> anyhow::Result<()> { + match output { + PruneOutput::Silent => Ok(()), + PruneOutput::Json => { + println!("{}", serde_json::to_string_pretty(report)?); + Ok(()) + } + PruneOutput::Human => { + let verb = if dry_run { "would evict" } else { "evicted" }; + let total_kept = report.kept_pinned + report.kept_buffer + report.kept_under_cap; + println!( + "Snapshot prune{}: {verb} {} snapshots, kept {total_kept} ({} pinned, {} buffer, {} under cap)", + if dry_run { " (dry run)" } else { "" }, + report.evicted.len(), + report.kept_pinned, + report.kept_buffer, + report.kept_under_cap, + ); + if !report.failures.is_empty() { + println!(" {} eviction(s) failed:", report.failures.len()); + for failure in &report.failures { + println!(" {}: {}", failure.snapshot_key, failure.message); + } + } + Ok(()) + } + } +} + +pub fn run_list(ctx: &ProjectContext, json: bool) -> anyhow::Result<()> { + let cache = ctx.cache_layout(); + let snapshots_root = cache.snapshots_root(); + let entries = if snapshots_root.exists() { + enumerate_snapshots(&snapshots_root)? + } else { + Vec::new() + }; + let pinned = collect_pin_set(cache); + + if json { + #[derive(Serialize)] + struct Row<'a> { + snapshot_key: &'a str, + bytes: u64, + modified_secs: u64, + pinned: bool, + } + let rows: Vec = entries + .iter() + .map(|e| Row { + snapshot_key: &e.snapshot_key, + bytes: e.bytes, + modified_secs: e.modified_secs, + pinned: pinned.contains(&e.snapshot_key), + }) + .collect(); + println!("{}", serde_json::to_string_pretty(&rows)?); + } else { + println!("Snapshots in {}:", snapshots_root.display()); + for entry in &entries { + let marker = if pinned.contains(&entry.snapshot_key) { + "PINNED" + } else { + " " + }; + println!( + " {marker} {} {:>10} bytes mtime={}", + entry.snapshot_key, entry.bytes, entry.modified_secs + ); + } + println!( + "Total: {} snapshots, {} pinned", + entries.len(), + entries + .iter() + .filter(|e| pinned.contains(&e.snapshot_key)) + .count() + ); + } + Ok(()) +} + +fn enumerate_snapshots(snapshots_root: &Path) -> io::Result> { + let mut out = Vec::new(); + for entry in fs::read_dir(snapshots_root)? { + let entry = entry?; + let file_type = entry.file_type()?; + if !file_type.is_dir() { + continue; + } + let name = match entry.file_name().into_string() { + Ok(name) => name, + Err(_) => continue, + }; + // Skip the `.locks` coordination dir and any partial-prune `.gc-*` + // stubs. `is_snapshot_key` accepts only the 16-hex-char names. + if !project::is_snapshot_key(&name) { + continue; + } + let path = entry.path(); + let bytes = directory_size(&path).unwrap_or(0); + let modified_secs = directory_modified_secs(&path).unwrap_or(0); + out.push(SnapshotEntry { + snapshot_key: name, + path, + bytes, + modified_secs, + }); + } + Ok(out) +} + +/// Build the set of snapshot keys that no eviction may touch. +/// +/// Each per-worktree `/worktrees//snapshot-key.txt` +/// is a GC root pinning whatever snapshot key it contains. When `git +/// worktree list` works against the cache's owning repo we additionally +/// drop cursors whose `wt-id` no longer corresponds to a live worktree — +/// those are dangling pointers from worktrees the user deleted. When +/// git is unavailable we trust every cursor on disk: better to retain +/// too many snapshots than to silently evict one a worktree is still +/// pointing at. +fn collect_pin_set(cache: &CacheLayout) -> HashSet { + let mut pinned = HashSet::new(); + let worktrees_root = cache.worktrees_root(); + let live_wt_ids = live_worktree_ids(cache); + let Ok(read) = fs::read_dir(&worktrees_root) else { + return pinned; + }; + for entry in read.flatten() { + let Ok(file_type) = entry.file_type() else { + continue; + }; + if !file_type.is_dir() { + continue; + } + let cursor_path = entry.path().join("snapshot-key.txt"); + let Ok(raw) = fs::read_to_string(&cursor_path) else { + continue; + }; + let key = raw.trim().to_string(); + if !project::is_snapshot_key(&key) { + continue; + } + + // When we have a live worktree list, drop cursors whose worktree-id + // is not present — those are dangling. Without git we keep all of + // them. + if let Some(ref live) = live_wt_ids { + let wt_id = entry.file_name().to_string_lossy().to_string(); + if !live.contains(&wt_id) { + continue; + } + } + pinned.insert(key); + } + pinned +} + +/// Best-effort enumeration of live git worktree-ids for the repo that +/// owns this snapshot store. Each id is [`project::short_path_hash`] of +/// that worktree's private `.git` admin dir — the same hash +/// [`tempyr_core::project::IndexLayout`] uses to derive each worktree's +/// cache subdir. +/// +/// We pass `--git-dir=` rather than relying on the +/// process cwd, so a `cargo test` running from the Tempyr repo doesn't +/// accidentally treat the *test's* synthetic store as belonging to the +/// Tempyr worktree. Returns `None` when the cache isn't backed by a git +/// repo (non-git tempyr projects keep their cache under `.tempyr/cache/`) +/// or when git is unavailable; callers treat `None` as "trust all +/// on-disk cursors" so we never over-evict. +fn live_worktree_ids(cache: &CacheLayout) -> Option> { + // For git-backed projects, `cache.shared_root` is `/tempyr`. + // Bail out if the parent doesn't look like a git common dir (no `HEAD` + // file, no `commondir` file pointing elsewhere). + let common_dir = cache.shared_root.parent()?; + if !common_dir.join("HEAD").is_file() && !common_dir.join("commondir").is_file() { + return None; + } + let output = std::process::Command::new("git") + .arg(format!("--git-dir={}", common_dir.display())) + .args(["worktree", "list", "--porcelain"]) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let mut ids = HashSet::new(); + let stdout = String::from_utf8_lossy(&output.stdout); + for line in stdout.lines() { + let Some(path) = line.strip_prefix("worktree ") else { + continue; + }; + if let Some(dirs) = project::resolve_git_dirs(&PathBuf::from(path)) { + ids.insert(project::short_path_hash(&dirs.git_dir)); + } + } + Some(ids) +} + +fn directory_size(path: &Path) -> io::Result { + let mut total: u64 = 0; + for entry in walkdir::WalkDir::new(path) { + let Ok(entry) = entry else { continue }; + let metadata = match entry.metadata() { + Ok(m) => m, + Err(_) => continue, + }; + if metadata.is_file() { + total = total.saturating_add(metadata.len()); + } + } + Ok(total) +} + +fn directory_modified_secs(path: &Path) -> io::Result { + let metadata = fs::metadata(path)?; + Ok(metadata + .modified() + .ok() + .and_then(|m| m.duration_since(UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or(0)) +} + +fn two_phase_remove(snapshot_dir: &Path) -> io::Result<()> { + let parent = snapshot_dir + .parent() + .ok_or_else(|| io::Error::other("snapshot dir has no parent"))?; + let name = snapshot_dir + .file_name() + .ok_or_else(|| io::Error::other("snapshot dir has no name"))? + .to_string_lossy() + .to_string(); + let nonce = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + let staged = parent.join(format!("{GC_PREFIX}{name}-{}-{nonce}", std::process::id())); + + fs::rename(snapshot_dir, &staged)?; + // Try to remove the renamed dir. On Windows this can fail with EBUSY + // if a long-running reader still has the index file open; in that + // case the staged dir lingers and the next prune sweeps it up. + fs::remove_dir_all(&staged) +} + +fn sweep_orphaned_gc_dirs(snapshots_root: &Path) -> io::Result<()> { + for entry in fs::read_dir(snapshots_root)? { + let Ok(entry) = entry else { continue }; + let name = entry.file_name(); + let name = name.to_string_lossy(); + if !name.starts_with(GC_PREFIX) { + continue; + } + let _ = fs::remove_dir_all(entry.path()); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_size_accepts_plain_bytes() { + assert_eq!(parse_size("1024").unwrap(), 1024); + } + + #[test] + fn parse_size_accepts_suffixes() { + assert_eq!(parse_size("1K").unwrap(), 1024); + assert_eq!(parse_size("2k").unwrap(), 2048); + assert_eq!(parse_size("3M").unwrap(), 3 * 1024 * 1024); + assert_eq!(parse_size("4g").unwrap(), 4 * 1024 * 1024 * 1024); + } + + #[test] + fn parse_size_rejects_unknown_suffix() { + assert!(parse_size("1T").is_err()); + assert!(parse_size("1.5M").is_err()); + assert!(parse_size("").is_err()); + } + + use std::fs; + + /// Build a synthetic snapshot store under `tmp` with the given keys, each + /// containing an `index.db` of `bytes_per_snapshot` bytes. Modified-time + /// is back-dated so the first key is oldest and the last key is newest. + fn populate_snapshot_store( + tmp: &Path, + keys: &[&str], + bytes_per_snapshot: usize, + ) -> CacheLayout { + let cache = CacheLayout { + shared_root: tmp.to_path_buf(), + worktree_root: tmp.join("worktrees").join("default"), + }; + let snapshots_root = cache.snapshots_root(); + fs::create_dir_all(&snapshots_root).unwrap(); + let payload = vec![0u8; bytes_per_snapshot]; + for (i, key) in keys.iter().enumerate() { + let dir = snapshots_root.join(key); + fs::create_dir_all(&dir).unwrap(); + let path = dir.join("index.db"); + fs::write(&path, &payload).unwrap(); + // Set mtime via filetime so test ordering is deterministic. + let mtime = filetime_for_offset_secs((keys.len() - i) as i64 * 60); + filetime::set_file_mtime(&path, mtime).unwrap(); + filetime::set_file_mtime(&dir, mtime).unwrap(); + } + cache + } + + fn write_worktree_cursor(cache: &CacheLayout, wt_id: &str, snapshot_key: &str) { + let dir = cache.worktrees_root().join(wt_id); + fs::create_dir_all(&dir).unwrap(); + fs::write(dir.join("snapshot-key.txt"), snapshot_key).unwrap(); + } + + fn filetime_for_offset_secs(secs_ago: i64) -> filetime::FileTime { + let now = std::time::SystemTime::now(); + let target = if secs_ago >= 0 { + now - std::time::Duration::from_secs(secs_ago as u64) + } else { + now + std::time::Duration::from_secs((-secs_ago) as u64) + }; + filetime::FileTime::from_system_time(target) + } + + fn make_ctx_for_cache(cache: &CacheLayout) -> ProjectContext { + let root = cache.shared_root.clone(); + let tempyr_dir = root.join(".tempyr"); + std::fs::create_dir_all(&tempyr_dir).unwrap(); + ProjectContext { + root: root.clone(), + graph_dir: root.join("graph"), + tempyr_dir, + cache: cache.clone(), + schema: tempyr_core::schema::Schema { + meta: tempyr_core::schema::SchemaMeta { + version: "1".to_string(), + description: String::new(), + }, + node_types: Default::default(), + edge_types: Default::default(), + }, + } + } + + fn keys_on_disk(cache: &CacheLayout) -> Vec { + let mut keys: Vec = fs::read_dir(cache.snapshots_root()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false)) + .filter_map(|e| e.file_name().into_string().ok()) + .filter(|n| n.len() == 16 && n.bytes().all(|b| b.is_ascii_hexdigit())) + .collect(); + keys.sort(); + keys + } + + #[test] + fn prune_keeps_pinned_snapshots() { + let tmp = tempfile::tempdir().unwrap(); + let keys = [ + "0000000000000001", + "0000000000000002", + "0000000000000003", + "0000000000000004", + ]; + let cache = populate_snapshot_store(tmp.path(), &keys, 8); + write_worktree_cursor(&cache, "wt-a", "0000000000000001"); + let ctx = make_ctx_for_cache(&cache); + + let opts = PruneOptions { + keep_recent: 0, + max_size_bytes: 1, // force eviction of everything not pinned/buffered + }; + let report = run_prune(&ctx, &opts, false, PruneOutput::Silent).unwrap(); + + // The pinned key must survive even though it's the oldest. + let remaining = keys_on_disk(&cache); + assert!(remaining.contains(&"0000000000000001".to_string())); + assert_eq!(report.kept_pinned, 1); + // Everything else evicted. + assert!( + report + .evicted + .iter() + .all(|e| e.snapshot_key != "0000000000000001") + ); + assert_eq!(remaining.len(), 1); + } + + #[test] + fn prune_keeps_recent_buffer_above_pin_set() { + let tmp = tempfile::tempdir().unwrap(); + let keys = [ + "1111111111111111", // oldest + "2222222222222222", + "3333333333333333", + "4444444444444444", + "5555555555555555", // newest + ]; + let cache = populate_snapshot_store(tmp.path(), &keys, 8); + // No worktree cursors → no pins. + let ctx = make_ctx_for_cache(&cache); + + let opts = PruneOptions { + keep_recent: 2, + max_size_bytes: 1, // cap forces only buffer to survive + }; + let report = run_prune(&ctx, &opts, false, PruneOutput::Silent).unwrap(); + + assert_eq!(report.kept_pinned, 0); + assert_eq!(report.kept_buffer, 2); + // Only the 2 newest survive. + let remaining = keys_on_disk(&cache); + assert_eq!( + remaining, + vec![ + "4444444444444444".to_string(), + "5555555555555555".to_string() + ] + ); + } + + #[test] + fn prune_evicts_lru_under_size_cap() { + let tmp = tempfile::tempdir().unwrap(); + let keys = [ + "aaaaaaaaaaaaaaaa", + "bbbbbbbbbbbbbbbb", + "cccccccccccccccc", + "dddddddddddddddd", + ]; + // 100 bytes per snapshot → 400 total. + let cache = populate_snapshot_store(tmp.path(), &keys, 100); + let ctx = make_ctx_for_cache(&cache); + + let opts = PruneOptions { + keep_recent: 0, + max_size_bytes: 250, // fits 2 snapshots, evict 2 + }; + let report = run_prune(&ctx, &opts, false, PruneOutput::Silent).unwrap(); + + // The 2 newest survive (cccc + dddd). + let remaining = keys_on_disk(&cache); + assert_eq!( + remaining, + vec![ + "cccccccccccccccc".to_string(), + "dddddddddddddddd".to_string() + ] + ); + assert_eq!(report.evicted.len(), 2); + assert!(report.total_bytes_after_estimate <= opts.max_size_bytes); + } + + #[test] + fn prune_dry_run_does_not_delete() { + let tmp = tempfile::tempdir().unwrap(); + let keys = ["1234567890abcdef", "abcdef1234567890"]; + let cache = populate_snapshot_store(tmp.path(), &keys, 50); + let ctx = make_ctx_for_cache(&cache); + + let opts = PruneOptions { + keep_recent: 0, + max_size_bytes: 1, + }; + let report = run_prune(&ctx, &opts, true, PruneOutput::Silent).unwrap(); + + assert_eq!(report.evicted.len(), 2); + assert_eq!(keys_on_disk(&cache).len(), 2, "dry run must not delete"); + } + + #[test] + fn prune_skips_locks_dir_and_gc_stubs() { + let tmp = tempfile::tempdir().unwrap(); + let keys = ["0123456789abcdef"]; + let cache = populate_snapshot_store(tmp.path(), &keys, 8); + // Create a .locks dir and a .gc-* stub — these must be ignored. + std::fs::create_dir_all(cache.snapshot_locks_dir()).unwrap(); + std::fs::create_dir_all(cache.snapshots_root().join(".gc-stale-12345")).unwrap(); + std::fs::write( + cache + .snapshots_root() + .join(".gc-stale-12345") + .join("index.db"), + b"orphan", + ) + .unwrap(); + + let ctx = make_ctx_for_cache(&cache); + let opts = PruneOptions { + keep_recent: 100, + max_size_bytes: u64::MAX, + }; + let report = run_prune(&ctx, &opts, false, PruneOutput::Silent).unwrap(); + + // Real snapshot survived. + assert!(keys_on_disk(&cache).contains(&"0123456789abcdef".to_string())); + // Orphan .gc-* dirs got swept. + assert!(!cache.snapshots_root().join(".gc-stale-12345").exists()); + // .locks dir unaffected. + assert!(cache.snapshot_locks_dir().exists()); + // Nothing real evicted. + assert!(report.evicted.is_empty()); + } +} diff --git a/crates/tempyr-cli/src/config.rs b/crates/tempyr-cli/src/config.rs index 6244692..0e18a41 100644 --- a/crates/tempyr-cli/src/config.rs +++ b/crates/tempyr-cli/src/config.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use tempyr_core::graph::Graph; use tempyr_core::project; -use tempyr_core::project::{CacheLayout, IndexLayout}; +use tempyr_core::project::{CacheLayout, IndexLayout, SnapshotBuildLock}; use tempyr_core::schema::Schema; use tempyr_index::embeddings::{ self, EmbeddingConfig, ResolvedEmbeddingConfig, resolve_embedding_config, @@ -101,6 +101,24 @@ impl ProjectContext { Ok(layout.publish_active_snapshot()?) } + /// Path to the shared content-addressed snapshot index for the current + /// graph state. The file may or may not exist yet. + pub fn shared_snapshot_index_path(&self) -> anyhow::Result { + let layout = self.index_layout()?; + Ok(layout.shared_snapshot_index_path()?) + } + + /// Try to acquire the per-snapshot build lock. Returns `Ok(None)` if + /// another process is already building this snapshot. + pub fn try_acquire_snapshot_build_lock(&self) -> anyhow::Result> { + let layout = self.index_layout()?; + Ok(layout.try_acquire_snapshot_build_lock()?) + } + + pub fn cache_layout(&self) -> &CacheLayout { + &self.cache + } + /// Refresh the index so query commands keep working after graph mutations. pub fn refresh_index_for_current_snapshot(&self) -> anyhow::Result<()> { let layout = self.index_layout()?; diff --git a/crates/tempyr-cli/src/main.rs b/crates/tempyr-cli/src/main.rs index e36d83b..8818f50 100644 --- a/crates/tempyr-cli/src/main.rs +++ b/crates/tempyr-cli/src/main.rs @@ -248,6 +248,13 @@ pub enum Commands { action: IndexAction, }, + /// Snapshot store management (the content-addressed shared index store + /// under `/tempyr/snapshots/`). + Snapshot { + #[command(subcommand)] + action: SnapshotAction, + }, + /// Linear integration Linear { #[command(subcommand)] @@ -313,6 +320,33 @@ pub enum InterviewAction { List, } +#[derive(Subcommand)] +pub enum SnapshotAction { + /// Prune historical snapshots from the shared store. Pinned snapshots + /// (cited by any live worktree's `snapshot-key.txt`) are never evicted. + /// A configurable buffer of the most-recent snapshots is also kept; + /// beyond pinned + buffer, snapshots are evicted in LRU order until the + /// total snapshot-store size is under the cap. + Prune { + /// Keep at least this many of the most-recently-modified snapshots + /// even if they are not pinned. + #[arg(long, default_value_t = 20)] + keep_recent: usize, + /// Soft cap on the total bytes used by `/snapshots/`. + /// Accepts plain bytes or a suffix like `500M`, `2G`. The pruner + /// evicts unpinned, non-buffered snapshots in LRU order until the + /// total falls below this value. Pinned snapshots are never counted + /// against the cap. + #[arg(long, default_value = "500M")] + max_size: String, + /// Show what would be deleted without removing anything. + #[arg(long)] + dry_run: bool, + }, + /// List snapshots in the shared store with their pinned/buffer status. + List, +} + #[derive(Subcommand)] pub enum IndexAction { /// Full index rebuild from source files @@ -320,6 +354,14 @@ pub enum IndexAction { /// Refresh structural search data only; do not call embedding providers #[arg(long)] skip_embeddings: bool, + /// Rebuild from scratch even when the shared snapshot already exists. + /// Use this to recover from a corrupted snapshot index. Without this + /// flag, `rebuild` short-circuits to a free seed when the snapshot + /// for the current graph state has already been built (by this + /// worktree or another), since rebuilding would produce identical + /// output. + #[arg(long)] + force: bool, }, /// Incremental update (changed files only) Update { @@ -775,15 +817,38 @@ fn run(cli: Cli) -> anyhow::Result<()> { Commands::Index { action } => { let ctx = config::ProjectContext::find(cli.graph_dir.as_deref())?; match action { - IndexAction::Rebuild { skip_embeddings } => { - commands::index_cmd::run_rebuild(&ctx, cli.json, skip_embeddings) - } + IndexAction::Rebuild { + skip_embeddings, + force, + } => commands::index_cmd::run_rebuild(&ctx, cli.json, skip_embeddings, force), IndexAction::Update { skip_embeddings } => { commands::index_cmd::run_update(&ctx, cli.json, skip_embeddings) } IndexAction::Stats => commands::index_cmd::run_stats(&ctx, cli.json), } } + Commands::Snapshot { action } => { + let ctx = config::ProjectContext::find(cli.graph_dir.as_deref())?; + match action { + SnapshotAction::Prune { + keep_recent, + max_size, + dry_run, + } => { + let opts = commands::snapshot_cmd::PruneOptions { + keep_recent, + max_size_bytes: commands::snapshot_cmd::parse_size(&max_size)?, + }; + let output = if cli.json { + commands::snapshot_cmd::PruneOutput::Json + } else { + commands::snapshot_cmd::PruneOutput::Human + }; + commands::snapshot_cmd::run_prune(&ctx, &opts, dry_run, output).map(|_| ()) + } + SnapshotAction::List => commands::snapshot_cmd::run_list(&ctx, cli.json), + } + } Commands::Linear { action } => { let ctx = config::ProjectContext::find(cli.graph_dir.as_deref())?; match action { diff --git a/crates/tempyr-core/src/project.rs b/crates/tempyr-core/src/project.rs index 202fa83..ef2a131 100644 --- a/crates/tempyr-core/src/project.rs +++ b/crates/tempyr-core/src/project.rs @@ -10,7 +10,7 @@ //! from a different working directory than the active repo. use std::cell::RefCell; -use std::fs; +use std::fs::{self, File, OpenOptions}; use std::io; use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -84,6 +84,27 @@ impl CacheLayout { .join("index.db") } + pub fn snapshots_root(&self) -> PathBuf { + self.shared_root.join("snapshots") + } + + /// Directory holding per-snapshot build coordination locks. + /// Lock files live here so the snapshot dir itself does not need to exist + /// before a builder takes the lock. + pub fn snapshot_locks_dir(&self) -> PathBuf { + self.shared_root.join("snapshots").join(".locks") + } + + pub fn snapshot_build_lock_path(&self, snapshot_key: &str) -> PathBuf { + self.snapshot_locks_dir() + .join(format!("{snapshot_key}.build.lock")) + } + + /// Root containing every per-worktree cursor directory. + pub fn worktrees_root(&self) -> PathBuf { + self.shared_root.join("worktrees") + } + pub fn embeddings_dir(&self) -> PathBuf { self.shared_root.join("embeddings") } @@ -98,6 +119,42 @@ pub struct IndexLayout { pub legacy_index_path: PathBuf, } +/// Held exclusive lock for building one specific snapshot key. Two worktrees +/// that race to build the same never-seen graph state will see one acquire +/// and the other receive `None` — the loser should wait briefly then re-check +/// whether the shared snapshot now exists rather than redoing the work. +/// +/// Drop releases the OS lock; the lockfile is left behind under +/// `/snapshots/.locks/` for reuse by the next builder. +#[derive(Debug)] +pub struct SnapshotBuildLock { + _file: File, +} + +impl SnapshotBuildLock { + /// Attempt a non-blocking exclusive acquire of the per-key build lock. + pub fn try_acquire(cache: &CacheLayout, snapshot_key: &str) -> io::Result> { + let locks_dir = cache.snapshot_locks_dir(); + fs::create_dir_all(&locks_dir)?; + let path = cache.snapshot_build_lock_path(snapshot_key); + + // `read(true)` is required for `File::try_lock` on Windows even though + // we never read — same constraint observed in PublisherLock. + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&path)?; + + match file.try_lock() { + Ok(()) => Ok(Some(SnapshotBuildLock { _file: file })), + Err(std::fs::TryLockError::WouldBlock) => Ok(None), + Err(std::fs::TryLockError::Error(e)) => Err(e), + } + } +} + struct StagedActiveIndex { active_path: PathBuf, staged_path: PathBuf, @@ -207,6 +264,13 @@ impl IndexLayout { Ok(self.cache.snapshot_index_path(&self.snapshot_key()?)) } + /// Try to acquire the build lock for the current snapshot key. + /// Caller decides whether to wait, retry, or short-circuit if `None`. + pub fn try_acquire_snapshot_build_lock(&self) -> io::Result> { + let snapshot_key = self.snapshot_key()?; + SnapshotBuildLock::try_acquire(&self.cache, &snapshot_key) + } + pub fn current_index_path(&self) -> io::Result> { let snapshot_key = self.snapshot_key()?; let shared = self.cache.snapshot_index_path(&snapshot_key); @@ -711,7 +775,11 @@ fn read_commondir(git_dir: &Path) -> Option { fs::canonicalize(resolved).ok() } -fn short_path_hash(path: &Path) -> String { +/// Stable per-worktree id derived from the path of the worktree's private +/// `.git` admin dir. Used to namespace per-worktree state under +/// `/worktrees//` and exposed so snapshot pruning can +/// recompute the same id from a `git worktree list` result. +pub fn short_path_hash(path: &Path) -> String { let canonical = fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf()); let mut hasher = Hasher::new(); hasher.update(canonical.to_string_lossy().as_bytes()); @@ -719,6 +787,14 @@ fn short_path_hash(path: &Path) -> String { hex[..12].to_string() } +/// Snapshot keys are the first 16 hex chars of a BLAKE3 digest, see +/// [`graph_snapshot_key`]. This predicate filters out the `.locks` +/// coordination dir and any partial-prune `.gc-*` stubs from a `read_dir` +/// over `/snapshots/`. +pub fn is_snapshot_key(name: &str) -> bool { + name.len() == 16 && name.bytes().all(|b| b.is_ascii_hexdigit()) +} + #[cfg(test)] mod tests { use super::*; @@ -1222,6 +1298,61 @@ mod tests { ); } + #[test] + fn is_snapshot_key_accepts_only_canonical_form() { + assert!(is_snapshot_key("12971a01c28ece80")); + assert!(is_snapshot_key("0000000000000000")); + assert!(!is_snapshot_key("")); + assert!(!is_snapshot_key("12971a01c28ece8")); + assert!(!is_snapshot_key("12971a01c28ece800")); + assert!(!is_snapshot_key(".locks")); + assert!(!is_snapshot_key(".gc-12971a01c28ece80")); + assert!(!is_snapshot_key("ZZ971a01c28ece80")); + } + + #[test] + fn snapshot_build_lock_is_exclusive_per_key() { + let tmp = tempfile::tempdir().unwrap(); + let cache = CacheLayout { + shared_root: tmp.path().to_path_buf(), + worktree_root: tmp.path().join("worktrees").join("default"), + }; + let key = "deadbeefcafef00d"; + + let first = SnapshotBuildLock::try_acquire(&cache, key).unwrap(); + assert!(first.is_some()); + + let second = SnapshotBuildLock::try_acquire(&cache, key).unwrap(); + assert!( + second.is_none(), + "second acquire while first still held should return None" + ); + + // Distinct key — no contention. + let other = SnapshotBuildLock::try_acquire(&cache, "abc1234567890def").unwrap(); + assert!(other.is_some()); + + drop(first); + let after_release = SnapshotBuildLock::try_acquire(&cache, key).unwrap(); + assert!(after_release.is_some()); + } + + #[test] + fn snapshot_build_lock_creates_locks_dir() { + let tmp = tempfile::tempdir().unwrap(); + let cache = CacheLayout { + shared_root: tmp.path().to_path_buf(), + worktree_root: tmp.path().join("worktrees").join("default"), + }; + + let _lock = SnapshotBuildLock::try_acquire(&cache, "cafefacecafefade") + .unwrap() + .unwrap(); + + assert!(cache.snapshot_locks_dir().is_dir()); + assert!(cache.snapshot_build_lock_path("cafefacecafefade").is_file()); + } + #[test] fn publish_active_snapshot_does_not_overwrite_existing_shared_snapshot() { let tmp = tempfile::tempdir().unwrap(); diff --git a/crates/tempyr-index/Cargo.toml b/crates/tempyr-index/Cargo.toml index 549de0d..3556d51 100644 --- a/crates/tempyr-index/Cargo.toml +++ b/crates/tempyr-index/Cargo.toml @@ -17,6 +17,7 @@ reqwest = { workspace = true } async-trait = { workspace = true } blake3 = { workspace = true } toml = { workspace = true } +walkdir = { workspace = true } [features] default = ["local-embeddings"] diff --git a/crates/tempyr-index/src/health.rs b/crates/tempyr-index/src/health.rs index 03dfc7b..e9d829d 100644 --- a/crates/tempyr-index/src/health.rs +++ b/crates/tempyr-index/src/health.rs @@ -118,6 +118,13 @@ pub struct IndexSection { pub snapshot_key_error: Option, pub fts_entries: Option, pub embedding_count_for_index: Option, + /// Number of historical snapshot directories under + /// `/snapshots/`. Excludes the `.locks` dir and any + /// `.gc-*` eviction stubs left by a partial prune. + pub snapshot_store_count: Option, + /// Total bytes used by historical snapshot directories. A high number + /// here is the signal to run `tempyr snapshot prune`. + pub snapshot_store_bytes: Option, } const LOCAL_EMBEDDINGS_COMPILED_IN: bool = cfg!(feature = "local-embeddings"); @@ -360,6 +367,8 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> let layout = match IndexLayout::resolve(inputs.root, inputs.graph_dir, inputs.tempyr_dir) { Ok(layout) => layout, Err(err) => { + let (snapshot_store_count, snapshot_store_bytes) = + probe_snapshot_store(&inputs.cache.snapshots_root()); return IndexSection { active_index_path: inputs.cache.active_index_path(), active_index_exists: inputs.cache.active_index_path().exists(), @@ -372,6 +381,8 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> snapshot_key_error: Some(err.to_string()), fts_entries: None, embedding_count_for_index: None, + snapshot_store_count, + snapshot_store_bytes, }; } }; @@ -398,6 +409,9 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> None => (None, None), }; + let (snapshot_store_count, snapshot_store_bytes) = + probe_snapshot_store(&inputs.cache.snapshots_root()); + IndexSection { active_index_path, active_index_exists, @@ -410,7 +424,50 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> snapshot_key_error, fts_entries, embedding_count_for_index, + snapshot_store_count, + snapshot_store_bytes, + } +} + +/// Walk `/snapshots/` and return `(snapshot_dir_count, total_bytes)`. +/// Uses [`project::is_snapshot_key`] so the count agrees with what +/// `tempyr snapshot prune` would consider — the `.locks` coordination dir +/// and any partial-prune `.gc-*` stubs are excluded. Returns `(None, None)` +/// if the dir does not exist or cannot be read (snapshot store unused on +/// this project yet). +fn probe_snapshot_store(snapshots_root: &Path) -> (Option, Option) { + if !snapshots_root.is_dir() { + return (None, None); + } + let read = match std::fs::read_dir(snapshots_root) { + Ok(read) => read, + Err(_) => return (None, None), + }; + let mut count = 0usize; + let mut bytes = 0u64; + for entry in read.flatten() { + let Ok(file_type) = entry.file_type() else { + continue; + }; + if !file_type.is_dir() { + continue; + } + let name = entry.file_name(); + let name = name.to_string_lossy(); + if !project::is_snapshot_key(&name) { + continue; + } + count += 1; + for sub in walkdir::WalkDir::new(entry.path()) { + let Ok(sub) = sub else { continue }; + if let Ok(meta) = sub.metadata() + && meta.is_file() + { + bytes = bytes.saturating_add(meta.len()); + } + } } + (Some(count), Some(bytes)) } fn probe_index(index_path: &Path, store_path: Option<&Path>) -> (Option, Option) { From 1c89c1717d7c40d321a2983603350d42d86a7619 Mon Sep 17 00:00:00 2001 From: Caleb Leak Date: Sat, 2 May 2026 17:46:40 -0700 Subject: [PATCH 2/4] review: address PR feedback on snapshot store changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes from review on PR #45, plus a regression test pinning the if-let drop semantics that one piece of feedback misidentified as a bug. - `seed_and_report` now re-seeds the local active.db from the shared snapshot before opening it. Without this, a `built_by_other = true` outcome (concurrent builder published during our wait) would update the worktree cursor to the new key while leaving a stale local index on disk. A subsequent query that fell through to the active path (e.g. after the shared snapshot was pruned) would then read stale bytes against a cursor that claimed they were current. Narrow window but real. - `project::is_snapshot_key` now rejects uppercase A–F. Snapshot keys are produced by BLAKE3 `to_hex()` which always emits lowercase, so any uppercase-named directory under `snapshots/` was created by some foreign process. Treating it as a snapshot would let `tempyr snapshot prune` evict it. Added a paired test that the predicate accepts whatever `graph_snapshot_key()` produces, so the two can't drift. - `health::probe_snapshot_store` now returns `(None, None)` on any IO failure during the walk instead of silently undercounting via `read.flatten()` and per-entry `continue`s. An undercount that hides a multi-GB snapshot store from the doctor's prune hint is worse than a clear "unavailable" signal. Concurrent prune (rename-then-remove) can briefly trigger this; a re-run after prune completes succeeds. - New `build_lock_survives_if_let_match_against_use_existing_arm` test empirically pins the Rust drop semantics in `run_rebuild`: the `if let RebuildSlot::UseExisting { .. } = slot { return ... }` pattern leaves `slot` (and any `Build(SnapshotBuildLock)` it carries) alive for the rest of the function, so the lock is held through `rebuild_from_scratch`. A future refactor that moves out of `slot` in the matched arm would fail this test. Co-Authored-By: Claude Opus 4.7 (1M context) --- .agents/skills/tempyr-interview/SKILL.md | 197 ++++++++++++++++++++ crates/tempyr-cli/src/commands/index_cmd.rs | 40 ++++ crates/tempyr-core/src/project.rs | 38 +++- crates/tempyr-index/src/health.rs | 34 ++-- 4 files changed, 294 insertions(+), 15 deletions(-) create mode 100644 .agents/skills/tempyr-interview/SKILL.md diff --git a/.agents/skills/tempyr-interview/SKILL.md b/.agents/skills/tempyr-interview/SKILL.md new file mode 100644 index 0000000..df9d5d2 --- /dev/null +++ b/.agents/skills/tempyr-interview/SKILL.md @@ -0,0 +1,197 @@ +--- +name: tempyr-interview +description: > + Guides the user through creating graph nodes via structured interview. + Activate when the user wants to: add a feature, create an epic, plan a + project, capture requirements, do a brain dump, or create a PRD/TDD. + Keywords: interview, new feature, brain dump, PRD, TDD, requirements. +allowed-tools: + - mcp__tempyr__interview_start + - mcp__tempyr__interview_answer + - mcp__tempyr__interview_show + - mcp__tempyr__interview_commit + - mcp__tempyr__interview_adjust + - mcp__tempyr__interview_resume + - mcp__tempyr__interview_add_node + - mcp__tempyr__interview_add_edge + - mcp__tempyr__graph_search + - mcp__tempyr__graph_list + - mcp__tempyr__graph_context + - mcp__tempyr__graph_traverse + - mcp__tempyr__graph_get_node + - mcp__tempyr__graph_update_node +--- + +# Tempyr Interview Skill + +You are conducting a structured interview to create knowledge graph nodes. +The MCP server handles state, gap detection, and phase transitions. Your job +is the CONVERSATION — phrasing questions naturally, extracting structured +entities from answers, and presenting proposals clearly. + +## CRITICAL: You are the interviewer, NOT the interviewee + +**NEVER answer your own questions.** You ask questions, then STOP and WAIT +for the user to respond. Every call to `interview_answer` MUST contain text +that the user actually typed — never your own fabricated answers, inferences, +or "obvious" gap-fills. If you think you know the answer from context, you +still ask — the user may disagree, clarify, or have context you lack. + +Concretely: +- After calling `interview_start`, present the gaps/questions and **stop**. +- After each user reply, call `interview_answer` with **their words**, then + present the next questions and **stop**. +- Do NOT batch-answer gaps. Do NOT pre-fill answers from existing graph + context. Do NOT call `interview_answer` multiple times in a row without + user input between each call. +- The only valid argument to `interview_answer` is a quote or close + paraphrase of what the user just said. + +## Core workflow + +### Starting an interview + +When the user describes something they want to build/plan/capture: +1. Call `interview_start` with their input as `brain_dump` +2. The server returns: tentative root node, existing graph context, gaps +3. Present what the server found in the existing graph FIRST +4. Show the tentative root node it created from the brain dump +5. Ask the first 2-3 questions from `next_questions` + +### Processing answers — the extraction loop + +When the user answers a question (or gives additional context — meaning they +typed something and you received it as a user message): + +1. **Record** the answer: call `interview_answer` with the user's actual response +2. **Extract** entities from the answer text. For each entity you identify: + - If the node **already exists in the graph** (not tentative), call + `graph_update_node` with the node_id and the fields to change (body, + status, owner, tags). Only provided fields are overwritten. + - If the node is **new**, call `interview_add_node` with the `session_id`, + a human-readable `slug` (e.g. `session-replay`, `p99-latency`), and + `node_type`. The system generates a 6-char suffix automatically and + returns the full ID. The node is stored as tentative (not written to + disk) until `interview_commit`. + - Call `interview_add_edge` using the `session_id` and the full ID + returned by `interview_add_node`. You can reference tentative nodes + (including the root node), existing graph nodes, or use 6-char suffixes. + - Do NOT include type prefixes in slugs — use `session-replay` not + `feat-session-replay`. The `node_type` field handles typing. +3. **Show** what was created/linked in compact format (see below) +4. **Ask** the next 2-3 questions from the server's gap list + +Alternatively, spawn the `tempyr-extractor` subagent for complex answers +(wall-of-text brain dumps, multi-entity responses). Pass it: +- The question that was asked +- The user's answer +- Current tentative nodes (from `interview_show`) +- Existing graph context + +Then apply its JSON output by calling `interview_add_node` (new) or +`graph_update_node` (existing graph nodes) and `interview_add_edge` for +each entity. + +### How to present tentative nodes + +Use this compact format — NOT full YAML: + +``` +Here's what I've added: ++ constraint: P99 replay load < 2s (from your latency requirement) ++ decision: separate ingestion pipeline (status: proposed) + -> linked to: comp-event-pipeline (existing), constraint-p99-latency (new) +``` + +### How to phrase questions + +The MCP server returns structured gap descriptions with context for +natural phrasing. Use `suggested_angle` as your approach hint. + +Server returns gap objects like: +```json +{ + "gap_type": "MissingSuccessMetric", + "priority": "Required", + "context": "'feat-replay' has no measured_by relationship to any metric.", + "existing_related": ["metric-mttr-reduction"], + "question_type": "Closed", + "suggested_angle": "Ask what success looks like -- quantitative if possible." +} +``` + +When `existing_related` is populated, reference those nodes: +"The observability epic tracks MTTR reduction — does that cover session +replay too, or does this feature need its own success metric?" + +When `existing_related` is empty, ask open-ended: +"How will we know this feature is successful? What would you measure?" + +### Question rules + +- NEVER ask more than 3 questions per turn +- NEVER ask questions the graph already answers — check `existing_related` +- For `Closed` question_type: phrase as confirmation ("Is X the right...?") +- For `Open` question_type: ask one focused question +- For `ForcedChoice` question_type: present the candidates from `existing_related` +- For `Implication` question_type: frame as "have you thought about X?" + with a specific number or consequence + +### Phase transitions + +The server manages phases internally. When the response includes +`phase_changed: true`, acknowledge the shift naturally: + +"Good — I have a clear picture of who this is for and what success looks +like. Let me ask about the technical side now." + +Do NOT announce phase names ("entering Technical phase"). The user +experiences a conversation, not a state machine. + +Phases flow: Discovery -> Product -> Technical -> Decomposition -> Review. +Each phase focuses on different gap types. The server handles this +automatically — just follow the `next_questions` it returns. + +### Handling tangents and corrections + +If the user: +- **Goes off-topic**: still call `interview_answer` — extract any relevant + entities and the gap analysis will catch what's still missing +- **Wants to correct something**: call `interview_adjust` with the node_id + and the changes (body, status, or new_id for renaming) +- **Wants to skip ahead**: call `interview_answer` with "user wants to + skip to technical/decomposition/review" +- **Dumps a wall of text**: spawn the `tempyr-extractor` subagent — it + handles multi-entity extraction better in isolated context + +### Review phase + +When the server returns `"phase": "Review"`: +1. Call `interview_show` to get the full tentative state +2. Present a structured summary organized by node type: + - Features, with their linked personas, metrics, constraints + - Decisions and their rationale + - Tasks and dependencies + - Risks and open questions +3. Show progress: "X nodes, Y edges proposed" +4. Ask: "Anything to add, change, or remove before I commit?" +5. On approval, call `interview_commit` +6. Report the files created and any validation warnings +7. Mention: `tempyr render prd ` or `tempyr render tdd ` + +### Resuming an interrupted interview + +If the user mentions a previous interview or wants to continue: +1. Call `interview_resume` with the session_id +2. The server returns the full current state +3. Summarize where they left off: phase, nodes created, gaps remaining +4. Continue asking questions from `next_questions` + +If the user doesn't know the session_id, they can run +`tempyr interview list` in the terminal to see active sessions. + +### When NOT to interview + +If the user just wants to quickly add a single note or insight: +- Use `graph_add_node` directly, skip the interview +- The interview is for features, epics, and multi-node creation diff --git a/crates/tempyr-cli/src/commands/index_cmd.rs b/crates/tempyr-cli/src/commands/index_cmd.rs index f04b5a2..0281fc9 100644 --- a/crates/tempyr-cli/src/commands/index_cmd.rs +++ b/crates/tempyr-cli/src/commands/index_cmd.rs @@ -214,6 +214,15 @@ fn seed_and_report( skip_embeddings: bool, waited_for_concurrent_builder: bool, ) -> anyhow::Result<()> { + // If a concurrent builder published the snapshot during our wait, the + // initial `ensure_active_index_seeded` (called at the top of `run_rebuild`) + // ran before the snapshot existed and so left the worktree's local index + // unrefreshed. Re-seed now so the local copy and cursor stay in sync with + // the shared snapshot — otherwise a later query that falls through to the + // active path (e.g. after the shared snapshot is pruned) would read stale + // bytes against a cursor that claims they're current. + let _ = ctx.ensure_active_index_seeded()?; + let index_path = ctx.queryable_index_path()?; let index = Index::open(&index_path)?; let stats = index.stats()?; @@ -586,6 +595,37 @@ mod tests { publisher.join().unwrap(); } + /// Regression guard for the if-let drop semantics in `run_rebuild`: + /// `if let RebuildSlot::UseExisting { .. } = slot { return ... }` must + /// leave `slot` (and the `Build(SnapshotBuildLock)` it might carry) + /// alive for the rest of the function, so that the lock is held through + /// `rebuild_from_scratch`. If a future refactor changes this — e.g. + /// moves out of `slot` in the matched arm — the test would fail by + /// observing the lock as available to a second acquirer mid-rebuild. + #[test] + fn build_lock_survives_if_let_match_against_use_existing_arm() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = make_ctx(tmp.path()); + let shared = ctx.shared_snapshot_index_path().unwrap(); + + // Snapshot doesn't exist → negotiate returns Build(lock). + let slot = negotiate_rebuild_slot(&ctx, &shared, false, Duration::from_millis(50)).unwrap(); + assert!(matches!(slot, RebuildSlot::Build(_))); + + // Mirror the exact pattern used in run_rebuild. + if let RebuildSlot::UseExisting { built_by_other: _ } = slot { + unreachable!("Build was constructed but UseExisting matched"); + } + + // After the if-let, `slot` (and its lock) must still be held — + // confirm by trying to acquire from a fresh ProjectContext. + let probe = ctx.try_acquire_snapshot_build_lock().unwrap(); + assert!( + probe.is_none(), + "lock was released too early; if-let against UseExisting arm dropped Build(lock)" + ); + } + #[test] fn slot_blocking_acquire_times_out_when_lock_never_releases() { let tmp = tempfile::tempdir().unwrap(); diff --git a/crates/tempyr-core/src/project.rs b/crates/tempyr-core/src/project.rs index ef2a131..a714f74 100644 --- a/crates/tempyr-core/src/project.rs +++ b/crates/tempyr-core/src/project.rs @@ -787,12 +787,21 @@ pub fn short_path_hash(path: &Path) -> String { hex[..12].to_string() } -/// Snapshot keys are the first 16 hex chars of a BLAKE3 digest, see -/// [`graph_snapshot_key`]. This predicate filters out the `.locks` +/// Snapshot keys are the first 16 lowercase hex chars of a BLAKE3 digest, +/// see [`graph_snapshot_key`]. This predicate filters out the `.locks` /// coordination dir and any partial-prune `.gc-*` stubs from a `read_dir` /// over `/snapshots/`. +/// +/// Lowercase-only matters: BLAKE3's `to_hex()` always emits lowercase, so +/// any uppercase-named directory under `snapshots/` was created by some +/// foreign process, not by Tempyr. Treating it as a snapshot would let +/// `tempyr snapshot prune` evict it — refusing to recognize it is the +/// safer default. pub fn is_snapshot_key(name: &str) -> bool { - name.len() == 16 && name.bytes().all(|b| b.is_ascii_hexdigit()) + name.len() == 16 + && name + .bytes() + .all(|b| b.is_ascii_digit() || (b'a'..=b'f').contains(&b)) } #[cfg(test)] @@ -1302,12 +1311,35 @@ mod tests { fn is_snapshot_key_accepts_only_canonical_form() { assert!(is_snapshot_key("12971a01c28ece80")); assert!(is_snapshot_key("0000000000000000")); + assert!(is_snapshot_key("abcdef0123456789")); assert!(!is_snapshot_key("")); assert!(!is_snapshot_key("12971a01c28ece8")); assert!(!is_snapshot_key("12971a01c28ece800")); assert!(!is_snapshot_key(".locks")); assert!(!is_snapshot_key(".gc-12971a01c28ece80")); assert!(!is_snapshot_key("ZZ971a01c28ece80")); + // Tightened: uppercase A-F is rejected (BLAKE3 to_hex emits lowercase). + assert!(!is_snapshot_key("ABCDEF0123456789")); + assert!(!is_snapshot_key("12971A01C28ECE80")); + } + + #[test] + fn is_snapshot_key_matches_graph_snapshot_key_output() { + // Whatever real snapshot-key the producer emits must satisfy the + // predicate, otherwise prune/health would silently drop it. + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let graph_dir = root.join("graph"); + let tempyr_dir = root.join(".tempyr"); + fs::create_dir_all(&graph_dir).unwrap(); + fs::create_dir_all(&tempyr_dir).unwrap(); + fs::write(tempyr_dir.join("schema.toml"), "name = 'x'\n").unwrap(); + + let key = graph_snapshot_key(&graph_dir, &tempyr_dir).unwrap(); + assert!( + is_snapshot_key(&key), + "graph_snapshot_key() output {key:?} must satisfy is_snapshot_key()" + ); } #[test] diff --git a/crates/tempyr-index/src/health.rs b/crates/tempyr-index/src/health.rs index ed3d3db..bc6eaa8 100644 --- a/crates/tempyr-index/src/health.rs +++ b/crates/tempyr-index/src/health.rs @@ -432,22 +432,29 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> /// Walk `/snapshots/` and return `(snapshot_dir_count, total_bytes)`. /// Uses [`project::is_snapshot_key`] so the count agrees with what /// `tempyr snapshot prune` would consider — the `.locks` coordination dir -/// and any partial-prune `.gc-*` stubs are excluded. Returns `(None, None)` -/// if the dir does not exist or cannot be read (snapshot store unused on -/// this project yet). +/// and any partial-prune `.gc-*` stubs are excluded. +/// +/// Returns `(None, None)` for any IO failure rather than reporting +/// partial totals: an undercount that hides a multi-GB store from the +/// `tempyr doctor` prune hint is worse than a clear "unavailable" signal. +/// `tempyr snapshot prune` running concurrently can briefly trigger this +/// (rename-then-remove races with our walk); a re-run of doctor after +/// the prune completes will succeed. fn probe_snapshot_store(snapshots_root: &Path) -> (Option, Option) { if !snapshots_root.is_dir() { return (None, None); } - let read = match std::fs::read_dir(snapshots_root) { - Ok(read) => read, - Err(_) => return (None, None), + let Ok(read) = std::fs::read_dir(snapshots_root) else { + return (None, None); }; let mut count = 0usize; let mut bytes = 0u64; - for entry in read.flatten() { + for entry in read { + let Ok(entry) = entry else { + return (None, None); + }; let Ok(file_type) = entry.file_type() else { - continue; + return (None, None); }; if !file_type.is_dir() { continue; @@ -459,10 +466,13 @@ fn probe_snapshot_store(snapshots_root: &Path) -> (Option, Option) { } count += 1; for sub in walkdir::WalkDir::new(entry.path()) { - let Ok(sub) = sub else { continue }; - if let Ok(meta) = sub.metadata() - && meta.is_file() - { + let Ok(sub) = sub else { + return (None, None); + }; + let Ok(meta) = sub.metadata() else { + return (None, None); + }; + if meta.is_file() { bytes = bytes.saturating_add(meta.len()); } } From b7406e8378ea254c44007a334e9a0c8ad7d5fa60 Mon Sep 17 00:00:00 2001 From: Caleb Leak Date: Sat, 2 May 2026 17:46:58 -0700 Subject: [PATCH 3/4] review: drop accidentally staged .agents/ scratch file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .agents/ dir was already untracked before this branch — it's local agent state, not part of the snapshot store work. Got swept in by a `git add -A` in the prior commit; removing here. Co-Authored-By: Claude Opus 4.7 (1M context) --- .agents/skills/tempyr-interview/SKILL.md | 197 ----------------------- 1 file changed, 197 deletions(-) delete mode 100644 .agents/skills/tempyr-interview/SKILL.md diff --git a/.agents/skills/tempyr-interview/SKILL.md b/.agents/skills/tempyr-interview/SKILL.md deleted file mode 100644 index df9d5d2..0000000 --- a/.agents/skills/tempyr-interview/SKILL.md +++ /dev/null @@ -1,197 +0,0 @@ ---- -name: tempyr-interview -description: > - Guides the user through creating graph nodes via structured interview. - Activate when the user wants to: add a feature, create an epic, plan a - project, capture requirements, do a brain dump, or create a PRD/TDD. - Keywords: interview, new feature, brain dump, PRD, TDD, requirements. -allowed-tools: - - mcp__tempyr__interview_start - - mcp__tempyr__interview_answer - - mcp__tempyr__interview_show - - mcp__tempyr__interview_commit - - mcp__tempyr__interview_adjust - - mcp__tempyr__interview_resume - - mcp__tempyr__interview_add_node - - mcp__tempyr__interview_add_edge - - mcp__tempyr__graph_search - - mcp__tempyr__graph_list - - mcp__tempyr__graph_context - - mcp__tempyr__graph_traverse - - mcp__tempyr__graph_get_node - - mcp__tempyr__graph_update_node ---- - -# Tempyr Interview Skill - -You are conducting a structured interview to create knowledge graph nodes. -The MCP server handles state, gap detection, and phase transitions. Your job -is the CONVERSATION — phrasing questions naturally, extracting structured -entities from answers, and presenting proposals clearly. - -## CRITICAL: You are the interviewer, NOT the interviewee - -**NEVER answer your own questions.** You ask questions, then STOP and WAIT -for the user to respond. Every call to `interview_answer` MUST contain text -that the user actually typed — never your own fabricated answers, inferences, -or "obvious" gap-fills. If you think you know the answer from context, you -still ask — the user may disagree, clarify, or have context you lack. - -Concretely: -- After calling `interview_start`, present the gaps/questions and **stop**. -- After each user reply, call `interview_answer` with **their words**, then - present the next questions and **stop**. -- Do NOT batch-answer gaps. Do NOT pre-fill answers from existing graph - context. Do NOT call `interview_answer` multiple times in a row without - user input between each call. -- The only valid argument to `interview_answer` is a quote or close - paraphrase of what the user just said. - -## Core workflow - -### Starting an interview - -When the user describes something they want to build/plan/capture: -1. Call `interview_start` with their input as `brain_dump` -2. The server returns: tentative root node, existing graph context, gaps -3. Present what the server found in the existing graph FIRST -4. Show the tentative root node it created from the brain dump -5. Ask the first 2-3 questions from `next_questions` - -### Processing answers — the extraction loop - -When the user answers a question (or gives additional context — meaning they -typed something and you received it as a user message): - -1. **Record** the answer: call `interview_answer` with the user's actual response -2. **Extract** entities from the answer text. For each entity you identify: - - If the node **already exists in the graph** (not tentative), call - `graph_update_node` with the node_id and the fields to change (body, - status, owner, tags). Only provided fields are overwritten. - - If the node is **new**, call `interview_add_node` with the `session_id`, - a human-readable `slug` (e.g. `session-replay`, `p99-latency`), and - `node_type`. The system generates a 6-char suffix automatically and - returns the full ID. The node is stored as tentative (not written to - disk) until `interview_commit`. - - Call `interview_add_edge` using the `session_id` and the full ID - returned by `interview_add_node`. You can reference tentative nodes - (including the root node), existing graph nodes, or use 6-char suffixes. - - Do NOT include type prefixes in slugs — use `session-replay` not - `feat-session-replay`. The `node_type` field handles typing. -3. **Show** what was created/linked in compact format (see below) -4. **Ask** the next 2-3 questions from the server's gap list - -Alternatively, spawn the `tempyr-extractor` subagent for complex answers -(wall-of-text brain dumps, multi-entity responses). Pass it: -- The question that was asked -- The user's answer -- Current tentative nodes (from `interview_show`) -- Existing graph context - -Then apply its JSON output by calling `interview_add_node` (new) or -`graph_update_node` (existing graph nodes) and `interview_add_edge` for -each entity. - -### How to present tentative nodes - -Use this compact format — NOT full YAML: - -``` -Here's what I've added: -+ constraint: P99 replay load < 2s (from your latency requirement) -+ decision: separate ingestion pipeline (status: proposed) - -> linked to: comp-event-pipeline (existing), constraint-p99-latency (new) -``` - -### How to phrase questions - -The MCP server returns structured gap descriptions with context for -natural phrasing. Use `suggested_angle` as your approach hint. - -Server returns gap objects like: -```json -{ - "gap_type": "MissingSuccessMetric", - "priority": "Required", - "context": "'feat-replay' has no measured_by relationship to any metric.", - "existing_related": ["metric-mttr-reduction"], - "question_type": "Closed", - "suggested_angle": "Ask what success looks like -- quantitative if possible." -} -``` - -When `existing_related` is populated, reference those nodes: -"The observability epic tracks MTTR reduction — does that cover session -replay too, or does this feature need its own success metric?" - -When `existing_related` is empty, ask open-ended: -"How will we know this feature is successful? What would you measure?" - -### Question rules - -- NEVER ask more than 3 questions per turn -- NEVER ask questions the graph already answers — check `existing_related` -- For `Closed` question_type: phrase as confirmation ("Is X the right...?") -- For `Open` question_type: ask one focused question -- For `ForcedChoice` question_type: present the candidates from `existing_related` -- For `Implication` question_type: frame as "have you thought about X?" - with a specific number or consequence - -### Phase transitions - -The server manages phases internally. When the response includes -`phase_changed: true`, acknowledge the shift naturally: - -"Good — I have a clear picture of who this is for and what success looks -like. Let me ask about the technical side now." - -Do NOT announce phase names ("entering Technical phase"). The user -experiences a conversation, not a state machine. - -Phases flow: Discovery -> Product -> Technical -> Decomposition -> Review. -Each phase focuses on different gap types. The server handles this -automatically — just follow the `next_questions` it returns. - -### Handling tangents and corrections - -If the user: -- **Goes off-topic**: still call `interview_answer` — extract any relevant - entities and the gap analysis will catch what's still missing -- **Wants to correct something**: call `interview_adjust` with the node_id - and the changes (body, status, or new_id for renaming) -- **Wants to skip ahead**: call `interview_answer` with "user wants to - skip to technical/decomposition/review" -- **Dumps a wall of text**: spawn the `tempyr-extractor` subagent — it - handles multi-entity extraction better in isolated context - -### Review phase - -When the server returns `"phase": "Review"`: -1. Call `interview_show` to get the full tentative state -2. Present a structured summary organized by node type: - - Features, with their linked personas, metrics, constraints - - Decisions and their rationale - - Tasks and dependencies - - Risks and open questions -3. Show progress: "X nodes, Y edges proposed" -4. Ask: "Anything to add, change, or remove before I commit?" -5. On approval, call `interview_commit` -6. Report the files created and any validation warnings -7. Mention: `tempyr render prd ` or `tempyr render tdd ` - -### Resuming an interrupted interview - -If the user mentions a previous interview or wants to continue: -1. Call `interview_resume` with the session_id -2. The server returns the full current state -3. Summarize where they left off: phase, nodes created, gaps remaining -4. Continue asking questions from `next_questions` - -If the user doesn't know the session_id, they can run -`tempyr interview list` in the terminal to see active sessions. - -### When NOT to interview - -If the user just wants to quickly add a single note or insight: -- Use `graph_add_node` directly, skip the interview -- The interview is for features, epics, and multi-node creation From fe40e73d7163a89f2b9f52838c68e170328c62f2 Mon Sep 17 00:00:00 2001 From: Caleb Leak Date: Sat, 2 May 2026 21:13:16 -0700 Subject: [PATCH 4/4] review: harden snapshot_key validation and clarify empty-store probe Two more fixes from review on PR #45. - `SnapshotBuildLock::try_acquire` now validates `snapshot_key` against `is_snapshot_key()` before doing IO. The path helpers (`snapshot_locks_dir`, `snapshot_build_lock_path`) are pure path construction and would happily produce a path outside the locks directory if given input like `"../../etc/passwd"`. All current internal callers source the key from `IndexLayout::snapshot_key()` (always BLAKE3 hex), but the function is `pub` so external code could pass arbitrary input. Validation at the IO boundary closes that surface without splaying `Result` returns across every path helper. Test: `snapshot_build_lock_rejects_path_traversal_keys`. - `health::probe_snapshot_store` now distinguishes "no snapshot store yet" from "probe failed". A missing snapshots directory returns `(Some(0), Some(0))` so `tempyr doctor` shows `snapshot store: 0 dirs, 0 B (ok)` on a fresh project instead of silently suppressing the line. Real anomalies (path exists but is not a directory, IO errors mid-walk) still return `(None, None)` so doctor's render can signal "unavailable". Tests: missing-dir, non-directory, and a fresh test that the canonical-snapshot filter excludes `.locks` and `.gc-*`. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tempyr-core/src/project.rs | 49 +++++++++++++++++++++++++ crates/tempyr-index/src/health.rs | 61 +++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/crates/tempyr-core/src/project.rs b/crates/tempyr-core/src/project.rs index a714f74..92ef96c 100644 --- a/crates/tempyr-core/src/project.rs +++ b/crates/tempyr-core/src/project.rs @@ -133,7 +133,23 @@ pub struct SnapshotBuildLock { impl SnapshotBuildLock { /// Attempt a non-blocking exclusive acquire of the per-key build lock. + /// + /// Rejects any `snapshot_key` that does not satisfy [`is_snapshot_key`] + /// — the path helpers it composes (`snapshot_locks_dir` + + /// `snapshot_build_lock_path`) are pure path construction and would + /// happily produce a path outside the locks directory if given an + /// input like `"../../etc/passwd"`. The validation here, at the IO + /// boundary, ensures no caller (internal or external) can use this + /// API to open or lock a file outside the snapshots store. pub fn try_acquire(cache: &CacheLayout, snapshot_key: &str) -> io::Result> { + if !is_snapshot_key(snapshot_key) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "invalid snapshot key {snapshot_key:?}: expected 16 lowercase hex characters" + ), + )); + } let locks_dir = cache.snapshot_locks_dir(); fs::create_dir_all(&locks_dir)?; let path = cache.snapshot_build_lock_path(snapshot_key); @@ -1369,6 +1385,39 @@ mod tests { assert!(after_release.is_some()); } + #[test] + fn snapshot_build_lock_rejects_path_traversal_keys() { + let tmp = tempfile::tempdir().unwrap(); + let cache = CacheLayout { + shared_root: tmp.path().to_path_buf(), + worktree_root: tmp.path().join("worktrees").join("default"), + }; + + for bad in [ + "..", + "../escape", + "../../etc/passwd", + "ABCDEF1234567890", // wrong case + "deadbeef", // wrong length + "", + "deadbeefcafef00g", // 'g' is not hex + ] { + let err = SnapshotBuildLock::try_acquire(&cache, bad).unwrap_err(); + assert_eq!( + err.kind(), + io::ErrorKind::InvalidInput, + "expected InvalidInput for {bad:?}, got {err:?}" + ); + } + + // Sanity: nothing got created outside the cache root. + let escaped = tmp.path().parent().unwrap().join("etc"); + assert!( + !escaped.exists(), + "rejection must not produce any side effects outside the cache root" + ); + } + #[test] fn snapshot_build_lock_creates_locks_dir() { let tmp = tempfile::tempdir().unwrap(); diff --git a/crates/tempyr-index/src/health.rs b/crates/tempyr-index/src/health.rs index bc6eaa8..3624478 100644 --- a/crates/tempyr-index/src/health.rs +++ b/crates/tempyr-index/src/health.rs @@ -434,6 +434,12 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> /// `tempyr snapshot prune` would consider — the `.locks` coordination dir /// and any partial-prune `.gc-*` stubs are excluded. /// +/// A missing snapshots directory returns `(Some(0), Some(0))` — this is +/// a definitively-empty store, not a probe failure (a fresh project +/// before any rebuild, or a non-git tempyr project before any indexing). +/// Distinguishing this from probe failure lets `tempyr doctor` show +/// `snapshot store: 0 dirs, 0 B (ok)` instead of suppressing the line. +/// /// Returns `(None, None)` for any IO failure rather than reporting /// partial totals: an undercount that hides a multi-GB store from the /// `tempyr doctor` prune hint is worse than a clear "unavailable" signal. @@ -441,7 +447,13 @@ fn build_index_section(inputs: &HealthInputs<'_>, store_path: Option<&Path>) -> /// (rename-then-remove races with our walk); a re-run of doctor after /// the prune completes will succeed. fn probe_snapshot_store(snapshots_root: &Path) -> (Option, Option) { + if !snapshots_root.exists() { + // Fresh project — no rebuild has populated the store yet. + return (Some(0), Some(0)); + } if !snapshots_root.is_dir() { + // Path exists but isn't a directory — that's a real anomaly, + // not "empty". Surface as unavailable. return (None, None); } let Ok(read) = std::fs::read_dir(snapshots_root) else { @@ -656,4 +668,53 @@ allowed_edges = [] .unwrap(); assert!(config_entry.exists); } + + #[test] + fn probe_snapshot_store_treats_missing_dir_as_empty() { + let tmp = tempfile::tempdir().unwrap(); + let snapshots_root = tmp.path().join("never-created"); + + let (count, bytes) = probe_snapshot_store(&snapshots_root); + + assert_eq!( + count, + Some(0), + "missing dir should be empty store, not failure" + ); + assert_eq!(bytes, Some(0)); + } + + #[test] + fn probe_snapshot_store_treats_non_directory_as_unavailable() { + let tmp = tempfile::tempdir().unwrap(); + let snapshots_root = tmp.path().join("not-a-dir"); + fs::write(&snapshots_root, b"oops").unwrap(); + + let (count, bytes) = probe_snapshot_store(&snapshots_root); + + assert_eq!(count, None, "non-directory at the path is a real anomaly"); + assert_eq!(bytes, None); + } + + #[test] + fn probe_snapshot_store_counts_canonical_snapshots_only() { + let tmp = tempfile::tempdir().unwrap(); + let snapshots_root = tmp.path().join("snapshots"); + fs::create_dir_all(&snapshots_root).unwrap(); + + // Two real snapshots + for key in ["0123456789abcdef", "fedcba9876543210"] { + let dir = snapshots_root.join(key); + fs::create_dir_all(&dir).unwrap(); + fs::write(dir.join("index.db"), vec![0u8; 100]).unwrap(); + } + // Plus a `.locks` dir and a `.gc-*` stub that must be ignored. + fs::create_dir_all(snapshots_root.join(".locks")).unwrap(); + fs::create_dir_all(snapshots_root.join(".gc-stale")).unwrap(); + fs::write(snapshots_root.join(".gc-stale").join("index.db"), b"x").unwrap(); + + let (count, bytes) = probe_snapshot_store(&snapshots_root); + assert_eq!(count, Some(2)); + assert_eq!(bytes, Some(200)); + } }