Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "synapse"
version = "0.1.3"
version = "0.1.4"
edition = "2024"

[[bin]]
Expand Down
86 changes: 86 additions & 0 deletions src/graph/ladybug_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,92 @@ impl GraphStore for LadybugGraphStore {
)
}

fn link_edges(&self, edges: &[crate::graph::model::GraphEdge]) -> Result<()> {
use crate::graph::model::GraphEdge;
if edges.is_empty() {
return Ok(());
}
let _guard = self.lock.lock().unwrap();
let conn = self.conn()?;

// The Cypher for each edge kind. Same MERGE shape as the per-edge
// `link_*` methods; here the statement is prepared ONCE per kind and
// re-executed per row, all inside one transaction — so we pay a single
// commit instead of one per edge (the source of the end-of-index stall).
let cypher = |e: &GraphEdge| -> &'static str {
match e {
GraphEdge::SymbolReferences { .. } => {
"MATCH (a:Symbol {id: $a}), (b:Symbol {id: $b}) MERGE (a)-[:REFERENCES]->(b)"
}
GraphEdge::SymbolInherits { .. } => {
"MATCH (a:Symbol {id: $a}), (b:Symbol {id: $b}) MERGE (a)-[:INHERITS]->(b)"
}
GraphEdge::SymbolImplements { .. } => {
"MATCH (a:Symbol {id: $a}), (b:Symbol {id: $b}) MERGE (a)-[:IMPLEMENTS]->(b)"
}
GraphEdge::FileImportsPackage { .. } => {
"MATCH (f:File {id: $a}), (k:Package {id: $b}) MERGE (f)-[:IMPORTS_PACKAGE]->(k)"
}
GraphEdge::ProjectContainsFile { .. } => {
"MATCH (p:Project {id: $a}), (f:File {id: $b}) MERGE (p)-[:CONTAINS_FILE]->(f)"
}
}
};
// The two endpoint ids for an edge, in (a, b) order matching the Cypher.
fn endpoints(e: &GraphEdge) -> (&str, &str) {
match e {
GraphEdge::SymbolReferences { from, to }
| GraphEdge::SymbolInherits { from, to }
| GraphEdge::SymbolImplements { from, to } => (from, to),
GraphEdge::FileImportsPackage { file, package } => (file, package),
GraphEdge::ProjectContainsFile { project, file } => (project, file),
}
}

// One prepared statement per distinct edge-kind Cypher, reused across
// all rows of that kind.
let mut prepared: std::collections::HashMap<&'static str, lbug::PreparedStatement> =
std::collections::HashMap::new();

conn.query("BEGIN TRANSACTION")
.map_err(|e| anyhow!("begin transaction: {e}"))?;
// Run the batch; on any error, roll back so a partial batch isn't left
// half-committed, then surface the error.
let result = (|| -> Result<()> {
for edge in edges {
let q = cypher(edge);
if !prepared.contains_key(q) {
let stmt = conn
.prepare(q)
.map_err(|e| anyhow!("preparing `{q}`: {e}"))?;
prepared.insert(q, stmt);
}
let stmt = prepared.get_mut(q).expect("just inserted");
let (a, b) = endpoints(edge);
conn.execute(
stmt,
vec![
("a", Value::String(a.to_string())),
("b", Value::String(b.to_string())),
],
)
.map_err(|e| anyhow!("executing batch edge: {e}"))?;
}
Ok(())
})();
match result {
Ok(()) => conn
.query("COMMIT")
.map(|_| ())
.map_err(|e| anyhow!("commit transaction: {e}")),
Err(err) => {
// Best-effort rollback; report the original error.
let _ = conn.query("ROLLBACK");
Err(err)
}
Comment on lines +430 to +439

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. No rollback on commit 🐞 Bug ☼ Reliability

In LadybugGraphStore::link_edges, if the batch executes successfully but the final COMMIT fails, the
function returns the commit error without attempting a rollback/cleanup of the open transaction.
This can leave the database in an open transaction state (and potentially holding locks) until the
connection is torn down.
Agent Prompt
### Issue description
`LadybugGraphStore::link_edges` starts a transaction and rolls back on mid-batch execution errors, but does not roll back when `COMMIT` itself fails. This leaves error handling asymmetric and risks keeping an open transaction/locks around after a failed commit.

### Issue Context
The batched write path uses explicit `BEGIN TRANSACTION` / `COMMIT` statements and reuses prepared statements across edges.

### Fix Focus Areas
- src/graph/ladybug_store.rs[404-440]

### Suggested fix
- Capture the result of `conn.query("COMMIT")`.
- If `COMMIT` returns `Err`, perform a best-effort `ROLLBACK` (ignore rollback errors), then return the commit error with context.
- Optionally, consider a small RAII/guard pattern so any early-return after `BEGIN` triggers rollback unless a `committed` flag is set.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

}
}

fn symbol_type_relations(&self, symbol_name: &str) -> Result<Vec<RelatedItem>> {
let _guard = self.lock.lock().unwrap();
let conn = self.conn()?;
Expand Down
17 changes: 17 additions & 0 deletions src/graph/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,23 @@ pub struct RelatedItem {
pub depth: usize,
}

/// A relationship edge to create between two already-upserted nodes, used by
/// the indexer's post-pass to write many edges in one batch. Both ids must
/// already exist as nodes; writing uses `MERGE` so re-runs are idempotent.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GraphEdge {
/// `Symbol -[:REFERENCES]-> Symbol` (referrer -> referenced).
SymbolReferences { from: String, to: String },
/// `Symbol -[:INHERITS]-> Symbol` (subtype -> base).
SymbolInherits { from: String, to: String },
/// `Symbol -[:IMPLEMENTS]-> Symbol` (implementor -> interface/trait).
SymbolImplements { from: String, to: String },
/// `File -[:IMPORTS_PACKAGE]-> Package`.
FileImportsPackage { file: String, package: String },
/// `Project -[:CONTAINS_FILE]-> File`.
ProjectContainsFile { project: String, file: String },
}

/// Aggregate counts for `status`/`index --stats`.
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct IndexStats {
Expand Down
27 changes: 27 additions & 0 deletions src/graph/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,33 @@ pub trait GraphStore {
/// type use (the `REFERENCES` edge). Direction is referrer -> referenced.
fn link_symbol_references(&self, from_symbol_id: &str, to_symbol_id: &str) -> Result<()>;

/// Create many relationship edges in a single batch. Backends that support
/// transactions should write the whole batch in one transaction with reused
/// prepared statements — far faster than one `link_*` call per edge during
/// the indexer's post-pass. Idempotent (`MERGE`); endpoints must already
/// exist as nodes. The default implementation falls back to per-edge writes.
fn link_edges(&self, edges: &[crate::graph::model::GraphEdge]) -> Result<()> {
use crate::graph::model::GraphEdge;
for e in edges {
match e {
GraphEdge::SymbolReferences { from, to } => {
self.link_symbol_references(from, to)?
}
GraphEdge::SymbolInherits { from, to } => self.link_symbol_inherits(from, to)?,
GraphEdge::SymbolImplements { from, to } => {
self.link_symbol_implements(from, to)?
}
GraphEdge::FileImportsPackage { file, package } => {
self.link_file_imports_package(file, package)?
}
GraphEdge::ProjectContainsFile { project, file } => {
self.link_project_contains_file(project, file)?
}
}
}
Ok(())
}

fn symbols_matching(&self, query: &SymbolSearchQuery) -> Result<Vec<IndexedSymbol>>;
fn files_matching(&self, query: &FileSearchQuery) -> Result<Vec<IndexedFile>>;
fn related_to_symbol(&self, symbol: &str, depth: usize) -> Result<Vec<RelatedItem>>;
Expand Down
106 changes: 88 additions & 18 deletions src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ pub struct IndexProgress {
pub projects: usize,
/// Packages discovered so far.
pub packages: usize,
/// The current post-loop phase, when indexing has moved past the per-file
/// scan into edge resolution (e.g. "resolving references"). `None` during
/// the file scan. Lets the UI show what the otherwise-frozen bar is doing.
pub phase: Option<&'static str>,
}

/// A progress observer invoked once per candidate file as indexing proceeds.
Expand Down Expand Up @@ -148,6 +152,7 @@ pub fn index_repo(
symbols: outcome.symbols,
projects: projects_seen,
packages: 0,
phase: None,
};
cb(rel, &snap);
}
Expand Down Expand Up @@ -235,32 +240,80 @@ pub fn index_repo(
}
}

// Post-loop edge-resolution passes. These run after the per-file scan (so
// their link targets all exist) and can be the bulk of wall-clock on large
// repos, so each reports a phase to the progress UI — otherwise the bar
// sits frozen at N/N and looks hung. `report_phase` reuses the same
// callback as the file scan, emitting a snapshot tagged with the phase.
// It's a fn (not a closure) taking the live counts so it doesn't hold a
// borrow of `outcome` across the mutations below.
let report_phase = |phase: &'static str, files: usize, symbols: usize| {
if let Some(cb) = progress {
cb(
"",
&IndexProgress {
processed: total,
total,
files_indexed: files,
symbols,
projects: projects_seen,
packages: 0,
phase: Some(phase),
},
);
}
};

// Resolve collected imports to known packages -> IMPORTS_PACKAGE edges.
// Done after the main loop so every manifest has registered its packages.
if !pending_imports.is_empty() {
report_phase("resolving imports", outcome.files_indexed, outcome.symbols);
outcome.edges += resolve_imports(store, &pending_imports)?;
}

// Resolve supertype relationships -> INHERITS/IMPLEMENTS edges. Done after
// the main loop so all symbols (the link targets) exist.
if !pending_supertypes.is_empty() {
report_phase(
"resolving type relationships",
outcome.files_indexed,
outcome.symbols,
);
outcome.edges += resolve_supertypes(store, &pending_supertypes)?;
}

// Resolve usage references -> REFERENCES edges. Done after the main loop so
// all declarations (the link targets) exist — references are cross-file.
if !pending_references.is_empty() {
report_phase(
"resolving references",
outcome.files_indexed,
outcome.symbols,
);
outcome.edges += resolve_references(store, &pending_references)?;
}

// Associate every indexed file with its nearest owning project manifest,
// creating CONTAINS_FILE edges. We link against the full candidate set (not
// just files touched this run) so ownership is complete after any index.
for rel in &candidates {
if let Some(manifest) = owning_manifest(rel, &manifests) {
store.link_project_contains_file(&project_id(manifest), &file_id(rel))?;
}
}
// Batched into one transaction like the resolve passes above.
report_phase(
"linking project membership",
outcome.files_indexed,
outcome.symbols,
);
let contains_edges: Vec<crate::graph::model::GraphEdge> = candidates
.iter()
.filter_map(|rel| {
owning_manifest(rel, &manifests).map(|manifest| {
crate::graph::model::GraphEdge::ProjectContainsFile {
project: project_id(manifest),
file: file_id(rel),
}
})
})
.collect();
store.link_edges(&contains_edges)?;

// Remove files that no longer exist as candidates (deleted/now-ignored).
if !changed_only {
Expand Down Expand Up @@ -299,7 +352,7 @@ fn resolve_imports(
.map(|p| p.name.as_str())
.collect();

let mut edges = 0;
let mut batch: Vec<crate::graph::model::GraphEdge> = Vec::new();
for (fid, imports, lang) in pending {
// De-dup the resolved package ids per file.
let mut linked: HashSet<String> = HashSet::new();
Expand All @@ -314,11 +367,15 @@ fn resolve_imports(
if let Some(pkg_id) = resolved
&& linked.insert(pkg_id.clone())
{
store.link_file_imports_package(fid, &pkg_id)?;
edges += 1;
batch.push(crate::graph::model::GraphEdge::FileImportsPackage {
file: fid.clone(),
package: pkg_id,
});
}
}
}
let edges = batch.len();
store.link_edges(&batch)?;
Ok(edges)
}

Expand Down Expand Up @@ -347,9 +404,9 @@ fn resolve_supertypes(
store: &dyn GraphStore,
pending: &[(String, Vec<tree_sitter::Supertype>)],
) -> Result<usize> {
use crate::graph::model::{SymbolKind, SymbolSearchQuery};
use crate::graph::model::{GraphEdge, SymbolKind, SymbolSearchQuery};

let mut edges = 0;
let mut batch: Vec<GraphEdge> = Vec::new();
for (file, supers) in pending {
let project_prefix = file.rsplit_once('/').map(|(d, _)| d).unwrap_or("");
for st in supers {
Expand Down Expand Up @@ -405,15 +462,22 @@ fn resolve_supertypes(
matches!(target.kind, SymbolKind::Interface | SymbolKind::Trait)
}
};
if implements {
store.link_symbol_implements(&child.id, &target.id)?;
batch.push(if implements {
GraphEdge::SymbolImplements {
from: child.id.clone(),
to: target.id.clone(),
}
} else {
store.link_symbol_inherits(&child.id, &target.id)?;
}
edges += 1;
GraphEdge::SymbolInherits {
from: child.id.clone(),
to: target.id.clone(),
}
});
}
}
}
let edges = batch.len();
store.link_edges(&batch)?;
Ok(edges)
}

Expand Down Expand Up @@ -445,7 +509,9 @@ fn resolve_references(
// applied per reference below.
let mut to_cache: HashMap<String, Vec<IndexedSymbol>> = HashMap::new();

let mut edges = 0;
// Accumulate edges and write them in one batch (one transaction) at the end
// rather than one DB statement per edge — this is what removes the stall.
let mut batch: Vec<crate::graph::model::GraphEdge> = Vec::new();
for (file, refs) in pending {
let project_prefix = file.rsplit_once('/').map(|(d, _)| d).unwrap_or("");

Expand Down Expand Up @@ -522,11 +588,15 @@ fn resolve_references(
// Deterministic order when an ambiguous name produces multiple edges.
targets.sort_by(|a, b| a.file_path.cmp(&b.file_path).then(a.id.cmp(&b.id)));
for target in targets {
store.link_symbol_references(&from.id, &target.id)?;
edges += 1;
batch.push(crate::graph::model::GraphEdge::SymbolReferences {
from: from.id.clone(),
to: target.id.clone(),
});
}
}
}
let edges = batch.len();
store.link_edges(&batch)?;
Ok(edges)
}

Expand Down
16 changes: 10 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,17 @@ fn cmd_index(cwd: &Path, args: cli::IndexArgs) -> Result<()> {
move |current: &str, p: &indexer::IndexProgress| {
pb.set_length(p.total as u64);
pb.set_position(p.processed as u64);
// Colored live stats line + the current file, both below the bar.
// Bottom line: the current file during the scan, or the active
// post-loop phase (e.g. "resolving references…") once the per-file
// scan is done — so the bar at N/N shows work rather than looking
// hung while edges are written.
let bottom = match p.phase {
Some(phase) => format!("\x1b[2m{phase}…\x1b[0m"),
None => format!("\x1b[2m{}\x1b[0m", truncate_middle(current, 64)),
};
pb.set_message(format!(
"\x1b[36mfiles\x1b[0m {} \x1b[36msymbols\x1b[0m {} \x1b[36mprojects\x1b[0m {}\n \x1b[2m{}\x1b[0m",
p.files_indexed,
p.symbols,
p.projects,
truncate_middle(current, 64),
"\x1b[36mfiles\x1b[0m {} \x1b[36msymbols\x1b[0m {} \x1b[36mprojects\x1b[0m {}\n {}",
p.files_indexed, p.symbols, p.projects, bottom,
));
}
});
Expand Down
Loading
Loading