Skip to content

Commit 8a3b197

Browse files
ahaviliclaude
andcommitted
perf(index): optimize initial indexing from 28.5s to 13s (2.2x faster)
Four targeted optimizations addressing measured bottlenecks in the extraction pipeline for large Swift projects (~2000 files): - Thread-local tree-sitter parsers: eliminate per-file Parser::new() + set_language() allocation in both Swift and Rust extractors - Pre-canonicalize module directories: ModuleMap.with_fallback() now canonicalizes dirs once at construction, removing O(M×N) redundant syscalls from the parallel extraction loop - Outward-expanding snippet search with radius limit: replace O(N×L) full-source scan per node with outward search from expected line, capped at 200 lines. The worst straggler (Strings.generated.swift, 12K nodes, 28K lines) dropped from 18.5s to 0.4s - Eager index store warmup: buildFileIndex() now runs during prepare_project instead of lazily blocking all rayon threads on first extraction Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 775cb28 commit 8a3b197

11 files changed

Lines changed: 171 additions & 56 deletions

File tree

grapha-core/src/module.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,27 @@ impl ModuleMap {
2626
.to_string();
2727
self.modules.insert(name, vec![root.to_path_buf()]);
2828
}
29+
self.canonicalize_dirs();
2930
self
3031
}
3132

33+
fn canonicalize_dirs(&mut self) {
34+
for dirs in self.modules.values_mut() {
35+
for dir in dirs.iter_mut() {
36+
if let Ok(canonical) = dir.canonicalize() {
37+
*dir = canonical;
38+
}
39+
}
40+
}
41+
}
42+
3243
pub fn module_for_file(&self, file: &Path) -> Option<String> {
3344
let canonical_file = normalize_path(file);
3445
let mut best_match: Option<(&str, usize)> = None;
3546

3647
for (name, dirs) in &self.modules {
3748
for dir in dirs {
38-
let canonical_dir = normalize_path(dir);
39-
40-
if let Ok(suffix) = canonical_file.strip_prefix(&canonical_dir) {
49+
if let Ok(suffix) = canonical_file.strip_prefix(dir) {
4150
let depth = suffix.components().count();
4251
match best_match {
4352
Some((_, best_depth)) if depth < best_depth => {
@@ -52,7 +61,7 @@ impl ModuleMap {
5261

5362
if best_match.is_none()
5463
&& file.is_relative()
55-
&& let Some(dir_name) = canonical_dir.file_name().and_then(|name| name.to_str())
64+
&& let Some(dir_name) = dir.file_name().and_then(|name| name.to_str())
5665
{
5766
let file_str = file.to_string_lossy();
5867
if file_str.starts_with(dir_name)

grapha-core/src/pipeline.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ mod tests {
213213
};
214214
let mut modules = ModuleMap::new();
215215
modules.modules.insert("core".to_string(), vec![src_dir]);
216+
modules = modules.with_fallback(dir.path());
216217
let file_context = file_context(&project, &modules, &file);
217218

218219
let result = extract_with_registry(&registry, b"fn main() {}", &file_context).unwrap();

grapha-swift/src/bridge.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ type IndexStoreOpenFn = unsafe extern "C" fn(*const i8, *mut i32) -> *mut std::f
3232
type IndexStoreCloseFn = unsafe extern "C" fn(*mut std::ffi::c_void);
3333
type IndexStoreExtractFn =
3434
unsafe extern "C" fn(*mut std::ffi::c_void, *const i8, *mut u32, *mut i32) -> *const u8;
35+
type IndexStoreWarmupFn = unsafe extern "C" fn(*mut std::ffi::c_void);
3536
type SwiftSyntaxExtractFn = unsafe extern "C" fn(*const i8, usize, *const i8) -> *const i8;
3637
type FreeStringFn = unsafe extern "C" fn(*mut i8);
3738
type FreeBufferFn = unsafe extern "C" fn(*mut u8);
@@ -41,6 +42,7 @@ pub struct SwiftBridge {
4142
pub indexstore_open: IndexStoreOpenFn,
4243
pub indexstore_close: IndexStoreCloseFn,
4344
pub indexstore_extract: IndexStoreExtractFn,
45+
pub indexstore_warmup: Option<IndexStoreWarmupFn>,
4446
pub swiftsyntax_extract: SwiftSyntaxExtractFn,
4547
pub free_string: FreeStringFn,
4648
pub free_buffer: FreeBufferFn,
@@ -69,6 +71,10 @@ impl SwiftBridge {
6971
let indexstore_extract = *lib
7072
.get::<IndexStoreExtractFn>(b"grapha_indexstore_extract")
7173
.ok()?;
74+
let indexstore_warmup = lib
75+
.get::<IndexStoreWarmupFn>(b"grapha_indexstore_warmup")
76+
.ok()
77+
.map(|s| *s);
7278
let swiftsyntax_extract = *lib
7379
.get::<SwiftSyntaxExtractFn>(b"grapha_swiftsyntax_extract")
7480
.ok()?;
@@ -80,6 +86,7 @@ impl SwiftBridge {
8086
indexstore_open,
8187
indexstore_close,
8288
indexstore_extract,
89+
indexstore_warmup,
8390
swiftsyntax_extract,
8491
free_string,
8592
free_buffer,

grapha-swift/src/indexstore.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,19 @@ fn get_or_open_store(index_store_path: &Path) -> Option<Arc<StoreHandle>> {
138138
})
139139
}
140140

141+
pub fn warmup_indexstore(index_store_path: &Path) {
142+
let Some(bridge) = bridge::bridge() else {
143+
return;
144+
};
145+
let Some(warmup_fn) = bridge.indexstore_warmup else {
146+
return;
147+
};
148+
let Some(handle) = get_or_open_store(index_store_path) else {
149+
return;
150+
};
151+
unsafe { warmup_fn(handle.ptr) };
152+
}
153+
141154
pub fn extract_from_indexstore(
142155
file_path: &Path,
143156
index_store_path: &Path,

grapha-swift/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ pub fn init_index_store(project_root: &Path) {
165165

166166
fn prepare_project_index_store(project_root: &Path) {
167167
prepare_project_with(&INDEX_STORE_PATHS, project_root, discover_index_store);
168+
if let Some(store_path) = index_store_path(project_root) {
169+
indexstore::warmup_indexstore(&store_path);
170+
}
168171
}
169172

170173
fn prepare_project_with<F>(

grapha-swift/src/treesitter/extract.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::cell::RefCell;
12
use std::collections::HashMap;
23
use std::path::Path;
34

@@ -9,24 +10,28 @@ use grapha_core::{ExtractionResult, LanguageExtractor};
910
use super::common::*;
1011
use super::swiftui::extract_swiftui_declaration_structure;
1112

13+
thread_local! {
14+
static SWIFT_PARSER: RefCell<Parser> = RefCell::new({
15+
let mut p = Parser::new();
16+
p.set_language(&tree_sitter_swift::LANGUAGE.into()).expect("failed to load Swift grammar");
17+
p
18+
});
19+
}
20+
1221
/// Parse Swift source once. Reuse the tree across enrichment passes.
1322
pub fn parse_swift(source: &[u8]) -> anyhow::Result<Tree> {
14-
let mut parser = Parser::new();
15-
parser.set_language(&tree_sitter_swift::LANGUAGE.into())?;
16-
parser
17-
.parse(source, None)
18-
.ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Swift source"))
23+
SWIFT_PARSER.with_borrow_mut(|parser| {
24+
parser
25+
.parse(source, None)
26+
.ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Swift source"))
27+
})
1928
}
2029

2130
pub struct SwiftExtractor;
2231

2332
impl LanguageExtractor for SwiftExtractor {
2433
fn extract(&self, source: &[u8], file_path: &Path) -> anyhow::Result<ExtractionResult> {
25-
let mut parser = Parser::new();
26-
parser.set_language(&tree_sitter_swift::LANGUAGE.into())?;
27-
let tree = parser
28-
.parse(source, None)
29-
.ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Swift source"))?;
34+
let tree = parse_swift(source)?;
3035

3136
let mut result = ExtractionResult::new();
3237
let file_str = file_path.to_string_lossy().to_string();

grapha-swift/swift-bridge/Sources/GraphaSwiftBridge/Bridge.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@ public func indexstoreExtract(
7272
return UnsafeRawPointer(ptr)
7373
}
7474

75+
@c(grapha_indexstore_warmup)
76+
public func indexstoreWarmup(_ handle: UnsafeMutableRawPointer?) {
77+
guard let handle else { return }
78+
let key = Int(bitPattern: handle)
79+
let reader = _readers.withLock { $0[key] }
80+
reader?.warmup()
81+
}
82+
7583
// MARK: - SwiftSyntax
7684

7785
@c(grapha_swiftsyntax_extract)

grapha-swift/swift-bridge/Sources/GraphaSwiftBridge/IndexStoreReader.swift

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -171,22 +171,12 @@ final class IndexStoreReader: @unchecked Sendable {
171171

172172
// MARK: - Public
173173

174+
func warmup() {
175+
ensureFileIndex()
176+
}
177+
174178
func extractFile(_ filePath: String) -> (UnsafeMutableRawPointer, UInt32)? {
175-
if fileIndex == nil {
176-
if let cached = _sharedFileIndexCache.withLock({ $0[storePath] }) {
177-
fileIndex = cached
178-
} else {
179-
_buildIndexLock.withLock { _ in
180-
if let cached = _sharedFileIndexCache.withLock({ $0[storePath] }) {
181-
fileIndex = cached
182-
} else {
183-
fileIndex = buildFileIndex()
184-
let idx = fileIndex!
185-
_sharedFileIndexCache.withLock { $0[storePath] = idx }
186-
}
187-
}
188-
}
189-
}
179+
ensureFileIndex()
190180

191181
let resolved = resolvePath(filePath)
192182
// Fast last-path-component extraction without Foundation URL
@@ -218,6 +208,24 @@ final class IndexStoreReader: @unchecked Sendable {
218208

219209
// MARK: - File Index (built once)
220210

211+
private func ensureFileIndex() {
212+
if fileIndex == nil {
213+
if let cached = _sharedFileIndexCache.withLock({ $0[storePath] }) {
214+
fileIndex = cached
215+
} else {
216+
_buildIndexLock.withLock { _ in
217+
if let cached = _sharedFileIndexCache.withLock({ $0[storePath] }) {
218+
fileIndex = cached
219+
} else {
220+
fileIndex = buildFileIndex()
221+
let idx = fileIndex!
222+
_sharedFileIndexCache.withLock { $0[storePath] = idx }
223+
}
224+
}
225+
}
226+
}
227+
}
228+
221229
private func buildFileIndex() -> [String: UnitInfo] {
222230
_cbStore = store
223231
_cbFileIndex = [:]

grapha/src/app/pipeline.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,18 @@ pub(crate) fn run_pipeline(
149149
let t_read_ns = AtomicU64::new(0);
150150
let t_extract_ns = AtomicU64::new(0);
151151
let t_snippet_ns = AtomicU64::new(0);
152+
let t_file_context_ns = AtomicU64::new(0);
153+
let t_total_per_file_ns = AtomicU64::new(0);
154+
let t_max_single_file_ns = AtomicU64::new(0);
152155
let extraction_cache_entries = Mutex::new(std::collections::HashMap::new());
153156

154157
let results: Vec<_> = all_files
155158
.par_iter()
156159
.filter_map(|file| {
160+
let t_file_start = Instant::now();
161+
let t_fc = Instant::now();
157162
let file_context = grapha_core::file_context(&project_context, &module_map, file);
163+
t_file_context_ns.fetch_add(t_fc.elapsed().as_nanos() as u64, Ordering::Relaxed);
158164
let cache_key = extraction_cache_key(&file_context.relative_path);
159165
if let Some(existing_cache) = existing_extraction_cache
160166
&& let Some(entry) = existing_cache.get(&cache_key)
@@ -169,6 +175,9 @@ pub(crate) fn run_pipeline(
169175
.lock()
170176
.expect("extraction cache mutex poisoned")
171177
.insert(cache_key, entry.clone());
178+
let file_ns = t_file_start.elapsed().as_nanos() as u64;
179+
t_total_per_file_ns.fetch_add(file_ns, Ordering::Relaxed);
180+
t_max_single_file_ns.fetch_max(file_ns, Ordering::Relaxed);
172181
return Some(entry.result.clone());
173182
}
174183

@@ -180,6 +189,9 @@ pub(crate) fn run_pipeline(
180189
if let Some(ref pb) = pb {
181190
pb.inc(1);
182191
}
192+
let file_ns = t_file_start.elapsed().as_nanos() as u64;
193+
t_total_per_file_ns.fetch_add(file_ns, Ordering::Relaxed);
194+
t_max_single_file_ns.fetch_max(file_ns, Ordering::Relaxed);
183195
return None;
184196
}
185197
};
@@ -225,6 +237,9 @@ pub(crate) fn run_pipeline(
225237
.expect("extraction cache mutex poisoned")
226238
.insert(key, entry);
227239
}
240+
let file_ns = t_file_start.elapsed().as_nanos() as u64;
241+
t_total_per_file_ns.fetch_add(file_ns, Ordering::Relaxed);
242+
t_max_single_file_ns.fetch_max(file_ns, Ordering::Relaxed);
228243
Some(result)
229244
}
230245
Err(e) => {
@@ -234,6 +249,9 @@ pub(crate) fn run_pipeline(
234249
eprintln!(" \x1b[33m!\x1b[0m skipping {}: {e}", file.display())
235250
});
236251
}
252+
let file_ns = t_file_start.elapsed().as_nanos() as u64;
253+
t_total_per_file_ns.fetch_add(file_ns, Ordering::Relaxed);
254+
t_max_single_file_ns.fetch_max(file_ns, Ordering::Relaxed);
237255
None
238256
}
239257
}
@@ -279,10 +297,14 @@ pub(crate) fn run_pipeline(
279297
let ts_fb_ms = grapha_swift::TIMING_TS_FALLBACK_NS
280298
.load(std::sync::atomic::Ordering::Relaxed) as f64
281299
/ 1_000_000.0;
300+
let fc_ms = t_file_context_ns.load(Ordering::Relaxed) as f64 / 1_000_000.0;
301+
let total_per_file_ms = t_total_per_file_ns.load(Ordering::Relaxed) as f64 / 1_000_000.0;
302+
let max_single_file_ms = t_max_single_file_ns.load(Ordering::Relaxed) as f64 / 1_000_000.0;
282303
eprintln!(
283-
" thread-summed: read {:.0}ms, extract {:.0}ms, snippet {:.0}ms",
284-
read_ms, extract_ms, snippet_ms
304+
" thread-summed: read {:.0}ms, extract {:.0}ms, snippet {:.0}ms, file_context {:.0}ms, total_per_file {:.0}ms",
305+
read_ms, extract_ms, snippet_ms, fc_ms, total_per_file_ms
285306
);
307+
eprintln!(" max_single_file: {:.0}ms", max_single_file_ms);
286308
eprintln!(
287309
" swift: indexstore {:.0}ms, ts-parse {:.0}ms, doc {:.0}ms, swiftui {:.0}ms, l10n {:.0}ms, asset {:.0}ms, swiftsyntax {:.0}ms, ts-fallback {:.0}ms",
288310
is_ms, ts_parse_ms, doc_ms, swiftui_ms, l10n_ms, asset_ms, ss_ms, ts_fb_ms

grapha/src/extract/rust.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::cell::RefCell;
12
use std::collections::HashMap;
23
use std::path::Path;
34

@@ -9,15 +10,23 @@ use grapha_core::graph::{
910

1011
use super::{ExtractionResult, LanguageExtractor};
1112

13+
thread_local! {
14+
static RUST_PARSER: RefCell<Parser> = RefCell::new({
15+
let mut p = Parser::new();
16+
p.set_language(&tree_sitter_rust::LANGUAGE.into()).expect("failed to load Rust grammar");
17+
p
18+
});
19+
}
20+
1221
pub struct RustExtractor;
1322

1423
impl LanguageExtractor for RustExtractor {
1524
fn extract(&self, source: &[u8], file_path: &Path) -> anyhow::Result<ExtractionResult> {
16-
let mut parser = Parser::new();
17-
parser.set_language(&tree_sitter_rust::LANGUAGE.into())?;
18-
let tree = parser
19-
.parse(source, None)
20-
.ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse source"))?;
25+
let tree = RUST_PARSER.with_borrow_mut(|parser| {
26+
parser
27+
.parse(source, None)
28+
.ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse source"))
29+
})?;
2130

2231
let mut result = ExtractionResult::new();
2332
let file_str = file_path.to_string_lossy().to_string();

0 commit comments

Comments
 (0)