Skip to content

Commit d665e99

Browse files
committed
perf: optimize directory walking to reduce syscalls and allocations
1 parent b6a38f2 commit d665e99

2 files changed

Lines changed: 82 additions & 17 deletions

File tree

src/glob.rs

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,19 @@ impl Glob {
318318
(None, Some(d)) => Some(d + 1), // Pattern depth + 1 (for root directory)
319319
(None, None) => None, // Unlimited (has **)
320320
};
321+
322+
// Optimization: Only enable accurate symlink detection when needed.
323+
// The `mark` option requires knowing whether an entry is a symlink to avoid
324+
// adding a trailing slash. When following symlinks, walkdir reports the TARGET
325+
// type, so we need an extra syscall to detect the symlink. Skip this overhead
326+
// when not needed.
327+
let need_accurate_symlink_detection = mark && follow;
328+
321329
let walk_options = WalkOptions::new()
322330
.follow_symlinks(follow)
323331
.max_depth(walker_max_depth)
324-
.dot(true);
332+
.dot(true)
333+
.need_accurate_symlink_detection(need_accurate_symlink_detection);
325334

326335
// Pre-compute: check if any pattern requires directory matching (ends with /)
327336
let any_pattern_requires_dir = patterns.iter().any(|p| p.requires_dir());
@@ -448,6 +457,9 @@ impl Glob {
448457
let walker = Walker::new(walk_root.clone(), adjusted_walk_options)
449458
.with_dir_prune_filter(prune_filter);
450459

460+
// Optimization: Check if we have any ignore patterns to avoid unnecessary work
461+
let has_ignore_filter = self.ignore_filter.is_some();
462+
451463
for entry in walker.walk() {
452464
let path = entry.path();
453465

@@ -479,13 +491,6 @@ impl Glob {
479491
rel_str_from_walk_root.into_owned()
480492
};
481493

482-
// For operations that need the actual relative path from cwd
483-
let rel_path = if prefix_to_strip.is_some() {
484-
std::path::PathBuf::from(&normalized)
485-
} else {
486-
rel_path_from_walk_root.to_path_buf()
487-
};
488-
489494
// Check if this path is inside an ignored directory.
490495
// Optimization: Use byte-level comparison instead of char iteration.
491496
if !ignored_dirs.is_empty() {
@@ -502,22 +507,30 @@ impl Glob {
502507
}
503508

504509
// Check ignore patterns
505-
if let Some(ref ignore_filter) = self.ignore_filter {
510+
// Optimization: Only create rel_path and abs_path when we have ignore patterns
511+
if has_ignore_filter {
512+
// For operations that need the actual relative path from cwd
513+
let rel_path = if prefix_to_strip.is_some() {
514+
std::path::PathBuf::from(&normalized)
515+
} else {
516+
rel_path_from_walk_root.to_path_buf()
517+
};
506518
let abs_path = abs_cwd.join(&rel_path);
519+
let ignore_filter = self.ignore_filter.as_ref().unwrap();
507520

508521
// Check if this specific path should be ignored
509522
if ignore_filter.should_ignore(&normalized, &abs_path) {
510523
// If children are also ignored, mark this directory
511524
if entry.is_dir() && ignore_filter.children_ignored(&normalized, &abs_path) {
512-
ignored_dirs.insert(normalized.to_string());
525+
ignored_dirs.insert(normalized.clone());
513526
}
514527
continue;
515528
}
516529

517530
// Also check if this is a directory whose children should be ignored
518531
// (for optimization - skip traversing)
519532
if entry.is_dir() && ignore_filter.children_ignored(&normalized, &abs_path) {
520-
ignored_dirs.insert(normalized.to_string());
533+
ignored_dirs.insert(normalized.clone());
521534
}
522535
}
523536

@@ -633,7 +646,9 @@ impl Glob {
633646

634647
let result = if self.absolute {
635648
// Return absolute path
636-
let abs_path = abs_cwd.join(&rel_path);
649+
// Optimization: Construct the absolute path from normalized string
650+
// instead of creating a PathBuf first
651+
let abs_path = abs_cwd.join(&normalized);
637652
let mut path = self.format_path(&abs_path);
638653
if self.mark && should_mark_as_dir {
639654
path = self.ensure_trailing_slash(&path);

src/walker.rs

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ pub struct WalkOptions {
1212
pub max_depth: Option<usize>,
1313
/// Include dotfiles (files starting with .)
1414
pub dot: bool,
15+
/// Whether to accurately detect symlinks even when following them.
16+
/// This is needed for the `mark` option to correctly NOT add trailing slashes to symlinks.
17+
/// When false, avoids an extra stat call per file (faster).
18+
pub need_accurate_symlink_detection: bool,
1519
}
1620

1721
/// A filter function that can prune directories during walking.
@@ -37,6 +41,11 @@ impl WalkOptions {
3741
self.dot = include_dot;
3842
self
3943
}
44+
45+
pub fn need_accurate_symlink_detection(mut self, need: bool) -> Self {
46+
self.need_accurate_symlink_detection = need;
47+
self
48+
}
4049
}
4150

4251
/// A single entry returned from the walker
@@ -50,6 +59,25 @@ pub struct WalkEntry {
5059
}
5160

5261
impl WalkEntry {
62+
/// Create a WalkEntry from a walkdir DirEntry without checking symlink metadata.
63+
/// This is faster but may not correctly detect symlinks when following links.
64+
/// Use this when `mark` option is false and you don't need accurate symlink detection.
65+
#[inline]
66+
pub fn from_dir_entry_fast(entry: &DirEntry) -> Self {
67+
let file_type = entry.file_type();
68+
Self {
69+
path: entry.path().to_path_buf(),
70+
depth: entry.depth(),
71+
is_dir: file_type.is_dir(),
72+
is_file: file_type.is_file(),
73+
is_symlink: file_type.is_symlink(),
74+
}
75+
}
76+
77+
/// Create a WalkEntry from a walkdir DirEntry with full symlink detection.
78+
/// This is slower because it makes an extra syscall for symlink_metadata,
79+
/// but correctly detects symlinks even when following them.
80+
/// Use this when `mark` option is true.
5381
pub fn from_dir_entry(entry: &DirEntry) -> Self {
5482
let file_type = entry.file_type();
5583
// When following symlinks, walkdir reports the TARGET type, not the symlink type.
@@ -97,6 +125,12 @@ impl WalkEntry {
97125
pub fn file_name(&self) -> Option<&std::ffi::OsStr> {
98126
self.path.file_name()
99127
}
128+
129+
/// Get the file name as a string slice if possible, for fast comparisons.
130+
#[inline]
131+
pub fn file_name_str(&self) -> Option<&str> {
132+
self.path.file_name().and_then(|s| s.to_str())
133+
}
100134
}
101135

102136
/// Directory walker that can traverse filesystem trees
@@ -152,6 +186,15 @@ impl Walker {
152186

153187
let dot = self.options.dot;
154188
let root = self.root.clone();
189+
let need_accurate_symlink = self.options.need_accurate_symlink_detection;
190+
191+
// Choose the appropriate entry creation function based on whether we need
192+
// accurate symlink detection. This avoids an extra syscall per file when not needed.
193+
let create_entry = if need_accurate_symlink {
194+
WalkEntry::from_dir_entry
195+
} else {
196+
WalkEntry::from_dir_entry_fast
197+
};
155198

156199
// If we have a pruning filter, we need to use it in filter_entry
157200
if let Some(ref prune_filter) = self.dir_prune_filter {
@@ -161,6 +204,7 @@ impl Walker {
161204
.into_iter()
162205
.filter_entry(|e| {
163206
// Filter dot files if dot option is false
207+
// Optimization: Use bytes comparison for dot check
164208
if !dot {
165209
if let Some(name) = e.file_name().to_str() {
166210
if e.depth() > 0 && name.starts_with('.') {
@@ -183,7 +227,7 @@ impl Walker {
183227
true
184228
})
185229
.filter_map(|result| match result {
186-
Ok(entry) => Some(WalkEntry::from_dir_entry(&entry)),
230+
Ok(entry) => Some(create_entry(&entry)),
187231
Err(err) => {
188232
if let Some(path) = err.path() {
189233
if let Ok(meta) = path.symlink_metadata() {
@@ -221,9 +265,9 @@ impl Walker {
221265
}
222266
true
223267
})
224-
.filter_map(|result| {
268+
.filter_map(move |result| {
225269
match result {
226-
Ok(entry) => Some(WalkEntry::from_dir_entry(&entry)),
270+
Ok(entry) => Some(create_entry(&entry)),
227271
Err(err) => {
228272
// For broken symlinks (or other IO errors), try to extract the path
229273
// and return it as an entry. This handles the case where follow_links
@@ -638,8 +682,14 @@ mod tests {
638682
// Create a symlink to the directory
639683
symlink(base.join("real-dir"), base.join("symlink-to-dir")).unwrap();
640684

641-
// Walk WITH following symlinks - check what is_symlink reports
642-
let walker = Walker::new(base.to_path_buf(), WalkOptions::new().follow_symlinks(true));
685+
// Walk WITH following symlinks AND accurate symlink detection enabled
686+
// (This is needed to correctly detect symlinks when following them)
687+
let walker = Walker::new(
688+
base.to_path_buf(),
689+
WalkOptions::new()
690+
.follow_symlinks(true)
691+
.need_accurate_symlink_detection(true),
692+
);
643693
let entries: Vec<_> = walker.walk_sync();
644694

645695
// Find the symlink entry

0 commit comments

Comments
 (0)