Skip to content

Commit 847d836

Browse files
script3rclaudehappy-otter
committed
Optimize O(n²) deduplication to O(n × m)
Both dedupe_more_specific() and dedupe_more_specific_hits() had nested loops comparing every hit against every other hit, which is O(n²). The key insight is that deduplication only matters for hits on the same line. By grouping hits by line first (O(n)), we only compare within each group. This reduces complexity to O(n × m) where m is the average hits per line (typically 1-3). Changes: - Add early return for 0-1 hits - Group hits by line using HashMap before comparison - Skip single-hit lines entirely - Only iterate within same-line groups This prevents worst-case blowup on crypto-dense files (e.g., 100 findings would have been 10,000 comparisons, now ~100-300). Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
1 parent 083862b commit 847d836

1 file changed

Lines changed: 57 additions & 29 deletions

File tree

src/scan.rs

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -340,30 +340,44 @@ pub fn find_algorithms<'a>(
340340
}
341341

342342
pub fn dedupe_more_specific_hits<'a>(hits: Vec<AlgorithmHit<'a>>) -> Vec<AlgorithmHit<'a>> {
343+
if hits.len() <= 1 {
344+
return hits;
345+
}
346+
347+
// Group hits by line for O(n) instead of O(n²)
348+
let mut by_line: HashMap<usize, Vec<usize>> = HashMap::new();
349+
for (idx, hit) in hits.iter().enumerate() {
350+
by_line.entry(hit.line).or_default().push(idx);
351+
}
352+
343353
let mut drop = vec![false; hits.len()];
344-
for i in 0..hits.len() {
345-
if drop[i] {
354+
355+
// Only compare hits on the same line
356+
for indices in by_line.values() {
357+
if indices.len() <= 1 {
346358
continue;
347359
}
348-
for j in 0..hits.len() {
349-
if i == j || drop[j] {
350-
continue;
351-
}
352-
if hits[i].line != hits[j].line {
360+
for &i in indices {
361+
if drop[i] {
353362
continue;
354363
}
355364
let Some(p_i) = primitive_of_metadata(&hits[i]) else {
356365
continue;
357366
};
358-
let Some(p_j) = primitive_of_metadata(&hits[j]) else {
359-
continue;
360-
};
361-
if p_i != p_j {
362-
continue;
363-
}
364-
if is_more_specific(hits[j].algorithm_name, hits[i].algorithm_name) {
365-
drop[i] = true;
366-
break;
367+
for &j in indices {
368+
if i == j || drop[j] {
369+
continue;
370+
}
371+
let Some(p_j) = primitive_of_metadata(&hits[j]) else {
372+
continue;
373+
};
374+
if p_i != p_j {
375+
continue;
376+
}
377+
if is_more_specific(hits[j].algorithm_name, hits[i].algorithm_name) {
378+
drop[i] = true;
379+
break;
380+
}
367381
}
368382
}
369383
}
@@ -656,24 +670,38 @@ fn dedupe_more_specific<'a>(
656670
hits: Vec<AlgorithmHit<'a>>,
657671
primitive_by_alg: &HashMap<String, String>,
658672
) -> Vec<AlgorithmHit<'a>> {
673+
if hits.len() <= 1 {
674+
return hits;
675+
}
676+
677+
// Group hits by line for O(n) instead of O(n²)
678+
let mut by_line: HashMap<usize, Vec<usize>> = HashMap::new();
679+
for (idx, hit) in hits.iter().enumerate() {
680+
by_line.entry(hit.line).or_default().push(idx);
681+
}
682+
659683
let mut drop = vec![false; hits.len()];
660-
for i in 0..hits.len() {
661-
if drop[i] {
684+
685+
// Only compare hits on the same line
686+
for indices in by_line.values() {
687+
if indices.len() <= 1 {
662688
continue;
663689
}
664-
for j in 0..hits.len() {
665-
if i == j || drop[j] {
690+
for &i in indices {
691+
if drop[i] {
666692
continue;
667693
}
668-
if hits[i].line != hits[j].line {
669-
continue;
670-
}
671-
if !primitives_compatible(&hits[i], &hits[j], primitive_by_alg) {
672-
continue;
673-
}
674-
if is_more_specific(hits[j].algorithm_name, hits[i].algorithm_name) {
675-
drop[i] = true;
676-
break;
694+
for &j in indices {
695+
if i == j || drop[j] {
696+
continue;
697+
}
698+
if !primitives_compatible(&hits[i], &hits[j], primitive_by_alg) {
699+
continue;
700+
}
701+
if is_more_specific(hits[j].algorithm_name, hits[i].algorithm_name) {
702+
drop[i] = true;
703+
break;
704+
}
677705
}
678706
}
679707
}

0 commit comments

Comments
 (0)