From 0ab80c543b802c85673cca08b500209a7861e076 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 12 May 2026 21:47:49 +0100 Subject: [PATCH 1/2] fix(stats): route reads exceeding --outFilterMultimapNmax to too_many_loci The existing AlignmentStats::record_alignment routing arm for n_alignments > max_multimaps was unreachable on the PE path - the too-many-loci filter returned n_for_mapq = 0, so the caller hit record_alignment(0, ...) and incremented the unmapped bucket. Log.final.out always reported 0 in the too-many-loci field and the per-bucket sum fell 3-of-50000 short of input on the yeast PE test profile (STAR records exactly 3 there on the same data). Return joint_pairs.len() as n_for_mapq from the PE filter site and route through record_alignment with that count, mirroring the SE caller pattern so the _ => arm fires. The read still isn't emitted in the BAM with outSAMmultNmax=-1, matching STAR; only the accounting changes. Fixes #53 Co-Authored-By: Claude --- src/align/read_align.rs | 3 ++- src/lib.rs | 4 ++-- src/stats.rs | 53 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/align/read_align.rs b/src/align/read_align.rs index 8d188e8..d177a3c 100644 --- a/src/align/read_align.rs +++ b/src/align/read_align.rs @@ -1084,10 +1084,11 @@ pub fn align_paired_read( // Step 3: TooManyLoci check (post-dedup, matching STAR's ordering: multMapSelect → dedup → TooManyLoci). if joint_pairs.len() > params.out_filter_multimap_nmax as usize { + let n_loci = joint_pairs.len(); return Ok(( Vec::new(), pe_chimeric, - 0, + n_loci, Some(UnmappedReason::TooManyLoci), )); } diff --git a/src/lib.rs b/src/lib.rs index d229763..38bcf52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1454,8 +1454,8 @@ fn align_reads_paired_end( .collect(); if results.is_empty() { - // Both mates unmapped - stats.record_alignment(0, max_multimaps); + let n_for_stats = if n_for_mapq > 0 { n_for_mapq } else { 0 }; + stats.record_alignment(n_for_stats, max_multimaps); stats.record_unmapped_reason( unmapped_reason.unwrap_or(crate::stats::UnmappedReason::Other), ); diff --git a/src/stats.rs b/src/stats.rs index 64a05de..6d1730c 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -827,6 +827,59 @@ mod tests { assert_eq!(stats.unmapped_mismatches.load(Ordering::Relaxed), 1); } + #[test] + fn test_too_many_loci_routed_and_totals_conserved() { + let stats = AlignmentStats::new(); + let max_multimaps = 3; + + stats.record_alignment(5, max_multimaps); + stats.record_unmapped_reason(UnmappedReason::TooManyLoci); + + assert_eq!(stats.total_reads.load(Ordering::Relaxed), 1); + assert_eq!(stats.too_many_loci.load(Ordering::Relaxed), 1); + assert_eq!(stats.uniquely_mapped.load(Ordering::Relaxed), 0); + assert_eq!(stats.multi_mapped.load(Ordering::Relaxed), 0); + assert_eq!(stats.unmapped.load(Ordering::Relaxed), 0); + assert_eq!(stats.unmapped_other.load(Ordering::Relaxed), 0); + assert_eq!(stats.unmapped_short.load(Ordering::Relaxed), 0); + assert_eq!(stats.unmapped_mismatches.load(Ordering::Relaxed), 0); + } + + #[test] + fn test_bucket_sum_conserved_with_too_many_loci() { + let stats = AlignmentStats::new(); + let max_multimaps = 10; + + for _ in 0..3 { + stats.record_alignment(1, max_multimaps); + } + for _ in 0..2 { + stats.record_alignment(5, max_multimaps); + } + for _ in 0..4 { + stats.record_alignment(0, max_multimaps); + stats.record_unmapped_reason(UnmappedReason::TooShort); + } + for _ in 0..2 { + stats.record_alignment(15, max_multimaps); + stats.record_unmapped_reason(UnmappedReason::TooManyLoci); + } + + let total = stats.total_reads.load(Ordering::Relaxed); + let unique = stats.uniquely_mapped.load(Ordering::Relaxed); + let multi = stats.multi_mapped.load(Ordering::Relaxed); + let unmapped_short = stats.unmapped_short.load(Ordering::Relaxed); + let unmapped_other = stats.unmapped_other.load(Ordering::Relaxed); + let unmapped_mismatches = stats.unmapped_mismatches.load(Ordering::Relaxed); + let too_many_loci = stats.too_many_loci.load(Ordering::Relaxed); + + assert_eq!(total, 11); + assert_eq!(too_many_loci, 2); + let bucket_sum = + unique + multi + unmapped_short + unmapped_other + unmapped_mismatches + too_many_loci; + assert_eq!(bucket_sum, total); + } + #[test] fn test_splice_motif_aggregation() { use crate::align::score::SpliceMotif; From 3f0377ee494d712994c5b99a69edfb9d0062d7a4 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 12 May 2026 22:31:38 +0100 Subject: [PATCH 2/2] refactor(stats): drop redundant n_for_mapq conditional in PE caller n_for_mapq is usize, so `if n > 0 { n } else { 0 }` is identical to passing n_for_mapq directly to record_alignment. Match the SE caller shape: the variable already carries the right count from the PE TooManyLoci return site, and record_alignment routes via max_multimaps. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 38bcf52..a845dd0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1454,8 +1454,7 @@ fn align_reads_paired_end( .collect(); if results.is_empty() { - let n_for_stats = if n_for_mapq > 0 { n_for_mapq } else { 0 }; - stats.record_alignment(n_for_stats, max_multimaps); + stats.record_alignment(n_for_mapq, max_multimaps); stats.record_unmapped_reason( unmapped_reason.unwrap_or(crate::stats::UnmappedReason::Other), );