From 1928c6b9e15be82f9b253d65fcb247cbc348b1b0 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 15 May 2026 08:56:12 +0200 Subject: [PATCH] chore: enable pedantic lints --- src/align/read_align.rs | 241 ++++++++++++++++++------------------ src/align/score.rs | 58 +++++---- src/align/seed.rs | 15 +-- src/align/stitch.rs | 15 +-- src/chimeric/detect.rs | 69 +++++------ src/chimeric/output.rs | 8 +- src/chimeric/score.rs | 2 +- src/genome/mod.rs | 6 +- src/index/packed_array.rs | 20 +-- src/index/suffix_array.rs | 6 +- src/io/fastq.rs | 1 + src/io/sam.rs | 5 +- src/junction/gtf.rs | 11 +- src/junction/sj_output.rs | 10 +- src/junction/sjdb_insert.rs | 10 +- src/lib.rs | 38 +++++- src/params.rs | 4 +- src/quant/mod.rs | 5 +- src/quant/transcriptome.rs | 33 +++-- src/stats.rs | 5 +- 20 files changed, 282 insertions(+), 280 deletions(-) diff --git a/src/align/read_align.rs b/src/align/read_align.rs index 8d188e8..91321ad 100644 --- a/src/align/read_align.rs +++ b/src/align/read_align.rs @@ -749,7 +749,7 @@ pub fn align_paired_read( combined_seeds.extend(m2_seeds); // mate_id: positions 0..len1 → mate1(0); positions len1+1.. → RC(mate2)(1). for s in &mut combined_seeds { - s.mate_id = if s.read_pos < len1 { 0 } else { 1 }; + s.mate_id = u8::from(s.read_pos >= len1); } // Cluster combined seeds using the combined read length @@ -831,135 +831,132 @@ pub fn align_paired_read( for wt in &wts { let split_result = split_combined_wt(wt, len1, len2, stitch_is_reverse, scorer.align_intron_min); - match split_result { - Some((m1_wt, m2_wt)) => { - let (m1_read_slice, m1_orig_rev, m2_read_slice, m2_orig_rev) = - if stitch_is_reverse { - // stitch_read = [mate2(0..len2) | SPACER | RC(mate1)(len2+1..)] - ( - &stitch_read[len2 + 1..], // RC(mate1_seq) - true, // mate1 5' at right in RC - &stitch_read[..len2], // mate2_seq - false, // mate2 5' at left - ) - } else { - // stitch_read = [mate1(0..len1) | SPACER | RC(mate2)(len1+1..)] - ( - &stitch_read[..len1], // mate1_seq - false, // mate1 5' at left - &stitch_read[len1 + 1..], // RC(mate2_seq) - true, // mate2 5' at right in RC - ) - }; - - // Suppress inner-side extensions for each mate. - // Inner = 3' end: right for forward (orig_is_rev=false), left for reverse. - let Some(mut t1) = finalize_transcript( - &m1_wt, - m1_read_slice, + if let Some((m1_wt, m2_wt)) = split_result { + let (m1_read_slice, m1_orig_rev, m2_read_slice, m2_orig_rev) = if stitch_is_reverse + { + // stitch_read = [mate2(0..len2) | SPACER | RC(mate1)(len2+1..)] + ( + &stitch_read[len2 + 1..], // RC(mate1_seq) + true, // mate1 5' at right in RC + &stitch_read[..len2], // mate2_seq + false, // mate2 5' at left + ) + } else { + // stitch_read = [mate1(0..len1) | SPACER | RC(mate2)(len1+1..)] + ( + &stitch_read[..len1], // mate1_seq + false, // mate1 5' at left + &stitch_read[len1 + 1..], // RC(mate2_seq) + true, // mate2 5' at right in RC + ) + }; + + // Suppress inner-side extensions for each mate. + // Inner = 3' end: right for forward (orig_is_rev=false), left for reverse. + let Some(mut t1) = finalize_transcript( + &m1_wt, + m1_read_slice, + index, + &scorer, + &stitch_cluster, + m1_orig_rev, + m1_orig_rev, // no_left_ext = inner for reverse (orig_is_rev=true) + !m1_orig_rev, // no_right_ext = inner for forward (orig_is_rev=false) + ) else { + continue; + }; + let Some(mut t2) = finalize_transcript( + &m2_wt, + m2_read_slice, + index, + &scorer, + &stitch_cluster, + m2_orig_rev, + m2_orig_rev, // no_left_ext = inner for reverse (orig_is_rev=true) + !m2_orig_rev, // no_right_ext = inner for forward (orig_is_rev=false) + ) else { + continue; + }; + + if stitch_is_reverse { + t1.is_reverse = true; + t2.is_reverse = false; + } else { + t1.is_reverse = false; + t2.is_reverse = true; + } + t1.read_seq = mate1_seq.to_vec(); + t2.read_seq = mate2_seq.to_vec(); + + if params.chim_segment_min > 0 { + all_m1_transcripts.push(t1.clone()); + all_m2_transcripts.push(t2.clone()); + } + + let combined_span = + t1.genome_end.max(t2.genome_end) - t1.genome_start.min(t2.genome_start); + let combined_wt_score = wt.score + scorer.genomic_length_penalty(combined_span); + + if let Some(pair) = try_pair_transcripts( + &t1, + &t2, + len1, + len2, + params, + combined_score_threshold, + combined_wt_score, + ) { + joint_pairs.push(pair); + } + } else { + // Single-mate WT: save for half-mapped fallback + let all_m1 = wt.exons.iter().all(|e| e.mate_id == 0); + let all_m2 = wt.exons.iter().all(|e| e.mate_id == 1); + if all_m1 { + let (read_slice, orig_rev) = if stitch_is_reverse { + (&stitch_read[len2 + 1..], true) + } else { + (&stitch_read[..len1], false) + }; + if let Some(mut t) = finalize_transcript( + wt, + read_slice, index, &scorer, &stitch_cluster, - m1_orig_rev, - m1_orig_rev, // no_left_ext = inner for reverse (orig_is_rev=true) - !m1_orig_rev, // no_right_ext = inner for forward (orig_is_rev=false) - ) else { - continue; + orig_rev, + false, + false, + ) { + t.is_reverse = stitch_is_reverse; + t.read_seq = mate1_seq.to_vec(); + if params.chim_segment_min > 0 { + all_m1_transcripts.push(t.clone()); + } + single_mate1_transcripts.push(t); + } + } else if all_m2 { + let (read_slice, orig_rev) = if stitch_is_reverse { + (&stitch_read[..len2], false) + } else { + (&stitch_read[len1 + 1..], true) }; - let Some(mut t2) = finalize_transcript( - &m2_wt, - m2_read_slice, + if let Some(mut t) = finalize_transcript( + wt, + read_slice, index, &scorer, &stitch_cluster, - m2_orig_rev, - m2_orig_rev, // no_left_ext = inner for reverse (orig_is_rev=true) - !m2_orig_rev, // no_right_ext = inner for forward (orig_is_rev=false) - ) else { - continue; - }; - - if stitch_is_reverse { - t1.is_reverse = true; - t2.is_reverse = false; - } else { - t1.is_reverse = false; - t2.is_reverse = true; - } - t1.read_seq = mate1_seq.to_vec(); - t2.read_seq = mate2_seq.to_vec(); - - if params.chim_segment_min > 0 { - all_m1_transcripts.push(t1.clone()); - all_m2_transcripts.push(t2.clone()); - } - - let combined_span = - t1.genome_end.max(t2.genome_end) - t1.genome_start.min(t2.genome_start); - let combined_wt_score = wt.score + scorer.genomic_length_penalty(combined_span); - - if let Some(pair) = try_pair_transcripts( - &t1, - &t2, - len1, - len2, - params, - combined_score_threshold, - combined_wt_score, + orig_rev, + false, + false, ) { - joint_pairs.push(pair); - } - } - None => { - // Single-mate WT: save for half-mapped fallback - let all_m1 = wt.exons.iter().all(|e| e.mate_id == 0); - let all_m2 = wt.exons.iter().all(|e| e.mate_id == 1); - if all_m1 { - let (read_slice, orig_rev) = if stitch_is_reverse { - (&stitch_read[len2 + 1..], true) - } else { - (&stitch_read[..len1], false) - }; - if let Some(mut t) = finalize_transcript( - wt, - read_slice, - index, - &scorer, - &stitch_cluster, - orig_rev, - false, - false, - ) { - t.is_reverse = stitch_is_reverse; - t.read_seq = mate1_seq.to_vec(); - if params.chim_segment_min > 0 { - all_m1_transcripts.push(t.clone()); - } - single_mate1_transcripts.push(t); - } - } else if all_m2 { - let (read_slice, orig_rev) = if stitch_is_reverse { - (&stitch_read[..len2], false) - } else { - (&stitch_read[len1 + 1..], true) - }; - if let Some(mut t) = finalize_transcript( - wt, - read_slice, - index, - &scorer, - &stitch_cluster, - orig_rev, - false, - false, - ) { - t.is_reverse = !stitch_is_reverse; - t.read_seq = mate2_seq.to_vec(); - if params.chim_segment_min > 0 { - all_m2_transcripts.push(t.clone()); - } - single_mate2_transcripts.push(t); + t.is_reverse = !stitch_is_reverse; + t.read_seq = mate2_seq.to_vec(); + if params.chim_segment_min > 0 { + all_m2_transcripts.push(t.clone()); } + single_mate2_transcripts.push(t); } } } @@ -2144,7 +2141,7 @@ mod tests { assert_eq!(items[4], (40, 4)); // Tied prefix contains the original three items in some order. let mut top: Vec = items[..3].iter().map(|t| t.1).collect(); - top.sort(); + top.sort_unstable(); assert_eq!(top, vec![0, 1, 2]); } diff --git a/src/align/score.rs b/src/align/score.rs index ea1d249..0a3af0a 100644 --- a/src/align/score.rs +++ b/src/align/score.rs @@ -224,35 +224,39 @@ impl AlignmentScorer { // Score the net indel portion (gg, rg) if gg > 0 && rg > 0 => { let excess = gg - rg; - if excess > 0 { - let del_len = excess as u32; - if del_len >= self.align_intron_min && del_len <= self.align_intron_max { - let rc_donor = genome_pos + rg as u64; - let donor = if is_reverse { - n_genome - rc_donor - del_len as u64 + match excess { + 1.. => { + let del_len = excess as u32; + if del_len >= self.align_intron_min && del_len <= self.align_intron_max { + let rc_donor = genome_pos + rg as u64; + let donor = if is_reverse { + n_genome - rc_donor - del_len as u64 + } else { + rc_donor + }; + let motif = self.detect_splice_motif(donor, del_len, genome); + let score = self.score_splice_junction(&motif); + ( + score, + GapType::SpliceJunction { + intron_len: del_len, + motif, + }, + ) } else { - rc_donor - }; - let motif = self.detect_splice_motif(donor, del_len, genome); - let score = self.score_splice_junction(&motif); - ( - score, - GapType::SpliceJunction { - intron_len: del_len, - motif, - }, - ) - } else { - let score = self.score_del_open + self.score_del_base * del_len as i32; - (score, GapType::Deletion(del_len)) + let score = self.score_del_open + self.score_del_base * del_len as i32; + (score, GapType::Deletion(del_len)) + } + } + ..=-1 => { + let ins_len = (-excess) as u32; + let score = self.score_ins_open + self.score_ins_base * ins_len as i32; + (score, GapType::Insertion(ins_len)) + } + 0 => { + // Equal gaps: no net indel + (0, GapType::Deletion(0)) } - } else if excess < 0 { - let ins_len = (-excess) as u32; - let score = self.score_ins_open + self.score_ins_base * ins_len as i32; - (score, GapType::Insertion(ins_len)) - } else { - // Equal gaps: no net indel - (0, GapType::Deletion(0)) } } // Other cases (negative gaps, etc.) diff --git a/src/align/seed.rs b/src/align/seed.rs index 6efc92d..b9deee4 100644 --- a/src/align/seed.rs +++ b/src/align/seed.rs @@ -366,14 +366,11 @@ fn find_seed_at_position( .sa_index .hierarchical_lookup(kmer_idx, actual_len as u32, n_sa); - let (sa_start, sa_end, matched_level, bounds_tight) = match result { - Some(r) => r, - None => { - return Ok(MmpResult { - seed: None, - advance: 1, - }); - } + let Some((sa_start, sa_end, matched_level, bounds_tight)) = result else { + return Ok(MmpResult { + seed: None, + advance: 1, + }); }; if sa_start >= sa_end { @@ -440,7 +437,7 @@ fn find_seed_at_position( /// Overflow-safe median of two unsigned integers. /// Equivalent to STAR's medianUint2: a/2 + b/2 + (a%2 + b%2)/2 fn median_uint2(a: usize, b: usize) -> usize { - a / 2 + b / 2 + (a % 2 + b % 2) / 2 + a / 2 + b / 2 + usize::midpoint(a % 2, b % 2) } /// Compare read to genome at a specific SA position, starting from offset l_start. diff --git a/src/align/stitch.rs b/src/align/stitch.rs index 8db4308..4d4e81b 100644 --- a/src/align/stitch.rs +++ b/src/align/stitch.rs @@ -236,9 +236,8 @@ fn extend_alignment( }; // Get genome base (with strand offset) - let genome_base = match index.genome.get_base(genome_pos + genome_offset) { - Some(b) => b, - None => break, + let Some(genome_base) = index.genome.get_base(genome_pos + genome_offset) else { + break; }; // Stop at chromosome boundary (padding = 5) @@ -878,7 +877,7 @@ pub fn cluster_seeds( // Phase 5: Build SeedCluster output let mut clusters = Vec::with_capacity(windows.len()); - for window in windows.iter() { + for window in &windows { if !window.alive || window.alignments.is_empty() { continue; } @@ -1946,12 +1945,10 @@ pub(crate) fn finalize_transcript( let t_genome_start = merged_exons .first() - .map(|e| e.genome_start) - .unwrap_or(forward_genome_start); + .map_or(forward_genome_start, |e| e.genome_start); let t_genome_end = merged_exons .last() - .map(|e| e.genome_end) - .unwrap_or(forward_genome_end); + .map_or(forward_genome_end, |e| e.genome_end); // Apply genomic length penalty let genomic_span = t_genome_end - t_genome_start; @@ -2650,7 +2647,7 @@ pub(crate) fn stitch_seeds_core( let mut keep_indices = std::collections::HashSet::new(); for (_diag, mut seeds) in diag_seeds { // Sort by start position - seeds.sort(); + seeds.sort_unstable(); // Merge intervals, keeping the index of the longest seed in each merged group let mut merged_end = seeds[0].1; let mut best_idx = seeds[0].2; diff --git a/src/chimeric/detect.rs b/src/chimeric/detect.rs index d891142..7127ba5 100644 --- a/src/chimeric/detect.rs +++ b/src/chimeric/detect.rs @@ -390,7 +390,7 @@ impl<'a> ChimericDetector<'a> { // Find cluster pairs with chimeric signatures for i in 0..clusters.len() { for j in (i + 1)..clusters.len() { - if self.is_chimeric_signature(&clusters[i], &clusters[j]) { + if is_chimeric_signature(&clusters[i], &clusters[j]) { // Try to build chimeric alignment from these clusters if let Some(chim) = self.build_chimeric_from_clusters( &clusters[i], @@ -408,27 +408,6 @@ impl<'a> ChimericDetector<'a> { Ok(chimeras) } - /// Check if two clusters represent a chimeric signature - fn is_chimeric_signature(&self, c1: &SeedCluster, c2: &SeedCluster) -> bool { - // Different chromosomes - if c1.chr_idx != c2.chr_idx { - return true; - } - - // Different strands (same chromosome) - if c1.is_reverse != c2.is_reverse { - return true; - } - - // Large genomic distance (same chr/strand) - let distance = genomic_distance(c1, c2); - if distance > 1_000_000 { - return true; - } - - false - } - /// Build chimeric alignment from two clusters fn build_chimeric_from_clusters( &self, @@ -529,6 +508,27 @@ fn genomic_distance(c1: &SeedCluster, c2: &SeedCluster) -> u64 { } } +/// Check if two clusters represent a chimeric signature +fn is_chimeric_signature(c1: &SeedCluster, c2: &SeedCluster) -> bool { + // Different chromosomes + if c1.chr_idx != c2.chr_idx { + return true; + } + + // Different strands (same chromosome) + if c1.is_reverse != c2.is_reverse { + return true; + } + + // Large genomic distance (same chr/strand) + let distance = genomic_distance(c1, c2); + if distance > 1_000_000 { + return true; + } + + false +} + /// Detect inter-mate chimeric alignment from two single-mate transcripts. /// /// Fires when mate1 and mate2 map to different chromosomes, opposite-orientation @@ -837,9 +837,8 @@ pub fn detect_chimeric_old( } // No chimeric partner found - let tr2 = match best_tr2 { - Some(t) => t, - None => return Ok(vec![]), + let Some(tr2) = best_tr2 else { + return Ok(vec![]); }; // Score filters @@ -1115,46 +1114,34 @@ mod tests { #[test] fn test_is_chimeric_signature_different_chr() { - let params = Parameters::try_parse_from(vec!["rustar-aligner"]).unwrap(); - let detector = ChimericDetector::new(¶ms); - let c1 = make_test_cluster(0, 1000, 1100, false); let c2 = make_test_cluster(1, 1000, 1100, false); - assert!(detector.is_chimeric_signature(&c1, &c2)); + assert!(is_chimeric_signature(&c1, &c2)); } #[test] fn test_is_chimeric_signature_strand_break() { - let params = Parameters::try_parse_from(vec!["rustar-aligner"]).unwrap(); - let detector = ChimericDetector::new(¶ms); - let c1 = make_test_cluster(0, 1000, 1100, false); let c2 = make_test_cluster(0, 1200, 1300, true); - assert!(detector.is_chimeric_signature(&c1, &c2)); + assert!(is_chimeric_signature(&c1, &c2)); } #[test] fn test_is_chimeric_signature_large_distance() { - let params = Parameters::try_parse_from(vec!["rustar-aligner"]).unwrap(); - let detector = ChimericDetector::new(¶ms); - let c1 = make_test_cluster(0, 1000, 1100, false); let c2 = make_test_cluster(0, 2_000_000, 2_000_100, false); - assert!(detector.is_chimeric_signature(&c1, &c2)); + assert!(is_chimeric_signature(&c1, &c2)); } #[test] fn test_is_chimeric_signature_close_same_strand() { - let params = Parameters::try_parse_from(vec!["rustar-aligner"]).unwrap(); - let detector = ChimericDetector::new(¶ms); - let c1 = make_test_cluster(0, 1000, 1100, false); let c2 = make_test_cluster(0, 1200, 1300, false); - assert!(!detector.is_chimeric_signature(&c1, &c2)); + assert!(!is_chimeric_signature(&c1, &c2)); } // --- transcript_to_segment tests --- diff --git a/src/chimeric/output.rs b/src/chimeric/output.rs index b9c0524..dade145 100644 --- a/src/chimeric/output.rs +++ b/src/chimeric/output.rs @@ -297,8 +297,8 @@ mod tests { // Create mock chimeric alignment (chr9 -> chr22, BCR-ABL fusion) let donor = ChimericSegment { chr_idx: 0, - genome_start: 133738300, - genome_end: 133738363, + genome_start: 133_738_300, + genome_end: 133_738_363, is_reverse: false, read_start: 0, read_end: 63, @@ -309,8 +309,8 @@ mod tests { let acceptor = ChimericSegment { chr_idx: 1, - genome_start: 23632600, - genome_end: 23632637, + genome_start: 23_632_600, + genome_end: 23_632_637, is_reverse: false, read_start: 63, read_end: 100, diff --git a/src/chimeric/score.rs b/src/chimeric/score.rs index ec87fe0..a4e1827 100644 --- a/src/chimeric/score.rs +++ b/src/chimeric/score.rs @@ -108,7 +108,7 @@ pub fn calculate_repeat_length( // Calculate how far we can check let donor_idx = chr_start + donor_pos; - let _acceptor_idx = chr_start + acceptor_pos; + //let acceptor_idx = chr_start + acceptor_pos; let mut repeat_len_donor = 0u32; let mut repeat_len_acceptor = 0u32; diff --git a/src/genome/mod.rs b/src/genome/mod.rs index e3e616a..a48c749 100644 --- a/src/genome/mod.rs +++ b/src/genome/mod.rs @@ -268,8 +268,7 @@ impl Genome { let gtf = params .sjdb_gtf_file .as_ref() - .map(|p| p.display().to_string()) - .unwrap_or_else(|| "-".to_string()); + .map_or_else(|| "-".to_string(), |p| p.display().to_string()); writeln!( f, "### STAR --runMode genomeGenerate --runThreadN {thr} --genomeDir {dir} --genomeFastaFiles {fa} --genomeSAindexNbases {sai} --sjdbGTFfile {gtf} --sjdbOverhang {ov}", @@ -319,8 +318,7 @@ impl Genome { let gtf_str = params .sjdb_gtf_file .as_ref() - .map(|p| p.display().to_string()) - .unwrap_or_else(|| "-".to_string()); + .map_or_else(|| "-".to_string(), |p| p.display().to_string()); writeln!(f, "sjdbGTFfile\t{}", gtf_str).map_err(|e| Error::io(e, &path))?; writeln!(f, "sjdbGTFchrPrefix\t-").map_err(|e| Error::io(e, &path))?; writeln!(f, "sjdbGTFfeatureExon\texon").map_err(|e| Error::io(e, &path))?; diff --git a/src/index/packed_array.rs b/src/index/packed_array.rs index 55a0030..407878e 100644 --- a/src/index/packed_array.rs +++ b/src/index/packed_array.rs @@ -197,11 +197,11 @@ mod tests { let mut arr = PackedArray::new(33, 100); // Human genome SA width let test_values = [ - 0x1FFFFFFFF, // All 33 bits set - 0x100000000, // Bit 32 set (strand bit) - 0x0FFFFFFFF, // Bits 0-31 set (max forward position) + 0x0001_FFFF_FFFF, // All 33 bits set + 0x0001_0000_0000, // Bit 32 set (strand bit) + 0x0000_FFFF_FFFF, // Bits 0-31 set (max forward position) 0, - 12345678, + 12_345_678, ]; for (i, &val) in test_values.iter().enumerate() { @@ -226,13 +226,13 @@ mod tests { fn bit_width_32() { let mut arr = PackedArray::new(32, 10); - arr.write(0, 0xDEADBEEF); - arr.write(1, 0x12345678); - arr.write(5, 0xCAFEBABE); + arr.write(0, 0xDEAD_BEEF); + arr.write(1, 0x1234_5678); + arr.write(5, 0xCAFE_BABE); - assert_eq!(arr.read(0), 0xDEADBEEF); - assert_eq!(arr.read(1), 0x12345678); - assert_eq!(arr.read(5), 0xCAFEBABE); + assert_eq!(arr.read(0), 0xDEAD_BEEF); + assert_eq!(arr.read(1), 0x1234_5678); + assert_eq!(arr.read(5), 0xCAFE_BABE); } #[test] diff --git a/src/index/suffix_array.rs b/src/index/suffix_array.rs index a08d8f7..46b051f 100644 --- a/src/index/suffix_array.rs +++ b/src/index/suffix_array.rs @@ -187,9 +187,9 @@ fn compare_suffixes( } // Normal byte comparison - match byte_a.cmp(&byte_b) { - Ordering::Equal => continue, - other => return other, + let byte_cmp = byte_a.cmp(&byte_b); + if byte_cmp != Ordering::Equal { + return byte_cmp; } } diff --git a/src/io/fastq.rs b/src/io/fastq.rs index 5c0d42a..afbd78e 100644 --- a/src/io/fastq.rs +++ b/src/io/fastq.rs @@ -272,6 +272,7 @@ impl PairedFastqReader { /// /// # Returns /// Base name with mate suffix removed +#[allow(clippy::case_sensitive_file_extension_comparisons)] // false positive pub fn strip_mate_suffix(name: &str) -> String { // First, strip space and everything after (Illumina format) let name = if let Some(pos) = name.find(' ') { diff --git a/src/io/sam.rs b/src/io/sam.rs index 2a6bf39..ee91a43 100644 --- a/src/io/sam.rs +++ b/src/io/sam.rs @@ -152,8 +152,7 @@ impl SamWriter { let cigar_str: String = cigar_ops .iter() .map(|op| format!("{}{:?}", op.len(), op.kind())) - .collect::>() - .join(""); + .collect::(); panic!( "[SAM-MISMATCH] read={} cigar_query_len={} seq_len={} flags={:?} cigar={}", name, @@ -799,7 +798,7 @@ where let mut builder = sam::Header::builder(); // @HD line (default version and unsorted) - builder = builder.set_header(Default::default()); + builder = builder.set_header(Map::default()); // @SQ lines for each reference for (name, length) in refs { diff --git a/src/junction/gtf.rs b/src/junction/gtf.rs index cdf98e0..9258c13 100644 --- a/src/junction/gtf.rs +++ b/src/junction/gtf.rs @@ -179,14 +179,9 @@ pub fn extract_junctions_configured( // Get chromosome index let chr_name = &exons[0].seqname; - let chr_idx = genome.chr_name.iter().position(|name| name == chr_name); - - let chr_idx = match chr_idx { - Some(idx) => idx, - None => { - log::warn!("Skipping transcript on unknown chromosome: {}", chr_name); - continue; - } + let Some(chr_idx) = genome.chr_name.iter().position(|name| name == chr_name) else { + log::warn!("Skipping transcript on unknown chromosome: {chr_name}"); + continue; }; // Convert strand diff --git a/src/junction/sj_output.rs b/src/junction/sj_output.rs index 472dbfc..9f75c95 100644 --- a/src/junction/sj_output.rs +++ b/src/junction/sj_output.rs @@ -49,7 +49,7 @@ pub struct SpliceJunctionStats { impl Clone for SpliceJunctionStats { fn clone(&self) -> Self { let new_map = DashMap::new(); - for entry in self.junctions.iter() { + for entry in &self.junctions { let key = entry.key().clone(); let counts = entry.value(); new_map.insert( @@ -198,11 +198,11 @@ impl SpliceJunctionStats { let intron_len = key.intron_end.saturating_sub(key.intron_start); let intron_max_thresholds = ¶ms.out_sj_filter_intron_max_vs_read_n; let max_intron_for_reads = if total >= 3 { - intron_max_thresholds.get(2).copied().unwrap_or(200000) + intron_max_thresholds.get(2).copied().unwrap_or(200_000) } else if total >= 2 { - intron_max_thresholds.get(1).copied().unwrap_or(100000) + intron_max_thresholds.get(1).copied().unwrap_or(100_000) } else { - intron_max_thresholds.first().copied().unwrap_or(50000) + intron_max_thresholds.first().copied().unwrap_or(50_000) }; if intron_len as i64 > max_intron_for_reads { continue; @@ -271,7 +271,7 @@ impl SpliceJunctionStats { chr_pos_end, key.strand, key.motif, - if *annotated { 1 } else { 0 }, + i32::from(*annotated), unique, multi, max_overhang diff --git a/src/junction/sjdb_insert.rs b/src/junction/sjdb_insert.rs index 01b19c0..cb1ac06 100644 --- a/src/junction/sjdb_insert.rs +++ b/src/junction/sjdb_insert.rs @@ -731,11 +731,11 @@ mod tests { shift_right: 1, strand: 1, }, - // Non-canonical: stored = shifted (139187..139217). + // Non-canonical: stored = shifted (139_187..139_217). PreparedJunction { chr_idx: 0, - start_pos: 139187, - end_pos: 139217, + start_pos: 139_187, + end_pos: 139_217, motif: 0, shift_left: 0, shift_right: 0, @@ -769,8 +769,8 @@ mod tests { // `stored + shift_left + 1`, which is `original + 1`. let noncan = PreparedJunction { chr_idx: 0, - start_pos: 139184, // shifted - end_pos: 139214, + start_pos: 139_184, // shifted + end_pos: 139_214, motif: 0, shift_left: 3, shift_right: 0, diff --git a/src/lib.rs b/src/lib.rs index 53ea6ca..09472fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,33 @@ +#![warn(clippy::pedantic)] +// TODO: enable these warnings eventually +#![allow( + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::doc_markdown, + clippy::format_collect, + clippy::format_push_string, + clippy::items_after_statements, + clippy::match_same_arms, + clippy::missing_errors_doc, + clippy::missing_panics_doc, + clippy::must_use_candidate, + clippy::needless_pass_by_value, + clippy::redundant_closure_for_method_calls, + clippy::similar_names, + clippy::too_many_lines, + clippy::trivially_copy_pass_by_ref, + clippy::uninlined_format_args, + clippy::unnecessary_wraps +)] +// These should stay disabled +#![allow( + // we have a bunch of “`if !reverse`” + clippy::if_not_else, +)] + pub mod error; pub mod params; @@ -758,10 +788,10 @@ where )?; use noodles::sam::alignment::record::Flags; - for r in rec1s.iter_mut() { + for r in &mut rec1s { *r.flags_mut() |= Flags::SEGMENTED | Flags::FIRST_SEGMENT; } - for r in rec2s.iter_mut() { + for r in &mut rec2s { *r.flags_mut() |= Flags::SEGMENTED | Flags::LAST_SEGMENT; } @@ -1154,7 +1184,7 @@ fn align_reads_single_end( read_count += reads_to_process as u64; // Progress logging - if read_count % 100000 < batch_size as u64 { + if read_count % 100_000 < batch_size as u64 { info!("Processed {} reads...", read_count); } @@ -1721,7 +1751,7 @@ fn align_reads_paired_end( read_count += pairs_to_process as u64; // Progress logging - if read_count % 100000 < batch_size as u64 { + if read_count % 100_000 < batch_size as u64 { info!("Processed {} pairs...", read_count); } diff --git a/src/params.rs b/src/params.rs index d1c1d7c..e2d644c 100644 --- a/src/params.rs +++ b/src/params.rs @@ -748,7 +748,7 @@ impl Parameters { /// - `"Standard"` → {NH, HI, AS, NM, nM} /// - `"All"` → {NH, HI, AS, NM, nM, MD, jM, jI, XS} /// - `"None"` → {} (empty) - /// - Explicit list (e.g. ["NH", "AS"]) → collected as-is + /// - Explicit list (e.g. `["NH", "AS"]`) → collected as-is /// /// `RG` is auto-appended when `--outSAMattrRGline` is set (STAR behavior, /// `Parameters_samAttributes.cpp:201`). @@ -1072,7 +1072,7 @@ mod tests { assert_eq!(p.out_sj_filter_dist_to_other_sjmin, vec![10, 0, 5, 10]); assert_eq!( p.out_sj_filter_intron_max_vs_read_n, - vec![50000, 100000, 200000] + vec![50_000, 100_000, 200_000] ); } diff --git a/src/quant/mod.rs b/src/quant/mod.rs index 9516d99..f50c4ff 100644 --- a/src/quant/mod.rs +++ b/src/quant/mod.rs @@ -64,9 +64,8 @@ impl GeneAnnotation { idx }; - let chr_idx = match genome.chr_name.iter().position(|n| n == &exon.seqname) { - Some(i) => i, - None => continue, + let Some(chr_idx) = genome.chr_name.iter().position(|n| n == &exon.seqname) else { + continue; }; if chr_idx >= n_chrs { continue; diff --git a/src/quant/transcriptome.rs b/src/quant/transcriptome.rs index de625a9..bd2d83a 100644 --- a/src/quant/transcriptome.rs +++ b/src/quant/transcriptome.rs @@ -289,17 +289,14 @@ impl TranscriptomeIndex { // name/biotype slot. Subsequent transcripts with a richer name or // biotype do NOT overwrite (STAR's Transcriptome writer is // first-seen-wins too). - let gene_idx = match gene_id_to_idx.get(&gene_id) { - Some(&i) => i, - None => { - let i = gene_ids.len() as u32; - gene_id_to_idx.insert(gene_id.clone(), i); - gene_ids.push(gene_id.clone()); - gene_names.push(gene_name); - gene_biotypes.push(gene_biotype); - i - } - }; + let gene_idx = gene_id_to_idx.get(&gene_id).copied().unwrap_or_else(|| { + let i = gene_ids.len() as u32; + gene_id_to_idx.insert(gene_id.clone(), i); + gene_ids.push(gene_id.clone()); + gene_names.push(gene_name); + gene_biotypes.push(gene_biotype); + i + }); tr_ids.push(tid.clone()); tr_chr_idx.push(chr_idx); @@ -1089,7 +1086,7 @@ fn align_to_one_transcript( if tr_strand == 2 { let tr_len = tr_length as u64; let lread_u = lread as u64; - for e in proj_exons.iter_mut() { + for e in &mut proj_exons { let len = e.genome_end - e.genome_start; let new_g = tr_len - (e.genome_start + len); e.genome_start = new_g; @@ -1116,8 +1113,8 @@ fn align_to_one_transcript( } // Projected genome bounds = outermost t-space exon positions. - let proj_start = proj_exons.first().map(|e| e.genome_start).unwrap_or(0); - let proj_end = proj_exons.last().map(|e| e.genome_end).unwrap_or(0); + let proj_start = proj_exons.first().map_or(0, |e| e.genome_start); + let proj_end = proj_exons.last().map_or(0, |e| e.genome_end); Some(Transcript { chr_idx: tr_idx, @@ -1335,7 +1332,7 @@ fn find_containing_exon(tr_exons: &[TrExon], pos: u64) -> Option { let mut lo = 0usize; let mut hi = tr_exons.len(); while lo < hi { - let mid = (lo + hi) / 2; + let mid = usize::midpoint(lo, hi); if tr_exons[mid].genome_end <= pos { lo = mid + 1; } else { @@ -1967,8 +1964,8 @@ mod tests { i_frag: 0, }) .collect(); - let gs = proj_exons.first().map(|e| e.genome_start).unwrap_or(0); - let ge = proj_exons.last().map(|e| e.genome_end).unwrap_or(0); + let gs = proj_exons.first().map_or(0, |e| e.genome_start); + let ge = proj_exons.last().map_or(0, |e| e.genome_end); Transcript { chr_idx, genome_start: gs, @@ -2403,7 +2400,7 @@ mod tests { // tr_order must be sorted by (start, end) let sorted_starts: Vec = idx.tr_starts_sorted.clone(); let mut check = sorted_starts.clone(); - check.sort(); + check.sort_unstable(); assert_eq!(sorted_starts, check); // tr_end_max_sorted must be monotonically non-decreasing diff --git a/src/stats.rs b/src/stats.rs index 64a05de..620c798 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -709,6 +709,7 @@ mod tests { } #[test] + #[allow(clippy::float_cmp)] fn test_empty_stats() { let stats = AlignmentStats::new(); assert_eq!(stats.unique_percent(), 0.0); @@ -893,8 +894,8 @@ mod tests { stats.multi_mapped.store(500, Ordering::Relaxed); stats.too_many_loci.store(50, Ordering::Relaxed); stats.unmapped.store(1150, Ordering::Relaxed); - stats.read_bases.store(1500000, Ordering::Relaxed); - stats.mapped_bases.store(1200000, Ordering::Relaxed); + stats.read_bases.store(1_500_000, Ordering::Relaxed); + stats.mapped_bases.store(1_200_000, Ordering::Relaxed); stats.mapped_mismatches.store(4800, Ordering::Relaxed); stats.mapped_ins_count.store(100, Ordering::Relaxed); stats.mapped_ins_bases.store(150, Ordering::Relaxed);