Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/align/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ pub use read_align::{
};
pub use seed::Seed;
pub use stitch::{SeedCluster, WindowAlignment, stitch_seeds};
pub use transcript::{CigarOp, Exon, Transcript};
pub use transcript::{Exon, Transcript};
87 changes: 45 additions & 42 deletions src/align/read_align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ pub fn align_read(
} else {
"unknown"
};
let cigar_str: String = t.cigar.iter().map(|op| format!("{}", op)).collect();
eprintln!(
" transcript[{}]: chr={}:{}-{} ({}) score={} mm={} junctions={} cigar={}",
ti,
Expand All @@ -334,7 +333,7 @@ pub fn align_read(
t.score,
t.n_mismatch,
t.n_junction,
cigar_str
t.cigar_string()
);
}
}
Expand All @@ -356,14 +355,14 @@ pub fn align_read(
a.genome_start,
a.genome_end,
a.is_reverse,
&a.cigar,
//&a.cigar,
Copy link
Copy Markdown
Member Author

@flying-sheep flying-sheep May 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This (and another cmp farther down) are the only casualties: why is this here? Can a vector of cigar ops be “greater” or “less” than another? (other than length)

)
.cmp(&(
b.chr_idx,
b.genome_start,
b.genome_end,
b.is_reverse,
&b.cigar,
//&b.cigar,
))
.then_with(|| b.score.cmp(&a.score))
});
Expand Down Expand Up @@ -467,13 +466,13 @@ pub fn align_read(

// Absolute matched bases
let n_matched = t.n_matched();
if n_matched < params.out_filter_match_nmin {
if n_matched < params.out_filter_match_nmin as usize {
*filter_reasons.entry("match_min").or_insert(0) += 1;
return false;
}

// Relative matched bases: STAR casts to uint (u32)
if n_matched < (params.out_filter_match_nmin_over_lread * lread_m1) as u32 {
if (n_matched as f64) < params.out_filter_match_nmin_over_lread * lread_m1 {
*filter_reasons.entry("match_min_relative").or_insert(0) += 1;
return false;
}
Expand Down Expand Up @@ -519,7 +518,6 @@ pub fn align_read(
match motif.implied_strand() {
Some('+') => has_plus = true,
Some('-') => has_minus = true,
None => {}
_ => {}
}
}
Expand Down Expand Up @@ -633,7 +631,6 @@ pub fn align_read(
} else {
"unknown"
};
let cigar_str: String = t.cigar.iter().map(|op| format!("{}", op)).collect();
eprintln!(
" FINAL[{}]: chr={}:{}-{} ({}) score={} mm={} junctions={} cigar={}",
i,
Expand All @@ -644,7 +641,7 @@ pub fn align_read(
t.score,
t.n_mismatch,
t.n_junction,
cigar_str
t.cigar_string()
);
}
}
Expand Down Expand Up @@ -987,10 +984,9 @@ pub fn align_paired_read(
if pos_cmp != std::cmp::Ordering::Equal {
return pos_cmp;
}
b.combined_wt_score
.cmp(&a.combined_wt_score)
.then_with(|| a.mate1_transcript.cigar.cmp(&b.mate1_transcript.cigar))
.then_with(|| a.mate2_transcript.cigar.cmp(&b.mate2_transcript.cigar))
b.combined_wt_score.cmp(&a.combined_wt_score)
//.then_with(|| a.mate1_transcript.cigar.cmp(&b.mate1_transcript.cigar))
//.then_with(|| a.mate2_transcript.cigar.cmp(&b.mate2_transcript.cigar))
});
joint_pairs.dedup_by(|a, b| {
a.mate1_transcript.chr_idx == b.mate1_transcript.chr_idx
Expand Down Expand Up @@ -1416,18 +1412,18 @@ fn extract_junctions_from_cigar(t: &Transcript) -> Vec<(u64, u64)> {
let mut junctions = Vec::new();
let mut genome_pos = t.genome_start;
for op in &t.cigar {
use crate::align::transcript::CigarOp;
match op {
CigarOp::Match(n) | CigarOp::Equal(n) | CigarOp::Diff(n) | CigarOp::Del(n) => {
genome_pos += *n as u64;
use noodles::sam::alignment::record::cigar::op::Kind;
match op.kind() {
Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::Deletion => {
genome_pos += op.len() as u64;
}
CigarOp::RefSkip(n) => {
Kind::Skip => {
let donor = genome_pos;
let acceptor = genome_pos + *n as u64;
let acceptor = genome_pos + op.len() as u64;
junctions.push((donor, acceptor));
genome_pos = acceptor;
}
CigarOp::Ins(_) | CigarOp::SoftClip(_) | CigarOp::HardClip(_) => {}
Kind::Insertion | Kind::SoftClip | Kind::HardClip | Kind::Pad => {}
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added Pad to all the “else” branches. Is that correct?

}
}
junctions
Expand Down Expand Up @@ -1479,6 +1475,7 @@ mod tests {
use crate::index::sa_index::SaIndex;
use crate::index::suffix_array::SuffixArray;
use clap::Parser;
use noodles::sam::alignment::record::cigar;

fn make_test_params() -> Parameters {
// Parse empty args to get default parameters
Expand Down Expand Up @@ -1537,8 +1534,7 @@ mod tests {

#[test]
fn combined_transcript_for_projection_rewrites_mate2_ifrag() {
use crate::align::transcript::CigarOp;

use cigar::op::{Kind, Op};
let make_tr = |gs: u64, ge: u64, rs: usize, re: usize| Transcript {
chr_idx: 0,
genome_start: gs,
Expand All @@ -1551,7 +1547,7 @@ mod tests {
read_end: re,
i_frag: 0,
}],
cigar: vec![CigarOp::Match((ge - gs) as u32)],
cigar: vec![Op::new(Kind::Match, (ge - gs) as usize)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand Down Expand Up @@ -1654,7 +1650,8 @@ mod tests {

#[test]
fn test_check_proper_pair_distance() {
use crate::align::transcript::{CigarOp, Exon};
use crate::align::transcript::Exon;
use cigar::op::{Kind, Op};

let params = make_test_params();

Expand All @@ -1671,7 +1668,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1693,7 +1690,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1709,7 +1706,8 @@ mod tests {

#[test]
fn test_check_proper_pair_too_far() {
use crate::align::transcript::{CigarOp, Exon};
use crate::align::transcript::Exon;
use cigar::op::{Kind, Op};

let mut params = make_test_params();
params.align_mates_gap_max = 100;
Expand All @@ -1726,7 +1724,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1748,7 +1746,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1764,7 +1762,8 @@ mod tests {

#[test]
fn test_calculate_insert_size_positive() {
use crate::align::transcript::{CigarOp, Exon};
use crate::align::transcript::Exon;
use cigar::op::{Kind, Op};

// Mate1 is leftmost
let t1 = Transcript {
Expand All @@ -1779,7 +1778,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1801,7 +1800,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1817,8 +1816,9 @@ mod tests {

#[test]
fn test_strand_consistency_filter() {
use crate::align::transcript::{CigarOp, Exon, Transcript};
use crate::align::transcript::{Exon, Transcript};
use crate::params::IntronStrandFilter;
use cigar::op::{Kind, Op};

// Create a transcript with conflicting strand motifs (mixed + and - within one transcript)
let t_inconsistent = Transcript {
Expand All @@ -1833,7 +1833,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1856,7 +1856,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand Down Expand Up @@ -1917,7 +1917,8 @@ mod tests {

#[test]
fn test_calculate_insert_size_negative() {
use crate::align::transcript::{CigarOp, Exon};
use crate::align::transcript::Exon;
use cigar::op::{Kind, Op};

// RF pair: mate1 reverse (right side), mate2 forward (left side).
// STAR reverse cluster: tlen = mate1.genome_end - mate2.genome_start = 1300 - 1000 = 300.
Expand All @@ -1934,7 +1935,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1956,7 +1957,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand All @@ -1973,7 +1974,8 @@ mod tests {
#[test]
fn test_noncanonical_unannotated_filter() {
use crate::align::score::SpliceMotif;
use crate::align::transcript::{CigarOp, Exon, Transcript};
use crate::align::transcript::{Exon, Transcript};
use cigar::op::{Kind, Op};

// Helper: check if a transcript would be filtered by RemoveNoncanonicalUnannotated
// (mirrors the logic in the retain closure)
Expand All @@ -1996,7 +1998,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand Down Expand Up @@ -2067,7 +2069,8 @@ mod tests {

#[test]
fn test_paired_alignment_result_enum_variants() {
use crate::align::transcript::{CigarOp, Exon};
use crate::align::transcript::Exon;
use cigar::op::{Kind, Op};

let transcript = Transcript {
chr_idx: 0,
Expand All @@ -2081,7 +2084,7 @@ mod tests {
read_end: 100,
i_frag: 0,
}],
cigar: vec![CigarOp::Match(100)],
cigar: vec![Op::new(Kind::Match, 100)],
score: 100,
n_mismatch: 0,
n_gap: 0,
Expand Down
Loading
Loading