Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 127 additions & 16 deletions src/io/sam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,7 @@ impl SamWriter {
};
let effective_n = n_alignments.max(n_for_mapq);
let mapq = calculate_mapq(effective_n, params.out_sam_mapq_unique);
let mut attrs = params.sam_attribute_set();
if params.out_sam_strand_field != "intronMotif" {
attrs.remove("XS");
}
let attrs = params.effective_sam_attribute_set();
let rg_id_owned = params.primary_rg_id()?;
let rg_id = rg_id_owned.as_deref();

Expand Down Expand Up @@ -232,10 +229,7 @@ impl SamWriter {
};
let effective_n = n_alignments.max(n_for_mapq);
let mapq = calculate_mapq(effective_n, params.out_sam_mapq_unique);
let mut attrs = params.sam_attribute_set();
if params.out_sam_strand_field != "intronMotif" {
attrs.remove("XS");
}
let attrs = params.effective_sam_attribute_set();
let rg_id_owned = params.primary_rg_id()?;
let rg_id = rg_id_owned.as_deref();

Expand Down Expand Up @@ -299,10 +293,7 @@ impl SamWriter {
};
let effective_n = n_alignments.max(n_for_mapq);
let mapq = calculate_mapq(effective_n, params.out_sam_mapq_unique);
let mut attrs = params.sam_attribute_set();
if params.out_sam_strand_field != "intronMotif" {
attrs.remove("XS");
}
let attrs = params.effective_sam_attribute_set();
let rg_id_owned = params.primary_rg_id()?;
let rg_id = rg_id_owned.as_deref();

Expand Down Expand Up @@ -385,10 +376,7 @@ impl SamWriter {
let n_alignments = 1usize;
let effective_n = n_alignments.max(n_for_mapq);
let mapq = calculate_mapq(effective_n, params.out_sam_mapq_unique);
let mut attrs = params.sam_attribute_set();
if params.out_sam_strand_field != "intronMotif" {
attrs.remove("XS");
}
let attrs = params.effective_sam_attribute_set();
let rg_id_owned = params.primary_rg_id()?;
let rg_id = rg_id_owned.as_deref();

Expand Down Expand Up @@ -3596,4 +3584,127 @@ mod tests {
assert!(records_m2[0].flags().is_unmapped()); // mate1 is unmapped
assert!(!records_m2[1].flags().is_unmapped()); // mate2 is mapped
}

fn spliced_gtag_transcript() -> Transcript {
Transcript {
chr_idx: 0,
genome_start: 0,
genome_end: 200,
is_reverse: false,
exons: vec![],
cigar: vec![
CigarOp::Match(25),
CigarOp::RefSkip(100),
CigarOp::Match(25),
],
score: 50,
n_mismatch: 0,
n_gap: 0,
n_junction: 1,
junction_motifs: vec![SpliceMotif::GtAg],
junction_annotated: vec![false],
read_seq: vec![0; 4],
}
}

#[test]
fn xs_tag_emitted_when_strand_field_intron_motif_only() {
let genome = make_test_genome();
let params = Parameters::parse_from(vec![
"rustar-aligner",
"--readFilesIn",
"r.fq",
"--outSAMstrandField",
"intronMotif",
]);

let transcripts = vec![spliced_gtag_transcript()];
let read_seq = vec![0, 1, 2, 3];
let read_qual = vec![30, 30, 30, 30];

let records = SamWriter::build_alignment_records(
"read1",
&read_seq,
&read_qual,
&transcripts,
&genome,
&params,
1,
)
.unwrap();

assert_eq!(records.len(), 1);
let data = records[0].data();
assert_eq!(
data.get(&Tag::new(b'X', b'S')),
Some(&Value::Character(b'+')),
"XS:A:+ should be emitted when --outSAMstrandField intronMotif is set"
);
}

#[test]
fn xs_tag_emitted_when_xs_in_attributes_only() {
let genome = make_test_genome();
let params = Parameters::parse_from(vec![
"rustar-aligner",
"--readFilesIn",
"r.fq",
"--outSAMattributes",
"NH",
"HI",
"XS",
]);

let transcripts = vec![spliced_gtag_transcript()];
let read_seq = vec![0, 1, 2, 3];
let read_qual = vec![30, 30, 30, 30];

let records = SamWriter::build_alignment_records(
"read1",
&read_seq,
&read_qual,
&transcripts,
&genome,
&params,
1,
)
.unwrap();

assert_eq!(records.len(), 1);
let data = records[0].data();
assert_eq!(
data.get(&Tag::new(b'X', b'S')),
Some(&Value::Character(b'+')),
"XS:A:+ should be emitted when XS is listed in --outSAMattributes"
);
}

#[test]
fn xs_tag_absent_when_neither_xs_nor_intron_motif_requested() {
let genome = make_test_genome();
let params = Parameters::parse_from(vec!["rustar-aligner", "--readFilesIn", "r.fq"]);

let transcripts = vec![spliced_gtag_transcript()];
let read_seq = vec![0, 1, 2, 3];
let read_qual = vec![30, 30, 30, 30];

let records = SamWriter::build_alignment_records(
"read1",
&read_seq,
&read_qual,
&transcripts,
&genome,
&params,
1,
)
.unwrap();

assert_eq!(records.len(), 1);
let data = records[0].data();
assert_eq!(
data.get(&Tag::new(b'X', b'S')),
None,
"XS should not be emitted under default attributes and strand field"
);
}
}
83 changes: 83 additions & 0 deletions src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,33 @@ impl Parameters {
attrs
}

/// Effective `--outSAMattributes` set after coupling with `--outSAMstrandField`.
///
/// Mirrors STAR's `Parameters_samAttributes.cpp:172-179` and `:213-216`:
/// `XS` in the attribute set and `--outSAMstrandField intronMotif` imply
/// each other. If either is set the returned set contains `XS`; otherwise
/// `XS` is excluded even if it was listed in `--outSAMattributes` without
/// a matching strand field (STAR forces the strand field on, so XS stays).
pub fn effective_sam_attribute_set(&self) -> HashSet<String> {
let mut attrs = self.sam_attribute_set();
if self.effective_out_sam_strand_field() == "intronMotif" {
attrs.insert("XS".to_string());
}
attrs
}

/// Effective `--outSAMstrandField` value after coupling with `--outSAMattributes`.
///
/// `XS` in `--outSAMattributes` forces `intronMotif` even when the user
/// left `--outSAMstrandField` at its default (`None`).
pub fn effective_out_sam_strand_field(&self) -> &str {
if self.out_sam_strand_field == "intronMotif" || self.sam_attribute_set().contains("XS") {
"intronMotif"
} else {
self.out_sam_strand_field.as_str()
}
}

/// True if the user provided a non-default `--outSAMattrRGline`.
pub fn rg_line_set(&self) -> bool {
!self.out_sam_attr_rg_line.is_empty() && self.out_sam_attr_rg_line[0] != "-"
Expand Down Expand Up @@ -953,6 +980,19 @@ impl Parameters {
}
self.rg_ids()?;

let user_xs_in_attrs = self.sam_attribute_set().contains("XS");
let user_strand_intron_motif = self.out_sam_strand_field == "intronMotif";
if user_xs_in_attrs && !user_strand_intron_motif {
log::info!(
"--outSAMattributes contains XS, therefore rustar-aligner will use --outSAMstrandField intronMotif"
);
}
if user_strand_intron_motif && !user_xs_in_attrs {
log::info!(
"--outSAMstrandField=intronMotif, therefore rustar-aligner will output XS attribute"
);
}

// quantMode TranscriptomeSAM requires transcript annotations —
// either via --sjdbGTFfile or pre-generated transcriptInfo.tab
// et al in --genomeDir (persisted at genomeGenerate time). At
Expand Down Expand Up @@ -1488,4 +1528,47 @@ mod tests {
]);
assert_eq!(p.align_sj_stitch_mismatch_nmax, vec![1, -1, 2, 3]);
}

#[test]
fn xs_strand_field_intron_motif_adds_xs_to_attrs() {
let p = parse(&[
"--readFilesIn",
"r.fq",
"--outSAMstrandField",
"intronMotif",
]);
let attrs = p.effective_sam_attribute_set();
assert!(attrs.contains("XS"));
assert_eq!(p.effective_out_sam_strand_field(), "intronMotif");
}

#[test]
fn xs_attr_forces_intron_motif_strand_field() {
let p = parse(&[
"--readFilesIn",
"r.fq",
"--outSAMattributes",
"NH",
"HI",
"XS",
]);
assert_eq!(p.effective_out_sam_strand_field(), "intronMotif");
let attrs = p.effective_sam_attribute_set();
assert!(attrs.contains("XS"));
}

#[test]
fn xs_coupling_dormant_without_user_request() {
let p = parse(&["--readFilesIn", "r.fq"]);
let attrs = p.effective_sam_attribute_set();
assert!(!attrs.contains("XS"));
assert_eq!(p.effective_out_sam_strand_field(), "None");
}

#[test]
fn xs_attr_via_all_preset_couples_strand_field() {
let p = parse(&["--readFilesIn", "r.fq", "--outSAMattributes", "All"]);
assert_eq!(p.effective_out_sam_strand_field(), "intronMotif");
assert!(p.effective_sam_attribute_set().contains("XS"));
}
}
Loading