Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/align/read_align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,7 @@ mod tests {
let genome = Genome {
sequence,
n_genome,
n_genome_real: n_genome,
n_chr_real: 1,
chr_name: vec!["chr1".to_string()],
chr_length: vec![10],
Expand Down Expand Up @@ -1535,6 +1536,7 @@ mod tests {
junction_db: crate::junction::SpliceJunctionDb::empty(),
transcriptome: None,
prepared_junctions: Vec::new(),
sjdb_overhang: 0,
}
}

Expand Down
1 change: 1 addition & 0 deletions src/align/score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ mod tests {
Genome {
sequence,
n_genome,
n_genome_real: n_genome,
n_chr_real: 1,
chr_name: vec!["chr1".to_string()],
chr_length: vec![seq.len() as u64],
Expand Down
321 changes: 195 additions & 126 deletions src/align/stitch.rs

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/chimeric/detect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,7 @@ mod tests {
Genome {
sequence,
n_genome,
n_genome_real: n_genome,
n_chr_real: 2,
chr_name: vec!["chr0".to_string(), "chr1".to_string()],
chr_length: vec![chr_len, chr_len],
Expand Down Expand Up @@ -1027,6 +1028,7 @@ mod tests {
junction_db: SpliceJunctionDb::empty(),
transcriptome: None,
prepared_junctions: Vec::new(),
sjdb_overhang: 0,
}
}

Expand Down
1 change: 1 addition & 0 deletions src/chimeric/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ mod tests {
Genome {
sequence: vec![0u8; 2048],
n_genome: 1024,
n_genome_real: 1024,
n_chr_real: 2,
chr_name: vec!["chr9".to_string(), "chr22".to_string()],
chr_length: vec![512, 512],
Expand Down
1 change: 1 addition & 0 deletions src/chimeric/score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ mod tests {
Genome {
sequence: seq,
n_genome: 100,
n_genome_real: 100,
n_chr_real: 1,
chr_name: vec!["chr1".to_string()],
chr_start: vec![0],
Expand Down
7 changes: 7 additions & 0 deletions src/genome/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ pub struct Genome {
/// Total length of the forward (padded) genome.
pub n_genome: u64,

/// Length of the real-genome forward strand, excluding any appended
/// Gsj splice-junction buffer. Equals `n_genome` before `append_sjdb`
/// and stays pinned at that value afterwards; matches STAR's
/// `mapGen.nGenomeReal`.
pub n_genome_real: u64,

/// Number of real chromosomes (not including scaffold/contigs if excluded).
pub n_chr_real: usize,

Expand Down Expand Up @@ -111,6 +117,7 @@ impl Genome {
Ok(Genome {
sequence,
n_genome,
n_genome_real: n_genome,
n_chr_real,
chr_name,
chr_length,
Expand Down
26 changes: 24 additions & 2 deletions src/index/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::index::packed_array::PackedArray;
use crate::index::sa_index::SaIndex;
use crate::index::suffix_array::SuffixArray;
use crate::junction::SpliceJunctionDb;
use crate::junction::sjdb_insert;
use crate::params::Parameters;
use crate::quant::transcriptome::TranscriptomeIndex;

Expand Down Expand Up @@ -99,13 +100,32 @@ impl GenomeIndex {
);
}

// Reload prepared junctions + sjdbOverhang from sjdbInfo.txt when
// present. STAR appends a Gsj flanking-sequence buffer to the
// genome at build time; align-time code needs the parsed junctions
// to decode SA hits that land inside that buffer back to real
// `(donor, acceptor)` genome positions.
let sjdb_info_path = genome_dir.join("sjdbInfo.txt");
let (prepared_junctions, sjdb_overhang) = if sjdb_info_path.exists() {
let tab = sjdb_insert::read_sjdb_info_tab(&sjdb_info_path, &genome)?;
log::info!(
"Loaded sjdbInfo.txt: {} junctions, sjdbOverhang={}",
tab.junctions.len(),
tab.sjdb_overhang,
);
(tab.junctions, tab.sjdb_overhang)
} else {
(Vec::new(), 0)
};

Ok(GenomeIndex {
genome,
suffix_array,
sa_index,
junction_db,
transcriptome,
prepared_junctions: Vec::new(),
prepared_junctions,
sjdb_overhang,
})
}
}
Expand Down Expand Up @@ -164,7 +184,8 @@ fn load_genome(genome_dir: &Path, _params: &Parameters) -> Result<Genome, Error>
// (real + Gsj) lives in `genomeParameters.txt` under `genomeFileSizes`.
// Prefer that value; fall back to the chr_start boundary for indices
// built without a GTF.
let n_genome = read_genome_file_size(genome_dir)?.unwrap_or(chr_start[n_chr_real]);
let n_genome_real = chr_start[n_chr_real];
let n_genome = read_genome_file_size(genome_dir)?.unwrap_or(n_genome_real);

// Load Genome sequence file
let genome_path = genome_dir.join("Genome");
Expand Down Expand Up @@ -192,6 +213,7 @@ fn load_genome(genome_dir: &Path, _params: &Parameters) -> Result<Genome, Error>
Ok(Genome {
sequence,
n_genome,
n_genome_real,
n_chr_real,
chr_name,
chr_length,
Expand Down
20 changes: 16 additions & 4 deletions src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@ pub struct GenomeIndex {
/// `transcriptInfo.tab` + friends at `genomeGenerate`, reloaded at
/// `alignReads` from the same files.
pub transcriptome: Option<TranscriptomeIndex>,
/// Prepared splice junctions (sorted/deduped) used only on the build
/// path to write `sjdbInfo.txt` + `sjdbList.out.tab`. Empty on the
/// load path — those files have already been written and are not
/// needed at align time.
/// Prepared splice junctions in their post-dedup order (the same
/// order they occupy in the Gsj buffer appended to the genome).
/// Populated on both build and load paths when sjdb is present;
/// empty for indices built without a GTF. Used at align time to
/// decode Gsj-region SA hits back to real-genome `(donor, acceptor)`
/// pairs.
pub prepared_junctions: Vec<PreparedJunction>,
/// `sjdbOverhang` recorded in `sjdbInfo.txt`. Zero when no sjdb
/// junctions are present.
pub sjdb_overhang: u32,
}

impl GenomeIndex {
Expand Down Expand Up @@ -143,13 +148,20 @@ impl GenomeIndex {
sa_index.data.len()
);

let sjdb_overhang = if prepared_junctions.is_empty() {
0
} else {
params.sjdb_overhang
};

Ok(GenomeIndex {
genome,
suffix_array,
sa_index,
junction_db,
transcriptome,
prepared_junctions,
sjdb_overhang,
})
}

Expand Down
1 change: 1 addition & 0 deletions src/io/bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ mod tests {
Genome {
sequence: vec![0, 1, 2, 3, 0, 1, 2, 3], // ACGTACGT
n_genome: 8,
n_genome_real: 8,
n_chr_real: 1,
chr_name: vec!["chr1".to_string()],
chr_length: vec![8],
Expand Down
1 change: 1 addition & 0 deletions src/io/sam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,7 @@ mod tests {
Genome {
sequence: vec![0, 1, 2, 3, 0, 1, 2, 3], // ACGTACGT
n_genome: 8,
n_genome_real: 8,
n_chr_real: 1,
chr_name: vec!["chr1".to_string()],
chr_length: vec![8],
Expand Down
6 changes: 6 additions & 0 deletions src/junction/gtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -367,6 +368,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -445,6 +447,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -476,6 +479,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -522,6 +526,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -605,6 +610,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down
4 changes: 4 additions & 0 deletions src/junction/sj_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -577,6 +578,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -613,6 +615,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down Expand Up @@ -696,6 +699,7 @@ mod tests {
let genome = Genome {
sequence: vec![0; 1000],
n_genome: 1000,
n_genome_real: 1000,
n_chr_real: 1,
chr_start: vec![0, 1000],
chr_length: vec![1000],
Expand Down
Loading
Loading