From 1a55cae8cdab38b2cdb65df1f7269016710de8c3 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 12 May 2026 18:37:30 +0100 Subject: [PATCH 1/2] feat(output): emit Log.out, Log.progress.out, _STARpass1/SJ.out.tab STAR writes three log files alongside Log.final.out and keeps two-pass intermediates in _STARpass1/. rustar was only writing Log.final.out and emitting the pass-1 SJ tab as SJ.pass1.out.tab at the top level. Add minimal Log.out (parameters dump + per-phase timestamps) and Log.progress.out (timestamp + mapping speed) writers next to the existing Log.final.out writer. Move the pass-1 SJ tab into _STARpass1/SJ.out.tab and mkdir the parent first. The Log.out content is intentionally a stub matching the file's existence rather than STAR's full verbosity; that's a follow-up. Fixes #28 Co-Authored-By: Claude --- src/lib.rs | 25 +++++-- src/stats.rs | 133 ++++++++++++++++++++++++++++++++++++ tests/alignment_features.rs | 26 ++++++- 3 files changed, 175 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d229763..fcb64a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -258,6 +258,21 @@ fn align_reads(params: &Parameters) -> anyhow::Result<()> { stats.write_log_final(&log_path, time_start, time_map_start, time_finish)?; info!("Wrote {}", log_path.display()); + let log_out_path = params.out_file_name_prefix.join("Log.out"); + crate::stats::write_log_out( + &log_out_path, + ¶ms, + time_start, + time_map_start, + time_finish, + )?; + info!("Wrote {}", log_out_path.display()); + + let log_progress_path = params.out_file_name_prefix.join("Log.progress.out"); + let total_reads = stats.total_reads.load(std::sync::atomic::Ordering::Relaxed); + crate::stats::write_log_progress(&log_progress_path, total_reads, time_map_start, time_finish)?; + info!("Wrote {}", log_progress_path.display()); + // Write ReadsPerGene.out.tab if quantMode GeneCounts was requested. if let Some(ref ctx) = quant_ctx { let quant_path = params.out_file_name_prefix.join("ReadsPerGene.out.tab"); @@ -457,13 +472,9 @@ fn run_two_pass( info!("Two-pass mode: Pass 1 - Junction discovery"); let (sj_stats_pass1, novel_junctions) = run_pass1(index, params)?; - // Write SJ.pass1.out.tab - let pass1_path = params.out_file_name_prefix.join("SJ.pass1.out.tab"); - - // Create output directory if it doesn't exist - if let Some(parent) = pass1_path.parent() { - std::fs::create_dir_all(parent)?; - } + let pass1_dir = params.out_file_name_prefix.join("_STARpass1"); + std::fs::create_dir_all(&pass1_dir)?; + let pass1_path = pass1_dir.join("SJ.out.tab"); info!("Writing pass 1 junctions to {}", pass1_path.display()); sj_stats_pass1.write_output(&pass1_path, &index.genome, params)?; diff --git a/src/stats.rs b/src/stats.rs index 64a05de..7681cab 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -5,6 +5,74 @@ use std::sync::atomic::{AtomicU64, Ordering}; use crate::align::transcript::{CigarOp, Transcript}; use crate::junction::encode_motif; +use crate::params::Parameters; + +/// Shared timestamp format used across Log.final.out / Log.out / Log.progress.out. +pub const LOG_TIME_FMT: &str = "%b %d %H:%M:%S"; + +/// Write a minimal STAR-compatible Log.out alongside Log.final.out. +/// +/// Carries a parameters dump and per-phase timestamps. Intentionally a stub +/// rather than a full STAR-verbose-log reproduction. +pub fn write_log_out( + path: &Path, + params: &Parameters, + time_start: chrono::DateTime, + time_map_start: chrono::DateTime, + time_finish: chrono::DateTime, +) -> std::io::Result<()> { + use std::io::Write; + + let mut f = std::fs::File::create(path)?; + + writeln!(f, "##### Run parameters")?; + writeln!(f, "{:#?}", params)?; + writeln!(f)?; + writeln!(f, "##### Run started")?; + writeln!(f, "{}", time_start.format(LOG_TIME_FMT))?; + writeln!(f)?; + writeln!(f, "##### Mapping started")?; + writeln!(f, "{}", time_map_start.format(LOG_TIME_FMT))?; + writeln!(f)?; + writeln!(f, "##### Mapping finished")?; + writeln!(f, "{}", time_finish.format(LOG_TIME_FMT))?; + + Ok(()) +} + +/// Write a minimal STAR-compatible Log.progress.out alongside Log.final.out. +/// +/// A header line plus a single "done" line with the final timestamp and +/// mapping speed (million reads per hour). Intentionally a stub. +pub fn write_log_progress( + path: &Path, + total_reads: u64, + time_map_start: chrono::DateTime, + time_finish: chrono::DateTime, +) -> std::io::Result<()> { + use std::io::Write; + + let elapsed_hours = { + let elapsed = time_finish - time_map_start; + elapsed.num_milliseconds() as f64 / 3_600_000.0 + }; + let mapping_speed = if elapsed_hours > 0.0 { + total_reads as f64 / elapsed_hours / 1_000_000.0 + } else { + 0.0 + }; + + let mut f = std::fs::File::create(path)?; + writeln!(f, "# completed\tmapping_speed_M_reads_per_hour")?; + writeln!( + f, + "{}\t{:.2}", + time_finish.format(LOG_TIME_FMT), + mapping_speed + )?; + + Ok(()) +} /// Reason a read could not be mapped #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1018,4 +1086,69 @@ mod tests { stats.record_half_mapped(); assert_eq!(stats.half_mapped_pairs.load(Ordering::Relaxed), 3); } + + #[test] + fn test_write_log_out_minimal() { + use chrono::TimeZone; + use clap::Parser; + + let params = Parameters::parse_from(["rustar-aligner", "--readFilesIn", "reads.fq"]); + let t_start = chrono::Local + .with_ymd_and_hms(2026, 5, 12, 14, 14, 23) + .unwrap(); + let t_map = chrono::Local + .with_ymd_and_hms(2026, 5, 12, 14, 14, 30) + .unwrap(); + let t_finish = chrono::Local + .with_ymd_and_hms(2026, 5, 12, 14, 14, 58) + .unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("Log.out"); + write_log_out(&path, ¶ms, t_start, t_map, t_finish).unwrap(); + + let content = std::fs::read_to_string(&path).unwrap(); + assert!(!content.is_empty(), "Log.out should not be empty"); + assert!(content.contains("##### Run parameters")); + assert!(content.contains("##### Run started")); + assert!(content.contains("##### Mapping started")); + assert!(content.contains("##### Mapping finished")); + // Each phase header should be followed by a timestamp shaped like "May 12 14:14:23". + assert!(content.contains("May 12 14:14:23")); + assert!(content.contains("May 12 14:14:58")); + // The parameters dump should mention at least one familiar field. + assert!(content.contains("read_files_in")); + } + + #[test] + fn test_write_log_progress_minimal() { + use chrono::TimeZone; + + let t_map = chrono::Local + .with_ymd_and_hms(2026, 5, 12, 14, 14, 30) + .unwrap(); + let t_finish = chrono::Local + .with_ymd_and_hms(2026, 5, 12, 14, 14, 58) + .unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("Log.progress.out"); + write_log_progress(&path, 10_000, t_map, t_finish).unwrap(); + + let content = std::fs::read_to_string(&path).unwrap(); + let lines: Vec<&str> = content.lines().collect(); + assert!(lines.len() >= 2, "expected header + data line"); + assert!(lines[0].starts_with('#'), "first line should be a header"); + assert!( + lines[1].contains("May 12 14:14:58"), + "second line should contain the finish timestamp, got: {}", + lines[1] + ); + // Mapping speed column should be a number with two decimals. + let cols: Vec<&str> = lines[1].split('\t').collect(); + assert_eq!(cols.len(), 2, "expected 2 tab-separated columns"); + cols[1] + .parse::() + .expect("second column should parse as a float"); + } } diff --git a/tests/alignment_features.rs b/tests/alignment_features.rs index cc3aa78..2fc1604 100644 --- a/tests/alignment_features.rs +++ b/tests/alignment_features.rs @@ -784,10 +784,32 @@ fn test_two_pass_mode() { .assert() .success(); - let pass1_path = output_dir.join("SJ.pass1.out.tab"); + let pass1_path = output_dir.join("_STARpass1").join("SJ.out.tab"); assert!( pass1_path.exists(), - "SJ.pass1.out.tab not found — two-pass mode did not write pass-1 junctions" + "_STARpass1/SJ.out.tab not found — two-pass mode did not write pass-1 junctions" + ); + let top_level_pass1 = output_dir.join("SJ.pass1.out.tab"); + assert!( + !top_level_pass1.exists(), + "SJ.pass1.out.tab should no longer be emitted at the top level" + ); + + let log_out = output_dir.join("Log.out"); + assert!(log_out.exists(), "Log.out not found"); + let log_out_content = fs::read_to_string(&log_out).unwrap(); + assert!(!log_out_content.is_empty(), "Log.out is empty"); + assert!( + log_out_content.contains("##### Run parameters"), + "Log.out missing parameters section" + ); + + let log_progress = output_dir.join("Log.progress.out"); + assert!(log_progress.exists(), "Log.progress.out not found"); + let log_progress_content = fs::read_to_string(&log_progress).unwrap(); + assert!( + log_progress_content.lines().count() >= 2, + "Log.progress.out should have a header and at least one data line" ); let sam_path = output_dir.join("Aligned.out.sam"); From e91433a0b875bb0cefbb0048f9e01de00aaadc28 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 12 May 2026 18:41:16 +0100 Subject: [PATCH 2/2] revert(stats): drop Log.out / Log.progress.out stub writers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit added writers that mimicked STAR's section-header structure with placeholder content (a Debug-format params dump + three timestamps for Log.out, a header + final-timestamp line for Log.progress.out). That closes the file-existence gap but makes the output look like STAR-equivalent verbose logging when it isn't — downstream tools parsing for memory usage, per-chunk progress, or warnings would silently get nothing. Drop the stub writers. The SJ.pass1.out.tab → _STARpass1/SJ.out.tab placement fix stays — that's a real path-parity change with no fakery. Real STAR-equivalent Log.out / Log.progress.out content (per-phase progress, warnings, memory, periodic updates during long runs) is a separate engineering effort, not a stub. Co-Authored-By: Claude --- src/lib.rs | 15 ---- src/stats.rs | 133 ------------------------------------ tests/alignment_features.rs | 17 ----- 3 files changed, 165 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fcb64a0..0187f9c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -258,21 +258,6 @@ fn align_reads(params: &Parameters) -> anyhow::Result<()> { stats.write_log_final(&log_path, time_start, time_map_start, time_finish)?; info!("Wrote {}", log_path.display()); - let log_out_path = params.out_file_name_prefix.join("Log.out"); - crate::stats::write_log_out( - &log_out_path, - ¶ms, - time_start, - time_map_start, - time_finish, - )?; - info!("Wrote {}", log_out_path.display()); - - let log_progress_path = params.out_file_name_prefix.join("Log.progress.out"); - let total_reads = stats.total_reads.load(std::sync::atomic::Ordering::Relaxed); - crate::stats::write_log_progress(&log_progress_path, total_reads, time_map_start, time_finish)?; - info!("Wrote {}", log_progress_path.display()); - // Write ReadsPerGene.out.tab if quantMode GeneCounts was requested. if let Some(ref ctx) = quant_ctx { let quant_path = params.out_file_name_prefix.join("ReadsPerGene.out.tab"); diff --git a/src/stats.rs b/src/stats.rs index 7681cab..64a05de 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -5,74 +5,6 @@ use std::sync::atomic::{AtomicU64, Ordering}; use crate::align::transcript::{CigarOp, Transcript}; use crate::junction::encode_motif; -use crate::params::Parameters; - -/// Shared timestamp format used across Log.final.out / Log.out / Log.progress.out. -pub const LOG_TIME_FMT: &str = "%b %d %H:%M:%S"; - -/// Write a minimal STAR-compatible Log.out alongside Log.final.out. -/// -/// Carries a parameters dump and per-phase timestamps. Intentionally a stub -/// rather than a full STAR-verbose-log reproduction. -pub fn write_log_out( - path: &Path, - params: &Parameters, - time_start: chrono::DateTime, - time_map_start: chrono::DateTime, - time_finish: chrono::DateTime, -) -> std::io::Result<()> { - use std::io::Write; - - let mut f = std::fs::File::create(path)?; - - writeln!(f, "##### Run parameters")?; - writeln!(f, "{:#?}", params)?; - writeln!(f)?; - writeln!(f, "##### Run started")?; - writeln!(f, "{}", time_start.format(LOG_TIME_FMT))?; - writeln!(f)?; - writeln!(f, "##### Mapping started")?; - writeln!(f, "{}", time_map_start.format(LOG_TIME_FMT))?; - writeln!(f)?; - writeln!(f, "##### Mapping finished")?; - writeln!(f, "{}", time_finish.format(LOG_TIME_FMT))?; - - Ok(()) -} - -/// Write a minimal STAR-compatible Log.progress.out alongside Log.final.out. -/// -/// A header line plus a single "done" line with the final timestamp and -/// mapping speed (million reads per hour). Intentionally a stub. -pub fn write_log_progress( - path: &Path, - total_reads: u64, - time_map_start: chrono::DateTime, - time_finish: chrono::DateTime, -) -> std::io::Result<()> { - use std::io::Write; - - let elapsed_hours = { - let elapsed = time_finish - time_map_start; - elapsed.num_milliseconds() as f64 / 3_600_000.0 - }; - let mapping_speed = if elapsed_hours > 0.0 { - total_reads as f64 / elapsed_hours / 1_000_000.0 - } else { - 0.0 - }; - - let mut f = std::fs::File::create(path)?; - writeln!(f, "# completed\tmapping_speed_M_reads_per_hour")?; - writeln!( - f, - "{}\t{:.2}", - time_finish.format(LOG_TIME_FMT), - mapping_speed - )?; - - Ok(()) -} /// Reason a read could not be mapped #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1086,69 +1018,4 @@ mod tests { stats.record_half_mapped(); assert_eq!(stats.half_mapped_pairs.load(Ordering::Relaxed), 3); } - - #[test] - fn test_write_log_out_minimal() { - use chrono::TimeZone; - use clap::Parser; - - let params = Parameters::parse_from(["rustar-aligner", "--readFilesIn", "reads.fq"]); - let t_start = chrono::Local - .with_ymd_and_hms(2026, 5, 12, 14, 14, 23) - .unwrap(); - let t_map = chrono::Local - .with_ymd_and_hms(2026, 5, 12, 14, 14, 30) - .unwrap(); - let t_finish = chrono::Local - .with_ymd_and_hms(2026, 5, 12, 14, 14, 58) - .unwrap(); - - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("Log.out"); - write_log_out(&path, ¶ms, t_start, t_map, t_finish).unwrap(); - - let content = std::fs::read_to_string(&path).unwrap(); - assert!(!content.is_empty(), "Log.out should not be empty"); - assert!(content.contains("##### Run parameters")); - assert!(content.contains("##### Run started")); - assert!(content.contains("##### Mapping started")); - assert!(content.contains("##### Mapping finished")); - // Each phase header should be followed by a timestamp shaped like "May 12 14:14:23". - assert!(content.contains("May 12 14:14:23")); - assert!(content.contains("May 12 14:14:58")); - // The parameters dump should mention at least one familiar field. - assert!(content.contains("read_files_in")); - } - - #[test] - fn test_write_log_progress_minimal() { - use chrono::TimeZone; - - let t_map = chrono::Local - .with_ymd_and_hms(2026, 5, 12, 14, 14, 30) - .unwrap(); - let t_finish = chrono::Local - .with_ymd_and_hms(2026, 5, 12, 14, 14, 58) - .unwrap(); - - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("Log.progress.out"); - write_log_progress(&path, 10_000, t_map, t_finish).unwrap(); - - let content = std::fs::read_to_string(&path).unwrap(); - let lines: Vec<&str> = content.lines().collect(); - assert!(lines.len() >= 2, "expected header + data line"); - assert!(lines[0].starts_with('#'), "first line should be a header"); - assert!( - lines[1].contains("May 12 14:14:58"), - "second line should contain the finish timestamp, got: {}", - lines[1] - ); - // Mapping speed column should be a number with two decimals. - let cols: Vec<&str> = lines[1].split('\t').collect(); - assert_eq!(cols.len(), 2, "expected 2 tab-separated columns"); - cols[1] - .parse::() - .expect("second column should parse as a float"); - } } diff --git a/tests/alignment_features.rs b/tests/alignment_features.rs index 2fc1604..11898f2 100644 --- a/tests/alignment_features.rs +++ b/tests/alignment_features.rs @@ -795,23 +795,6 @@ fn test_two_pass_mode() { "SJ.pass1.out.tab should no longer be emitted at the top level" ); - let log_out = output_dir.join("Log.out"); - assert!(log_out.exists(), "Log.out not found"); - let log_out_content = fs::read_to_string(&log_out).unwrap(); - assert!(!log_out_content.is_empty(), "Log.out is empty"); - assert!( - log_out_content.contains("##### Run parameters"), - "Log.out missing parameters section" - ); - - let log_progress = output_dir.join("Log.progress.out"); - assert!(log_progress.exists(), "Log.progress.out not found"); - let log_progress_content = fs::read_to_string(&log_progress).unwrap(); - assert!( - log_progress_content.lines().count() >= 2, - "Log.progress.out should have a header and at least one data line" - ); - let sam_path = output_dir.join("Aligned.out.sam"); assert!(sam_path.exists(), "Aligned.out.sam not found");