diff --git a/src/io/sam.rs b/src/io/sam.rs index 9e5cff5..b288b4e 100644 --- a/src/io/sam.rs +++ b/src/io/sam.rs @@ -17,7 +17,7 @@ use noodles::sam::alignment::record_buf::data::field::value::Array; use noodles::sam::alignment::record_buf::{QualityScores, RecordBuf, Sequence}; use noodles::sam::header::record::value::{ Map, - map::{Program, ReadGroup, tag::Other as HeaderOtherTag}, + map::{Program, ReadGroup, program::tag as program_tag, tag::Other as HeaderOtherTag}, }; use std::collections::HashSet; use std::fmt::Write as FmtWrite; @@ -843,8 +843,20 @@ where builder = builder.add_read_group(id, map); } - // @PG line - builder = builder.add_program("rustar-aligner", Map::::default()); + let mut pg = Map::::default(); + pg.other_fields_mut() + .insert(program_tag::NAME, BString::from("rustar-aligner")); + pg.other_fields_mut().insert( + program_tag::VERSION, + BString::from(env!("CARGO_PKG_VERSION")), + ); + let cl = params + .command_line + .clone() + .unwrap_or_else(|| "rustar-aligner".to_string()); + pg.other_fields_mut() + .insert(program_tag::COMMAND_LINE, BString::from(cl)); + builder = builder.add_program("rustar-aligner", pg); Ok(builder.build()) } @@ -1373,6 +1385,59 @@ mod tests { assert_eq!(header.reference_sequences().len(), 1); } + #[test] + fn test_build_sam_header_pg_line_populated() { + let genome = make_test_genome(); + let mut params = Parameters::parse_from(vec!["rustar-aligner", "--readFilesIn", "test.fq"]); + params.command_line = + Some("rustar-aligner --readFilesIn test.fq --runThreadN 4".to_string()); + + let header = build_sam_header(&genome, ¶ms).unwrap(); + let programs = header.programs().as_ref(); + let pg = programs + .get(&b"rustar-aligner"[..]) + .expect("@PG line with ID:rustar-aligner must be present"); + + let pn: &[u8] = pg + .other_fields() + .get(&program_tag::NAME) + .expect("PN field must be present") + .as_ref(); + assert_eq!(pn, b"rustar-aligner"); + + let vn: &[u8] = pg + .other_fields() + .get(&program_tag::VERSION) + .expect("VN field must be present") + .as_ref(); + assert_eq!(vn, env!("CARGO_PKG_VERSION").as_bytes()); + + let cl: &[u8] = pg + .other_fields() + .get(&program_tag::COMMAND_LINE) + .expect("CL field must be present") + .as_ref(); + assert!(!cl.is_empty(), "CL field must be non-empty"); + assert_eq!(cl, b"rustar-aligner --readFilesIn test.fq --runThreadN 4"); + } + + #[test] + fn test_build_sam_header_pg_line_default_cl_when_unset() { + let genome = make_test_genome(); + let params = Parameters::parse_from(vec!["rustar-aligner", "--readFilesIn", "test.fq"]); + assert!(params.command_line.is_none()); + + let header = build_sam_header(&genome, ¶ms).unwrap(); + let programs = header.programs().as_ref(); + let pg = programs.get(&b"rustar-aligner"[..]).unwrap(); + let cl: &[u8] = pg + .other_fields() + .get(&program_tag::COMMAND_LINE) + .expect("CL field must be present even when command_line is None") + .as_ref(); + assert!(!cl.is_empty()); + } + #[test] fn test_build_sam_header_with_rg() { let genome = make_test_genome(); diff --git a/src/main.rs b/src/main.rs index 8979239..4cb56fd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,8 @@ fn main() -> anyhow::Result<()> { cpu::check_cpu_compat()?; - let params = Parameters::parse(); + let command_line = std::env::args().collect::>().join(" "); + let mut params = Parameters::parse(); + params.command_line = Some(command_line); rustar_aligner::run(¶ms) } diff --git a/src/params.rs b/src/params.rs index 9c124b8..01c975f 100644 --- a/src/params.rs +++ b/src/params.rs @@ -701,6 +701,10 @@ pub struct Parameters { /// Chimeric output type #[arg(long = "chimOutType", num_args = 1..=2, default_values_t = vec!["Junctions".to_string()])] pub chim_out_type: Vec, + + /// Full command line as invoked, embedded in the BAM `@PG` `CL:` field. + #[arg(skip)] + pub command_line: Option, } impl Parameters {