WGLab
diff --git a/‎Makefile‎
Lines changed: 8 additions & 0 deletions b/‎Makefile‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 14 additions & 0 deletions b/‎README.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎include/input_parameters.h‎
Lines changed: 4 additions & 1 deletion b/‎include/input_parameters.h‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎include/tin.h‎
Lines changed: 1 addition & 1 deletion b/‎include/tin.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/version.h‎
Lines changed: 2 additions & 0 deletions b/‎include/version.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/bam_module.cpp‎
Lines changed: 7 additions & 7 deletions b/‎src/bam_module.cpp‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/cli.py‎
Lines changed: 22 additions & 39 deletions b/‎src/cli.py‎
Lines changed: 22 additions & 39 deletions
@@ -2,6 +2,9 @@ INCL_DIR := $(CURDIR)/include
 SRC_DIR := $(CURDIR)/src
 LIB_DIR := $(CURDIR)/lib
 
+VERSION := $(shell git describe --tags --always)
+VERSION_HEADER := $(INCL_DIR)/version.h
+
 # Set the library paths for the compiler
 CONDA_PREFIX ?= $(shell echo $$CONDA_PREFIX)
 LIBRARY_PATHS := -L$(LIB_DIR) -L$(CONDA_PREFIX)/lib
@@ -10,6 +13,11 @@ INCLUDE_PATHS := -I$(INCL_DIR) -I$(CONDA_PREFIX)/include
 # All targets
 all: swig_build compile
 
+# Rule to generate version.h
+$(VERSION_HEADER):
+	@echo "#pragma once" > $@
+	@echo "#define VERSION \"$(VERSION)\"" >> $@
+
 # Generate the SWIG Python/C++ wrappers
 swig_build:
 	mkdir -p $(LIB_DIR)
 
@@ -10,6 +10,7 @@ LongReadSum supports FASTA, FASTQ, BAM, FAST5, and sequencing_summary.txt file f
 - [Installation using Anaconda (recommended)](#installation-using-anaconda)
 - [Installation using Docker](#installation-using-anaconda)
 - [Building from source](#building-from-source)
+- [MultiQC support](#multiqc-support)
 - General usage for common filetypes:
   - [Common parameters](#common-parameters)
   - [WGS BAM](#wgs-bam)
@@ -78,6 +79,19 @@ conda activate longreadsum
 make
 ```
 
+# MultiQC support
+[MultiQC](https://seqera.io/multiqc/) is a widely used open-source tool for
+aggregating bioinformatics analyses results from many tools across samples.
+
+To run MultiQC, input the LongReadSum directory containing the output JSON
+summary file, and specify the _longreadsum_ module:
+
+```
+multiqc $INPUT_DIRECTORY --module longreadsum --outdir $OUTPUT_DIRECTORY/multiqc
+```
+
+Example report:
+<img width="1707" height="761" alt="image" src="https://github.com/user-attachments/assets/adbcacf7-44f8-48bd-9135-9293379d65d2" />
 
 ## Running
 Activate the conda environment and then run with arguments:
 
@@ -21,7 +21,6 @@ class Input_Para{
     // Parameters
     int threads;
     size_t num_input_files;
-    std::string out_prefix;
     int64_t other_flags;
     int32_t user_defined_fastq_base_qual_offset;
     std::string output_folder;  // Output folder
@@ -36,10 +35,14 @@ class Input_Para{
     bool mod_analysis;  // Perform base modification analysis on BAM file
     int tin_sample_size;  // Number of equally spaced samples for TIN calculation
     int tin_min_coverage;  // Minimum coverage for TIN calculation
+    std::string sample_name;  // Sample name
+    std::string version_str = "";  // Version string for the program
 
     // Functions
     std::string add_input_file(const std::string& input_filepath);
 
+    const std::string &getVersion() const;
+
     Input_Para();
 
     ~Input_Para();
 
@@ -15,7 +15,7 @@ typedef std::unordered_map<std::string, std::tuple<std::string, int, int, double
 
 // Calculate the TIN score for each transcript in the gene BED file
 // (Reference: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-0922-z#Sec11)
-void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::string& bam_filepath, int min_cov, int sample_size, const std::string& output_folder, int thread_count);
+void calculateTIN(TINStats& tin_stats, const std::string& gene_bed, const std::string& bam_filepath, int min_cov, int sample_size, const std::string& sample_name, const std::string& output_folder, int thread_count);
 
 std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand);
 
 
@@ -0,0 +1,2 @@
+#pragma once
+#define VERSION "v1.4.0-5-g9e0d2c4"
@@ -80,7 +80,7 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
             std::cout << "Calculating TIN scores for file: " << filepath << std::endl;
 
             TINStats tin_stats;
-            calculateTIN(&tin_stats, gene_bed, input_params.input_files[i], min_cov, sample_size, input_params.output_folder, input_params.threads);
+            calculateTIN(tin_stats, gene_bed, input_params.input_files[i], min_cov, sample_size, input_params.sample_name, input_params.output_folder, input_params.threads);
 
             // Print the TIN stats
             std::cout << "Number of transcripts: " << tin_stats.num_transcripts << std::endl;
@@ -154,8 +154,6 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
 
          while (reader.hasNextRecord()){
             // Read the next batch of records
-            // std::cout << "Generating " << thread_count << " thread(s)..." <<
-            // std::endl;
             printMessage("Generating " + std::to_string(thread_count) + " thread(s)...");
             std::vector<std::thread> thread_vector;
             for (int thread_index=0; thread_index<thread_count; thread_index++){
@@ -165,7 +163,6 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
             }
 
             // Join the threads in thread_vector
-            // std::cout<<"Joining threads..."<<std::endl;
             int thread_index = 0;
             for (auto& t : thread_vector){
                 if (t.joinable()){
@@ -222,11 +219,14 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
     std::cout << "Saving summary statistics to file..." << std::endl;
 
     // If in RRMS mode, append RRMS accepted/rejected to the output prefix
-    std::string output_prefix = "bam";
+    // std::string output_prefix = "bam";
+    std::string output_prefix = input_params.sample_name;
     if (input_params.rrms_csv != ""){
-        output_prefix += input_params.rrms_filter ? "_rrms_accepted" : "_rrms_rejected";
+        output_prefix += input_params.rrms_filter ? "_accepted" : "_rejected";
     }
-    std::string summary_filepath = input_params.output_folder + "/" + output_prefix + "_summary.txt";
+
+    std::string summary_filepath = input_params.output_folder + "/" + output_prefix + "_summary.bam.json";
+
     final_output.save_summary(summary_filepath, input_params, final_output);
     std::cout << "Saved file: " << summary_filepath << std::endl;
 
 
@@ -65,9 +65,6 @@ def get_common_param(margs):
             pat_split = margs.pattern.split("*")
             param_dict["input_files"].extend(
                 glob(os.path.join("*".join(pat_split[:-1]), "*" + pat_split[-1])))
-
-        # read_count = int(margs.read_count)
-        # param_dict["read_count"] = read_count
 
         if len(param_dict["input_files"]) == 0:
             parsing_error_msg += "No input file(s) were provided.\n"
@@ -88,8 +85,6 @@ def get_common_param(margs):
         except OSError:
             parsing_error_msg += "Cannot create folder for " + param_dict["output_folder"] + " \n"
 
-    param_dict["out_prefix"] = margs.outprefix
-
     # Set up logging to file and stdout
     if margs.log is None or margs.log == "":
         parsing_error_msg += "No log file was provided.\n"
@@ -113,6 +108,7 @@ def get_common_param(margs):
         param_dict["log_level"] = margs.log_level
 
     param_dict["threads"] = margs.threads
+    param_dict["sample_name"] = margs.sample
 
     # Reset the param_dict if there are parsing errors
     if parsing_error_msg != "":
@@ -133,17 +129,15 @@ def fq_module(margs):
 
     else:
         logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "fastq"
 
         # Import the SWIG Python wrapper for our C++ module
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
 
         input_para.other_flags = 0
         input_para.user_defined_fastq_base_qual_offset = margs.udqual
-
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
 
         for _ipf in param_dict["input_files"]:
             input_para.add_input_file(str(_ipf))
@@ -157,7 +151,7 @@ def fq_module(margs):
             fq_html_gen = generate_html.ST_HTML_Generator(
                 [["basic_st", "read_length_bar", "read_length_hist", "gc_content_hist", "base_counts", "base_quality",
                   "read_avg_base_quality"], "FASTQ QC", param_dict], plot_filepaths, static=False)
-            fq_html_gen.generate_html()
+            fq_html_gen.generate_html(input_para.sample_name, "fastq")
 
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
         else:
@@ -176,12 +170,11 @@ def fa_module(margs):
     else:
         # If there are no parse errors, run the filetype-specific module
         logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "fasta"
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
         input_para.other_flags = 0
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
 
         for _ipf in param_dict["input_files"]:
             input_para.add_input_file(str(_ipf))
@@ -195,7 +188,7 @@ def fa_module(margs):
             fa_html_gen = generate_html.ST_HTML_Generator(
                 [["basic_st", "read_length_bar", "read_length_hist", "gc_content_hist", "base_counts"], "FASTA QC",
                  param_dict], plot_filepaths, static=True)
-            fa_html_gen.generate_html()
+            fa_html_gen.generate_html(input_para.sample_name, "fasta")
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
 
         else:
@@ -211,11 +204,10 @@ def bam_module(margs):
 
     else:
         logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "bam";
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
 
         # Set the reference genome file and base modification threshold
         ref_genome = margs.ref if margs.ref != "" or margs.ref is not None else ""
@@ -269,7 +261,7 @@ def bam_module(margs):
             # Generate the HTML report
             bam_html_gen = generate_html.ST_HTML_Generator(
                 [qc_info_list, "BAM QC", param_dict], plot_filepaths, static=False)
-            bam_html_gen.generate_html()
+            bam_html_gen.generate_html(input_para.sample_name, "bam")
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
 
         else:
@@ -288,26 +280,19 @@ def rrms_module(margs):
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
+
         for _ipf in param_dict["input_files"]:
             input_para.add_input_file(str(_ipf))
 
         # Set the RRMS input CSV file
         input_para.rrms_csv = margs.csv
         logging.info("RRMS CSV file is " + input_para.rrms_csv)
 
-        # Get the output prefix
-        output_prefix = param_dict["out_prefix"]
-
         # Run QC for both accepted and rejected reads
         rrms_filter = [True, False]
         for filter_type in rrms_filter:
-
-            # Set the RRMS filter type
-            input_para.rrms_filter = filter_type
-
-            # Set the output prefix
-            param_dict["out_prefix"] = output_prefix + "rrms_" + ("accepted" if filter_type else "rejected")
+            input_para.rrms_filter = filter_type  # True for accepted reads, False for rejected reads
             param_dict["mod"] = input_para.mod_analysis = False  # Disable base modification analysis for RRMS (use BAM module for this)
 
             # Run the QC module
@@ -332,7 +317,7 @@ def rrms_module(margs):
                 # Generate the HTML report
                 bam_html_gen = generate_html.ST_HTML_Generator(
                     [qc_info_list, "BAM QC", param_dict], plot_filepaths, static=False)
-                bam_html_gen.generate_html()
+                bam_html_gen.generate_html(input_para.sample_name, "rrms_bam_" + ("accepted" if filter_type else "rejected"))
                 logging.info("Done. Output files are in %s", param_dict["output_folder"])
 
             else:
@@ -349,11 +334,10 @@ def seqtxt_module(margs):
 
     else:
         logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "seqtxt"
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
 
         for _ipf in param_dict["input_files"]:
             input_para.add_input_file(str(_ipf))
@@ -370,7 +354,7 @@ def seqtxt_module(margs):
                 [["basic_st", "read_length_bar", "read_length_hist"],
                     report_title, param_dict], plot_filepaths, static=False)
 
-            seqtxt_html_gen.generate_html()
+            seqtxt_html_gen.generate_html(input_para.sample_name, "basecall_summary")
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
         else:
             logging.error("QC did not generate.")
@@ -386,11 +370,10 @@ def fast5_module(margs):
 
     else:
         # logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "FAST5"
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
         input_para.other_flags = 0  # 0 for normal QC, 1 for signal statistics output
 
         for _ipf in param_dict["input_files"]:
@@ -405,7 +388,7 @@ def fast5_module(margs):
             fast5_html_obj = generate_html.ST_HTML_Generator(
                 [["basic_st", "read_length_bar", "read_length_hist", "gc_content_hist", "base_counts", "base_quality"], 
                  "FAST5 QC", param_dict], plot_filepaths, static=False)
-            fast5_html_obj.generate_html()
+            fast5_html_obj.generate_html(input_para.sample_name, "fast5")
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
 
         else:
@@ -421,11 +404,11 @@ def fast5_signal_module(margs):
 
     else:
         # logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "fast5_signal"
         input_para = lrst.Input_Para()
         input_para.threads = param_dict["threads"]
         input_para.output_folder = str(param_dict["output_folder"])
-        input_para.out_prefix = str(param_dict["out_prefix"])
+        input_para.sample_name = param_dict["sample_name"]
+        
         input_para.other_flags = 1  # 0 for normal QC, 1 for signal statistics output
 
         # Get the read count if specified
@@ -450,7 +433,7 @@ def fast5_signal_module(margs):
             plot_filepaths = plot(fast5_output, param_dict, 'FAST5s')
             fast5_html_obj = generate_html.ST_HTML_Generator(
                 [["basic_st", "read_length_bar", "read_length_hist", "gc_content_hist", "base_counts", "ont_signal"], "FAST5 QC", param_dict], plot_filepaths, static=False)
-            fast5_html_obj.generate_html(signal_plots=True)
+            fast5_html_obj.generate_html(input_para.sample_name, "fast5_signal", signal_plots=True)
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
 
         else:
@@ -467,11 +450,10 @@ def pod5_module(margs):
 
     else:
         # logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
-        param_dict["out_prefix"] += "POD5"
         input_para = {}
         input_para['threads'] = param_dict["threads"]
         input_para['output_folder'] = str(param_dict["output_folder"])
-        input_para['out_prefix'] = str(param_dict["out_prefix"])
+        input_para['sample_name'] = param_dict["sample_name"]
         input_para['other_flags'] = 0  # 0 for normal QC, 1 for signal statistics output
         input_para['input_files'] = []
         for input_file in param_dict["input_files"]:
@@ -496,7 +478,6 @@ def pod5_module(margs):
             basecalls_input = lrst.Input_Para()
             basecalls_input.threads = param_dict["threads"]
             basecalls_input.output_folder = str(param_dict["output_folder"])
-            basecalls_input.out_prefix = str(param_dict["out_prefix"])
             basecalls_input.add_input_file(basecalls)
             bam_output = lrst.Output_BAM()
             exit_code = lrst.callBAMModule(basecalls_input, bam_output)
@@ -518,7 +499,7 @@ def pod5_module(margs):
             webpage_title = "POD5 QC"
             fast5_html_obj = generate_html.ST_HTML_Generator(
                 [["basic_st", "read_length_bar", "read_length_hist", "gc_content_hist", "base_counts", "ont_signal"], webpage_title, param_dict], plot_filepaths, static=False)
-            fast5_html_obj.generate_html(signal_plots=True)
+            fast5_html_obj.generate_html(input_para.sample_name, "pod5_signal", signal_plots=True)
             logging.info("Done. Output files are in %s", param_dict["output_folder"])
 
         else:
@@ -543,6 +524,8 @@ def set_file_parser_defaults(file_parser):
                         help="The number of threads used. Default: 1.")
     file_parser.add_argument("-Q", "--outprefix", type=str, default="QC_",
                         help="The prefix for output filenames. Default: `QC_`.")
+    file_parser.add_argument("-s", "--sample", type=str, default="Sample",
+                        help="Sample name. Default: `Sample`")
 
 
 # Set up the argument parser
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#pragma once`
	`2`	`+#define VERSION "v1.4.0-5-g9e0d2c4"`