3737#include < string>
3838#include < vector>
3939
40- // NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
40+ // NOLINTBEGIN(*-narrowing-conversions)
41+
42+ static auto
43+ write_output (const std::string &outfile, const std::uint32_t bin_size,
44+ const std::uint32_t base_step_size,
45+ const std::vector<double > &coverage_estimates) {
46+ std::ofstream out (outfile);
47+ if (!out)
48+ throw std::runtime_error (" failed to open output file: " + outfile);
49+
50+ out << " TOTAL_BASES\t EXPECTED_DISTINCT\n " ;
51+
52+ out << 0 << ' \t ' << 0 << ' \n ' ;
53+ for (auto i = 0u ; i < std::size (coverage_estimates); ++i)
54+ out << (i + 1 ) * base_step_size << ' \t ' << coverage_estimates[i] * bin_size
55+ << ' \n ' ;
56+ }
4157
4258// ADS: functions same, header different (above and this one)
43- static void
59+ static auto
4460write_predicted_coverage_curve (
4561 const std::string &outfile, const double c_level, const double base_step_size,
46- const std::size_t bin_size, const std::vector<double > &cvrg_estimates,
62+ const std::uint32_t bin_size, const std::vector<double > &cvrg_estimates,
4763 const std::vector<double > &cvrg_lower_ci_lognorm,
4864 const std::vector<double > &cvrg_upper_ci_lognorm) {
4965 static constexpr double one_hundred = 100.0 ;
50- std::ofstream of;
51- if (!outfile.empty ())
52- of.open (outfile);
53- std::ostream out (outfile.empty () ? std::cout.rdbuf () : of.rdbuf ());
66+ std::ofstream out (outfile);
67+ if (!out)
68+ throw std::runtime_error (" failed to open output file: " + outfile);
5469
5570 const double percentile = one_hundred * c_level;
5671 // clang-format off
5772 out << " TOTAL_BASES" << ' \t '
5873 << " EXPECTED_COVERED_BASES" << ' \t '
59- << " LOWER_" << percentile << " %CI" << ' \t '
60- << " UPPER_" << percentile << " %CI"
61- << ' \n ' ;
74+ << " LOWER_" << percentile << " _CI" << ' \t '
75+ << " UPPER_" << percentile << " _CI\n " ;
6276 // clang-format on
6377
64- out.setf (std::ios_base::fixed, std::ios_base::floatfield);
65- out.precision (1 );
66-
6778 out << 0 << ' \t ' << 0 << ' \t ' << 0 << ' \t ' << 0 << ' \n ' ;
68- for (std:: size_t i = 0 ; i < std::size (cvrg_estimates); ++i) {
79+ for (auto i = 0u ; i < std::size (cvrg_estimates); ++i) {
6980 // clang-format off
7081 out << (i + 1 ) * base_step_size << ' \t '
7182 << cvrg_estimates[i] * bin_size << ' \t '
@@ -78,29 +89,30 @@ write_predicted_coverage_curve(
7889auto
7990gc_extrap::main (int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
8091 try {
81- static constexpr auto MIN_REQUIRED_COUNTS = 4 ;
92+ static constexpr auto min_required_counts = 4 ;
93+ static constexpr auto max_iter_per_bootstrap = 10 ;
8294
8395 std::string outfile;
8496 std::string infile;
8597 std::string histogram_outfile;
8698
99+ // NOLINTBEGIN(*-avoid-magic-numbers)
87100 int diagonal = 0 ;
88- std::size_t orig_max_terms = 100 ;
89- std::size_t bin_size = 10 ;
90- bool verbose = false ;
101+ std::uint32_t orig_max_terms = 100 ;
102+ std::uint32_t bin_size = 10 ;
91103 double base_step_size = 1.0e8 ;
92- std::size_t max_width = 10000 ;
93- bool SINGLE_ESTIMATE = false ;
104+ std::uint32_t max_width = 10000 ;
94105 double max_extrap = 1.0e12 ;
95- std::size_t n_bootstraps = 100 ;
106+ std::uint32_t n_bootstraps = 100 ;
96107 std::uint32_t seed = 408 ;
97- bool allow_defects = false ;
98-
99108 double c_level = 0.95 ;
100- bool BAM_FORMAT_INPUT = false ;
101- #ifdef HAVE_HTSLIB
102109 std::uint32_t n_threads{1 };
103- #endif
110+ // NOLINTEND(*-avoid-magic-numbers)
111+
112+ bool allow_defects{false };
113+ bool verbose{false };
114+ bool single_estimate{false };
115+
104116 CLI::App app{rlstrip (about_msg)};
105117 argv = app.ensure_utf8 (argv);
106118 app.usage (" \n Usage: preseq gc_extrap [OPTIONS]" );
@@ -124,11 +136,8 @@ gc_extrap::main(int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
124136 app.add_option (" -n,--bootstraps" , n_bootstraps, " number of bootstraps" );
125137 app.add_option (" -c,--cval" , c_level, " level for confidence intervals" );
126138 app.add_option (" -x,--terms" , orig_max_terms, " maximum number of terms" );
127- #ifdef HAVE_HTSLIB
128- app.add_flag (" -B,--bam" , BAM_FORMAT_INPUT, " input is in BAM format" );
129- #endif
130139 app.add_option (" -r,--seed" , seed, " seed for random number generator" );
131- app.add_flag (" -Q,--quick" , SINGLE_ESTIMATE ,
140+ app.add_flag (" -Q,--quick" , single_estimate ,
132141 " quick mode: run gc_extrap without bootstrapping for confidence intervals" );
133142 app.add_flag (" -D,--defects" , allow_defects,
134143 " defects mode to extrapolate without testing for defects" );
@@ -142,30 +151,30 @@ gc_extrap::main(int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
142151 }
143152 CLI11_PARSE (app, argc, argv);
144153
145- const auto input_format = BAM_FORMAT_INPUT ? " BAM" : " BED" ;
154+ const auto bam_format_input = is_sam_or_bam_format (infile);
155+
156+ const auto input_format = bam_format_input ? " BAM" : " BED" ;
146157 if (verbose)
147158 std::cerr << " LOADING READS (" << input_format << " format)\n " ;
148159
149- const auto [n_reads, coverage_hist] = [&] {
160+ const auto [n_reads, coverage_hist] =
150161#ifdef HAVE_HTSLIB
151- if (BAM_FORMAT_INPUT)
152- return load_coverage_counts_BAM (n_threads, infile, seed, bin_size,
153- max_width);
154- else
162+ bam_format_input
163+ ? load_coverage_counts_BAM (n_threads, infile, seed, bin_size, max_width)
164+ :
155165#endif
156- return load_coverage_counts (infile, seed, bin_size, max_width);
157- }();
166+ load_coverage_counts (infile, seed, bin_size, max_width);
158167
159168 const auto total_bins = get_counts_from_hist (coverage_hist);
160169 const auto distinct_bins = std::accumulate (std::cbegin (coverage_hist),
161170 std::cend (coverage_hist), 0.0 );
162171 const double avg_bins_per_read = total_bins / n_reads;
163172 const double bin_step_size = base_step_size / bin_size;
164173
165- const std::size_t max_observed_count = std::size (coverage_hist) - 1 ;
174+ const std::uint32_t max_observed_count = std::size (coverage_hist) - 1 ;
166175
167176 // ENSURE THAT THE MAX TERMS ARE ACCEPTABLE
168- std::size_t first_zero{1 };
177+ std::uint32_t first_zero{1 };
169178 while (first_zero < std::size (coverage_hist) &&
170179 coverage_hist[first_zero] > 0 )
171180 ++first_zero;
@@ -188,7 +197,7 @@ gc_extrap::main(int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
188197 report_histogram (histogram_outfile, coverage_hist);
189198
190199 // catch if all reads are distinct
191- if (orig_max_terms < MIN_REQUIRED_COUNTS )
200+ if (orig_max_terms < min_required_counts )
192201 throw std::runtime_error (" max count before zero is les than min required "
193202 " count (4), sample not sufficiently deep or "
194203 " duplicates removed" );
@@ -204,35 +213,22 @@ gc_extrap::main(int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
204213
205214 std::vector<double > coverage_estimates;
206215
207- if (SINGLE_ESTIMATE ) {
208- bool SINGLE_ESTIMATE_SUCCESS = extrap_single_estimate (
216+ if (single_estimate ) {
217+ const auto success = extrap_single_estimate (
209218 verbose, allow_defects, coverage_hist, orig_max_terms, diagonal,
210219 bin_step_size, max_extrap / bin_size, coverage_estimates);
211220 // IF FAILURE, EXIT
212- if (!SINGLE_ESTIMATE_SUCCESS)
213- throw std::runtime_error (" SINGLE ESTIMATE FAILED, NEED TO RUN IN "
214- " FULL MODE FOR ESTIMATES" );
215-
216- std::ofstream of;
217- if (!outfile.empty ())
218- of.open (outfile);
219- std::ostream out (outfile.empty () ? std::cout.rdbuf () : of.rdbuf ());
220-
221- out << " TOTAL_BASES\t EXPECTED_DISTINCT\n " ;
222-
223- out.setf (std::ios_base::fixed, std::ios_base::floatfield);
224- out.precision (1 );
221+ if (!success)
222+ throw std::runtime_error (
223+ " Single estimate failed. Run in full mode for estimates" );
225224
226- out << 0 << ' \t ' << 0 << ' \n ' ;
227- for (std::size_t i = 0 ; i < std::size (coverage_estimates); ++i)
228- out << (i + 1 ) * base_step_size << ' \t '
229- << coverage_estimates[i] * bin_size << ' \n ' ;
225+ write_output (outfile, bin_size, base_step_size, coverage_estimates);
230226 }
231227 else {
232228 if (verbose)
233229 std::cerr << " [BOOTSTRAPPING HISTOGRAM]\n " ;
234230
235- const std::size_t max_iter = 10 * n_bootstraps;
231+ const std::uint32_t max_iter = max_iter_per_bootstrap * n_bootstraps;
236232
237233 std::vector<std::vector<double >> bootstrap_estimates;
238234 extrap_bootstrap (verbose, allow_defects, seed, coverage_hist,
@@ -261,4 +257,4 @@ gc_extrap::main(int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
261257 return EXIT_SUCCESS;
262258}
263259
264- // NOLINTEND(*-avoid-magic-numbers,*- narrowing-conversions)
260+ // NOLINTEND(*-narrowing-conversions)
0 commit comments