Skip to content

Commit 8f7bc67

Browse files
src/load_data_for_complexity.cpp: modernizing
1 parent 5004a5e commit 8f7bc67

1 file changed

Lines changed: 57 additions & 58 deletions

File tree

src/load_data_for_complexity.cpp

Lines changed: 57 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ width(const T &x) -> std::uint32_t {
4949
return x.stop - x.start;
5050
}
5151

52-
static bool
52+
static auto
5353
update_pe_duplicate_counts_hist(const Interval6 &curr, const Interval6 &prev,
5454
std::vector<double> &counts_hist,
55-
std::size_t &current_count) {
55+
std::size_t &current_count) -> bool {
5656
// check if reads are sorted
5757
if (curr.chrom == prev.chrom && curr.start < prev.start &&
5858
curr.stop < prev.stop)
@@ -95,27 +95,26 @@ update_se_duplicate_counts_hist(const Interval6 &curr, const Interval6 &prev,
9595
++current_count;
9696
}
9797

98-
struct Interval6OrderChecker {
99-
bool
100-
operator()(const Interval6 &prev, const Interval6 &curr) const {
98+
struct interval_greater {
99+
auto
100+
operator()(const Interval6 &prev, const Interval6 &curr) const -> bool {
101101
return curr < prev; // prev > curr
102102
}
103103
};
104104

105-
typedef std::priority_queue<Interval6, std::vector<Interval6>,
106-
Interval6OrderChecker>
107-
ReadPQ;
105+
using read_pq =
106+
std::priority_queue<Interval6, std::vector<Interval6>, interval_greater>;
108107

109-
static bool
110-
is_ready_to_pop(const ReadPQ &pq, const Interval6 &interval,
111-
const std::size_t max_width) {
108+
static auto
109+
is_ready_to_pop(const read_pq &pq, const Interval6 &interval,
110+
const std::size_t max_width) -> bool {
112111
return pq.top().chrom != interval.chrom ||
113112
pq.top().stop + max_width < interval.start;
114113
}
115114

116115
static void
117116
empty_pq(Interval6 &prev, std::size_t &current_count,
118-
std::vector<double> &counts_hist, ReadPQ &read_pq,
117+
std::vector<double> &counts_hist, read_pq &read_pq,
119118
const std::string &input_file_name) {
120119
const auto curr = read_pq.top();
121120
read_pq.pop();
@@ -134,14 +133,12 @@ empty_pq(Interval6 &prev, std::size_t &current_count,
134133
prev = curr;
135134
}
136135

137-
/* this code is for BED file input */
136+
// for BED file input
138137

139-
std::size_t
138+
auto
140139
load_counts_bed_se(const std::string &input_file_name,
141-
std::vector<double> &counts_hist) {
142-
// resize vals_hist
143-
counts_hist.clear();
144-
counts_hist.resize(2, 0.0);
140+
std::vector<double> &counts_hist) -> std::size_t {
141+
counts_hist = std::vector<double>(2, 0.0);
145142

146143
std::ifstream in(input_file_name);
147144
if (!in)
@@ -168,9 +165,9 @@ load_counts_bed_se(const std::string &input_file_name,
168165
return n_reads;
169166
}
170167

171-
std::size_t
168+
auto
172169
load_counts_bed_pe(const std::string &input_file_name,
173-
std::vector<double> &counts_hist) {
170+
std::vector<double> &counts_hist) -> std::size_t {
174171
// resize vals_hist
175172
counts_hist.clear();
176173
counts_hist.resize(2, 0.0);
@@ -205,8 +202,9 @@ load_counts_bed_pe(const std::string &input_file_name,
205202
return n_reads;
206203
}
207204

208-
std::size_t
209-
load_counts(const std::string &infile, std::vector<double> &counts_hist) {
205+
auto
206+
load_counts(const std::string &infile,
207+
std::vector<double> &counts_hist) -> std::size_t {
210208
std::ifstream in(infile);
211209
if (!in)
212210
throw std::runtime_error("failed to open file: " + infile);
@@ -228,8 +226,9 @@ load_counts(const std::string &infile, std::vector<double> &counts_hist) {
228226
}
229227

230228
// returns number of reads from file containing counts histogram
231-
std::size_t
232-
load_histogram(const std::string &filename, std::vector<double> &counts_hist) {
229+
auto
230+
load_histogram(const std::string &filename,
231+
std::vector<double> &counts_hist) -> std::size_t {
233232
counts_hist.clear();
234233

235234
std::ifstream in(filename);
@@ -305,7 +304,7 @@ load_coverage_counts(const std::string &infile, const std::uint32_t seed,
305304
throw std::runtime_error("problem opening file: " + infile);
306305

307306
// prioirty queue to reorder the split reads
308-
ReadPQ PQ;
307+
read_pq pq;
309308

310309
Interval6 prev;
311310
std::size_t n_reads{};
@@ -318,19 +317,19 @@ load_coverage_counts(const std::string &infile, const std::uint32_t seed,
318317

319318
// add split intervals to the priority queue
320319
for (const auto &i : splits)
321-
PQ.push(i);
320+
pq.push(i);
322321

323322
if (std::size(splits) > 0) {
324323
// remove intervals from the priority queue
325-
while (!PQ.empty() && is_ready_to_pop(PQ, splits.back(), max_width))
326-
empty_pq(prev, current_count, coverage_hist, PQ, infile);
324+
while (!pq.empty() && is_ready_to_pop(pq, splits.back(), max_width))
325+
empty_pq(prev, current_count, coverage_hist, pq, infile);
327326
}
328327
++n_reads;
329328
}
330329

331330
// done adding reads, now spit the rest out
332-
while (!PQ.empty())
333-
empty_pq(prev, current_count, coverage_hist, PQ, infile);
331+
while (!pq.empty())
332+
empty_pq(prev, current_count, coverage_hist, pq, infile);
334333

335334
return n_reads;
336335
}
@@ -341,8 +340,8 @@ struct genomic_interval {
341340
std::int32_t tid{-1}; // indicates uninitialized
342341
hts_pos_t start{};
343342
hts_pos_t stop{};
344-
bool
345-
operator<(const genomic_interval &rhs) const {
343+
auto
344+
operator<(const genomic_interval &rhs) const -> bool {
346345
// clang-format off
347346
return (tid < rhs.tid ||
348347
(tid == rhs.tid &&
@@ -360,16 +359,16 @@ struct aln_pos {
360359
aln_pos(const std::int32_t tid, const hts_pos_t pos) : tid{tid}, pos{pos} {}
361360
explicit aln_pos(const bamxx::bam_rec &a) :
362361
tid{get_tid(a)}, pos{get_pos(a)} {}
363-
bool
364-
operator<(const aln_pos &rhs) const {
362+
auto
363+
operator<(const aln_pos &rhs) const -> bool {
365364
return tid < rhs.tid || (tid == rhs.tid && pos < rhs.pos);
366365
}
367-
bool
368-
operator>(const aln_pos &rhs) const {
366+
auto
367+
operator>(const aln_pos &rhs) const -> bool {
369368
return tid > rhs.tid || (tid == rhs.tid && pos > rhs.pos);
370369
}
371-
bool
372-
operator!=(const aln_pos &rhs) const {
370+
auto
371+
operator!=(const aln_pos &rhs) const -> bool {
373372
// ADS: ordered to check pos first
374373
return pos != rhs.pos || tid != rhs.tid;
375374
}
@@ -382,22 +381,23 @@ struct aln_pos_pair {
382381
hts_pos_t mpos{};
383382
explicit aln_pos_pair(const bamxx::bam_rec &a) :
384383
tid{get_tid(a)}, pos{get_pos(a)}, mtid{get_mtid(a)}, mpos{get_mpos(a)} {}
385-
bool
386-
operator<(const aln_pos_pair &rhs) const {
384+
auto
385+
operator<(const aln_pos_pair &rhs) const -> bool {
387386
// ADS: only compares on tid and pos, NOT mtid or mpos
388387
return tid < rhs.tid || (tid == rhs.tid && pos < rhs.pos);
389388
}
390-
bool
391-
operator!=(const aln_pos_pair &rhs) const {
389+
auto
390+
operator!=(const aln_pos_pair &rhs) const -> bool {
392391
// ADS: ordered to check pos first
393392
return pos != rhs.pos || tid != rhs.tid || mtid != rhs.mtid ||
394393
mpos != rhs.mpos;
395394
}
396395
};
397396

398397
template <typename T>
399-
[[nodiscard]] static inline T
400-
round_position(const T x, const std::uint32_t bin_size, const double frac) {
398+
[[nodiscard]] static inline auto
399+
round_position(const T x, const std::uint32_t bin_size,
400+
const double frac) -> T {
401401
// probabilisticly round read ends so they are at bin boundaries
402402
const double lo = (x / bin_size) * bin_size;
403403
const double hi = ((x + bin_size - 1) / bin_size) * bin_size;
@@ -420,8 +420,8 @@ split_genomic_interval(const genomic_interval &gi, std::mt19937 &generator,
420420
output.emplace_back(gi.tid, pos);
421421
}
422422

423-
static inline bool
424-
not_mapped(const bamxx::bam_rec &aln) {
423+
static inline auto
424+
not_mapped(const bamxx::bam_rec &aln) -> bool {
425425
return get_tid(aln) == -1;
426426
}
427427

@@ -444,9 +444,9 @@ update_duplicate_counts_hist_BAM(const T &curr, const T &prev,
444444
}
445445

446446
template <typename aln_pos_t>
447-
std::size_t
447+
auto
448448
load_counts_BAM(const std::uint32_t n_threads, const std::string &inputfile,
449-
std::vector<double> &counts_hist) {
449+
std::vector<double> &counts_hist) -> std::size_t {
450450
bamxx::bam_tpool tp(n_threads);
451451

452452
bamxx::bam_in hts(inputfile); // assume already checked
@@ -506,15 +506,15 @@ load_counts_BAM(const std::uint32_t n_threads, const std::string &inputfile,
506506
return n_reads;
507507
}
508508

509-
std::size_t
509+
auto
510510
load_counts_BAM_se(const std::uint32_t n_threads, const std::string &inputfile,
511-
std::vector<double> &counts_hist) {
511+
std::vector<double> &counts_hist) -> std::size_t {
512512
return load_counts_BAM<aln_pos>(n_threads, inputfile, counts_hist);
513513
}
514514

515-
std::size_t
515+
auto
516516
load_counts_BAM_pe(const std::uint32_t n_threads, const std::string &inputfile,
517-
std::vector<double> &counts_hist) {
517+
std::vector<double> &counts_hist) -> std::size_t {
518518
return load_counts_BAM<aln_pos_pair>(n_threads, inputfile, counts_hist);
519519
}
520520

@@ -535,12 +535,12 @@ update_coverage_hist(const T &curr, const T &prev,
535535

536536
// ADS: don't care if mapped reads are SE or PE, we only need the first mate
537537
// for each mapped read
538-
std::size_t
538+
auto
539539
load_coverage_counts_BAM(const std::uint32_t n_threads,
540540
const std::string &inputfile, const std::uint32_t seed,
541541
const std::size_t bin_size,
542542
const std::size_t max_width,
543-
std::vector<double> &coverage_hist) {
543+
std::vector<double> &coverage_hist) -> std::size_t {
544544
std::mt19937 generator(seed);
545545

546546
bamxx::bam_tpool tp(n_threads);
@@ -568,14 +568,13 @@ load_coverage_counts_BAM(const std::uint32_t n_threads,
568568
std::size_t current_count = 1;
569569

570570
// initialize prioirty queue to reorder the split reads
571-
std::priority_queue<aln_pos, std::vector<aln_pos>, std::greater<aln_pos>> pq;
571+
std::priority_queue<aln_pos, std::vector<aln_pos>, std::greater<>> pq;
572572
std::vector<aln_pos> parts; // reuse allocated space
573573
aln_pos prev_part;
574574
genomic_interval prev;
575575

576-
// max_dist indicates when we think we can assume the read parts
577-
// will be sorted and can be processed; this is not the same as the
578-
// full reads being sorted
576+
// max_dist indicates when we think we can assume the read parts will be
577+
// sorted and can be processed; not the same as the full reads being sorted
579578
const hts_pos_t max_dist = bin_size + max_width;
580579

581580
const auto can_pop = [&](const auto &last) {

0 commit comments

Comments
 (0)