Skip to content

Commit 4ecf656

Browse files
Removed option to disable sort check as it did nothing for speed. Allowing option to use more threads for compression/decompression
1 parent 59db7a3 commit 4ecf656

1 file changed

Lines changed: 22 additions & 15 deletions

File tree

src/utils/uniq.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ get_read(samFile *hts, sam_hdr_t *hdr) {
184184

185185

186186
static void
187-
uniq(const bool VERBOSE, const bool NO_SORT_TEST,
187+
uniq(const bool VERBOSE, const size_t n_threads,
188188
const string &cmd, const string &infile,
189189
const string &statfile, const string &histfile,
190190
const bool bam_format, const string &outfile) {
@@ -193,6 +193,9 @@ uniq(const bool VERBOSE, const bool NO_SORT_TEST,
193193
if (!hts || errno)
194194
throw runtime_error("bad htslib file: " + infile);
195195

196+
if (n_threads > 1 && hts_set_threads(hts, n_threads/2) < 0)
197+
throw runtime_error("error setting threads");
198+
196199
if (hts_get_format(hts)->category != sequence_data)
197200
throw runtime_error("bad file format: " + infile);
198201

@@ -204,6 +207,9 @@ uniq(const bool VERBOSE, const bool NO_SORT_TEST,
204207
samFile *out = bam_format ? hts_open(outfile.c_str(), "wb") :
205208
hts_open(outfile.c_str(), "w");
206209

210+
if (n_threads > 1 && hts_set_threads(out, (n_threads + 1)/2) < 0)
211+
throw runtime_error("error setting threads");
212+
207213
// take care of the output file's header
208214
sam_hdr_t *hdr_out = bam_hdr_dup(hdr);
209215
if (sam_hdr_add_line(hdr_out, "PG", "ID",
@@ -235,18 +241,18 @@ uniq(const bool VERBOSE, const bool NO_SORT_TEST,
235241
while (aln = get_read(hts, hdr)) {
236242
++reads_in;
237243
good_bases_in += qlen(aln);
238-
if (!NO_SORT_TEST) {
239-
// below works because buffer is reset every chrom
240-
if (precedes_by_start(aln, buffer.front()))
241-
throw runtime_error("input not properly sorted:\n" +
242-
read_name(buffer[0]) + "\n" + read_name(aln));
243-
const int32_t chrom = get_tid(aln);
244-
if (chrom != cur_chrom) {
245-
if (chroms_seen[chrom]) throw runtime_error("input not sorted");
246-
chroms_seen[chrom] = true;
247-
cur_chrom = chrom;
248-
}
244+
245+
// below works because buffer is reset every chrom
246+
if (precedes_by_start(aln, buffer.front()))
247+
throw runtime_error("input not properly sorted:\n" +
248+
read_name(buffer[0]) + "\n" + read_name(aln));
249+
const int32_t chrom = get_tid(aln);
250+
if (chrom != cur_chrom) {
251+
if (chroms_seen[chrom]) throw runtime_error("input not sorted");
252+
chroms_seen[chrom] = true;
253+
cur_chrom = chrom;
249254
}
255+
250256
if (!equivalent_chrom_and_start(buffer.front(), aln))
251257
process_buffer(reads_out, good_bases_out, reads_with_dups,
252258
hist, buffer, hdr, out);
@@ -273,7 +279,6 @@ main_uniq(int argc, const char **argv) {
273279
try {
274280

275281
bool VERBOSE = false;
276-
bool NO_SORT_TEST = false;
277282

278283
bool bam_format = false;
279284
bool use_stdout = false;
@@ -284,18 +289,19 @@ main_uniq(int argc, const char **argv) {
284289
string outfile;
285290
string statfile;
286291
string histfile;
292+
size_t n_threads = 1;
287293

288294
/****************** COMMAND LINE OPTIONS ********************/
289295
OptionParser opt_parse(strip_path(argv[0]), "program to remove "
290296
"duplicate reads from sorted mapped reads",
291297
"<in-file> [out-file]", 2);
298+
opt_parse.add_opt("threads", 't', "number of threads", false, n_threads);
292299
opt_parse.add_opt("stats", 'S', "statistics output file", false, statfile);
293300
opt_parse.add_opt("hist", '\0', "histogram output file for library"
294301
" complexity analysis", false, histfile);
295302
opt_parse.add_opt("bam", 'B', "output in BAM format", false, bam_format);
296303
opt_parse.add_opt("stdout", '\0',
297304
"write to standard output", false, use_stdout);
298-
opt_parse.add_opt("disable", 'D', "disable sort test", false, NO_SORT_TEST);
299305
opt_parse.add_opt("seed", 's', "random seed", false, the_seed);
300306
opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
301307
opt_parse.set_show_defaults();
@@ -341,10 +347,11 @@ main_uniq(int argc, const char **argv) {
341347
if (VERBOSE)
342348
cerr << "[output file: " << outfile << "]" << endl
343349
<< "[output format: " << (bam_format ? "B" : "S") << "AM]" << endl
350+
<< "[threads requested: " << n_threads << "]" << endl
344351
<< "[command line: \"" << cmd.str() << "\"]" << endl
345352
<< "[random number seed: " << the_seed << "]" << endl;
346353

347-
uniq(VERBOSE, NO_SORT_TEST, cmd.str(),
354+
uniq(VERBOSE, n_threads, cmd.str(),
348355
infile, statfile, histfile, bam_format, outfile);
349356
}
350357
catch (const runtime_error &e) {

0 commit comments

Comments
 (0)