@@ -123,7 +123,7 @@ write_hist_output(const vector<size_t> &hist, const string &histfile) {
123123static void
124124process_inner_buffer (const vector<bam1_t *>::const_iterator it,
125125 const vector<bam1_t *>::const_iterator jt,
126- sam_hdr_t *hdr, htsFile *out,
126+ sam_hdr_t *hdr, samFile *out,
127127 size_t &reads_out,
128128 size_t &good_bases_out,
129129 size_t &reads_with_dups,
@@ -146,7 +146,7 @@ process_inner_buffer(const vector<bam1_t*>::const_iterator it,
146146static void
147147process_buffer (size_t &reads_out, size_t &good_bases_out,
148148 size_t &reads_with_dups, vector<size_t > &hist,
149- vector<bam1_t *> &buffer, sam_hdr_t *hdr, htsFile *out) {
149+ vector<bam1_t *> &buffer, sam_hdr_t *hdr, samFile *out) {
150150 sort (begin (buffer), end (buffer), precedes_by_end_and_strand);
151151 auto it (begin (buffer));
152152 auto jt = it + 1 ;
@@ -170,41 +170,42 @@ process_buffer(size_t &reads_out, size_t &good_bases_out,
170170
171171
172172static bam1_t *
173- get_read (htsFile *hts, bam_hdr_t *hdr) {
173+ get_read (samFile *hts, sam_hdr_t *hdr) {
174174 bam1_t *b = bam_init1 ();
175175 const int result = sam_read1 (hts, hdr, b);
176- if (result > 0 ) return b;
176+ if (result >= 0 ) return b;
177177
178178 if (result < -1 )
179179 throw runtime_error (" error reading file: " + string (hts->fn ));
180- else // means EOF, so we free this read
180+ else // -1 should mean EOF, so we free this read
181181 bam_destroy1 (b);
182182 return 0 ;
183183}
184184
185185
186186static void
187187uniq (const bool VERBOSE, const bool NO_SORT_TEST,
188- const string &cmd, const string &infile,
189- const string &statfile, const string &histfile,
190- const string &outfile) {
188+ const string &cmd, const string &infile,
189+ const string &statfile, const string &histfile,
190+ const bool bam_format, const string &outfile) {
191191
192- htsFile * hts = hts_open (infile.c_str (), " r" );
192+ samFile * hts = hts_open (infile.c_str (), " r" );
193193 if (!hts || errno)
194194 throw runtime_error (" bad htslib file: " + infile);
195195
196196 if (hts_get_format (hts)->category != sequence_data)
197197 throw runtime_error (" bad file format: " + infile);
198198
199- bam_hdr_t *hdr = sam_hdr_read (hts);
199+ sam_hdr_t *hdr = sam_hdr_read (hts);
200200 if (!hdr)
201201 throw runtime_error (" failed to read header: " + infile);
202202
203203 // open the output file
204- htsFile *out = hts_open (outfile.c_str (), " wb" );
204+ samFile *out = bam_format ? hts_open (outfile.c_str (), " wb" ) :
205+ hts_open (outfile.c_str (), " w" );
205206
206207 // take care of the output file's header
207- bam_hdr_t *hdr_out = bam_hdr_dup (hdr);
208+ sam_hdr_t *hdr_out = bam_hdr_dup (hdr);
208209 if (sam_hdr_add_line (hdr_out, " PG" , " ID" ,
209210 " DNMTOOLS" , " VN" , VERSION, " CL" , cmd.c_str (), NULL ))
210211 throw runtime_error (" failed to format header" );
@@ -274,6 +275,9 @@ main_uniq(int argc, const char **argv) {
274275 bool VERBOSE = false ;
275276 bool NO_SORT_TEST = false ;
276277
278+ bool bam_format = false ;
279+ bool use_stdout = false ;
280+
277281 // ADS: Not recommended to change this seed. It shouldn't matter
278282 // at all, and we want results to behave as deterministic.
279283 size_t the_seed = 408 ;
@@ -288,8 +292,10 @@ main_uniq(int argc, const char **argv) {
288292 opt_parse.add_opt (" stats" , ' S' , " statistics output file" , false , statfile);
289293 opt_parse.add_opt (" hist" , ' \0 ' , " histogram output file for library"
290294 " complexity analysis" , false , histfile);
291- opt_parse.add_opt (" disable" , ' D' , " disable sort test" ,
292- false , NO_SORT_TEST);
295+ opt_parse.add_opt (" bam" , ' B' , " output in BAM format" , false , bam_format);
296+ opt_parse.add_opt (" stdout" , ' \0 ' ,
297+ " write to standard output" , false , use_stdout);
298+ opt_parse.add_opt (" disable" , ' D' , " disable sort test" , false , NO_SORT_TEST);
293299 opt_parse.add_opt (" seed" , ' s' , " random seed" , false , the_seed);
294300 opt_parse.add_opt (" verbose" , ' v' , " print more run info" , false , VERBOSE);
295301 opt_parse.set_show_defaults ();
@@ -308,14 +314,21 @@ main_uniq(int argc, const char **argv) {
308314 cerr << opt_parse.option_missing_message () << endl;
309315 return EXIT_SUCCESS;
310316 }
311- if (leftover_args.size () != 1 && leftover_args.size () != 2 ) {
317+ if (leftover_args.size () == 1 && !use_stdout) {
318+ cerr << opt_parse.help_message () << endl
319+ << opt_parse.about_message () << endl;
320+ return EXIT_SUCCESS;
321+ }
322+ if (leftover_args.size () == 2 && use_stdout) {
312323 cerr << opt_parse.help_message () << endl
313324 << opt_parse.about_message () << endl;
314325 return EXIT_SUCCESS;
315326 }
316327 const string infile (leftover_args.front ());
317- if (leftover_args.size () == 2 )
328+ if (leftover_args.size () == 2 && !use_stdout )
318329 outfile = leftover_args.back ();
330+ else
331+ outfile = string (" -" ); // so htslib can write to stdout
319332 /* ***************** END COMMAND LINE OPTIONS *****************/
320333
321334 // ADS: Random here is because we choose randomly when keeping one
@@ -327,11 +340,12 @@ main_uniq(int argc, const char **argv) {
327340
328341 if (VERBOSE)
329342 cerr << " [output file: " << outfile << " ]" << endl
343+ << " [output format: " << (bam_format ? " B" : " S" ) << " AM]" << endl
330344 << " [command line: \" " << cmd.str () << " \" ]" << endl
331- << " [random number seed: " << the_seed << " ]" << endl
332- << endl;
345+ << " [random number seed: " << the_seed << " ]" << endl;
333346
334- uniq (VERBOSE, NO_SORT_TEST, cmd.str (), infile, statfile, histfile, outfile);
347+ uniq (VERBOSE, NO_SORT_TEST, cmd.str (),
348+ infile, statfile, histfile, bam_format, outfile);
335349 }
336350 catch (const runtime_error &e) {
337351 cerr << e.what () << endl;
0 commit comments