2323#include < numeric>
2424#include < stdexcept>
2525
26+ #include < bamxx.hpp>
27+
2628#include " GenomicRegion.hpp"
2729#include " OptionParser.hpp"
2830#include " smithlab_utils.hpp"
@@ -38,6 +40,36 @@ using std::endl;
3840using std::unordered_map;
3941using std::runtime_error;
4042
43+ using bamxx::bgzf_file;
44+
45+ #include < bamxx.hpp>
46+ #include < sstream>
47+
48+ static inline bamxx::bgzf_file &
49+ read_epiread (bamxx::bgzf_file &f, epiread &er) {
50+ std::string line;
51+ if (getline (f, line))
52+ er = epiread (line);
53+ return f;
54+ }
55+
56+ static inline bool
57+ validate_epiread_bgzf_file (const string &filename) {
58+ constexpr size_t max_lines_to_validate = 10000 ;
59+ bgzf_file in (filename, " r" );
60+ if (!in) throw std::runtime_error (" failed to open file: " + filename);
61+
62+ string c, s, other;
63+ size_t p = 0 ;
64+
65+ size_t n_lines = 0 ;
66+ string line;
67+ while (getline (in, line) && n_lines++ < max_lines_to_validate) {
68+ std::istringstream iss (line);
69+ if (!(iss >> c >> p >> s) || iss >> other) return false ;
70+ }
71+ return true ;
72+ }
4173
4274// //////////////////////////////////////////////////////////////////////
4375// / CODE BELOW HERE IS FOR FILTERING THE AMR WINDOWS AND MERGING THEM
@@ -413,7 +445,7 @@ main_amrfinder(int argc, const char **argv) {
413445 const string reads_file (leftover_args.front ());
414446 /* ***************** END COMMAND LINE OPTIONS *****************/
415447
416- if (!validate_epiread_file (reads_file))
448+ if (!validate_epiread_bgzf_file (reads_file))
417449 throw runtime_error (" invalid states file: " + reads_file);
418450
419451 if (VERBOSE)
@@ -424,17 +456,16 @@ main_amrfinder(int argc, const char **argv) {
424456 const EpireadStats epistat (low_prob, high_prob,
425457 critical_value, max_itr, use_bic);
426458
427- std::ifstream in (reads_file);
428- if (!in)
429- throw runtime_error (" cannot open input file: " + reads_file);
459+ bgzf_file in (reads_file, " r" );
460+ if (!in) throw runtime_error (" failed to open input file: " + reads_file);
430461
431462 vector<GenomicRegion> amrs;
432463 size_t windows_tested = 0 ;
433464 epiread er;
434465 vector<epiread> epireads;
435466 string prev_chrom, curr_chrom, tmp_states;
436467
437- while (in >> er ) {
468+ while (read_epiread (in, er) ) {
438469 curr_chrom = er.chr ;
439470 if (!epireads.empty () && curr_chrom != prev_chrom) {
440471 windows_tested +=
0 commit comments