Skip to content

Commit 8827bab

Browse files
amrfinder: updates to allow for zipped input files
1 parent 487b679 commit 8827bab

1 file changed

Lines changed: 36 additions & 5 deletions

File tree

src/amrfinder/amrfinder.cpp

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <numeric>
2424
#include <stdexcept>
2525

26+
#include <bamxx.hpp>
27+
2628
#include "GenomicRegion.hpp"
2729
#include "OptionParser.hpp"
2830
#include "smithlab_utils.hpp"
@@ -38,6 +40,36 @@ using std::endl;
3840
using std::unordered_map;
3941
using std::runtime_error;
4042

43+
using bamxx::bgzf_file;
44+
45+
#include <bamxx.hpp>
46+
#include <sstream>
47+
48+
static inline bamxx::bgzf_file &
49+
read_epiread(bamxx::bgzf_file &f, epiread &er) {
50+
std::string line;
51+
if (getline(f, line))
52+
er = epiread(line);
53+
return f;
54+
}
55+
56+
static inline bool
57+
validate_epiread_bgzf_file(const string &filename) {
58+
constexpr size_t max_lines_to_validate = 10000;
59+
bgzf_file in(filename, "r");
60+
if (!in) throw std::runtime_error("failed to open file: " + filename);
61+
62+
string c, s, other;
63+
size_t p = 0;
64+
65+
size_t n_lines = 0;
66+
string line;
67+
while (getline(in, line) && n_lines++ < max_lines_to_validate) {
68+
std::istringstream iss(line);
69+
if (!(iss >> c >> p >> s) || iss >> other) return false;
70+
}
71+
return true;
72+
}
4173

4274
////////////////////////////////////////////////////////////////////////
4375
/// CODE BELOW HERE IS FOR FILTERING THE AMR WINDOWS AND MERGING THEM
@@ -413,7 +445,7 @@ main_amrfinder(int argc, const char **argv) {
413445
const string reads_file(leftover_args.front());
414446
/****************** END COMMAND LINE OPTIONS *****************/
415447

416-
if (!validate_epiread_file(reads_file))
448+
if (!validate_epiread_bgzf_file(reads_file))
417449
throw runtime_error("invalid states file: " + reads_file);
418450

419451
if (VERBOSE)
@@ -424,17 +456,16 @@ main_amrfinder(int argc, const char **argv) {
424456
const EpireadStats epistat(low_prob, high_prob,
425457
critical_value, max_itr, use_bic);
426458

427-
std::ifstream in(reads_file);
428-
if (!in)
429-
throw runtime_error("cannot open input file: " + reads_file);
459+
bgzf_file in(reads_file, "r");
460+
if (!in) throw runtime_error("failed to open input file: " + reads_file);
430461

431462
vector<GenomicRegion> amrs;
432463
size_t windows_tested = 0;
433464
epiread er;
434465
vector<epiread> epireads;
435466
string prev_chrom, curr_chrom, tmp_states;
436467

437-
while (in >> er) {
468+
while (read_epiread(in, er)) {
438469
curr_chrom = er.chr;
439470
if (!epireads.empty() && curr_chrom != prev_chrom) {
440471
windows_tested +=

0 commit comments

Comments
 (0)