4141/* HTSlib */
4242#include < htslib/sam.h>
4343
44- using bamxx::bam_header;
45- using bamxx::bam_rec;
46- using bamxx::bgzf_file;
47-
4844[[nodiscard]] inline bool
4945is_cytosine (const char c) {
5046 return c == ' c' || c == ' C' ;
@@ -73,7 +69,7 @@ is_cpg(const std::string &s, const std::size_t i) {
7369static void
7470read_fasta_file (const std::string &filename, std::vector<std::string> &names,
7571 std::vector<std::string> &sequences) {
76- bgzf_file in (filename, " r" );
72+ bamxx:: bgzf_file in (filename, " r" );
7773 if (!in)
7874 throw std::runtime_error (" error opening genome file: " + filename);
7975 names.clear ();
@@ -96,7 +92,7 @@ read_fasta_file(const std::string &filename, std::vector<std::string> &names,
9692}
9793
9894[[nodiscard]] static std::string
99- get_basecall_model (const bam_header &hdr) {
95+ get_basecall_model (const bamxx:: bam_header &hdr) {
10096 kstring_t ks{};
10197
10298 ks = {0 , 0 , nullptr };
@@ -583,7 +579,7 @@ struct match_counter {
583579};
584580
585581static void
586- count_states_fwd (const bam_rec &aln, std::vector<CountSet> &counts,
582+ count_states_fwd (const bamxx:: bam_rec &aln, std::vector<CountSet> &counts,
587583 mod_prob_buffer &mod_buf, const std::string &chrom,
588584 match_counter &mc) {
589585 /* Move through cigar, reference and read positions without
@@ -612,9 +608,12 @@ count_states_fwd(const bam_rec &aln, std::vector<CountSet> &counts,
612608 const decltype (qpos) end_qpos = qpos + n;
613609 for (; qpos < end_qpos; ++qpos) {
614610 const auto r_nuc = *ref_itr++;
615- mc.add_fwd (r_nuc, ref_itr == end_ref ? ' N' : *ref_itr,
616- seq_nt16_str[bam_seqi (seq, qpos)],
617- qpos == q_lim ? ' N' : seq_nt16_str[bam_seqi (seq, qpos + 1 )]);
611+ if (qpos + 1 < end_qpos) {
612+ mc.add_fwd (r_nuc, ref_itr == end_ref ? ' N' : *ref_itr,
613+ seq_nt16_str[bam_seqi (seq, qpos)],
614+ qpos == q_lim ? ' N'
615+ : seq_nt16_str[bam_seqi (seq, qpos + 1 )]);
616+ }
618617 counts[rpos++].add_count_fwd (*hydroxy_prob_itr, *methyl_prob_itr);
619618 ++methyl_prob_itr;
620619 ++hydroxy_prob_itr;
@@ -638,7 +637,7 @@ count_states_fwd(const bam_rec &aln, std::vector<CountSet> &counts,
638637}
639638
640639static void
641- count_states_rev (const bam_rec &aln, std::vector<CountSet> &counts,
640+ count_states_rev (const bamxx:: bam_rec &aln, std::vector<CountSet> &counts,
642641 mod_prob_buffer &mod_buf, const std::string &chrom,
643642 match_counter &mc) {
644643 /* Move through cigar, reference and (*backward*) through read
@@ -672,8 +671,10 @@ count_states_rev(const bam_rec &aln, std::vector<CountSet> &counts,
672671 const auto r_nuc = *ref_itr++;
673672 const auto q_nuc = seq_nt16_str[bam_seqi (seq, q_idx)];
674673 ++q_idx;
675- mc.add_rev (r_nuc, ref_itr == end_ref ? ' N' : *ref_itr, q_nuc,
676- q_idx == l_qseq ? ' N' : seq_nt16_str[bam_seqi (seq, q_idx)]);
674+ if (end_qpos + 1 < qpos)
675+ mc.add_rev (r_nuc, ref_itr == end_ref ? ' N' : *ref_itr, q_nuc,
676+ q_idx == l_qseq ? ' N'
677+ : seq_nt16_str[bam_seqi (seq, q_idx)]);
677678 --methyl_prob_itr;
678679 --hydroxy_prob_itr;
679680 counts[rpos++].add_count_rev (*hydroxy_prob_itr, *methyl_prob_itr);
@@ -699,7 +700,7 @@ count_states_rev(const bam_rec &aln, std::vector<CountSet> &counts,
699700[[nodiscard]] static std::tuple<std::map<std::int32_t , std::size_t >,
700701 std::set<std::int32_t >>
701702get_tid_to_idx (
702- const bam_header &hdr,
703+ const bamxx:: bam_header &hdr,
703704 const std::unordered_map<std::string, std::size_t > &name_to_idx) {
704705 std::set<std::int32_t > missing_tids;
705706 std::map<std::int32_t , std::size_t > tid_to_idx;
@@ -718,7 +719,7 @@ get_tid_to_idx(
718719}
719720
720721[[nodiscard]] static bool
721- consistent_targets (const bam_header &hdr,
722+ consistent_targets (const bamxx:: bam_header &hdr,
722723 const std::map<std::int32_t , std::size_t > &tid_to_idx,
723724 const std::vector<std::string> &names,
724725 const std::vector<std::size_t > &sizes) {
@@ -741,7 +742,8 @@ consistent_targets(const bam_header &hdr,
741742
742743[[nodiscard]] static bool
743744consistent_existing_targets (
744- const bam_header &hdr, const std::map<std::int32_t , std::size_t > &tid_to_idx,
745+ const bamxx::bam_header &hdr,
746+ const std::map<std::int32_t , std::size_t > &tid_to_idx,
745747 const std::vector<std::string> &names,
746748 const std::vector<std::size_t > &sizes) {
747749 const std::size_t n_targets = hdr.h ->n_targets ;
@@ -805,10 +807,10 @@ struct read_processor {
805807 output_skipped_chromosome (
806808 const std::int32_t tid,
807809 const std::map<std::int32_t , std::size_t > &tid_to_idx,
808- const bam_header &hdr,
810+ const bamxx:: bam_header &hdr,
809811 const std::vector<std::string>::const_iterator chroms_beg,
810812 const std::vector<std::size_t > &chrom_sizes, std::vector<CountSet> &counts,
811- bgzf_file &out) const {
813+ bamxx:: bgzf_file &out) const {
812814
813815 // get the index of the next chrom sequence
814816 const auto chrom_idx = tid_to_idx.find (tid);
@@ -831,7 +833,7 @@ struct read_processor {
831833 }
832834
833835 void
834- write_output_all (const bam_header &hdr, bgzf_file &out,
836+ write_output_all (const bamxx:: bam_header &hdr, bamxx:: bgzf_file &out,
835837 const std::int32_t tid, const std::string &chrom,
836838 const std::vector<CountSet> &counts) const {
837839 static constexpr auto out_fmt = " %ld\t %c\t %s\t %.6g\t %d\t %.6g\t %.6g\n " ;
@@ -886,7 +888,7 @@ struct read_processor {
886888 }
887889
888890 void
889- write_output_sym (const bam_header &hdr, bgzf_file &out,
891+ write_output_sym (const bamxx:: bam_header &hdr, bamxx:: bgzf_file &out,
890892 const std::int32_t tid, const std::string &chrom,
891893 const std::vector<CountSet> &counts) const {
892894 static constexpr auto out_fmt = " %ld\t +\t CpG\t %.6g\t %d\t %.6g\t %.6g\n " ;
@@ -952,8 +954,8 @@ struct read_processor {
952954 }
953955
954956 void
955- write_output (const bam_header &hdr, bgzf_file &out, const std:: int32_t tid ,
956- const std::string &chrom,
957+ write_output (const bamxx:: bam_header &hdr, bamxx:: bgzf_file &out,
958+ const std::int32_t tid, const std:: string &chrom,
957959 const std::vector<CountSet> &counts) const {
958960 if (symmetric)
959961 write_output_sym (hdr, out, tid, chrom, counts);
@@ -989,7 +991,7 @@ struct read_processor {
989991 if (!hts)
990992 throw std::runtime_error (" failed to open input file" );
991993 // load the input file's header
992- bam_header hdr (hts);
994+ bamxx:: bam_header hdr (hts);
993995 if (!hdr)
994996 throw std::runtime_error (" failed to read header" );
995997
@@ -1011,7 +1013,7 @@ struct read_processor {
10111013
10121014 // open the output file
10131015 const std::string output_mode = compress_output ? " w" : " wu" ;
1014- bgzf_file out (outfile, output_mode);
1016+ bamxx:: bgzf_file out (outfile, output_mode);
10151017 if (!out)
10161018 throw std::runtime_error (" error opening output file: " + outfile);
10171019
@@ -1044,7 +1046,7 @@ struct read_processor {
10441046
10451047 // now iterate over the reads, switching chromosomes and writing
10461048 // output as needed
1047- bam_rec aln;
1049+ bamxx:: bam_rec aln;
10481050 std::int32_t prev_tid = -1 ;
10491051
10501052 std::vector<std::string>::const_iterator chrom_itr{};
@@ -1190,7 +1192,7 @@ main_nanocount(int argc, const char **argv) {
11901192 throw std::runtime_error (" thread count cannot be negative" );
11911193
11921194 std::ostringstream cmd;
1193- copy (argv, argv + argc, std::ostream_iterator<const char *>(cmd, " " ));
1195+ std:: copy (argv, argv + argc, std::ostream_iterator<const char *>(cmd, " " ));
11941196
11951197 // file types from HTSlib use "-" for the filename to go to stdout
11961198 if (outfile.empty ())
0 commit comments