Skip to content

Commit a1b609b

Browse files
src/utils/xcounts.cpp: Adding a check for chrom order being consistent between the header or reference and the internals of the counts file
1 parent 85d3dce commit a1b609b

1 file changed

Lines changed: 82 additions & 14 deletions

File tree

src/utils/xcounts.cpp

Lines changed: 82 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <stdexcept>
2424
#include <string>
2525
#include <system_error>
26+
#include <type_traits> // std::underlying_type_t
2627
#include <vector>
2728

2829
// from smithlab_cpp
@@ -45,6 +46,49 @@ using std::vector;
4546

4647
using bamxx::bgzf_file;
4748

49+
enum class xcounts_err {
50+
// clang-format off
51+
ok = 0,
52+
open_failure = 1,
53+
header_failure = 2,
54+
chromosome_not_found = 3,
55+
inconsistent_chromosome_order = 4,
56+
incorrect_chromosome_size = 5,
57+
failed_to_write_file = 6,
58+
failed_to_parse_site = 7,
59+
// clang-format on
60+
};
61+
62+
struct xcounts_err_cat : std::error_category {
63+
auto name() const noexcept -> const char * override {
64+
return "xcounts error";
65+
}
66+
auto message(const int condition) const -> std::string override {
67+
using std::string_literals::operator""s;
68+
switch (condition) {
69+
case 0: return "ok"s;
70+
case 1: return "failed to open methylome file"s;
71+
case 2: return "failed to parse xcounts header"s;
72+
case 3: return "failed to find chromosome in xcounts header"s;
73+
case 4: return "inconsistent chromosome order"s;
74+
case 5: return "incorrect chromosome size"s;
75+
case 6: return "failed to write to file"s;
76+
case 7: return "failed to parse site"s;
77+
}
78+
std::abort(); // unreacheable
79+
}
80+
};
81+
82+
template <>
83+
struct std::is_error_code_enum<xcounts_err> : public std::true_type {};
84+
85+
std::error_code
86+
make_error_code(xcounts_err e) {
87+
static auto category = xcounts_err_cat{};
88+
return std::error_code(static_cast<std::underlying_type_t<xcounts_err>>(e),
89+
category);
90+
}
91+
4892
template <typename T>
4993
static inline uint32_t
5094
fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
@@ -137,10 +181,12 @@ main_xcounts(int argc, const char **argv) {
137181
tpool.set_io(out);
138182
}
139183

184+
std::unordered_map<string, uint32_t> chrom_order;
140185
if (!header_file.empty())
141-
write_counts_header_from_file(header_file, out);
186+
chrom_order = write_counts_header_from_file(header_file, out);
142187
else if (!genome_file.empty())
143-
write_counts_header_from_chrom_sizes(chrom_names, chrom_sizes, out);
188+
chrom_order =
189+
write_counts_header_from_chrom_sizes(chrom_names, chrom_sizes, out);
144190

145191
// use the kstring_t type to more directly use the BGZF file
146192
kstring_t line{0, 0, nullptr};
@@ -152,11 +198,14 @@ main_xcounts(int argc, const char **argv) {
152198

153199
uint32_t offset = 0;
154200
string prev_chrom;
155-
bool status_ok = true;
156201
bool found_header = (!genome_file.empty() || !header_file.empty());
157202

203+
std::error_code ec{};
204+
205+
uint32_t chrom_counter = 0;
206+
158207
MSite site;
159-
while (status_ok && bamxx::getline(in, line)) {
208+
while (ec == std::errc{} && bamxx::getline(in, line)) {
160209
if (is_counts_header_line(line.s)) {
161210
if (!genome_file.empty() || !header_file.empty())
162211
continue;
@@ -166,34 +215,53 @@ main_xcounts(int argc, const char **argv) {
166215
continue;
167216
}
168217

169-
status_ok = site.initialize(line.s, line.s + line.l);
170-
if (!status_ok || !found_header)
218+
if (!site.initialize(line.s, line.s + line.l)) {
219+
ec = xcounts_err::failed_to_parse_site;
220+
break;
221+
}
222+
if (!found_header) {
223+
ec = xcounts_err::header_failure;
171224
break;
225+
}
172226

173227
if (site.chrom != prev_chrom) {
174228
if (verbose)
175229
cerr << "processing: " << site.chrom << endl;
230+
231+
if (!chrom_order.empty()) {
232+
const auto expected_chrom_counter = chrom_order.find(site.chrom);
233+
if (expected_chrom_counter == std::cend(chrom_order)) {
234+
ec = xcounts_err::chromosome_not_found;
235+
break;
236+
}
237+
if (expected_chrom_counter->second != chrom_counter) {
238+
ec = xcounts_err::inconsistent_chromosome_order;
239+
break;
240+
}
241+
}
242+
243+
chrom_counter++;
244+
176245
prev_chrom = site.chrom;
177246
offset = 0;
178247

179248
site.chrom += '\n';
180249
const int64_t sz = size(site.chrom);
181-
status_ok = bgzf_write(out.f, site.chrom.data(), sz) == sz;
250+
if (bgzf_write(out.f, site.chrom.data(), sz) != sz)
251+
ec = xcounts_err::failed_to_write_file;
182252
}
183253
if (site.n_reads > 0) {
184254
const int64_t sz = fill_output_buffer(offset, site, buf);
185-
status_ok = bgzf_write(out.f, buf.data(), sz) == sz;
255+
if (bgzf_write(out.f, buf.data(), sz) != sz)
256+
ec = xcounts_err::failed_to_write_file;
186257
offset = site.pos;
187258
}
188259
}
189260
ks_free(&line);
190261

191-
if (!status_ok) {
192-
cerr << "failed converting " << filename << " to " << outfile << endl;
193-
return EXIT_FAILURE;
194-
}
195-
if (!found_header) {
196-
cerr << "no header provided or found" << endl;
262+
if (ec) {
263+
cerr << "failed converting " << filename << " to " << outfile << endl
264+
<< ec << endl;
197265
return EXIT_FAILURE;
198266
}
199267
}

0 commit comments

Comments
 (0)