Skip to content

Commit f780944

Browse files
src/radmeth/radmeth.cpp: Chaned what is output so that for each group, an estimate of the methylation level is provided for each site and an estimate of the shared dispersion parameter. Also removed old code that assigned chunks of data to threads
1 parent bd94e85 commit f780944

1 file changed

Lines changed: 33 additions & 68 deletions

File tree

src/radmeth/radmeth.cpp

Lines changed: 33 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -176,40 +176,11 @@ enum class row_status : std::uint8_t {
176176
na_extreme_cnt,
177177
};
178178

179-
[[nodiscard]] static std::vector<double>
180-
drop_idx(const std::vector<double> &v, const std::size_t idx_to_drop) {
181-
std::vector<double> u;
182-
u.reserve(std::size(v) - 1);
183-
for (auto i = 0u; i < std::size(v); ++i)
184-
if (i != idx_to_drop)
185-
u.push_back(v[i]);
186-
return u;
187-
}
188-
189-
/// ADS: this function is not currently used, as the threads do not operate in
190-
/// "chunks"
191-
/*
192-
static inline void
193-
get_chunk_bounds(const std::uint32_t n_elements, const std::uint32_t n_chunks,
194-
std::vector<std::pair<std::uint32_t, std::uint32_t>> &chunks) {
195-
const std::uint32_t q = n_elements / n_chunks;
196-
const std::uint32_t r = n_elements - q * n_chunks;
197-
std::uint32_t block_start{};
198-
for (std::size_t i = 0; i < n_chunks; ++i) {
199-
const auto sz = (i < r) ? q + 1 : q;
200-
const auto block_end = block_start + sz;
201-
chunks[i] = {block_start, block_end};
202-
block_start = block_end;
203-
}
204-
}
205-
*/
206-
207179
static void
208180
radmeth(const bool show_progress, const bool more_na_info,
209181
const std::uint32_t n_threads, const std::string &table_filename,
210182
const std::string &outfile, const Regression &alt_model,
211183
const Regression &null_model, const std::uint32_t test_factor_idx) {
212-
static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
213184
static constexpr auto buf_size = 1024;
214185
static constexpr auto max_lines = 16384;
215186

@@ -251,7 +222,7 @@ that the design matrix and the proportion table are correctly formatted.
251222
std::vector<Regression> alt_models(n_threads, alt_model);
252223
std::vector<Regression> null_models(n_threads, null_model);
253224

254-
// std::vector<std::pair<std::uint32_t, std::uint32_t>> chunks(n_threads);
225+
const auto n_groups = alt_model.n_groups();
255226

256227
// Iterate over rows in the file and do llr test on proportions from
257228
// each. Do this in sets of rows to avoid having to spawn too many threads.
@@ -262,19 +233,20 @@ that the design matrix and the proportion table are correctly formatted.
262233
if (n_lines == 0)
263234
break;
264235

265-
/// ADS: chunks not used
266-
// get_chunk_bounds(n_lines, n_threads, chunks);
267-
268236
std::vector<std::thread> threads;
269237
for (auto thread_id = 0u; thread_id < n_threads; ++thread_id) {
270238
threads.emplace_back([&, thread_id] {
271-
/// ADS: chunks not used
272-
// const auto &[chunk_beg, chunk_end] = chunks[thread_id];
239+
std::vector<double> p_estim_alt;
240+
std::vector<double> p_estim_null;
241+
double phi_estim_alt{};
242+
double phi_estim_null{};
243+
273244
auto &t_alt_model = alt_models[thread_id];
274245
auto &t_null_model = null_models[thread_id];
275-
/// ADS: chunks not used
276-
// for (auto b = chunk_beg; b != chunk_end; ++b) {
277246
for (auto b = 0u; b < n_lines; ++b) {
247+
// ADS: rows done by different threads are interleaved because the
248+
// difficult (e.g., high-coverage) rows can be consecutive and this
249+
// balances work better.
278250
if (b % n_threads != thread_id)
279251
continue;
280252
t_alt_model.props.parse(lines[b]);
@@ -291,37 +263,19 @@ that the design matrix and the proportion table are correctly formatted.
291263
if (has_extreme_counts(t_alt_model))
292264
return std::tuple{1.0, row_status::na_extreme_cnt};
293265

294-
std::vector<double> alternate_params;
295-
fit_regression_model(t_alt_model, alternate_params);
266+
fit_regression_model(t_alt_model, p_estim_alt, phi_estim_alt);
267+
296268
t_null_model.props = t_alt_model.props;
297269

298-
auto null_params = drop_idx(alternate_params, test_factor_idx);
270+
fit_regression_model(t_null_model, p_estim_null, phi_estim_null);
299271

300-
fit_regression_model(t_null_model, null_params);
301272
const double p_value =
302273
llr_test(t_null_model.max_loglik, t_alt_model.max_loglik);
303274

304275
return (p_value != p_value) ? std::tuple{1.0, row_status::na}
305276
: std::tuple{p_value, row_status::ok};
306277
}();
307278

308-
std::size_t n_reads_factor = 0;
309-
std::size_t n_reads_others = 0;
310-
std::size_t n_meth_factor = 0;
311-
std::size_t n_meth_others = 0;
312-
313-
const auto &mc = t_alt_model.props.mc;
314-
const auto &vec = t_alt_model.design.tmatrix[test_factor_idx];
315-
for (std::size_t s = 0; s < n_samples; ++s)
316-
if (vec[s] != 0) {
317-
n_reads_factor += mc[s].n_reads;
318-
n_meth_factor += mc[s].n_meth;
319-
}
320-
else {
321-
n_reads_others += mc[s].n_reads;
322-
n_meth_others += mc[s].n_meth;
323-
}
324-
325279
n_bytes[b] = [&] {
326280
// clang-format off
327281
const int n_prefix_bytes =
@@ -349,17 +303,26 @@ that the design matrix and the proportion table are correctly formatted.
349303

350304
cursor += n_pval_bytes;
351305

352-
// clang-format off
353-
const int n_suffix_bytes = std::sprintf(cursor, suffix_fmt,
354-
n_reads_factor,
355-
n_meth_factor,
356-
n_reads_others,
357-
n_meth_others);
358-
// clang-format on
359-
if (n_suffix_bytes < 0)
360-
return n_suffix_bytes;
306+
const int n_param_bytes = [&] {
307+
std::int32_t n_bytes = 0;
308+
for (auto g_idx = 0u; g_idx < n_groups; ++g_idx) {
309+
const int n = std::sprintf(cursor, "\t%f", p_estim_alt[g_idx]);
310+
cursor += n;
311+
if (n < 0)
312+
return n;
313+
n_bytes += n;
314+
}
315+
const int n = std::sprintf(cursor, "\t%f\n", phi_estim_alt);
316+
if (n < 0)
317+
return n;
318+
n_bytes += n;
319+
return n_bytes;
320+
}();
321+
322+
if (n_param_bytes < 0)
323+
return n_param_bytes;
361324

362-
return n_prefix_bytes + n_pval_bytes + n_suffix_bytes;
325+
return n_prefix_bytes + n_pval_bytes + n_param_bytes;
363326
}();
364327
}
365328
});
@@ -450,6 +413,8 @@ main_radmeth(int argc, char *argv[]) {
450413
// initialize full design matrix from file
451414
Regression alt_model;
452415
alt_model.design = read_design(design_filename);
416+
const auto n_samples = alt_model.n_samples();
417+
453418
if (verbose)
454419
std::cerr << "Alternate model:\n" << alt_model.design << '\n';
455420

0 commit comments

Comments
 (0)