Skip to content

Commit fcf9ade

Browse files
Merge pull request #87 from smithlabcode/dnmtools-commands
Dnmtools commands
2 parents ca6589c + e4c6bd8 commit fcf9ade

1 file changed

Lines changed: 185 additions & 140 deletions

File tree

src/dnmtools.cpp

Lines changed: 185 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -13,157 +13,202 @@
1313
* General Public License for more details.
1414
*/
1515

16+
#include <config.h>
17+
18+
#include <algorithm>
19+
#include <functional>
20+
#include <iomanip>
1621
#include <iostream>
22+
#include <sstream>
1723
#include <string>
18-
#include <cstring>
19-
20-
#include <config.h>
24+
#include <vector>
2125

22-
using std::string;
23-
using std::to_string;
24-
using std::cerr;
26+
using std::begin;
27+
using std::end;
2528
using std::cout;
2629
using std::endl;
30+
using std::pair;
31+
using std::string;
32+
using std::to_string;
33+
using std::vector;
34+
35+
static const string PROGRAM_NAME = "dnmtools";
36+
37+
struct dnmtools_command {
38+
string tag;
39+
string description;
40+
std::function<int(const int, const char **)> fun;
41+
42+
auto operator()(const int argc, const char **argv) const -> int {
43+
return fun(argc - 1, argv + 1);
44+
}
45+
};
46+
47+
auto
48+
operator<<(std::ostream &out, const dnmtools_command &cmd) -> std::ostream & {
49+
static const size_t pad_size = 4;
50+
static const size_t offset = 15;
51+
static const string pad(pad_size, ' ');
52+
return out << pad << std::left << std::setw(offset) << (cmd.tag + ":")
53+
<< cmd.description;
54+
}
2755

28-
#define PROGRAM_NAME "dnmtools"
29-
30-
int abismal(int argc, const char **argv);
31-
int abismalidx(int argc, const char **argv);
32-
int simreads(int argc, const char **argv);
33-
int main_counts(int argc, const char **argv);
34-
int main_allelicmeth(int argc, const char **argv);
35-
int main_amrfinder(int argc, const char **argv);
36-
int main_amrtester(int argc, const char **argv);
37-
int main_bsrate(int argc, const char **argv);
38-
int main_hmr(int argc, const char **argv);
39-
int main_hmr_rep(int argc, const char **argv);
40-
int main_hypermr(int argc, const char **argv);
41-
int main_levels(int argc, const char **argv);
42-
int main_methentropy(int argc, const char **argv);
43-
int main_methstates(int argc, const char **argv);
44-
int main_multimethstat(int argc, const char **argv);
45-
int main_pmd(int argc, const char **argv);
46-
int main_roimethstat(int argc, const char **argv);
47-
int main_mlml(int argc, const char **argv);
48-
int main_dmr(int argc, const char **argv);
49-
int main_methdiff(int argc, const char **argv);
50-
int main_radmeth_adjust(int argc, const char **argv);
51-
int main_radmeth(int argc, const char **argv);
52-
int main_radmeth_merge(int argc, const char **argv);
53-
int main_clean_hairpins(int argc, const char **argv);
54-
int main_uniq(int argc, const char **argv);
55-
int main_fast_liftover(int argc, const char **argv);
56-
int main_format(int argc, const char **argv);
57-
int main_guessprotocol(int argc, const char **argv);
58-
int main_lc_approx(int argc, const char **argv);
59-
int main_lift_filter(int argc, const char **argv);
60-
int main_merge_bsrate(int argc, const char **argv);
61-
int main_merge_methcounts(int argc, const char **argv);
62-
int main_selectsites(int argc, const char **argv);
63-
int main_symmetric_cpgs(int argc, const char **argv);
56+
// ADS: not sure of best way to acquire these below beyond simply
57+
// declaring them here
58+
int
59+
abismal(int argc, const char **argv);
60+
int
61+
abismalidx(int argc, const char **argv);
62+
int
63+
simreads(int argc, const char **argv);
64+
int
65+
main_counts(int argc, const char **argv);
66+
int
67+
main_allelicmeth(int argc, const char **argv);
68+
int
69+
main_amrfinder(int argc, const char **argv);
70+
int
71+
main_amrtester(int argc, const char **argv);
72+
int
73+
main_bsrate(int argc, const char **argv);
74+
int
75+
main_hmr(int argc, const char **argv);
76+
int
77+
main_hmr_rep(int argc, const char **argv);
78+
int
79+
main_hypermr(int argc, const char **argv);
80+
int
81+
main_levels(int argc, const char **argv);
82+
int
83+
main_methentropy(int argc, const char **argv);
84+
int
85+
main_methstates(int argc, const char **argv);
86+
int
87+
main_multimethstat(int argc, const char **argv);
88+
int
89+
main_pmd(int argc, const char **argv);
90+
int
91+
main_roimethstat(int argc, const char **argv);
92+
int
93+
main_mlml(int argc, const char **argv);
94+
int
95+
main_dmr(int argc, const char **argv);
96+
int
97+
main_methdiff(int argc, const char **argv);
98+
int
99+
main_radmeth_adjust(int argc, const char **argv);
100+
int
101+
main_radmeth(int argc, const char **argv);
102+
int
103+
main_radmeth_merge(int argc, const char **argv);
104+
int
105+
main_clean_hairpins(int argc, const char **argv);
106+
int
107+
main_uniq(int argc, const char **argv);
108+
int
109+
main_fast_liftover(int argc, const char **argv);
110+
int
111+
main_format(int argc, const char **argv);
112+
int
113+
main_guessprotocol(int argc, const char **argv);
114+
int
115+
main_lc_approx(int argc, const char **argv);
116+
int
117+
main_lift_filter(int argc, const char **argv);
118+
int
119+
main_merge_bsrate(int argc, const char **argv);
120+
int
121+
main_merge_methcounts(int argc, const char **argv);
122+
int
123+
main_selectsites(int argc, const char **argv);
124+
int
125+
main_symmetric_cpgs(int argc, const char **argv);
64126

65127
void
66-
print_help() {
67-
static const string sep = " ";
68-
cout << "Program: " << PROGRAM_NAME << "\n";
69-
cout << "Version: " << VERSION << "\n";
70-
cout << "Usage: " << PROGRAM_NAME << " <command> [options]\n";
71-
cout << "Commands:";
72-
73-
cout << "\n" << sep << "read mapping:\n";
74-
cout << sep+sep << "abismal: map FASTQ reads to a FASTA reference genome or an index\n";
75-
cout << sep+sep << "abismalidx: convert a FASTA reference genome to an abismal index\n";
76-
cout << sep+sep << "simreads: simulate reads in a FASTA reference genome\n";
77-
78-
cout << "\n" << sep << "methylome construction:\n";
79-
cout << sep+sep << "format: convert SAM/BAM mapped bs-seq reads to standard dnmtools format\n";
80-
cout << sep+sep << "uniq: remove duplicate reads from sorted mapped reads\n";
81-
cout << sep+sep << "bsrate: compute the BS conversion rate from BS-seq reads mapped to a genome\n";
82-
cout << sep+sep << "counts: get methylation levels from mapped WGBS reads\n";
83-
cout << sep+sep << "sym: get CpG sites and make methylation levels symmetric.\n";
84-
cout << sep+sep << "levels: compute methylation summary statistics from a counts file\n";
85-
86-
cout << "\n" << sep << "methylome analysis:\n";
87-
cout << sep+sep << "hmr: identify hypomethylated regions.\n";
88-
cout << sep+sep << "hmr-rep: identify hypomethylated regions in a set of replicate methylomes\n";
89-
cout << sep+sep << "entropy: compute methylation entropy in sliding window\n";
90-
cout << sep+sep << "multistat: summarize methylation from to genomic intervals in a BED file.\n";
91-
cout << sep+sep << "pmd: identify partially methylated domains\n";
92-
cout << sep+sep << "roi: compute average CpG methylation in each of a set of genomic interval\n";
93-
cout << sep+sep << "mlml: program to estimate hydroxymethylation levels\n";
94-
95-
cout << "\n" << sep << "allele-specific methylation:\n";
96-
cout << sep+sep << "states: convert read sequences in SAM format to methylation states at CpGs covered by those reads\n";
97-
cout << sep+sep << "allelic: computes probability of allele-specific methylation at each tuple of CpGs\n";
98-
cout << sep+sep << "amrfinder: identify regions of allele-specific methylation\n";
99-
cout << sep+sep << "amrtester: resolve epi-alleles\n";
100-
101-
cout << "\n" << sep << "differential methylation:\n";
102-
cout << sep+sep << "dmr: computes DMRs based on HMRs and probability of differences at individual CpGs\n";
103-
cout << sep+sep << "diff: compute probability that site has higher methylation in file A than B\n";
104-
cout << sep+sep << "radmeth: computes differentially methylated CpGs\n";
105-
cout << sep+sep << "radadjust: adjust p-values of radmeth output\n";
106-
cout << sep+sep << "radmerge: merge significant CpGs in radmeth output\n";
107-
108-
cout << "\n" << sep << "methylation visualization:\n";
109-
cout << sep+sep << "fastlift: liftover methylation levels between species\n";
110-
cout << sep+sep << "liftfilter: filter CpG regions that do not exist in resulting genome\n";
111-
112-
cout << "\n" << sep << "general-purpose tools:\n";
113-
cout << sep+sep << "cleanhp: fix and stat invdup/hairping reads\n";
114-
cout << sep+sep << "guessprotocol: guess whether protocol is ordinary, pbat or random\n";
115-
cout << sep+sep << "lc: approximate line counts in a file\n";
116-
cout << sep+sep << "merge-bsrate: merge the BS conversion rate from two sets of BS-seq reads mapped to a genome\n";
117-
cout << sep+sep << "merge: merge multiple counts files\n";
118-
cout << sep+sep << "selectsites: sites inside a set of genomic intervals\n";
119-
120-
cout << "\n";
128+
print_help(
129+
const vector<pair<string, vector<dnmtools_command>>> &command_groups) {
130+
cout << "Program: " << PROGRAM_NAME << "\n"
131+
<< "Version: " << VERSION << "\n"
132+
<< "Usage: " << PROGRAM_NAME << " <command> [options]\n"
133+
<< "Commands:" << endl;
134+
for (auto &&g : command_groups) {
135+
cout << " " << g.first << ":" << endl;
136+
for (auto &&c : g.second) cout << c << endl;
137+
cout << endl;
138+
}
121139
}
122140

123141
int
124142
main(int argc, const char **argv) {
125-
int ret = 0;
126-
if (argc < 2) { print_help(); return ret; }
127-
128-
if (strcmp(argv[1], "abismal") == 0) ret = abismal(argc - 1, argv + 1);
129-
else if (strcmp(argv[1], "abismalidx") == 0) ret = abismalidx(argc - 1, argv + 1);
130-
else if (strcmp(argv[1], "simreads") == 0) ret = simreads(argc - 1, argv + 1);
131-
else if (strcmp(argv[1], "counts") == 0) ret = main_counts(argc - 1, argv + 1);
132-
else if (strcmp(argv[1], "allelic") == 0) ret = main_allelicmeth(argc - 1, argv + 1);
133-
else if (strcmp(argv[1], "amrfinder") == 0) ret = main_amrfinder(argc - 1, argv + 1);
134-
else if (strcmp(argv[1], "amrtester") == 0) ret = main_amrtester(argc - 1, argv + 1);
135-
else if (strcmp(argv[1], "bsrate") == 0) ret = main_bsrate(argc - 1, argv + 1);
136-
else if (strcmp(argv[1], "hmr") == 0) ret = main_hmr(argc - 1, argv + 1);
137-
else if (strcmp(argv[1], "hmr-rep") == 0) ret = main_hmr_rep(argc - 1, argv + 1);
138-
else if (strcmp(argv[1], "hypermr") == 0) ret = main_hypermr(argc - 1, argv + 1);
139-
else if (strcmp(argv[1], "levels") == 0) ret = main_levels(argc - 1, argv + 1);
140-
else if (strcmp(argv[1], "entropy") == 0) ret = main_methentropy(argc - 1, argv + 1);
141-
else if (strcmp(argv[1], "states") == 0) ret = main_methstates(argc - 1, argv + 1);
142-
else if (strcmp(argv[1], "multistat") == 0) ret = main_multimethstat(argc - 1, argv + 1);
143-
else if (strcmp(argv[1], "pmd") == 0) ret = main_pmd(argc - 1, argv + 1);
144-
else if (strcmp(argv[1], "roi") == 0) ret = main_roimethstat(argc - 1, argv + 1);
145-
else if (strcmp(argv[1], "mlml") == 0) ret = main_mlml(argc - 1, argv + 1);
146-
else if (strcmp(argv[1], "dmr") == 0) ret = main_dmr(argc - 1, argv + 1);
147-
else if (strcmp(argv[1], "diff") == 0) ret = main_methdiff(argc - 1, argv + 1);
148-
else if (strcmp(argv[1], "radmeth") == 0) ret = main_radmeth(argc - 1, argv + 1);
149-
else if (strcmp(argv[1], "radadjust") == 0) ret = main_radmeth_adjust(argc - 1, argv + 1);
150-
else if (strcmp(argv[1], "radmerge") == 0) ret = main_radmeth_merge(argc - 1, argv + 1);
151-
else if (strcmp(argv[1], "cleanhp") == 0) ret = main_clean_hairpins(argc - 1, argv + 1);
152-
else if (strcmp(argv[1], "uniq") == 0) ret = main_uniq(argc - 1, argv + 1);
153-
else if (strcmp(argv[1], "fastlift") == 0) ret = main_fast_liftover(argc - 1, argv + 1);
154-
else if (strcmp(argv[1], "format") == 0) ret = main_format(argc - 1, argv + 1);
155-
else if (strcmp(argv[1], "guessprotocol") == 0) ret = main_guessprotocol(argc - 1, argv + 1);
156-
else if (strcmp(argv[1], "lc") == 0) ret = main_lc_approx(argc - 1, argv + 1);
157-
else if (strcmp(argv[1], "liftfilter") == 0) ret = main_lift_filter(argc - 1, argv + 1);
158-
else if (strcmp(argv[1], "merge-bsrate") == 0) ret = main_merge_bsrate(argc - 1, argv + 1);
159-
else if (strcmp(argv[1], "merge") == 0) ret = main_merge_methcounts(argc - 1, argv + 1);
160-
else if (strcmp(argv[1], "selectsites") == 0) ret = main_selectsites(argc - 1, argv + 1);
161-
else if (strcmp(argv[1], "sym") == 0) ret = main_symmetric_cpgs(argc - 1, argv + 1);
162-
163-
else {
164-
cerr << "command not found: " << argv[1] << endl;
165-
ret = 1;
143+
try {
144+
vector<pair<string, vector<dnmtools_command>>> command_groups = {
145+
// clang-format off
146+
{{"mapping",
147+
{{{"abismal", "map FASTQ reads to a FASTA reference genome or an index", abismal},
148+
{"abismalidx", "convert a FASTA reference genome to an abismal index", abismalidx},
149+
{"simreads", "simulate reads in a FASTA reference genome", simreads}}}},
150+
151+
{"methylome construction",
152+
{{{"format", "convert SAM/BAM mapped bs-seq reads to standard dnmtools format", main_format},
153+
{"uniq", "remove duplicate reads from sorted mapped reads", main_uniq},
154+
{"bsrate", "compute the BS conversion rate from BS-seq reads mapped to a genome", main_bsrate},
155+
{"counts", "get methylation levels from mapped WGBS reads", main_counts},
156+
{"sym", "get CpG sites and make methylation levels symmetric", main_symmetric_cpgs},
157+
{"levels", "compute methylation summary statistics from a counts file", main_levels}}}},
158+
159+
{"methylome analysis",
160+
{{{"hmr", "identify hypomethylated regions", main_hmr},
161+
{"hmr-rep", "identify hypomethylated regions in a set of replicate methylomes", main_hmr_rep},
162+
{"hypermr", "identify hypermethylated regions in plant methylomes", main_hypermr},
163+
{"entropy", "compute methylation entropy in sliding window", main_methentropy},
164+
{"multistat", "summarize methylation from to genomic intervals in a BED file", main_multimethstat},
165+
{"pmd", "identify partially methylated domains", main_pmd},
166+
{"roi", "compute average CpG methylation in each of a set of genomic interval", main_roimethstat},
167+
{"mlml", "program to estimate hydroxymethylation levels", main_mlml}}}},
168+
169+
{"allele-specific methylation (ASM)",
170+
{{{"states", "convert reads in SAM format into methylation states at CpGs", main_methstates},
171+
{"allelic", "get probability of ASM for each pair of neighboring CpGs", main_allelicmeth},
172+
{"amrfinder", "identify regions of ASM in the genome", main_amrfinder},
173+
{"amrtester", "test a set of genomic intervals for ASM", main_amrtester}}}},
174+
175+
{"differential methylation (DM)",
176+
{{{"dmr", "identify DMRs from genomic intervals and single-CpG DM probabilities", main_dmr},
177+
{"diff", "compute single-CpG DM probability between two methylomes", main_methdiff},
178+
{"radmeth", "compute DM probabilities for each CpG using multiple methylomes", main_radmeth},
179+
{"radadjust", "adjust p-values from radmeth output", main_radmeth_adjust},
180+
{"radmerge", "merge significant CpGs in radmeth output", main_radmeth_merge}}}},
181+
182+
{"methylation visualization",
183+
{{{"fastlift", "liftover methylation levels between species", main_fast_liftover},
184+
{"liftfilter", "filter CpGs that are not CpGs in the target genome", main_lift_filter}}}},
185+
186+
{"utilities",
187+
{{{"cleanhp", "fix and stat invdup/hairping reads", main_clean_hairpins},
188+
{"guessprotocol", "guess whether protocol is ordinary, pbat or random", main_guessprotocol},
189+
{"lc", "approximate line counts in a file", main_lc_approx},
190+
{"merge-bsrate", "merge bisulfite conversion rates files from bsrate", main_merge_bsrate},
191+
{"merge", "merge multiple counts files into a counts file or a table", main_merge_methcounts},
192+
{"selectsites", "sites inside a set of genomic intervals", main_selectsites}}}}}};
193+
// clang-format on
194+
195+
if (argc < 2) {
196+
print_help(command_groups);
197+
return EXIT_SUCCESS;
198+
}
199+
200+
const auto has_tag = [&](const dnmtools_command &a) {
201+
return a.tag == argv[1];
202+
};
203+
204+
for (auto &&g : command_groups) {
205+
const auto the_cmd = find_if(begin(g.second), end(g.second), has_tag);
206+
if (the_cmd != end(g.second)) return (*the_cmd)(argc, argv);
207+
}
166208
}
167-
168-
return ret;
209+
catch (const std::exception &e) {
210+
std::cerr << "ERROR:\t" << e.what() << endl;
211+
return EXIT_FAILURE;
212+
}
213+
return EXIT_SUCCESS;
169214
}

0 commit comments

Comments
 (0)