|
13 | 13 | * General Public License for more details. |
14 | 14 | */ |
15 | 15 |
|
| 16 | +#include <config.h> |
| 17 | + |
| 18 | +#include <algorithm> |
| 19 | +#include <functional> |
| 20 | +#include <iomanip> |
16 | 21 | #include <iostream> |
| 22 | +#include <sstream> |
17 | 23 | #include <string> |
18 | | -#include <cstring> |
19 | | - |
20 | | -#include <config.h> |
| 24 | +#include <vector> |
21 | 25 |
|
22 | | -using std::string; |
23 | | -using std::to_string; |
24 | | -using std::cerr; |
| 26 | +using std::begin; |
| 27 | +using std::end; |
25 | 28 | using std::cout; |
26 | 29 | using std::endl; |
| 30 | +using std::pair; |
| 31 | +using std::string; |
| 32 | +using std::to_string; |
| 33 | +using std::vector; |
| 34 | + |
| 35 | +static const string PROGRAM_NAME = "dnmtools"; |
| 36 | + |
| 37 | +struct dnmtools_command { |
| 38 | + string tag; |
| 39 | + string description; |
| 40 | + std::function<int(const int, const char **)> fun; |
| 41 | + |
| 42 | + auto operator()(const int argc, const char **argv) const -> int { |
| 43 | + return fun(argc - 1, argv + 1); |
| 44 | + } |
| 45 | +}; |
| 46 | + |
| 47 | +auto |
| 48 | +operator<<(std::ostream &out, const dnmtools_command &cmd) -> std::ostream & { |
| 49 | + static const size_t pad_size = 4; |
| 50 | + static const size_t offset = 15; |
| 51 | + static const string pad(pad_size, ' '); |
| 52 | + return out << pad << std::left << std::setw(offset) << (cmd.tag + ":") |
| 53 | + << cmd.description; |
| 54 | +} |
27 | 55 |
|
28 | | -#define PROGRAM_NAME "dnmtools" |
29 | | - |
30 | | -int abismal(int argc, const char **argv); |
31 | | -int abismalidx(int argc, const char **argv); |
32 | | -int simreads(int argc, const char **argv); |
33 | | -int main_counts(int argc, const char **argv); |
34 | | -int main_allelicmeth(int argc, const char **argv); |
35 | | -int main_amrfinder(int argc, const char **argv); |
36 | | -int main_amrtester(int argc, const char **argv); |
37 | | -int main_bsrate(int argc, const char **argv); |
38 | | -int main_hmr(int argc, const char **argv); |
39 | | -int main_hmr_rep(int argc, const char **argv); |
40 | | -int main_hypermr(int argc, const char **argv); |
41 | | -int main_levels(int argc, const char **argv); |
42 | | -int main_methentropy(int argc, const char **argv); |
43 | | -int main_methstates(int argc, const char **argv); |
44 | | -int main_multimethstat(int argc, const char **argv); |
45 | | -int main_pmd(int argc, const char **argv); |
46 | | -int main_roimethstat(int argc, const char **argv); |
47 | | -int main_mlml(int argc, const char **argv); |
48 | | -int main_dmr(int argc, const char **argv); |
49 | | -int main_methdiff(int argc, const char **argv); |
50 | | -int main_radmeth_adjust(int argc, const char **argv); |
51 | | -int main_radmeth(int argc, const char **argv); |
52 | | -int main_radmeth_merge(int argc, const char **argv); |
53 | | -int main_clean_hairpins(int argc, const char **argv); |
54 | | -int main_uniq(int argc, const char **argv); |
55 | | -int main_fast_liftover(int argc, const char **argv); |
56 | | -int main_format(int argc, const char **argv); |
57 | | -int main_guessprotocol(int argc, const char **argv); |
58 | | -int main_lc_approx(int argc, const char **argv); |
59 | | -int main_lift_filter(int argc, const char **argv); |
60 | | -int main_merge_bsrate(int argc, const char **argv); |
61 | | -int main_merge_methcounts(int argc, const char **argv); |
62 | | -int main_selectsites(int argc, const char **argv); |
63 | | -int main_symmetric_cpgs(int argc, const char **argv); |
| 56 | +// ADS: not sure of best way to acquire these below beyond simply |
| 57 | +// declaring them here |
| 58 | +int |
| 59 | +abismal(int argc, const char **argv); |
| 60 | +int |
| 61 | +abismalidx(int argc, const char **argv); |
| 62 | +int |
| 63 | +simreads(int argc, const char **argv); |
| 64 | +int |
| 65 | +main_counts(int argc, const char **argv); |
| 66 | +int |
| 67 | +main_allelicmeth(int argc, const char **argv); |
| 68 | +int |
| 69 | +main_amrfinder(int argc, const char **argv); |
| 70 | +int |
| 71 | +main_amrtester(int argc, const char **argv); |
| 72 | +int |
| 73 | +main_bsrate(int argc, const char **argv); |
| 74 | +int |
| 75 | +main_hmr(int argc, const char **argv); |
| 76 | +int |
| 77 | +main_hmr_rep(int argc, const char **argv); |
| 78 | +int |
| 79 | +main_hypermr(int argc, const char **argv); |
| 80 | +int |
| 81 | +main_levels(int argc, const char **argv); |
| 82 | +int |
| 83 | +main_methentropy(int argc, const char **argv); |
| 84 | +int |
| 85 | +main_methstates(int argc, const char **argv); |
| 86 | +int |
| 87 | +main_multimethstat(int argc, const char **argv); |
| 88 | +int |
| 89 | +main_pmd(int argc, const char **argv); |
| 90 | +int |
| 91 | +main_roimethstat(int argc, const char **argv); |
| 92 | +int |
| 93 | +main_mlml(int argc, const char **argv); |
| 94 | +int |
| 95 | +main_dmr(int argc, const char **argv); |
| 96 | +int |
| 97 | +main_methdiff(int argc, const char **argv); |
| 98 | +int |
| 99 | +main_radmeth_adjust(int argc, const char **argv); |
| 100 | +int |
| 101 | +main_radmeth(int argc, const char **argv); |
| 102 | +int |
| 103 | +main_radmeth_merge(int argc, const char **argv); |
| 104 | +int |
| 105 | +main_clean_hairpins(int argc, const char **argv); |
| 106 | +int |
| 107 | +main_uniq(int argc, const char **argv); |
| 108 | +int |
| 109 | +main_fast_liftover(int argc, const char **argv); |
| 110 | +int |
| 111 | +main_format(int argc, const char **argv); |
| 112 | +int |
| 113 | +main_guessprotocol(int argc, const char **argv); |
| 114 | +int |
| 115 | +main_lc_approx(int argc, const char **argv); |
| 116 | +int |
| 117 | +main_lift_filter(int argc, const char **argv); |
| 118 | +int |
| 119 | +main_merge_bsrate(int argc, const char **argv); |
| 120 | +int |
| 121 | +main_merge_methcounts(int argc, const char **argv); |
| 122 | +int |
| 123 | +main_selectsites(int argc, const char **argv); |
| 124 | +int |
| 125 | +main_symmetric_cpgs(int argc, const char **argv); |
64 | 126 |
|
65 | 127 | void |
66 | | -print_help() { |
67 | | - static const string sep = " "; |
68 | | - cout << "Program: " << PROGRAM_NAME << "\n"; |
69 | | - cout << "Version: " << VERSION << "\n"; |
70 | | - cout << "Usage: " << PROGRAM_NAME << " <command> [options]\n"; |
71 | | - cout << "Commands:"; |
72 | | - |
73 | | - cout << "\n" << sep << "read mapping:\n"; |
74 | | - cout << sep+sep << "abismal: map FASTQ reads to a FASTA reference genome or an index\n"; |
75 | | - cout << sep+sep << "abismalidx: convert a FASTA reference genome to an abismal index\n"; |
76 | | - cout << sep+sep << "simreads: simulate reads in a FASTA reference genome\n"; |
77 | | - |
78 | | - cout << "\n" << sep << "methylome construction:\n"; |
79 | | - cout << sep+sep << "format: convert SAM/BAM mapped bs-seq reads to standard dnmtools format\n"; |
80 | | - cout << sep+sep << "uniq: remove duplicate reads from sorted mapped reads\n"; |
81 | | - cout << sep+sep << "bsrate: compute the BS conversion rate from BS-seq reads mapped to a genome\n"; |
82 | | - cout << sep+sep << "counts: get methylation levels from mapped WGBS reads\n"; |
83 | | - cout << sep+sep << "sym: get CpG sites and make methylation levels symmetric.\n"; |
84 | | - cout << sep+sep << "levels: compute methylation summary statistics from a counts file\n"; |
85 | | - |
86 | | - cout << "\n" << sep << "methylome analysis:\n"; |
87 | | - cout << sep+sep << "hmr: identify hypomethylated regions.\n"; |
88 | | - cout << sep+sep << "hmr-rep: identify hypomethylated regions in a set of replicate methylomes\n"; |
89 | | - cout << sep+sep << "entropy: compute methylation entropy in sliding window\n"; |
90 | | - cout << sep+sep << "multistat: summarize methylation from to genomic intervals in a BED file.\n"; |
91 | | - cout << sep+sep << "pmd: identify partially methylated domains\n"; |
92 | | - cout << sep+sep << "roi: compute average CpG methylation in each of a set of genomic interval\n"; |
93 | | - cout << sep+sep << "mlml: program to estimate hydroxymethylation levels\n"; |
94 | | - |
95 | | - cout << "\n" << sep << "allele-specific methylation:\n"; |
96 | | - cout << sep+sep << "states: convert read sequences in SAM format to methylation states at CpGs covered by those reads\n"; |
97 | | - cout << sep+sep << "allelic: computes probability of allele-specific methylation at each tuple of CpGs\n"; |
98 | | - cout << sep+sep << "amrfinder: identify regions of allele-specific methylation\n"; |
99 | | - cout << sep+sep << "amrtester: resolve epi-alleles\n"; |
100 | | - |
101 | | - cout << "\n" << sep << "differential methylation:\n"; |
102 | | - cout << sep+sep << "dmr: computes DMRs based on HMRs and probability of differences at individual CpGs\n"; |
103 | | - cout << sep+sep << "diff: compute probability that site has higher methylation in file A than B\n"; |
104 | | - cout << sep+sep << "radmeth: computes differentially methylated CpGs\n"; |
105 | | - cout << sep+sep << "radadjust: adjust p-values of radmeth output\n"; |
106 | | - cout << sep+sep << "radmerge: merge significant CpGs in radmeth output\n"; |
107 | | - |
108 | | - cout << "\n" << sep << "methylation visualization:\n"; |
109 | | - cout << sep+sep << "fastlift: liftover methylation levels between species\n"; |
110 | | - cout << sep+sep << "liftfilter: filter CpG regions that do not exist in resulting genome\n"; |
111 | | - |
112 | | - cout << "\n" << sep << "general-purpose tools:\n"; |
113 | | - cout << sep+sep << "cleanhp: fix and stat invdup/hairping reads\n"; |
114 | | - cout << sep+sep << "guessprotocol: guess whether protocol is ordinary, pbat or random\n"; |
115 | | - cout << sep+sep << "lc: approximate line counts in a file\n"; |
116 | | - cout << sep+sep << "merge-bsrate: merge the BS conversion rate from two sets of BS-seq reads mapped to a genome\n"; |
117 | | - cout << sep+sep << "merge: merge multiple counts files\n"; |
118 | | - cout << sep+sep << "selectsites: sites inside a set of genomic intervals\n"; |
119 | | - |
120 | | - cout << "\n"; |
| 128 | +print_help( |
| 129 | + const vector<pair<string, vector<dnmtools_command>>> &command_groups) { |
| 130 | + cout << "Program: " << PROGRAM_NAME << "\n" |
| 131 | + << "Version: " << VERSION << "\n" |
| 132 | + << "Usage: " << PROGRAM_NAME << " <command> [options]\n" |
| 133 | + << "Commands:" << endl; |
| 134 | + for (auto &&g : command_groups) { |
| 135 | + cout << " " << g.first << ":" << endl; |
| 136 | + for (auto &&c : g.second) cout << c << endl; |
| 137 | + cout << endl; |
| 138 | + } |
121 | 139 | } |
122 | 140 |
|
123 | 141 | int |
124 | 142 | main(int argc, const char **argv) { |
125 | | - int ret = 0; |
126 | | - if (argc < 2) { print_help(); return ret; } |
127 | | - |
128 | | - if (strcmp(argv[1], "abismal") == 0) ret = abismal(argc - 1, argv + 1); |
129 | | - else if (strcmp(argv[1], "abismalidx") == 0) ret = abismalidx(argc - 1, argv + 1); |
130 | | - else if (strcmp(argv[1], "simreads") == 0) ret = simreads(argc - 1, argv + 1); |
131 | | - else if (strcmp(argv[1], "counts") == 0) ret = main_counts(argc - 1, argv + 1); |
132 | | - else if (strcmp(argv[1], "allelic") == 0) ret = main_allelicmeth(argc - 1, argv + 1); |
133 | | - else if (strcmp(argv[1], "amrfinder") == 0) ret = main_amrfinder(argc - 1, argv + 1); |
134 | | - else if (strcmp(argv[1], "amrtester") == 0) ret = main_amrtester(argc - 1, argv + 1); |
135 | | - else if (strcmp(argv[1], "bsrate") == 0) ret = main_bsrate(argc - 1, argv + 1); |
136 | | - else if (strcmp(argv[1], "hmr") == 0) ret = main_hmr(argc - 1, argv + 1); |
137 | | - else if (strcmp(argv[1], "hmr-rep") == 0) ret = main_hmr_rep(argc - 1, argv + 1); |
138 | | - else if (strcmp(argv[1], "hypermr") == 0) ret = main_hypermr(argc - 1, argv + 1); |
139 | | - else if (strcmp(argv[1], "levels") == 0) ret = main_levels(argc - 1, argv + 1); |
140 | | - else if (strcmp(argv[1], "entropy") == 0) ret = main_methentropy(argc - 1, argv + 1); |
141 | | - else if (strcmp(argv[1], "states") == 0) ret = main_methstates(argc - 1, argv + 1); |
142 | | - else if (strcmp(argv[1], "multistat") == 0) ret = main_multimethstat(argc - 1, argv + 1); |
143 | | - else if (strcmp(argv[1], "pmd") == 0) ret = main_pmd(argc - 1, argv + 1); |
144 | | - else if (strcmp(argv[1], "roi") == 0) ret = main_roimethstat(argc - 1, argv + 1); |
145 | | - else if (strcmp(argv[1], "mlml") == 0) ret = main_mlml(argc - 1, argv + 1); |
146 | | - else if (strcmp(argv[1], "dmr") == 0) ret = main_dmr(argc - 1, argv + 1); |
147 | | - else if (strcmp(argv[1], "diff") == 0) ret = main_methdiff(argc - 1, argv + 1); |
148 | | - else if (strcmp(argv[1], "radmeth") == 0) ret = main_radmeth(argc - 1, argv + 1); |
149 | | - else if (strcmp(argv[1], "radadjust") == 0) ret = main_radmeth_adjust(argc - 1, argv + 1); |
150 | | - else if (strcmp(argv[1], "radmerge") == 0) ret = main_radmeth_merge(argc - 1, argv + 1); |
151 | | - else if (strcmp(argv[1], "cleanhp") == 0) ret = main_clean_hairpins(argc - 1, argv + 1); |
152 | | - else if (strcmp(argv[1], "uniq") == 0) ret = main_uniq(argc - 1, argv + 1); |
153 | | - else if (strcmp(argv[1], "fastlift") == 0) ret = main_fast_liftover(argc - 1, argv + 1); |
154 | | - else if (strcmp(argv[1], "format") == 0) ret = main_format(argc - 1, argv + 1); |
155 | | - else if (strcmp(argv[1], "guessprotocol") == 0) ret = main_guessprotocol(argc - 1, argv + 1); |
156 | | - else if (strcmp(argv[1], "lc") == 0) ret = main_lc_approx(argc - 1, argv + 1); |
157 | | - else if (strcmp(argv[1], "liftfilter") == 0) ret = main_lift_filter(argc - 1, argv + 1); |
158 | | - else if (strcmp(argv[1], "merge-bsrate") == 0) ret = main_merge_bsrate(argc - 1, argv + 1); |
159 | | - else if (strcmp(argv[1], "merge") == 0) ret = main_merge_methcounts(argc - 1, argv + 1); |
160 | | - else if (strcmp(argv[1], "selectsites") == 0) ret = main_selectsites(argc - 1, argv + 1); |
161 | | - else if (strcmp(argv[1], "sym") == 0) ret = main_symmetric_cpgs(argc - 1, argv + 1); |
162 | | - |
163 | | - else { |
164 | | - cerr << "command not found: " << argv[1] << endl; |
165 | | - ret = 1; |
| 143 | + try { |
| 144 | + vector<pair<string, vector<dnmtools_command>>> command_groups = { |
| 145 | +// clang-format off |
| 146 | +{{"mapping", |
| 147 | + {{{"abismal", "map FASTQ reads to a FASTA reference genome or an index", abismal}, |
| 148 | + {"abismalidx", "convert a FASTA reference genome to an abismal index", abismalidx}, |
| 149 | + {"simreads", "simulate reads in a FASTA reference genome", simreads}}}}, |
| 150 | + |
| 151 | +{"methylome construction", |
| 152 | + {{{"format", "convert SAM/BAM mapped bs-seq reads to standard dnmtools format", main_format}, |
| 153 | + {"uniq", "remove duplicate reads from sorted mapped reads", main_uniq}, |
| 154 | + {"bsrate", "compute the BS conversion rate from BS-seq reads mapped to a genome", main_bsrate}, |
| 155 | + {"counts", "get methylation levels from mapped WGBS reads", main_counts}, |
| 156 | + {"sym", "get CpG sites and make methylation levels symmetric", main_symmetric_cpgs}, |
| 157 | + {"levels", "compute methylation summary statistics from a counts file", main_levels}}}}, |
| 158 | + |
| 159 | +{"methylome analysis", |
| 160 | + {{{"hmr", "identify hypomethylated regions", main_hmr}, |
| 161 | + {"hmr-rep", "identify hypomethylated regions in a set of replicate methylomes", main_hmr_rep}, |
| 162 | + {"hypermr", "identify hypermethylated regions in plant methylomes", main_hypermr}, |
| 163 | + {"entropy", "compute methylation entropy in sliding window", main_methentropy}, |
| 164 | + {"multistat", "summarize methylation from to genomic intervals in a BED file", main_multimethstat}, |
| 165 | + {"pmd", "identify partially methylated domains", main_pmd}, |
| 166 | + {"roi", "compute average CpG methylation in each of a set of genomic interval", main_roimethstat}, |
| 167 | + {"mlml", "program to estimate hydroxymethylation levels", main_mlml}}}}, |
| 168 | + |
| 169 | +{"allele-specific methylation (ASM)", |
| 170 | + {{{"states", "convert reads in SAM format into methylation states at CpGs", main_methstates}, |
| 171 | + {"allelic", "get probability of ASM for each pair of neighboring CpGs", main_allelicmeth}, |
| 172 | + {"amrfinder", "identify regions of ASM in the genome", main_amrfinder}, |
| 173 | + {"amrtester", "test a set of genomic intervals for ASM", main_amrtester}}}}, |
| 174 | + |
| 175 | +{"differential methylation (DM)", |
| 176 | + {{{"dmr", "identify DMRs from genomic intervals and single-CpG DM probabilities", main_dmr}, |
| 177 | + {"diff", "compute single-CpG DM probability between two methylomes", main_methdiff}, |
| 178 | + {"radmeth", "compute DM probabilities for each CpG using multiple methylomes", main_radmeth}, |
| 179 | + {"radadjust", "adjust p-values from radmeth output", main_radmeth_adjust}, |
| 180 | + {"radmerge", "merge significant CpGs in radmeth output", main_radmeth_merge}}}}, |
| 181 | + |
| 182 | +{"methylation visualization", |
| 183 | + {{{"fastlift", "liftover methylation levels between species", main_fast_liftover}, |
| 184 | + {"liftfilter", "filter CpGs that are not CpGs in the target genome", main_lift_filter}}}}, |
| 185 | + |
| 186 | +{"utilities", |
| 187 | + {{{"cleanhp", "fix and stat invdup/hairping reads", main_clean_hairpins}, |
| 188 | + {"guessprotocol", "guess whether protocol is ordinary, pbat or random", main_guessprotocol}, |
| 189 | + {"lc", "approximate line counts in a file", main_lc_approx}, |
| 190 | + {"merge-bsrate", "merge bisulfite conversion rates files from bsrate", main_merge_bsrate}, |
| 191 | + {"merge", "merge multiple counts files into a counts file or a table", main_merge_methcounts}, |
| 192 | + {"selectsites", "sites inside a set of genomic intervals", main_selectsites}}}}}}; |
| 193 | +// clang-format on |
| 194 | + |
| 195 | + if (argc < 2) { |
| 196 | + print_help(command_groups); |
| 197 | + return EXIT_SUCCESS; |
| 198 | + } |
| 199 | + |
| 200 | + const auto has_tag = [&](const dnmtools_command &a) { |
| 201 | + return a.tag == argv[1]; |
| 202 | + }; |
| 203 | + |
| 204 | + for (auto &&g : command_groups) { |
| 205 | + const auto the_cmd = find_if(begin(g.second), end(g.second), has_tag); |
| 206 | + if (the_cmd != end(g.second)) return (*the_cmd)(argc, argv); |
| 207 | + } |
166 | 208 | } |
167 | | - |
168 | | - return ret; |
| 209 | + catch (const std::exception &e) { |
| 210 | + std::cerr << "ERROR:\t" << e.what() << endl; |
| 211 | + return EXIT_FAILURE; |
| 212 | + } |
| 213 | + return EXIT_SUCCESS; |
169 | 214 | } |
0 commit comments