Skip to content

Commit eac0f9f

Browse files
Merge pull request #120 from smithlabcode/msite-input-validation
Msite input validation
2 parents 0f27104 + f582982 commit eac0f9f

7 files changed

Lines changed: 38 additions & 15 deletions

File tree

src/analysis/hmr-rep.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,10 @@ main_hmr_rep(int argc, const char **argv) {
415415
const vector<string> cpgs_files(leftover_args);
416416
/****************** END COMMAND LINE OPTIONS *****************/
417417

418+
for (auto &filename : cpgs_files)
419+
if (!is_msite_file(filename))
420+
throw runtime_error("malformed counts file: " + filename);
421+
418422
vector<string> params_in_file;
419423
if (!params_in_files.empty()) {
420424
params_in_file = split_comma(params_in_files);

src/analysis/hmr.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,9 @@ main_hmr(int argc, const char **argv) {
442442
const string cpgs_file = leftover_args.front();
443443
/****************** END COMMAND LINE OPTIONS *****************/
444444

445+
if (!is_msite_file(cpgs_file))
446+
throw runtime_error("malformed counts file: " + cpgs_file);
447+
445448
// separate the regions by chrom and by desert
446449
vector<MSite> cpgs;
447450
vector<pair<double, double> > meth;

src/analysis/hypermr.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,9 @@ main_hypermr(int argc, const char **argv) {
278278
const string cpgs_file = leftover_args.front();
279279
/****************** END COMMAND LINE OPTIONS *****************/
280280

281+
if (!is_msite_file(cpgs_file))
282+
throw runtime_error("malformed counts file: " + cpgs_file);
283+
281284
if (VERBOSE) cerr << "[loading_data]" << endl;
282285
vector<MSite> cpgs;
283286
vector<pair<double, double>> meth;

src/analysis/levels.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ main_levels(int argc, const char **argv) {
120120
const string meth_file = leftover_args.front();
121121
/****************** END COMMAND LINE OPTIONS *****************/
122122

123+
if (!is_msite_file(meth_file))
124+
throw runtime_error("malformed counts file: " + meth_file);
125+
123126
const counts_file_format guessed_format =
124127
guess_counts_file_format(meth_file);
125128
if (guessed_format != counts_file_format::ordinary) {

src/analysis/pmd.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1173,7 +1173,10 @@ main_pmd(int argc, const char **argv) {
11731173
assert(cpgs_file.size() == params_in_file.size());
11741174
}
11751175

1176-
size_t n_replicates = cpgs_file.size();
1176+
const size_t n_replicates = cpgs_file.size();
1177+
for (auto &filename : cpgs_file)
1178+
if (!is_msite_file(filename))
1179+
throw runtime_error("malformed counts file: " + filename);
11771180

11781181
bool insufficient_data = false; // ADS: this is used now to detect
11791182
// when the counts files have

src/analysis/roimethstat.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,9 @@ Columns (beyond the first 6) in the BED format output:
470470
const string cpgs_file = leftover_args.back();
471471
/****************** END COMMAND LINE OPTIONS *****************/
472472

473+
if (!is_msite_file(cpgs_file))
474+
throw runtime_error("malformed counts file: " + cpgs_file);
475+
473476
if (VERBOSE)
474477
cerr << "loading regions" << endl;
475478

src/common/MSite.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -156,43 +156,47 @@ find_offset_for_msite(const std::string &chr,
156156
}
157157

158158

159-
160159
bool
161-
is_msite_file(const string &file) {
162-
ifstream in(file);
163-
if (!in)
164-
throw runtime_error("cannot open file: " + file);
165-
166-
string line;
167-
if(!getline(in, line)) return false;
160+
is_msite_line(const string &line) {
168161

169162
std::istringstream iss(line);
170163

171164
string chrom;
172165
if (!(iss >> chrom)) return false;
173-
166+
174167
long int pos = 0;
175168
if (!(iss >> pos)) return false;
176169

177170
string strand;
178-
if (!(iss >> strand) ||
179-
(strand.size() != 1) ||
180-
((strand != "+") && (strand != "-")) )
171+
if (!(iss >> strand) ||
172+
(strand.size() != 1) ||
173+
((strand != "+") && (strand != "-")) )
181174
return false;
182175

183176
string context;
184177
std::regex pattern("^C[pHWX][GH]$");
185178
if (!(iss >> context) || !regex_match(context, pattern)) return false;
186179

187180
double level = 0.0;
188-
if (!(iss >> level) || level < 0 || level > 1) return false;
181+
if (!(iss >> level) || level < 0.0 || level > 1.0) return false;
189182

190183
long int n_reads = 0;
191-
if (!(iss >> n_reads)) return false;
184+
if (!(iss >> n_reads) || n_reads < 0) return false;
192185

193186
string temp;
194187
if (iss >> temp) return false;
195188
else return true;
196189

197190
}
198191

192+
bool
193+
is_msite_file(const string &file) {
194+
bamxx::bgzf_file in(file, "r");
195+
if (!in)
196+
throw runtime_error("cannot open file: " + file);
197+
198+
string line;
199+
if (!getline(in, line)) return false;
200+
201+
return is_msite_line(line);
202+
}

0 commit comments

Comments
 (0)