Skip to content

Commit 504a212

Browse files
committed
adding rng seed option from preseq_dev
1 parent 0213136 commit 504a212

1 file changed

Lines changed: 38 additions & 6 deletions

File tree

preseq.cpp

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
#include <RNG.hpp>
4646
#include <smithlab_os.hpp>
4747

48-
#define PRESEQ_VERSION "2.0.2"
48+
#define PRESEQ_VERSION "2.0.3"
4949

5050
// AS: might not be good to depend on mapped read here
5151
// TD: if we're including gc_extrap, we need the dependence
@@ -73,13 +73,15 @@ using std::tr1::unordered_map;
7373
/////////////////////////////////////////////////////////
7474
// Confidence interval stuff
7575

76+
/*
7677
static inline double
7778
alpha_log_confint_multiplier(const double estimate,
7879
const double variance, const double alpha) {
7980
const double inv_norm_alpha = gsl_cdf_ugaussian_Qinv(alpha/2.0);
8081
return exp(inv_norm_alpha*
8182
sqrt(log(1.0 + variance/pow(estimate, 2))));
8283
}
84+
*/
8385

8486

8587
static void
@@ -267,6 +269,7 @@ check_yield_estimates(const vector<double> &estimates) {
267269

268270
void
269271
extrap_bootstrap(const bool VERBOSE, const bool DEFECTS,
272+
const unsigned long int seed,
270273
const vector<double> &orig_hist,
271274
const size_t bootstraps, const size_t orig_max_terms,
272275
const int diagonal, const double bin_step_size,
@@ -279,7 +282,7 @@ extrap_bootstrap(const bool VERBOSE, const bool DEFECTS,
279282
srand(time(0) + getpid());
280283
gsl_rng_env_setup();
281284
gsl_rng *rng = gsl_rng_alloc(gsl_rng_default);
282-
gsl_rng_set(rng, rand());
285+
gsl_rng_set(rng, seed);
283286

284287
double vals_sum = 0.0;
285288
for(size_t i = 0; i < orig_hist.size(); i++)
@@ -588,6 +591,7 @@ lc_extrap(const int argc, const char **argv) {
588591
size_t bootstraps = 100;
589592
int diagonal = 0;
590593
double c_level = 0.95;
594+
unsigned long int seed = 0;
591595

592596
/* FLAGS */
593597
bool VERBOSE = false;
@@ -644,6 +648,8 @@ lc_extrap(const int argc, const char **argv) {
644648
opt_parse.add_opt("defects", 'D',
645649
"defects mode to extrapolate without testing for defects",
646650
false, DEFECTS);
651+
opt_parse.add_opt("seed", 'r', "seed for random number generator",
652+
false, seed);
647653

648654
vector<string> leftover_args;
649655
opt_parse.parse(argc-1, argv+1, leftover_args);
@@ -666,6 +672,10 @@ lc_extrap(const int argc, const char **argv) {
666672
const string input_file_name = leftover_args.front();
667673
/******************************************************************/
668674

675+
// if seed is not set, make it random
676+
if(seed == 0){
677+
seed = rand();
678+
}
669679

670680
vector<double> counts_hist;
671681
size_t n_reads = 0;
@@ -809,7 +819,7 @@ lc_extrap(const int argc, const char **argv) {
809819
const size_t max_iter = 10*bootstraps;
810820

811821
vector<vector <double> > bootstrap_estimates;
812-
extrap_bootstrap(VERBOSE, DEFECTS, counts_hist, bootstraps,
822+
extrap_bootstrap(VERBOSE, DEFECTS, seed, counts_hist, bootstraps,
813823
orig_max_terms, diagonal, step_size, max_extrapolation,
814824
max_iter, bootstrap_estimates);
815825

@@ -867,6 +877,7 @@ gc_extrap(const int argc, const char **argv) {
867877
bool SINGLE_ESTIMATE = false;
868878
double max_extrapolation = 1.0e12;
869879
size_t bootstraps = 100;
880+
unsigned long int seed = 0;
870881
bool DEFECTS = false;
871882

872883
bool NO_SEQUENCE = false;
@@ -908,6 +919,9 @@ gc_extrap(const int argc, const char **argv) {
908919
opt_parse.add_opt("defects", 'D',
909920
"defects mode to extrapolate without testing for defects",
910921
false, DEFECTS);
922+
opt_parse.add_opt("seed", 'r', "seed for random number generator",
923+
false, seed);
924+
911925

912926

913927
vector<string> leftover_args;
@@ -931,6 +945,11 @@ gc_extrap(const int argc, const char **argv) {
931945
const string input_file_name = leftover_args.front();
932946
// ****************************************************************
933947

948+
// if seed is not set, set it to random
949+
if(seed == 0){
950+
seed = rand();
951+
}
952+
934953
vector<double> coverage_hist;
935954
size_t n_reads = 0;
936955
if(VERBOSE)
@@ -1047,7 +1066,7 @@ gc_extrap(const int argc, const char **argv) {
10471066
const size_t max_iter = 10*bootstraps;
10481067

10491068
vector<vector <double> > bootstrap_estimates;
1050-
extrap_bootstrap(VERBOSE, DEFECTS, coverage_hist, bootstraps, orig_max_terms,
1069+
extrap_bootstrap(VERBOSE, DEFECTS, seed, coverage_hist, bootstraps, orig_max_terms,
10511070
diagonal, bin_step_size, max_extrapolation/bin_size,
10521071
max_iter, bootstrap_estimates);
10531072

@@ -1100,6 +1119,7 @@ c_curve(const int argc, const char **argv) {
11001119
bool PAIRED_END = false;
11011120
bool HIST_INPUT = false;
11021121
bool VALS_INPUT = false;
1122+
unsigned long int seed = 0;
11031123

11041124
string outfile;
11051125

@@ -1137,6 +1157,9 @@ c_curve(const int argc, const char **argv) {
11371157
+ toa(MAX_SEGMENT_LENGTH) + ")",
11381158
false, MAX_SEGMENT_LENGTH);
11391159
#endif
1160+
opt_parse.add_opt("seed", 'r', "seed for random number generator",
1161+
false, seed);
1162+
11401163

11411164
vector<string> leftover_args;
11421165
opt_parse.parse(argc-1, argv+1, leftover_args);
@@ -1159,11 +1182,14 @@ c_curve(const int argc, const char **argv) {
11591182
const string input_file_name = leftover_args.front();
11601183
/******************************************************************/
11611184

1185+
if(seed == 0){
1186+
seed = rand();
1187+
}
11621188
// Setup the random number generator
11631189
gsl_rng_env_setup();
11641190
gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); // use default type
11651191
srand(time(0) + getpid()); //give the random fxn a new seed
1166-
gsl_rng_set(rng, rand()); //initialize random number generator with the seed
1192+
gsl_rng_set(rng, seed); //initialize random number generator with the seed
11671193

11681194
vector<double> counts_hist;
11691195
size_t n_reads = 0;
@@ -1299,6 +1325,7 @@ bound_pop(const int argc, const char **argv) {
12991325
size_t bootstraps = 500;
13001326
double c_level = 0.95;
13011327
size_t max_iter = 100;
1328+
unsigned long int seed = 0;
13021329

13031330

13041331
/********** GET COMMAND LINE ARGUMENTS FOR C_CURVE ***********/
@@ -1337,6 +1364,8 @@ bound_pop(const int argc, const char **argv) {
13371364
#endif
13381365
opt_parse.add_opt("quick", 'Q', "quick mode, estimate without bootstrapping",
13391366
false, QUICK_MODE);
1367+
opt_parse.add_opt("seed", 'r', "seed for random number generator",
1368+
false, seed);
13401369

13411370

13421371
vector<string> leftover_args;
@@ -1521,10 +1550,13 @@ bound_pop(const int argc, const char **argv) {
15211550
vector<double> quad_estimates;
15221551

15231552
//setup rng
1553+
if(seed == 0){
1554+
seed = rand();
1555+
}
15241556
srand(time(0) + getpid());
15251557
gsl_rng_env_setup();
15261558
gsl_rng *rng = gsl_rng_alloc(gsl_rng_default);
1527-
gsl_rng_set(rng, rand());
1559+
gsl_rng_set(rng, seed);
15281560

15291561
// hist may be sparse, to speed up bootstrapping
15301562
// sample only from positive entries

0 commit comments

Comments
 (0)