2121
2222__author__ = 'Markus Englund'
2323__license__ = 'MIT'
24- __version__ = '0.6 .0'
24+ __version__ = '0.7 .0'
2525
2626
2727def main (args = None ):
@@ -41,7 +41,7 @@ def main(args=None):
4141
4242 result_iterator = iter_seqgen_results (
4343 simulation_input , seq_len = parser .length , gamma_cats = parser .gamma_cats ,
44- seqgen_path = parser .sg_filepath )
44+ basefreqs = parser . basefreqs , seqgen_path = parser .sg_filepath )
4545
4646 if parser .out_format == 'nexus' :
4747 schema_kwargs = {'schema' : 'nexus' , 'simple' : False }
@@ -75,6 +75,12 @@ def parse_args(args):
7575 parser .add_argument (
7676 '-l' , '--length' , action = 'store' , default = 1000 , type = int ,
7777 help = 'sequence lenght (default: 1000)' , metavar = 'N' , dest = 'length' )
78+ parser .add_argument (
79+ '-f' , '--freqs' , action = 'store' , type = float , nargs = 4 ,
80+ help = (
81+ 'base frequences (overrides any base frequences '
82+ 'in MrBayes\' output)' ),
83+ metavar = ('#A' , '#C' , '#G' , '#T' ), dest = 'basefreqs' )
7884 parser .add_argument (
7985 '-g' , '--gamma-cats' , action = 'store' , type = int ,
8086 help = 'number of gamma rate categories (default: continuous)' ,
@@ -88,7 +94,7 @@ def parse_args(args):
8894 help = 'number of records (trees) to use in the simulation' ,
8995 metavar = 'N' , dest = 'num_records' )
9096 parser .add_argument (
91- '-f ' , '--format' , default = 'nexus' , choices = ['nexus' , 'phylip' ],
97+ '-o ' , '--out -format' , default = 'nexus' , choices = ['nexus' , 'phylip' ],
9298 help = 'output format (default: "nexus")' , dest = 'out_format' )
9399 parser .add_argument (
94100 '-p' , '--seqgen-path' , default = 'seq-gen' , type = str ,
@@ -111,7 +117,7 @@ def parse_args(args):
111117 help = 'path to a MrBayes p-file' , metavar = 'pfile' )
112118 parser .add_argument (
113119 'tfile_path' , action = StoreExpandedPath , type = is_file ,
114- help = 'path to a MrBayes t-file' , metavar = 'tfile' , )
120+ help = 'path to a MrBayes t-file' , metavar = 'tfile' )
115121
116122 return parser .parse_args (args )
117123
@@ -200,35 +206,41 @@ def kappa_to_titv(kappa, piA, piC, piG, piT):
200206 return titv
201207
202208
203- def get_seqgen_params (mrbayes_params ):
209+ def get_seqgen_params (mrbayes_params , basefreqs = None ):
204210 """
205211 Adapt MrBayes parameter values for use with Seq-Gen.
206212
207213 Paramters
208214 ---------
209- mrbayes_prams : dict
215+ mrbayes_params : dict
210216 Parameter values from a single row in a MrBayes p-file.
217+ basefreqs : list of floats
218+ Frequences for the four nucleotides A, C, G, and T to use
219+ if missing from MrBayes output.
211220
212221 Returns
213222 -------
214223 seqgen_params : dict
215224 """
216- seqgen_params = {}
217- try :
218- seqgen_params ['state_freqs' ] = (
219- str (mrbayes_params ['pi(A)' ]) + ',' +
220- str (mrbayes_params ['pi(C)' ]) + ',' +
221- str (mrbayes_params ['pi(G)' ]) + ',' +
222- str (mrbayes_params ['pi(T)' ]))
223- except KeyError :
224- seqgen_params ['state_freqs' ] = '0.25,0.25,0.25,0.25'
225+
226+ if basefreqs is None :
227+ try :
228+ basefreqs = [
229+ float (mrbayes_params ['pi(A)' ]),
230+ float (mrbayes_params ['pi(C)' ]),
231+ float (mrbayes_params ['pi(G)' ]),
232+ float (mrbayes_params ['pi(T)' ])]
233+ except KeyError as exc :
234+ msg = (
235+ 'Base frequences must be provided since they '
236+ 'are not present in MrBayes\' output.' )
237+ raise KeyError (msg ) from exc
238+
239+ seqgen_params = {'state_freqs' : ',' .join ([str (v ) for v in basefreqs ])}
240+
225241 try :
226242 seqgen_params ['ti_tv' ] = kappa_to_titv (
227- float (mrbayes_params ['kappa' ]),
228- float (mrbayes_params ['pi(A)' ]),
229- float (mrbayes_params ['pi(C)' ]),
230- float (mrbayes_params ['pi(G)' ]),
231- float (mrbayes_params ['pi(T)' ]))
243+ float (mrbayes_params ['kappa' ]), * basefreqs )
232244 except KeyError :
233245 pass
234246 try :
@@ -267,11 +279,11 @@ def combine_simulation_input(tree_list, p_dicts, rng_seeds=None):
267279
268280
269281def iter_seqgen_results (
270- simulation_input , seq_len = 1000 , gamma_cats = None ,
282+ simulation_input , seq_len = 1000 , gamma_cats = None , basefreqs = None ,
271283 seqgen_path = 'seq-gen' ):
272284 """Iterate over multiple simulations."""
273285 for tree , p_dict , rng_seed in simulation_input :
274- seqgen_params = get_seqgen_params (p_dict )
286+ seqgen_params = get_seqgen_params (p_dict , basefreqs = basefreqs )
275287 result = simulate_matrix (
276288 tree , seq_len = seq_len , rng_seed = rng_seed ,
277289 seqgen_path = seqgen_path , ** seqgen_params )
0 commit comments