1- '''This program will run CodeML on a directory of single gene alignments.
2- It will generate a unique control file and tree file for each input gene
3- before invoking CodeML using the number of CPUs specified by the user
4- (default = 1).
1+ '''This program will call the CodeML cython script on a directory of
2+ single gene alignments. It will generate a unique control file and tree
3+ file for each input gene before invoking CodeML using the number of CPUs
4+ specified by the user (default = 1).
55
66 Copyright 2016 by Shawn Rupp'''
77
8+ from __future__ import division
89from datetime import datetime
9- from sys import stdout
1010from glob import glob
11- from subprocess import Popen
12- from shlex import split
13- from functools import partial
1411from multiprocessing import Pool , cpu_count
12+ from functools import partial
1513import argparse
1614import shutil
15+ import sys
1716import math
1817import os
19- import re
20- from callPhyML import phyml
18+ from parallelCodeML import parallelize
2119
22- # Define max number of threads and devnull for capturing stdout
2320MAXCPU = cpu_count ()
24- DEVNULL = open (os .devnull , "w" )
2521
2622#-----------------------------------------------------------------------------
2723
@@ -44,47 +40,42 @@ def outputFiles(outdir):
4440
4541def controlFiles (indir , outdir , forward , cpu ):
4642 '''Reads input files and stores them in memory'''
43+ multiple = False
4744 # Make temp directory
4845 tmp = outdir + "tmp/"
4946 try :
5047 os .mkdir (tmp )
5148 except FileExistsError :
5249 pass
53- multiple = phyml (indir , tmp , cpu , forward )
54- return True , multiple
55-
56- #-----------------------------------------------------------------------------
57-
58- def runCodeml (ap , outdir , finished , completed , multiple , gene ):
59- '''Creates temporary control and tree files and runs CodeML.'''
60- filename = gene .split ("/" )[- 1 ]
61- geneid = filename .split ("." )[0 ]
62- wd = outdir + "tmp/" + geneid + "/"
63- if (geneid + "\n " ) in completed :
64- pass
65- else :
66- tempctl = wd + "codeml.ctl"
67- os .chdir (wd )
68- if multiple == True :
69- if filename .split ("." )[1 ] == "2" :
70- pass
71- else :
72- # Calls CodeML if 3 or more sequences are present
73- cm = Popen (split (ap + "paml/bin/codeml " + tempctl ),
74- stdout = DEVNULL )
75- elif multiple == False :
76- # Call CodeML for all files
77- cm = Popen (split (ap + "paml/bin/codeml " + tempctl ),
78- stdout = DEVNULL )
79- with open (finished , "a" ) as fin :
80- fin .write (geneid + "\n " )
50+ # Reconstruct output path
51+ path = outdir .split ("/" )[:- 2 ]
52+ out = ""
53+ for i in path :
54+ out += i + "/"
55+ control = glob (out + "*.ctl" )
56+ if len (control ) > 1 :
57+ # Quit if multiple .ctl files are present
58+ print ("\n \t Please provide only one control file for CodeML.\n " )
59+ quit ()
60+ with open (control [0 ], "r" ) as infile :
61+ ctl = infile .readlines ()
62+ for line in ctl :
63+ # Determine if a phylogenic tree is needed
64+ if "runmode = 0" in line or "runmode = 1" in line :
65+ multiple = True
66+ return ctl , multiple
8167
8268#-----------------------------------------------------------------------------
8369
8470def main ():
8571 starttime = datetime .now ()
8672 # Save path to the AlignmentProcessor directory
8773 ap = os .getcwd () + "/"
74+ if " " in ap :
75+ # Change to warning ########################################################
76+ print ("\t WARNING: AlignmentProcessor will not run properly if there \
77+ is a space in its PATH name." )
78+ ap = ap .replace (" (ASU)" , "" )
8879 run = False
8980 # Parse command
9081 parser = argparse .ArgumentParser (description = "Runs CodeML on all files \
@@ -95,7 +86,8 @@ def main():
9586 parser .add_argument ("-f" , default = "" ,
9687help = "Forward species (name must be the same as it appears in input files." )
9788 parser .add_argument ("--cleanUp" , action = "store_true" ,
98- help = "Remove temporary files (it may be useful to retain phylogenic trees for future use)." )
89+ help = "Remove temporary files (it may be useful to retain phylogenic trees \
90+ for future use)." )
9991 args = parser .parse_args ()
10092 # Assign arguments
10193 indir = args .i
@@ -108,29 +100,27 @@ def main():
108100 if cpu > MAXCPU :
109101 cpu = MAXCPU
110102 forward = args .f
111- cleanup = args .cleanUp
112103 # Reads in required data
113104 finished , completed = outputFiles (outdir )
114- run , multiple = controlFiles (indir , outdir , forward , cpu )
115- if run == True :
116- # Call CodeML after PhyML completes.
105+ ctl , multiple = controlFiles (indir , outdir , forward , cpu )
106+ # Call PhyML and CodeML in parallel completes.
107+ if ctl :
108+ # Call CodeML and PhyML
117109 genes = glob (indir + "*.phylip" )
118110 l = int (len (genes ))
119- # Determine chunksize
120- if l <= cpu :
121- chunk = 1
122- elif l > cpu :
123- chunk = int (math .ceil (l / cpu ))
111+ func = partial (parallelize , ap , outdir , finished , completed , multiple ,
112+ cpu , ctl , forward )
113+ print (("\t Running CodeML on {0!s} genes with {1!s} threads...."
114+ ).format (l , cpu ))
124115 pool = Pool (processes = cpu )
125- func = partial (runCodeml , ap , outdir , finished , completed , multiple )
126- print (("\t Running CodeML with {0!s} threads...." ).format (cpu ))
127- rcml = pool .imap_unordered (func , genes , chunksize = chunk )
116+ for i , _ in enumerate (pool .imap_unordered (func , genes ), 1 ):
117+ sys .stderr .write ("\r \t {0:%} of genes have finished" .format (i / l ))
128118 pool .close ()
129- pool .join ()
119+ pool .join ()
130120 # Remove tmp directory
131- if cleanup == True :
121+ if args . cleanUp == True :
132122 shutil .rmtree (outdir + "tmp/" )
133- print (("\t CodeML runtime: {0!s}" ).format (datetime .now () - starttime ))
123+ print (("\n \ t CodeML runtime: {0!s}" ).format (datetime .now () - starttime ))
134124
135125if __name__ == "__main__" :
136126 main ()
0 commit comments