44
55 Copyright 2016 by Shawn Rupp'''
66
7- import argparse
7+ from datetime import datetime
8+ from sys import stdout
9+ from glob import glob
810from subprocess import Popen
911from shlex import split
10- from glob import glob
12+ from functools import partial
13+ from multiprocessing import Pool , cpu_count
14+ import argparse
1115import os
1216
17+ # Define max number of threads and devnull for capturing stdout
18+ MAXCPU = cpu_count ()
1319DEVNULL = open (os .devnull , "w" )
1420
15- def calculateKaKs (indir , outdir , method ):
21+ def calculateKaKs (indir , outdir , method , axt ):
1622 '''Calculates substition rates.'''
17- print ("\t Running KaKs_Calculator..." )
18- files = glob (indir + "*.axt" )
19- for axt in files :
20- with open (axt , "r" ) as infile :
21- filename = axt .split ("/" )[- 1 ]
22- # Create output file
23- outfile = (outdir + filename .split ("." )[0 ] + ".kaks" )
24- ck = Popen (split ("bin/KaKs_Calculator -i " + axt + " -o " +
25- outfile + " -m " + method ), stdout = DEVNULL )
26- ck .wait ()
27- return True
23+ with open (axt , "r" ) as infile :
24+ filename = axt .split ("/" )[- 1 ]
25+ # Create output file
26+ outfile = (outdir + filename .split ("." )[0 ] + ".kaks" )
27+ ck = Popen (split ("bin/KaKs_Calculator -i " + axt + " -o " +
28+ outfile + " -m " + method ), stdout = DEVNULL )
29+ ck .wait ()
30+ if ck .returncode () == 0 :
31+ return True
2832
2933def compileKsKs (outdir ):
30- '''Prints Ka/Ks output as a single csv file.'''
34+ '''Prints Ka/Ks output as a single tsv file.'''
3135 print ("\t Compiling KaKs_Calculator output..." )
3236 # Set counter so the header is only printed once
3337 count = 0
@@ -56,13 +60,17 @@ def compileKsKs(outdir):
5660 output .write ("GeneID\t " + line .replace ("\t " ,"," ))
5761 count += 1
5862
63+ #-----------------------------------------------------------------------------
64+
5965def main ():
66+ starttime = datetime .now ()
6067 concatenate = False
6168 parser = argparse .ArgumentParser (description = "This program will run \
6269 KaKs_Calculator on a directory." )
63- parser .add_argument ("-i" , help = "Path to input file." )
64- parser .add_argument ("-o" , help = "Path to output file." )
65- parser .add_argument ("-m" , help = "Method for calculating Ka/Ks." )
70+ parser .add_argument ("-i" , help = "Path to input file." )
71+ parser .add_argument ("-o" , help = "Path to output file." )
72+ parser .add_argument ("-m" , help = "Method for calculating Ka/Ks." )
73+ parser .add_argument ("-t" , type = int , help = "Number of threads." )
6674 # Parse arguments and assign to variables
6775 args = parser .parse_args ()
6876 indir = args .i
@@ -72,9 +80,21 @@ def main():
7280 if outdir != "/" :
7381 outdir += "/"
7482 method = args .m
75- concatenate = calculateKaKs (indir , outdir , method )
76- if concatenate == True :
77- compileKsKs (outdir )
83+ cpu = args .t
84+ if cpu > MAXCPU :
85+ cpu = MAXCPU
86+ # Call Ka/Ks_Calculator in parallel.
87+ genes = glob (indir + "*.axt" )
88+ l = int (len (genes ))
89+ pool = Pool (processes = cpu )
90+ func = partial (calculateKaKs , indir , outdir , method )
91+ print ("\t Running CodeML with" , str (cpu ), "threads...." )
92+ rcml = pool .imap (func , genes )
93+ pool .close ()
94+ pool .join ()
95+ # Compile output
96+ compileKsKs (outdir )
97+ print ("\t KaKs_Calculator runtime: " , datetime .now () - starttime )
7898
7999if __name__ == "__main__" :
80100 main ()
0 commit comments