Skip to content

Commit c7ee5bc

Browse files
committed
Added capabilty to run CodeML on multiple threads.
1 parent e35359e commit c7ee5bc

10 files changed

Lines changed: 199 additions & 93 deletions

AlignmentProcessor.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,14 @@ def splitFasta(fasta, outdir):
112112
if sf.returncode == 0:
113113
return True
114114

115-
def rmHeader(outdir):
115+
def rmHeader(outdir, commonNames):
116116
'''Removes header information and changes genome build to common name.'''
117117
print("Changing header...")
118-
rh = Popen(split("python bin/02_RemoveHeader.py " + outdir))
118+
if commonNames == False:
119+
rh = Popen(split("python bin/02_RemoveHeader.py " + outdir))
120+
elif commonNames == True:
121+
rh = Popen(split("python bin/02_RemoveHeader.py " + outdir +
122+
"--changeNames"))
119123
rh.wait()
120124
if rh.returncode == 0:
121125
return True
@@ -193,14 +197,15 @@ def phylipConvert(outdir, starttime, codeml):
193197
print("Total runtime: ", elapsedtime)
194198
return True
195199

196-
def runcodeml(outdir, retainStops, starttime):
200+
def runcodeml(cpu, outdir, retainStops, starttime):
197201
'''Runs codeml on a directory.'''
198202
print("Running codeml...")
199203
if retainStops == False:
200-
cm = Popen(split("python bin/07_CodeMLonDir.py " + outdir))
204+
cm = Popen(split("python bin/07_CodeMLonDir.py -i " + outdir
205+
+ " -n " + cpu))
201206
elif retainStops == True:
202-
cm = Popen(split("python bin/07_CodeMLonDir.py " + outdir +
203-
"--retainStops"))
207+
cm = Popen(split("python bin/07_CodeMLonDir.py -i " + outdir +
208+
" --retainStops -n " + cpu))
204209
cm.wait()
205210
if cm.returncode == 0:
206211
elapsedtime = datetime.now() - starttime
@@ -240,12 +245,15 @@ def helplist():
240245
def main():
241246
starttime = datetime.now()
242247
# Set optional parameters to False:
243-
axt = False
248+
commonNames = False
244249
retainStops = False
250+
axt = False
245251
phylip = False
246252
kaks = False
247253
codeml = False
248254
conv = False
255+
# Popen requires string input, so the following are typecast as strings
256+
cpu = "1"
249257
percent = "0.5"
250258
ref = "void"
251259
# Extract values from command line:
@@ -283,6 +291,10 @@ def main():
283291
codeml = True
284292
elif i == "--ucsc":
285293
conv = True
294+
elif i == "-n":
295+
cpu = str(argv[argv.index(i) + 1])
296+
elif i == "--changeNames":
297+
commonNames = True
286298
elif i == "--retainStops":
287299
retainStops = True
288300
# Check inout commands prior to running:
@@ -316,7 +328,7 @@ def main():
316328
else:
317329
sf = splitFasta(fasta, outdir)
318330
if sf == True:
319-
rh = rmHeader(outdir)
331+
rh = rmHeader(outdir, commonNames)
320332
if rh == True:
321333
cf = checkFrame(outdir, ref)
322334
if cf == True:
@@ -340,7 +352,7 @@ def main():
340352
# Run codeml
341353
if codeml == True and kaks == False:
342354
if pc == True:
343-
runcodeml(outdir, retainStops, starttime)
355+
runcodeml(cpu, outdir, retainStops, starttime)
344356

345357
if __name__ == "__main__":
346358
main()

AlignmentProcessorReadMe.txt

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,29 @@ in the bin/ directory.
162162

163163
-i the path to your input fasta alignment.
164164
-o the path to your working/output directory.
165-
-r the common name of your reference species (more below).
165+
-r the build or common name of your reference species (more below).
166166

167167
# Optional Arguments:
168168

169169
--ucsc This will invoke 00_convertHeader.py, which will convert the
170170
headers from UCSC fasta files so they only contain build
171171
names and gene IDs. This does not need to be run on Stich Gene
172172
Blocks output.
173+
174+
--retainStops This will tell AlignmentProcessor to retain sequences
175+
that contain internal stop codons. By default,
176+
sequences with internal stop codons will be removed
177+
from the analysis as they may bias the results.
178+
179+
--changeNames Tells the program to change genome build names to
180+
commom names (more below).
181+
182+
-% a decimal value specifying the minimum percentage of reads
183+
that must remain after replacing unknown codons with gaps
184+
(Default = 0.5). You may wish to use a lower threshold
185+
for highly diverged species or for low quality genomes.
186+
187+
# Ka/Ks_Calculator/CodeML related arguments
173188

174189
--axt/phylip Specifies which format to convert the files into
175190
(axt format for KaKs_Calculator; phylip for CodeML
@@ -186,15 +201,10 @@ in the bin/ directory.
186201
This file must be titled "codeml.ctl" (the default
187202
name given by PAML).
188203

189-
--retainStops This will tell AlignmentProcessor to retain sequences
190-
that contain internal stop codons. By default,
191-
sequences with internal stop codons will be removed
192-
from the analysis as they may bias the results.
193-
194-
-% a decimal value specifying the minimum percentage of reads
195-
that must remain after replacing unknown codons with gaps
196-
(Default = 0.5). You may wish to use a lower threshold
197-
for highly diverged species or for low quality genomes.
204+
-n if "--codeml" is selected, you may specify the number of CPUs
205+
to run CodeML. CodeML itself cannot be parallelized, but
206+
AlignmentProcessor can call multiple instances of CodeML to
207+
shorten overall run time. (Default = 1)
198208

199209
# Additional Commands
200210

@@ -431,7 +441,7 @@ memory.
431441
Change directory into the AlignmentProcessor folder. Paste the followig into
432442
a terminal:
433443

434-
python AlignmentProcessor.py --axt --kaks --ucsc -r Green_anole \
444+
python AlignmentProcessor.py --axt --kaks --ucsc -r anoCar2 \
435445
-i kaksTest.fa -o test/
436446

437447
This will return a text file with 11 lines.
@@ -441,7 +451,7 @@ The test directory already contains sample CodeML control and tree files, so
441451
all you need to do is change into the AlignmentProcessor direcotry and paste
442452
the following:
443453

444-
python AlignmentProcessor.py --phylip --codeml --ucsc -r Green_anole \
454+
python AlignmentProcessor.py --phylip --codeml --ucsc -n 2 -r anoCar2 \
445455
-i codemlTest.fa -o test/
446456

447457
There should be 8 .mlc files in the 07_codeml directory.

bin/02_RemoveHeader.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from sys import argv
1111
from glob import glob
1212

13-
def removeHeader(path):
13+
def removeHeader(path, commonNames):
1414
# Open all input files in the directory
1515
inpath = path + "01_splitFastaFiles/" + "*.fa"
1616
files = glob(inpath)
@@ -25,7 +25,11 @@ def removeHeader(path):
2525
if line[0] == ">":
2626
# Split header to get build name and remove extra info
2727
header = line.split(".")
28-
name = changeNames(header[0])
28+
# Replace build name with common name
29+
if commonNames == True:
30+
name = changeNames(header[0])
31+
elif commonNames == False:
32+
name = header[0][1:]
2933
# Write common name to file
3034
output.write(">" + str(name) + "\n")
3135
else:
@@ -56,9 +60,15 @@ def main():
5660
<path to inut and output directories>")
5761
quit()
5862
else:
63+
commonNames = False
5964
# Set directory names and add a trailing "/" if necessary
6065
path = argv[1]
61-
removeHeader(path)
66+
try:
67+
if argv[2] == "--changeNames":
68+
commonNames = True
69+
except IndexError:
70+
pass
71+
removeHeader(path, commonNames)
6272

6373
if __name__ == "__main__":
6474
main()

bin/02_nameList.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Aapl1 Anolis_apletophallus
1+
Aapl1 Slender_anole
22
Aaur1 Grass_anole
33
Afre1 Bridled_anole
44
ailMel1 Panda

0 commit comments

Comments
 (0)