Skip to content

Commit 21596d6

Browse files
committed
Fixed bug causing CodeML to hang
1 parent b191ea9 commit 21596d6

2 files changed

Lines changed: 30 additions & 54 deletions

File tree

bin/04_CallCodeML.py

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import math
1818
import os
1919
import re
20+
from callPhyML import phyml
2021

2122
# Define max number of threads and devnull for capturing stdout
2223
MAXCPU = cpu_count()
@@ -49,39 +50,34 @@ def controlFiles(indir, outdir, forward, cpu):
4950
os.mkdir(tmp)
5051
except FileExistsError:
5152
pass
52-
cmd = ("python bin/04_callPhyML.py -i" + indir + " -o " + tmp +
53-
" -t " + str(cpu))
54-
if forward:
55-
cmd += " -f " + forward
56-
phyml = Popen(split(cmd))
57-
phyml.wait()
58-
#if phyml.returncode() == 0:
59-
return True
53+
multiple = phyml(indir, tmp, cpu, forward)
54+
return True, multiple
6055

6156
#-----------------------------------------------------------------------------
6257

63-
def runCodeml(ap, outdir, finished, completed, gene):
58+
def runCodeml(ap, outdir, finished, completed, multiple, gene):
6459
'''Creates temporary control and tree files and runs CodeML.'''
6560
filename = gene.split("/")[-1]
6661
geneid = filename.split(".")[0]
6762
wd = outdir + "tmp/" + geneid + "/"
68-
if filename.split(".")[1] == "2":
69-
if len(glob(wd + "*")) > 1:
70-
# Skip pairwise genes if tree files are present
71-
pass
72-
elif (geneid + "\n") in completed:
63+
if (geneid + "\n") in completed:
7364
pass
7465
else:
75-
tempctl = wd + geneid + ".ctl"
76-
# Calls CodeML
66+
tempctl = wd + "codeml.ctl"
7767
os.chdir(wd)
78-
cm = Popen(split(ap + "/paml/bin/codeml " + tempctl),
79-
stdout = DEVNULL)
80-
cm.wait()
81-
if cm.returncode == 0:
82-
# Append gene ID to list of finishedCodeML.txt
83-
with open(finished, "a") as fin:
84-
fin.write(geneid + "\n")
68+
if multiple == True:
69+
if filename.split(".")[1] == "2":
70+
pass
71+
else:
72+
# Calls CodeML if 3 or more sequences are present
73+
cm = Popen(split(ap + "paml/bin/codeml " + tempctl),
74+
stdout = DEVNULL)
75+
elif multiple == False:
76+
# Call CodeML for all files
77+
cm = Popen(split(ap + "paml/bin/codeml " + tempctl),
78+
stdout = DEVNULL)
79+
with open(finished, "a") as fin:
80+
fin.write(geneid + "\n")
8581

8682
#-----------------------------------------------------------------------------
8783

@@ -115,7 +111,7 @@ def main():
115111
cleanup = args.cleanUp
116112
# Reads in required data
117113
finished, completed = outputFiles(outdir)
118-
run = controlFiles(indir, outdir, forward, cpu)
114+
run, multiple = controlFiles(indir, outdir, forward, cpu)
119115
if run == True:
120116
# Call CodeML after PhyML completes.
121117
genes = glob(indir + "*.phylip")
@@ -126,15 +122,15 @@ def main():
126122
elif l > cpu:
127123
chunk = int(math.ceil(l/cpu))
128124
pool = Pool(processes = cpu)
129-
func = partial(runCodeml, ap, outdir, finished, completed)
130-
print("\tRunning CodeML with", str(cpu), "threads....")
131-
rcml = pool.imap(func, genes, chunksize = chunk)
125+
func = partial(runCodeml, ap, outdir, finished, completed, multiple)
126+
print(("\tRunning CodeML with {0!s} threads....").format(cpu))
127+
rcml = pool.imap_unordered(func, genes, chunksize = chunk)
132128
pool.close()
133-
pool.join()
129+
pool.join()
134130
# Remove tmp directory
135131
if cleanup == True:
136132
shutil.rmtree(outdir + "tmp/")
137-
print("\tCodeML runtime: ", datetime.now() - starttime, "\n")
133+
print(("\tCodeML runtime: {0!s}").format(datetime.now() - starttime))
138134

139135
if __name__ == "__main__":
140136
main()
Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def makeTree(indir, outdir, forward, completed, ctl, gene):
6767
pass
6868
# Set unique file names
6969
outfile = (out + geneid + "." + filename.split(".")[1] + ".mlc")
70-
tempctl = wd + geneid + ".ctl"
70+
tempctl = wd + "codeml.ctl"
7171
treefile = wd + filename + "_phyml_tree.txt"
7272
# Make unique control file
7373
makeCtl(gene, outfile, tempctl, treefile, ctl)
@@ -130,7 +130,7 @@ def pairwiseControl(indir, outdir, ctl, gene):
130130
out += i + "/"
131131
# Set unique file names
132132
outfile = (out + geneid +"."+ filename.split(".")[1] + ".mlc")
133-
tempctl = wd + geneid + ".ctl"
133+
tempctl = wd + "codeml.ctl"
134134
treefile = ""
135135
# Make unique control file
136136
makeCtl(gene, outfile, tempctl, treefile, ctl)
@@ -150,27 +150,9 @@ def makeCtl(gene, outfile, tempctl, treefile, ctl):
150150

151151
#-----------------------------------------------------------------------------
152152

153-
def main():
154-
# Parse command
155-
parser = argparse.ArgumentParser(description="Runs CodeML on all files \
156-
in a directory.")
157-
parser.add_argument("-i", help="Path to input directory.")
158-
parser.add_argument("-o", help="Path to temp directory.")
159-
parser.add_argument("-t", type=int, default=1, help="Number of threads.")
160-
parser.add_argument("-f", default="",
161-
help="Forward species (name must be the same as it appears in input files.")
162-
args = parser.parse_args()
163-
# Assign arguments
164-
indir = args.i
165-
if indir[-1] != "/":
166-
indir += "/"
167-
outdir = args.o
168-
if outdir[-1] != "/":
169-
outdir += "/"
170-
cpu = args.t
171-
forward = args.f
153+
def phyml(indir, outdir, cpu, forward=""):
172154
# Reads in required data
173-
completed, ctl, multiple = outputFiles( outdir)
155+
completed, ctl, multiple = outputFiles(outdir)
174156
# Call PhyML for multiple alignments or write pairwise control files.
175157
genes = glob(indir + "*.phylip")
176158
l = int(len(genes))
@@ -196,6 +178,4 @@ def main():
196178
rpml = pool.imap(func, genes, chunksize = chunk)
197179
pool.close()
198180
pool.join()
199-
200-
if __name__ == "__main__":
201-
main()
181+
return multiple

0 commit comments

Comments
 (0)