Skip to content

Commit 953a769

Browse files
committed
Added status for CodeML, forces nucleotides into capital letters.
1 parent 61e8cf5 commit 953a769

5 files changed

Lines changed: 163 additions & 238 deletions

File tree

AlignmentProcessorReadMe.pdf

622 Bytes
Binary file not shown.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1111
GNU General Public License for more details.
1212

13-
# AlignmentProcessor1.2 Package
13+
# AlignmentProcessor1.3 Package
1414

1515
# Dependencies:
1616
Python 3

bin/04_CallCodeML.py

Lines changed: 46 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,23 @@
1-
'''This program will run CodeML on a directory of single gene alignments.
2-
It will generate a unique control file and tree file for each input gene
3-
before invoking CodeML using the number of CPUs specified by the user
4-
(default = 1).
1+
'''This program will call the CodeML cython script on a directory of
2+
single gene alignments. It will generate a unique control file and tree
3+
file for each input gene before invoking CodeML using the number of CPUs
4+
specified by the user (default = 1).
55
66
Copyright 2016 by Shawn Rupp'''
77

8+
from __future__ import division
89
from datetime import datetime
9-
from sys import stdout
1010
from glob import glob
11-
from subprocess import Popen
12-
from shlex import split
13-
from functools import partial
1411
from multiprocessing import Pool, cpu_count
12+
from functools import partial
1513
import argparse
1614
import shutil
15+
import sys
1716
import math
1817
import os
19-
import re
20-
from callPhyML import phyml
18+
from parallelCodeML import parallelize
2119

22-
# Define max number of threads and devnull for capturing stdout
2320
MAXCPU = cpu_count()
24-
DEVNULL = open(os.devnull, "w")
2521

2622
#-----------------------------------------------------------------------------
2723

@@ -44,47 +40,42 @@ def outputFiles(outdir):
4440

4541
def controlFiles(indir, outdir, forward, cpu):
4642
'''Reads input files and stores them in memory'''
43+
multiple = False
4744
# Make temp directory
4845
tmp = outdir + "tmp/"
4946
try:
5047
os.mkdir(tmp)
5148
except FileExistsError:
5249
pass
53-
multiple = phyml(indir, tmp, cpu, forward)
54-
return True, multiple
55-
56-
#-----------------------------------------------------------------------------
57-
58-
def runCodeml(ap, outdir, finished, completed, multiple, gene):
59-
'''Creates temporary control and tree files and runs CodeML.'''
60-
filename = gene.split("/")[-1]
61-
geneid = filename.split(".")[0]
62-
wd = outdir + "tmp/" + geneid + "/"
63-
if (geneid + "\n") in completed:
64-
pass
65-
else:
66-
tempctl = wd + "codeml.ctl"
67-
os.chdir(wd)
68-
if multiple == True:
69-
if filename.split(".")[1] == "2":
70-
pass
71-
else:
72-
# Calls CodeML if 3 or more sequences are present
73-
cm = Popen(split(ap + "paml/bin/codeml " + tempctl),
74-
stdout = DEVNULL)
75-
elif multiple == False:
76-
# Call CodeML for all files
77-
cm = Popen(split(ap + "paml/bin/codeml " + tempctl),
78-
stdout = DEVNULL)
79-
with open(finished, "a") as fin:
80-
fin.write(geneid + "\n")
50+
# Reconstruct output path
51+
path = outdir.split("/")[:-2]
52+
out = ""
53+
for i in path:
54+
out += i + "/"
55+
control = glob(out + "*.ctl")
56+
if len(control) > 1:
57+
# Quit if multiple .ctl files are present
58+
print("\n\tPlease provide only one control file for CodeML.\n")
59+
quit()
60+
with open(control[0], "r") as infile:
61+
ctl = infile.readlines()
62+
for line in ctl:
63+
# Determine if a phylogenic tree is needed
64+
if "runmode = 0" in line or "runmode = 1" in line:
65+
multiple = True
66+
return ctl, multiple
8167

8268
#-----------------------------------------------------------------------------
8369

8470
def main():
8571
starttime = datetime.now()
8672
# Save path to the AlignmentProcessor directory
8773
ap = os.getcwd() + "/"
74+
if " " in ap:
75+
# Change to warning ########################################################
76+
print("\tWARNING: AlignmentProcessor will not run properly if there \
77+
is a space in its PATH name.")
78+
ap = ap.replace(" (ASU)", "")
8879
run = False
8980
# Parse command
9081
parser = argparse.ArgumentParser(description="Runs CodeML on all files \
@@ -95,7 +86,8 @@ def main():
9586
parser.add_argument("-f", default="",
9687
help="Forward species (name must be the same as it appears in input files.")
9788
parser.add_argument("--cleanUp", action="store_true",
98-
help="Remove temporary files (it may be useful to retain phylogenic trees for future use).")
89+
help="Remove temporary files (it may be useful to retain phylogenic trees \
90+
for future use).")
9991
args = parser.parse_args()
10092
# Assign arguments
10193
indir = args.i
@@ -108,29 +100,27 @@ def main():
108100
if cpu > MAXCPU:
109101
cpu = MAXCPU
110102
forward = args.f
111-
cleanup = args.cleanUp
112103
# Reads in required data
113104
finished, completed = outputFiles(outdir)
114-
run, multiple = controlFiles(indir, outdir, forward, cpu)
115-
if run == True:
116-
# Call CodeML after PhyML completes.
105+
ctl, multiple = controlFiles(indir, outdir, forward, cpu)
106+
# Call PhyML and CodeML in parallel completes.
107+
if ctl:
108+
# Call CodeML and PhyML
117109
genes = glob(indir + "*.phylip")
118110
l = int(len(genes))
119-
# Determine chunksize
120-
if l <= cpu:
121-
chunk = 1
122-
elif l > cpu:
123-
chunk = int(math.ceil(l/cpu))
111+
func = partial(parallelize, ap, outdir, finished, completed, multiple,
112+
cpu, ctl, forward)
113+
print(("\tRunning CodeML on {0!s} genes with {1!s} threads...."
114+
).format(l, cpu))
124115
pool = Pool(processes = cpu)
125-
func = partial(runCodeml, ap, outdir, finished, completed, multiple)
126-
print(("\tRunning CodeML with {0!s} threads....").format(cpu))
127-
rcml = pool.imap_unordered(func, genes, chunksize = chunk)
116+
for i, _ in enumerate(pool.imap_unordered(func, genes), 1):
117+
sys.stderr.write("\r\t{0:%} of genes have finished".format(i/l))
128118
pool.close()
129-
pool.join()
119+
pool.join()
130120
# Remove tmp directory
131-
if cleanup == True:
121+
if args.cleanUp == True:
132122
shutil.rmtree(outdir + "tmp/")
133-
print(("\tCodeML runtime: {0!s}").format(datetime.now() - starttime))
123+
print(("\n\tCodeML runtime: {0!s}").format(datetime.now() - starttime))
134124

135125
if __name__ == "__main__":
136126
main()

bin/callPhyML.py

Lines changed: 0 additions & 181 deletions
This file was deleted.

0 commit comments

Comments
 (0)