Skip to content

Commit 9db5239

Browse files
committed
Added Windows and Mac binaries of KaKs_Calculator
1 parent 99f253e commit 9db5239

228 files changed

Lines changed: 59958 additions & 136 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

AlignmentProcessor.py

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
'''AlignmentProcessor will run the subsituion rate pipeline to produce trimmed
2+
axt or phylip files for use with KaKs_calculator or PhyMl.
3+
4+
5+
Copyright 2016 by Shawn Rupp
6+
7+
This package is free software: you can redistribute it and/or modify
8+
it under the terms of the GNU General Public License as published by
9+
the Free Software Foundation, either version 3 of the License, or
10+
(at your option) any later version.
11+
12+
This program is distributed in the hope that it will be useful,
13+
but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
GNU General Public License for more details.'''
16+
17+
from datetime import datetime
18+
from sys import argv
19+
from subprocess import Popen
20+
from shlex import split
21+
from glob import glob
22+
import os
23+
24+
def makeDir(path, outdir, axt, phylip):
25+
'''Makes all sub-directories used by program.'''
26+
os.chdir(outdir)
27+
for i in ["01_splitFastaFiles", "02_rmHeader", "03_checkFrame",
28+
"04_countBasesPercent", "05_ReplaceStopCodons"]:
29+
try:
30+
os.mkdir(i)
31+
except FileExistsError:
32+
pass
33+
if axt == True:
34+
for i in ["06_axtFiles", "KaKsOutput"]:
35+
try:
36+
os.mkdir(i)
37+
except FileExistsError:
38+
pass
39+
if phylip == True:
40+
for i in ["06_phylipFiles", "07_codeml"]:
41+
try:
42+
os.mkdir(i)
43+
except FileExistsError:
44+
pass
45+
os.chdir(path)
46+
47+
def convertHeaders(fasta):
48+
'''Changes headers of UCSC CDS fasta files to includ eonly build name and
49+
gene ID'''
50+
print("Converting fasta headers...")
51+
ch = Popen(split("python bin/00_ConvertHeader.py " + fasta))
52+
ch.wait()
53+
# Extcact converted file name
54+
path = fasta.split("/")[:-1]
55+
outpath = ""
56+
for i in path:
57+
outpath += i + "/"
58+
newfasta = outpath + "newHeader." + fasta.split("/")[-1]
59+
if ch.returncode == 0:
60+
return True, newfasta
61+
62+
def splitFasta(fasta, outdir):
63+
'''Splits fasta alignment into one file per gene.'''
64+
print("Splitting fasta file into separate files by gene...")
65+
sf = Popen(split("python bin/01_SplitFastaFiles.py " + " "
66+
+ fasta + " " + outdir))
67+
sf.wait()
68+
if sf.returncode == 0:
69+
return True
70+
71+
def rmHeader(outdir):
72+
'''Removes header information and changes genome build to common name.'''
73+
print("Changing header...")
74+
rh = Popen(split("python bin/02_RemoveHeader.py " + outdir))
75+
rh.wait()
76+
if rh.returncode == 0:
77+
return True
78+
79+
def checkFrame(outdir, ref):
80+
'''Checks nucleotide frame of secondary species against the reference
81+
species'''
82+
print("Checking nucleotide frames...")
83+
cf = Popen(split("python bin/03_CheckFrame.py " + " " + outdir
84+
+ " " + ref))
85+
cf.wait()
86+
if cf.returncode == 0:
87+
return True
88+
89+
def countBases(outdir, percent):
90+
'''Checks that each species has at least 50% of it's nucleotide content.'''
91+
print("Checking nucleotide content...")
92+
cb = Popen(split("python bin/04_CountBases.py " + " " + percent +
93+
" " + outdir))
94+
cb.wait()
95+
if cb.returncode == 0:
96+
return True
97+
98+
def replaceStop(outdir):
99+
'''Removes stop codons for downstream analysis.'''
100+
print("Removing stop codons...")
101+
rs = Popen(split("python bin/05_ReplaceStopCodons.py " + " "
102+
+ outdir))
103+
rs.wait()
104+
if rs.returncode == 0:
105+
return True
106+
107+
def axtConvert(outdir, kaks, starttime):
108+
'''Open all input files in the directory and convert to axt script for
109+
use with KaKs_Calculator'''
110+
print("Converting fasta files into axt files...")
111+
ac = Popen(split("python bin/06_FASTAtoAXT.py " + outdir))
112+
ac.wait()
113+
if ac.returncode == 0:
114+
if kaks == True:
115+
return True
116+
elif kaks == False:
117+
print("Finished converting files.")
118+
elapsedtime = datetime.now() - starttime
119+
print("Total runtime: ", elapsedtime)
120+
121+
def calculateKaKs(outdir):
122+
'''Calculates substition rates.'''
123+
print("Calculating Ka/Ks values...")
124+
ck = Popen(split("python bin/07_KaKsonDir.py " + outdir))
125+
ck.wait()
126+
if ck.returncode == 0:
127+
return True
128+
129+
def printCSV(outdir, starttime):
130+
'''Prints Ka/Ks output as a single csv file.'''
131+
print("Printing Ka/Ks output as a single text file...")
132+
Popen(split("python bin/08_compileKaKs.py " + outdir))
133+
print("Finished calculating Ka/Ks values.")
134+
elapsedtime = datetime.now() - starttime
135+
print("Total runtime: ", elapsedtime)
136+
137+
def phylipConvert(outdir, starttime, codeml):
138+
'''Converts fasta files to phylip format'''
139+
print("Converting fasta files to phylip...")
140+
pc = Popen(split("python bin/06_FASTAtoPhylip.py " + " " + outdir))
141+
pc.wait()
142+
if pc.returncode == 0:
143+
if codeml == False:
144+
elapsedtime = datetime.now() - starttime
145+
print("Total runtime: ", elapsedtime)
146+
return True
147+
148+
def runcodeml(outdir, starttime):
149+
'''Runs codeml on a directory.'''
150+
print("Running codeml...")
151+
cm = Popen(split("python bin/07_CodeMLonDir.py " + outdir + "codeml.ctl "
152+
+ outdir))
153+
cm.wait()
154+
if cm.returncode == 0:
155+
elapsedtime = datetime.now() - starttime
156+
print("Total runtime: ", elapsedtime)
157+
158+
def helplist():
159+
print()
160+
print("### AlignmentProcessor will run the subsituion rate pipeline to \
161+
produce trimmed axt files for use with KaKs_calculator. ###")
162+
print(" example usage: python AlignmentProcessor.py -% <decimal> \
163+
--axt/phylip --kaks/codeml -i <input fasta file> -o \
164+
<path to output directory> -r <reference species>")
165+
print(" --axt Converts files to axt for use in KaKs_Calcuator.")
166+
print(" --phylip Converts files to phylip for use in PhyML.")
167+
print(" --kaks Runs KaKs_Calcuator if --axt is also specified")
168+
print(" --codeml Runs codeml if --phylip is also specified")
169+
print(" --ucsc converts headers of CDS fasta files obtained from \
170+
the UCSC genome browser")
171+
print(" -r the name of the reference species which will be used to \
172+
determine the open reading frame")
173+
print(" -% Sets the percentage cutoff for the countBases step (50% \
174+
by default).")
175+
print(" -i path to input file containing a multifasta alignment")
176+
print(" -o AlignmentProcessor will use this as its working \
177+
directory and print output to this directory")
178+
print(" -v prints coyright and version information to the screen")
179+
print("### Please note that you must be in the substituionManager \
180+
directory to run the program.###")
181+
print()
182+
183+
def main():
184+
starttime = datetime.now()
185+
# Set optional parameters to False:
186+
axt = False
187+
phylip = False
188+
kaks = False
189+
codeml = False
190+
conv = False
191+
percent = "0.5"
192+
# Extract values from command line:
193+
for i in argv:
194+
if i == "-h" or i == "--help":
195+
helplist()
196+
quit()
197+
elif i == "-v" or i == "--version":
198+
print("\nAlignmentProcessor Copyright 2016 by Shawn Rupp\n")
199+
print("This program comes with ABSOLUTELY NO WARRANTY")
200+
print("This is free software, and you are welcome to redistribute\
201+
it under certain conditions\n")
202+
quit()
203+
elif i == "-i":
204+
fasta = argv[argv.index(i) + 1]
205+
elif i == "-o":
206+
outdir = argv[argv.index(i) + 1]
207+
if outdir[-1] != "/":
208+
outdir = outdir + "/"
209+
elif i == "-r":
210+
ref = argv[argv.index(i) + 1]
211+
elif i == "-%":
212+
percent = argv[argv.index(i) + 1]
213+
elif i == "--axt":
214+
axt = True
215+
elif i == "--phylip":
216+
phylip = True
217+
elif i == "--kaks":
218+
kaks = True
219+
elif i == "--codeml":
220+
codeml = True
221+
elif i == "--ucsc":
222+
conv = True
223+
# Save working directory to variable to call other scripts:
224+
path = os.getcwd()
225+
path = path + "/"
226+
# Run program if the user did not ask for help:
227+
if "-h" not in argv and "--help" not in argv:
228+
# Set checkpoint variables to False:
229+
ch = False
230+
sf = False
231+
rh= False
232+
cf = False
233+
cb = False
234+
rs = False
235+
ac = False
236+
ck = False
237+
pc = False
238+
# Begin pipeline
239+
makeDir(path, outdir, axt, phylip)
240+
if conv == True:
241+
ch, newfasta = convertHeaders(fasta)
242+
if ch == True:
243+
sf = splitFasta(newfasta, outdir)
244+
else:
245+
sf = splitFasta(fasta, outdir)
246+
if sf == True:
247+
rh = rmHeader(outdir)
248+
if rh == True:
249+
cf = checkFrame(outdir, ref)
250+
if cf == True:
251+
cb =countBases(outdir, percent)
252+
if cb == True:
253+
rs = replaceStop(outdir)
254+
# Optionally covert files to axt format:
255+
if axt == True:
256+
if rs == True:
257+
ac = axtConvert(outdir, kaks, starttime)
258+
# Run KaKs_Calculator:
259+
if kaks == True and codeml == False:
260+
if ac == True:
261+
ck = calculateKaKs(outdir)
262+
if ck == True:
263+
printCSV(outdir, starttime)
264+
# Optionally covert files to phylip format:
265+
if phylip == True:
266+
if rs == True:
267+
pc = phylipConvert(outdir, starttime, codeml)
268+
# Run codeml
269+
if codeml == True and kaks == False:
270+
if pc == True:
271+
runcodeml(outdir, starttime)
272+
273+
if __name__ == "__main__":
274+
main()

AlignmentProcessorReadMe.txt

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212

1313

1414
###############################################################
15-
# AlignmentProcessor0.6 Package
15+
# AlignmentProcessor0.7 Package
1616
#
1717
# Dependencies: Python 3
1818
# Python 3 version of Biopython
1919
# Perl
20+
# PAML (comes packaged with Linux binaries)
2021
###############################################################
2122

2223
### Contents ###
@@ -52,6 +53,21 @@ a terminal and Anaconda will install Biopython for you:
5253

5354
conda install -c https://conda.anaconda.org/anaconda biopython
5455

56+
# KaKs_Calculator
57+
58+
AlignmentProcessor0.7 is packaged with KaKs_Calculator2.0 binaries for Linux
59+
and Windows, and a KaKs_Calculator1.2 binary for Mac (there is no 2.0 binary
60+
available for OSX). Before using, copy or move the appropriate binary for your
61+
system into the AlignmentProcessor bin which contains the python scipts.
62+
63+
# PAML 4.8
64+
65+
AlignmentProcessor0.7 comes packaged with Linux binaries of PAML 4.8. If you
66+
are using Mac or Windows, you will have to replace the Linux binaries with the
67+
appropriate package for you system
68+
(http://abacus.gene.ucl.ac.uk/software/paml.html). Make sure that it is
69+
titled "paml".
70+
5571
#-------------------------------
5672
# 1. Obtaining a fasta alignment
5773
#-------------------------------
@@ -120,7 +136,7 @@ in the bin/ directory.
120136

121137
# Example Usage:
122138

123-
python substituionManager0.3.py -% <decimal> \
139+
python AlignmentProcessor.py -% <decimal> \
124140
--axt/phylip --kaks/codeml --ucsc -i <input fasta file> \
125141
-o <path to output directory> -r <reference species>
126142

@@ -184,14 +200,17 @@ in the bin/ directory.
184200
that they are unique for each file. You may also need to provide a
185201
tree file for codeml (see PAML manual above).
186202

203+
The control file must be titled titled “codeml.ctl”, and it must be
204+
locatedin the output directory.
205+
187206
# Invoking the Ka/Ks pipeline with a UCSC alignment:
188207

189-
python AlignmentProcessor0.6.py --axt --kaks --ucsc -r green_anole \
208+
python AlignmentProcessor0.7.py --axt --kaks --ucsc -r green_anole \
190209
-i anolis_gallus.fa -o pairwiseKaKs/
191210

192211
# Invoking the CodeML pipeline with a de novo alignment:
193212

194-
python AlignmentProcessor0.6.py --phylip --codeml -% 0.6 \
213+
python AlignmentProcessor0.7.py --phylip --codeml -% 0.6 \
195214
-r green_anole -i anolis_gallus.fa -o codemlOutput/
196215

197216
#-------------------------------
@@ -346,7 +365,7 @@ simultaneously, as this could require too much memory.
346365
Change directory into the AlignmentProcessor folder. Paste the followig into
347366
a terminal (change the output directory to the desired loaction):
348367

349-
python AlignmentProcessor0.6.py --axt --kaks --ucsc -r green_anole \
368+
python AlignmentProcessor.py --axt --kaks --ucsc -r green_anole \
350369
-i test.fa -o test/
351370

352371
It should procude a text file with 17 lines.

bin/00_ConvertHeader.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,7 @@
22
UCSC into headers which are compliant with AlignmentProcessor.
33
44
5-
AlignmentProcessor0.6 Copyright 2016 by Shawn Rupp
6-
7-
This program is free software: you can redistribute it and/or modify
8-
it under the terms of the GNU General Public License as published by
9-
the Free Software Foundation, either version 3 of the License, or
10-
(at your option) any later version.
11-
12-
This program is distributed in the hope that it will be useful,
13-
but WITHOUT ANY WARRANTY; without even the implied warranty of
14-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15-
GNU General Public License (GPL3.txt) for more details.'''
5+
Copyright 2016 by Shawn Rupp'''
166

177
from sys import argv
188

bin/01_SplitFastaFiles.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,7 @@
22
and create individual FASTA alignment files for each gene.
33
44
5-
AlignmentProcessor0.6 Copyright 2016 by Shawn Rupp
6-
7-
This program is free software: you can redistribute it and/or modify
8-
it under the terms of the GNU General Public License as published by
9-
the Free Software Foundation, either version 3 of the License, or
10-
(at your option) any later version.
11-
12-
This program is distributed in the hope that it will be useful,
13-
but WITHOUT ANY WARRANTY; without even the implied warranty of
14-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15-
GNU General Public License (GPL3.txt) for more details.'''
5+
Copyright 2016 by Shawn Rupp'''
166

177
from sys import argv
188

0 commit comments

Comments
 (0)