|
| 1 | +'''AlignmentProcessor will run the subsituion rate pipeline to produce trimmed |
| 2 | +axt or phylip files for use with KaKs_calculator or PhyMl. |
| 3 | +
|
| 4 | +
|
| 5 | + Copyright 2016 by Shawn Rupp |
| 6 | +
|
| 7 | + This package is free software: you can redistribute it and/or modify |
| 8 | + it under the terms of the GNU General Public License as published by |
| 9 | + the Free Software Foundation, either version 3 of the License, or |
| 10 | + (at your option) any later version. |
| 11 | +
|
| 12 | + This program is distributed in the hope that it will be useful, |
| 13 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + GNU General Public License for more details.''' |
| 16 | + |
| 17 | +from datetime import datetime |
| 18 | +from sys import argv |
| 19 | +from subprocess import Popen |
| 20 | +from shlex import split |
| 21 | +from glob import glob |
| 22 | +import os |
| 23 | + |
| 24 | +def makeDir(path, outdir, axt, phylip): |
| 25 | + '''Makes all sub-directories used by program.''' |
| 26 | + os.chdir(outdir) |
| 27 | + for i in ["01_splitFastaFiles", "02_rmHeader", "03_checkFrame", |
| 28 | + "04_countBasesPercent", "05_ReplaceStopCodons"]: |
| 29 | + try: |
| 30 | + os.mkdir(i) |
| 31 | + except FileExistsError: |
| 32 | + pass |
| 33 | + if axt == True: |
| 34 | + for i in ["06_axtFiles", "KaKsOutput"]: |
| 35 | + try: |
| 36 | + os.mkdir(i) |
| 37 | + except FileExistsError: |
| 38 | + pass |
| 39 | + if phylip == True: |
| 40 | + for i in ["06_phylipFiles", "07_codeml"]: |
| 41 | + try: |
| 42 | + os.mkdir(i) |
| 43 | + except FileExistsError: |
| 44 | + pass |
| 45 | + os.chdir(path) |
| 46 | + |
| 47 | +def convertHeaders(fasta): |
| 48 | + '''Changes headers of UCSC CDS fasta files to includ eonly build name and |
| 49 | +gene ID''' |
| 50 | + print("Converting fasta headers...") |
| 51 | + ch = Popen(split("python bin/00_ConvertHeader.py " + fasta)) |
| 52 | + ch.wait() |
| 53 | + # Extcact converted file name |
| 54 | + path = fasta.split("/")[:-1] |
| 55 | + outpath = "" |
| 56 | + for i in path: |
| 57 | + outpath += i + "/" |
| 58 | + newfasta = outpath + "newHeader." + fasta.split("/")[-1] |
| 59 | + if ch.returncode == 0: |
| 60 | + return True, newfasta |
| 61 | + |
| 62 | +def splitFasta(fasta, outdir): |
| 63 | + '''Splits fasta alignment into one file per gene.''' |
| 64 | + print("Splitting fasta file into separate files by gene...") |
| 65 | + sf = Popen(split("python bin/01_SplitFastaFiles.py " + " " |
| 66 | + + fasta + " " + outdir)) |
| 67 | + sf.wait() |
| 68 | + if sf.returncode == 0: |
| 69 | + return True |
| 70 | + |
| 71 | +def rmHeader(outdir): |
| 72 | + '''Removes header information and changes genome build to common name.''' |
| 73 | + print("Changing header...") |
| 74 | + rh = Popen(split("python bin/02_RemoveHeader.py " + outdir)) |
| 75 | + rh.wait() |
| 76 | + if rh.returncode == 0: |
| 77 | + return True |
| 78 | + |
| 79 | +def checkFrame(outdir, ref): |
| 80 | + '''Checks nucleotide frame of secondary species against the reference |
| 81 | +species''' |
| 82 | + print("Checking nucleotide frames...") |
| 83 | + cf = Popen(split("python bin/03_CheckFrame.py " + " " + outdir |
| 84 | + + " " + ref)) |
| 85 | + cf.wait() |
| 86 | + if cf.returncode == 0: |
| 87 | + return True |
| 88 | + |
| 89 | +def countBases(outdir, percent): |
| 90 | + '''Checks that each species has at least 50% of it's nucleotide content.''' |
| 91 | + print("Checking nucleotide content...") |
| 92 | + cb = Popen(split("python bin/04_CountBases.py " + " " + percent + |
| 93 | + " " + outdir)) |
| 94 | + cb.wait() |
| 95 | + if cb.returncode == 0: |
| 96 | + return True |
| 97 | + |
| 98 | +def replaceStop(outdir): |
| 99 | + '''Removes stop codons for downstream analysis.''' |
| 100 | + print("Removing stop codons...") |
| 101 | + rs = Popen(split("python bin/05_ReplaceStopCodons.py " + " " |
| 102 | + + outdir)) |
| 103 | + rs.wait() |
| 104 | + if rs.returncode == 0: |
| 105 | + return True |
| 106 | + |
| 107 | +def axtConvert(outdir, kaks, starttime): |
| 108 | + '''Open all input files in the directory and convert to axt script for |
| 109 | + use with KaKs_Calculator''' |
| 110 | + print("Converting fasta files into axt files...") |
| 111 | + ac = Popen(split("python bin/06_FASTAtoAXT.py " + outdir)) |
| 112 | + ac.wait() |
| 113 | + if ac.returncode == 0: |
| 114 | + if kaks == True: |
| 115 | + return True |
| 116 | + elif kaks == False: |
| 117 | + print("Finished converting files.") |
| 118 | + elapsedtime = datetime.now() - starttime |
| 119 | + print("Total runtime: ", elapsedtime) |
| 120 | + |
| 121 | +def calculateKaKs(outdir): |
| 122 | + '''Calculates substition rates.''' |
| 123 | + print("Calculating Ka/Ks values...") |
| 124 | + ck = Popen(split("python bin/07_KaKsonDir.py " + outdir)) |
| 125 | + ck.wait() |
| 126 | + if ck.returncode == 0: |
| 127 | + return True |
| 128 | + |
| 129 | +def printCSV(outdir, starttime): |
| 130 | + '''Prints Ka/Ks output as a single csv file.''' |
| 131 | + print("Printing Ka/Ks output as a single text file...") |
| 132 | + Popen(split("python bin/08_compileKaKs.py " + outdir)) |
| 133 | + print("Finished calculating Ka/Ks values.") |
| 134 | + elapsedtime = datetime.now() - starttime |
| 135 | + print("Total runtime: ", elapsedtime) |
| 136 | + |
| 137 | +def phylipConvert(outdir, starttime, codeml): |
| 138 | + '''Converts fasta files to phylip format''' |
| 139 | + print("Converting fasta files to phylip...") |
| 140 | + pc = Popen(split("python bin/06_FASTAtoPhylip.py " + " " + outdir)) |
| 141 | + pc.wait() |
| 142 | + if pc.returncode == 0: |
| 143 | + if codeml == False: |
| 144 | + elapsedtime = datetime.now() - starttime |
| 145 | + print("Total runtime: ", elapsedtime) |
| 146 | + return True |
| 147 | + |
| 148 | +def runcodeml(outdir, starttime): |
| 149 | + '''Runs codeml on a directory.''' |
| 150 | + print("Running codeml...") |
| 151 | + cm = Popen(split("python bin/07_CodeMLonDir.py " + outdir + "codeml.ctl " |
| 152 | + + outdir)) |
| 153 | + cm.wait() |
| 154 | + if cm.returncode == 0: |
| 155 | + elapsedtime = datetime.now() - starttime |
| 156 | + print("Total runtime: ", elapsedtime) |
| 157 | + |
| 158 | +def helplist(): |
| 159 | + print() |
| 160 | + print("### AlignmentProcessor will run the subsituion rate pipeline to \ |
| 161 | +produce trimmed axt files for use with KaKs_calculator. ###") |
| 162 | + print(" example usage: python AlignmentProcessor.py -% <decimal> \ |
| 163 | +--axt/phylip --kaks/codeml -i <input fasta file> -o \ |
| 164 | +<path to output directory> -r <reference species>") |
| 165 | + print(" --axt Converts files to axt for use in KaKs_Calcuator.") |
| 166 | + print(" --phylip Converts files to phylip for use in PhyML.") |
| 167 | + print(" --kaks Runs KaKs_Calcuator if --axt is also specified") |
| 168 | + print(" --codeml Runs codeml if --phylip is also specified") |
| 169 | + print(" --ucsc converts headers of CDS fasta files obtained from \ |
| 170 | +the UCSC genome browser") |
| 171 | + print(" -r the name of the reference species which will be used to \ |
| 172 | +determine the open reading frame") |
| 173 | + print(" -% Sets the percentage cutoff for the countBases step (50% \ |
| 174 | +by default).") |
| 175 | + print(" -i path to input file containing a multifasta alignment") |
| 176 | + print(" -o AlignmentProcessor will use this as its working \ |
| 177 | +directory and print output to this directory") |
| 178 | + print(" -v prints coyright and version information to the screen") |
| 179 | + print("### Please note that you must be in the substituionManager \ |
| 180 | +directory to run the program.###") |
| 181 | + print() |
| 182 | + |
| 183 | +def main(): |
| 184 | + starttime = datetime.now() |
| 185 | + # Set optional parameters to False: |
| 186 | + axt = False |
| 187 | + phylip = False |
| 188 | + kaks = False |
| 189 | + codeml = False |
| 190 | + conv = False |
| 191 | + percent = "0.5" |
| 192 | + # Extract values from command line: |
| 193 | + for i in argv: |
| 194 | + if i == "-h" or i == "--help": |
| 195 | + helplist() |
| 196 | + quit() |
| 197 | + elif i == "-v" or i == "--version": |
| 198 | + print("\nAlignmentProcessor Copyright 2016 by Shawn Rupp\n") |
| 199 | + print("This program comes with ABSOLUTELY NO WARRANTY") |
| 200 | + print("This is free software, and you are welcome to redistribute\ |
| 201 | + it under certain conditions\n") |
| 202 | + quit() |
| 203 | + elif i == "-i": |
| 204 | + fasta = argv[argv.index(i) + 1] |
| 205 | + elif i == "-o": |
| 206 | + outdir = argv[argv.index(i) + 1] |
| 207 | + if outdir[-1] != "/": |
| 208 | + outdir = outdir + "/" |
| 209 | + elif i == "-r": |
| 210 | + ref = argv[argv.index(i) + 1] |
| 211 | + elif i == "-%": |
| 212 | + percent = argv[argv.index(i) + 1] |
| 213 | + elif i == "--axt": |
| 214 | + axt = True |
| 215 | + elif i == "--phylip": |
| 216 | + phylip = True |
| 217 | + elif i == "--kaks": |
| 218 | + kaks = True |
| 219 | + elif i == "--codeml": |
| 220 | + codeml = True |
| 221 | + elif i == "--ucsc": |
| 222 | + conv = True |
| 223 | + # Save working directory to variable to call other scripts: |
| 224 | + path = os.getcwd() |
| 225 | + path = path + "/" |
| 226 | + # Run program if the user did not ask for help: |
| 227 | + if "-h" not in argv and "--help" not in argv: |
| 228 | + # Set checkpoint variables to False: |
| 229 | + ch = False |
| 230 | + sf = False |
| 231 | + rh= False |
| 232 | + cf = False |
| 233 | + cb = False |
| 234 | + rs = False |
| 235 | + ac = False |
| 236 | + ck = False |
| 237 | + pc = False |
| 238 | + # Begin pipeline |
| 239 | + makeDir(path, outdir, axt, phylip) |
| 240 | + if conv == True: |
| 241 | + ch, newfasta = convertHeaders(fasta) |
| 242 | + if ch == True: |
| 243 | + sf = splitFasta(newfasta, outdir) |
| 244 | + else: |
| 245 | + sf = splitFasta(fasta, outdir) |
| 246 | + if sf == True: |
| 247 | + rh = rmHeader(outdir) |
| 248 | + if rh == True: |
| 249 | + cf = checkFrame(outdir, ref) |
| 250 | + if cf == True: |
| 251 | + cb =countBases(outdir, percent) |
| 252 | + if cb == True: |
| 253 | + rs = replaceStop(outdir) |
| 254 | + # Optionally covert files to axt format: |
| 255 | + if axt == True: |
| 256 | + if rs == True: |
| 257 | + ac = axtConvert(outdir, kaks, starttime) |
| 258 | + # Run KaKs_Calculator: |
| 259 | + if kaks == True and codeml == False: |
| 260 | + if ac == True: |
| 261 | + ck = calculateKaKs(outdir) |
| 262 | + if ck == True: |
| 263 | + printCSV(outdir, starttime) |
| 264 | + # Optionally covert files to phylip format: |
| 265 | + if phylip == True: |
| 266 | + if rs == True: |
| 267 | + pc = phylipConvert(outdir, starttime, codeml) |
| 268 | + # Run codeml |
| 269 | + if codeml == True and kaks == False: |
| 270 | + if pc == True: |
| 271 | + runcodeml(outdir, starttime) |
| 272 | + |
| 273 | +if __name__ == "__main__": |
| 274 | + main() |
0 commit comments