1010import re
1111import os
1212
13- def readIn (path ):
13+ def controlFiles (path ):
1414 '''Reads input files and stores them in memory'''
1515 usertree = False
1616 with open (path + "codeml.ctl" , "r" ) as control :
@@ -23,9 +23,8 @@ def readIn(path):
2323 newline = "\t treefile = " + path + "07_codeml/tmp.tree \n "
2424 ctl .insert (i , newline )
2525 # Determine if a phylogenic tree is needed
26- elif "runmode" in line :
27- if "0" in line or "1" in line :
28- usertree = True
26+ if "runmode = 0" in line or "runmode = 1" in line :
27+ usertree = True
2928 # Only run if a tree is required
3029 if usertree == True :
3130 with open (path + "codeml.tree" , "r" ) as intree :
@@ -37,6 +36,11 @@ def readIn(path):
3736 i = i .replace ("(" , "" )
3837 i = i .replace (")" , "" )
3938 i = i .replace (";" , "" )
39+ if "$" in i :
40+ print ()
41+ print ("\t Please only use the pound sign (#) to indicate nades." )
42+ print ()
43+ quit ()
4044 if "#" in i or "$" in i :
4145 node = i .split ()
4246 nodes [node [0 ]] = node [1 ]
@@ -56,8 +60,12 @@ def readIn(path):
5660 reftree .write (tree )
5761 elif usertree == False :
5862 pass
63+ return usertree , nodes , ctl
64+
65+ def rmSeqs (path ):
66+ '''Creates a dictionary of sequences removed from specific genes'''
5967 with open (path + "internalStops.txt" , "r" ) as stops :
60- # Creates a dictionary of sequences removed from specific genes
68+ # Add sequences with internal stop codons
6169 rmseqs = {}
6270 for line in stops :
6371 splt = line .split ("\t " )
@@ -67,7 +75,17 @@ def readIn(path):
6775 # Must typecast dictionary values in order to create a list of
6876 # strings
6977 rmseqs [splt [0 ]] = [str (splt [1 ]).rstrip (), ]
70- return usertree , nodes , ctl , rmseqs
78+ with open (path + "Logs/04_CountBasesLog.txt" , "r" ) as countlog :
79+ # Add sequences which were removed due to low content to the dict.
80+ for line in stops :
81+ splt = line .split ("\t " )
82+ if splt [0 ] in rmseqs :
83+ rmseqs [splt [0 ]].append (str (splt [1 ]).rstrip ())
84+ else :
85+ rmseqs [splt [0 ]] = [str (splt [1 ]).rstrip (), ]
86+ with open (path + "Logs/04_CountBasesLog.txt" , "r" ) as countlog :
87+
88+ return rmseqs
7189
7290def runCodeML (usertree , path , ctl , rmseqs , nodes , retainStops ):
7391 '''Runs CodeML on all files in a directory using temporary controlfiles.'''
@@ -79,7 +97,7 @@ def runCodeML(usertree, path, ctl, rmseqs, nodes, retainStops):
7997 geneid = filename .split ("." )[0 ]
8098 outfile = path + "07_codeml/" + geneid
8199 tempctl = path + "07_codeml/tmp.ctl"
82- if retainStops == False and usertree == True :
100+ if usertree == True :
83101 # Prune tree only if it will be used
84102 pruneTree (path , geneid , rmseqs , nodes )
85103 with open (tempctl , "w" ) as temp :
@@ -155,7 +173,10 @@ def main():
155173 retainStops = True
156174 except IndexError :
157175 pass
158- usertree , nodes , ctl , rmseqs = readIn (path )
176+ # Reads in required data
177+ usertree , nodes , ctl = controlFiles (path )
178+ rmseqs = rmSeqs (path )
179+ # Calls CodeML for each input file
159180 runCodeML (usertree , path , ctl , rmseqs , nodes , retainStops )
160181
161182if __name__ == "__main__" :
0 commit comments