Skip to content

Commit e35359e

Browse files
committed
Added warning when using $ node symbol in CodeML trees.
1 parent 1a15f2e commit e35359e

2 files changed

Lines changed: 34 additions & 11 deletions

File tree

AlignmentProcessorReadMe.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,11 @@ in the bin/ directory.
243243
but trimmed to ten characters (some programs still set a ten character
244244
limit on the length of names, so AlignmentProcessor trims the names).
245245
The 07_CodeMLonDir.py script will save any nodes you have specified
246-
before sending a plain Newick tree to ape (which will not work if
247-
there are PAML node symbols). It will then add any nodes back into the
248-
tree after it has been trimmed.
246+
with a "#" before sending a plain Newick tree to ape (which will not
247+
work if there are PAML node symbols). It will then add any nodes back
248+
into the tree after it has been trimmed. AlignmentProcessor will not
249+
currently save nodes specified with "$" since it is difficult to
250+
determine where a nested clade begins and ends.
249251

250252
# Invoking the Ka/Ks pipeline with a UCSC alignment:
251253

bin/07_CodeMLonDir.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import re
1111
import os
1212

13-
def readIn(path):
13+
def controlFiles(path):
1414
'''Reads input files and stores them in memory'''
1515
usertree = False
1616
with open(path + "codeml.ctl", "r") as control:
@@ -23,9 +23,8 @@ def readIn(path):
2323
newline = "\ttreefile = " + path + "07_codeml/tmp.tree \n"
2424
ctl.insert(i, newline)
2525
# Determine if a phylogenic tree is needed
26-
elif "runmode" in line:
27-
if "0" in line or "1" in line:
28-
usertree = True
26+
if "runmode = 0" in line or "runmode = 1" in line:
27+
usertree = True
2928
# Only run if a tree is required
3029
if usertree == True:
3130
with open(path + "codeml.tree", "r") as intree:
@@ -37,6 +36,11 @@ def readIn(path):
3736
i = i.replace("(", "")
3837
i = i.replace(")", "")
3938
i = i .replace(";", "")
39+
if "$" in i:
40+
print()
41+
print("\tPlease only use the pound sign (#) to indicate nades.")
42+
print()
43+
quit()
4044
if "#" in i or "$" in i:
4145
node = i.split()
4246
nodes[node[0]] = node[1]
@@ -56,8 +60,12 @@ def readIn(path):
5660
reftree.write(tree)
5761
elif usertree == False:
5862
pass
63+
return usertree, nodes, ctl
64+
65+
def rmSeqs(path):
66+
'''Creates a dictionary of sequences removed from specific genes'''
5967
with open(path + "internalStops.txt", "r") as stops:
60-
# Creates a dictionary of sequences removed from specific genes
68+
# Add sequences with internal stop codons
6169
rmseqs = {}
6270
for line in stops:
6371
splt = line.split("\t")
@@ -67,7 +75,17 @@ def readIn(path):
6775
# Must typecast dictionary values in order to create a list of
6876
# strings
6977
rmseqs[splt[0]] = [str(splt[1]).rstrip(), ]
70-
return usertree, nodes, ctl, rmseqs
78+
with open(path + "Logs/04_CountBasesLog.txt", "r") as countlog:
79+
# Add sequences which were removed due to low content to the dict.
80+
for line in stops:
81+
splt = line.split("\t")
82+
if splt[0] in rmseqs:
83+
rmseqs[splt[0]].append(str(splt[1]).rstrip())
84+
else:
85+
rmseqs[splt[0]] = [str(splt[1]).rstrip(), ]
86+
with open(path + "Logs/04_CountBasesLog.txt", "r") as countlog:
87+
88+
return rmseqs
7189

7290
def runCodeML(usertree, path, ctl, rmseqs, nodes, retainStops):
7391
'''Runs CodeML on all files in a directory using temporary controlfiles.'''
@@ -79,7 +97,7 @@ def runCodeML(usertree, path, ctl, rmseqs, nodes, retainStops):
7997
geneid = filename.split(".")[0]
8098
outfile = path + "07_codeml/" + geneid
8199
tempctl = path + "07_codeml/tmp.ctl"
82-
if retainStops == False and usertree == True:
100+
if usertree == True:
83101
# Prune tree only if it will be used
84102
pruneTree(path, geneid, rmseqs, nodes)
85103
with open(tempctl, "w") as temp:
@@ -155,7 +173,10 @@ def main():
155173
retainStops = True
156174
except IndexError:
157175
pass
158-
usertree, nodes, ctl, rmseqs = readIn(path)
176+
# Reads in required data
177+
usertree, nodes, ctl = controlFiles(path)
178+
rmseqs = rmSeqs(path)
179+
# Calls CodeML for each input file
159180
runCodeML(usertree, path, ctl, rmseqs, nodes, retainStops)
160181

161182
if __name__ == "__main__":

0 commit comments

Comments
 (0)