Skip to content

Commit ca3f970

Browse files
committed
Added ability to ignore directories
1 parent 21596d6 commit ca3f970

1 file changed

Lines changed: 58 additions & 52 deletions

File tree

bin/ConcatenateCodeML.py

Lines changed: 58 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -22,68 +22,74 @@ def readMultiple(indir, outfile):
2222
output.write("TranscriptID,dN,dS,dN/dS,TreeLength,lnL,Species\n")
2323
infiles = glob(indir + "*")
2424
for infile in infiles:
25-
# Get gene id and number of species remaining for each file
26-
filename = infile.split("/")[-1]
27-
transcript = filename.split(".")[0]
28-
species = filename.split(".")[1]
29-
if int(species) > 2:
25+
try:
26+
# Get gene id and number of species remaining for each file
27+
filename = infile.split("/")[-1]
28+
transcript = filename.split(".")[0]
29+
species = filename.split(".")[1]
30+
if int(species) > 2:
31+
with open(infile, "r") as mlc:
32+
for line in mlc:
33+
# Get substitution rates, tree lengths, etc
34+
if "tree length =" in line:
35+
length = line.split("=")[1].strip()
36+
elif "tree length for dN" in line:
37+
dn = line.split(":")[1].strip()
38+
elif "tree length for dS" in line:
39+
ds = line.split(":")[1].strip()
40+
elif "lnL" in line:
41+
lnl = line.split("):")[1]
42+
lnl = lnl.split()[0].strip()
43+
# Calculate dN/dS and save as a string
44+
try:
45+
dnds = str(float(dn)/float(ds))
46+
except ZeroDivisionError:
47+
dnds = "NA"
48+
# Append data to list and convert into string
49+
data = [dn, ds, dnds, length, lnl, species]
50+
transcript += ","
51+
for i in data:
52+
transcript += str(i) + ","
53+
transcript = transcript[:-1]
54+
output.write(transcript + "\n")
55+
else:
56+
# Skip files with only two sequences
57+
pass
58+
except IsADirectoryError:
59+
pass
60+
61+
def readPairwise(indir, outfile):
62+
with open(outfile, "w") as output:
63+
# Open output file and write header
64+
output.write("TranscriptID,dN,dS,dN/dS,lnL\n")
65+
infiles = glob(indir + "*")
66+
for infile in infiles:
67+
try:
68+
# Get gene id and number of species remaining for each file
69+
filename = infile.split("/")[-1]
70+
transcript = filename.split(".")[0]
3071
with open(infile, "r") as mlc:
3172
for line in mlc:
32-
# Get substitution rates, tree lengths, etc
33-
if "tree length =" in line:
34-
length = line.split("=")[1].strip()
35-
elif "tree length for dN" in line:
36-
dn = line.split(":")[1].strip()
37-
elif "tree length for dS" in line:
38-
ds = line.split(":")[1].strip()
73+
if line[:2] == "t=":
74+
# Extract dN, dS, and dN/dS
75+
i = line.index("dN/dS")
76+
j = line.index("dN ")
77+
k = line.index("dS ")
78+
dnds = line[i:j].split("=")[1].strip()
79+
dn = line[j:k].split("=")[1].strip()
80+
ds = line[k:].split("=")[1].strip()
3981
elif "lnL" in line:
40-
lnl = line.split("):")[1]
41-
lnl = lnl.split()[0].strip()
42-
# Calculate dN/dS and save as a string
43-
try:
44-
dnds = str(float(dn)/float(ds))
45-
except ZeroDivisionError:
46-
dnds = "NA"
82+
lnl = line.split("=")[1].strip()
4783
# Append data to list and convert into string
48-
data = [dn, ds, dnds, length, lnl, species]
84+
data = [dn, ds, dnds, lnl]
4985
transcript += ","
5086
for i in data:
5187
transcript += str(i) + ","
5288
transcript = transcript[:-1]
53-
output.write(transcript + "\n")
54-
else:
55-
# Skip files with only two sequences
89+
output.write(transcript + "\n")
90+
except IsADirectoryError:
5691
pass
5792

58-
def readPairwise(indir, outfile):
59-
with open(outfile, "w") as output:
60-
# Open output file and write header
61-
output.write("TranscriptID,dN,dS,dN/dS,lnL\n")
62-
infiles = glob(indir + "*")
63-
for infile in infiles:
64-
# Get gene id and number of species remaining for each file
65-
filename = infile.split("/")[-1]
66-
transcript = filename.split(".")[0]
67-
with open(infile, "r") as mlc:
68-
for line in mlc:
69-
if line[:2] == "t=":
70-
# Extract dN, dS, and dN/dS
71-
i = line.index("dN/dS")
72-
j = line.index("dN ")
73-
k = line.index("dS ")
74-
dnds = line[i:j].split("=")[1].strip()
75-
dn = line[j:k].split("=")[1].strip()
76-
ds = line[k:].split("=")[1].strip()
77-
elif "lnL" in line:
78-
lnl = line.split("=")[1].strip()
79-
# Append data to list and convert into string
80-
data = [dn, ds, dnds, lnl]
81-
transcript += ","
82-
for i in data:
83-
transcript += str(i) + ","
84-
transcript = transcript[:-1]
85-
output.write(transcript + "\n")
86-
8793
def main():
8894
parser = argparse.ArgumentParser(description="This script will \
8995
concatenate CodeML output and print them in a single file.")

0 commit comments

Comments
 (0)