@@ -22,68 +22,74 @@ def readMultiple(indir, outfile):
2222 output .write ("TranscriptID,dN,dS,dN/dS,TreeLength,lnL,Species\n " )
2323 infiles = glob (indir + "*" )
2424 for infile in infiles :
25- # Get gene id and number of species remaining for each file
26- filename = infile .split ("/" )[- 1 ]
27- transcript = filename .split ("." )[0 ]
28- species = filename .split ("." )[1 ]
29- if int (species ) > 2 :
25+ try :
26+ # Get gene id and number of species remaining for each file
27+ filename = infile .split ("/" )[- 1 ]
28+ transcript = filename .split ("." )[0 ]
29+ species = filename .split ("." )[1 ]
30+ if int (species ) > 2 :
31+ with open (infile , "r" ) as mlc :
32+ for line in mlc :
33+ # Get substitution rates, tree lengths, etc
34+ if "tree length =" in line :
35+ length = line .split ("=" )[1 ].strip ()
36+ elif "tree length for dN" in line :
37+ dn = line .split (":" )[1 ].strip ()
38+ elif "tree length for dS" in line :
39+ ds = line .split (":" )[1 ].strip ()
40+ elif "lnL" in line :
41+ lnl = line .split ("):" )[1 ]
42+ lnl = lnl .split ()[0 ].strip ()
43+ # Calculate dN/dS and save as a string
44+ try :
45+ dnds = str (float (dn )/ float (ds ))
46+ except ZeroDivisionError :
47+ dnds = "NA"
48+ # Append data to list and convert into string
49+ data = [dn , ds , dnds , length , lnl , species ]
50+ transcript += ","
51+ for i in data :
52+ transcript += str (i ) + ","
53+ transcript = transcript [:- 1 ]
54+ output .write (transcript + "\n " )
55+ else :
56+ # Skip files with only two sequences
57+ pass
58+ except IsADirectoryError :
59+ pass
60+
61+ def readPairwise (indir , outfile ):
62+ with open (outfile , "w" ) as output :
63+ # Open output file and write header
64+ output .write ("TranscriptID,dN,dS,dN/dS,lnL\n " )
65+ infiles = glob (indir + "*" )
66+ for infile in infiles :
67+ try :
68+ # Get gene id and number of species remaining for each file
69+ filename = infile .split ("/" )[- 1 ]
70+ transcript = filename .split ("." )[0 ]
3071 with open (infile , "r" ) as mlc :
3172 for line in mlc :
32- # Get substitution rates, tree lengths, etc
33- if "tree length =" in line :
34- length = line .split ("=" )[1 ].strip ()
35- elif "tree length for dN" in line :
36- dn = line .split (":" )[1 ].strip ()
37- elif "tree length for dS" in line :
38- ds = line .split (":" )[1 ].strip ()
73+ if line [:2 ] == "t=" :
74+ # Extract dN, dS, and dN/dS
75+ i = line .index ("dN/dS" )
76+ j = line .index ("dN " )
77+ k = line .index ("dS " )
78+ dnds = line [i :j ].split ("=" )[1 ].strip ()
79+ dn = line [j :k ].split ("=" )[1 ].strip ()
80+ ds = line [k :].split ("=" )[1 ].strip ()
3981 elif "lnL" in line :
40- lnl = line .split ("):" )[1 ]
41- lnl = lnl .split ()[0 ].strip ()
42- # Calculate dN/dS and save as a string
43- try :
44- dnds = str (float (dn )/ float (ds ))
45- except ZeroDivisionError :
46- dnds = "NA"
82+ lnl = line .split ("=" )[1 ].strip ()
4783 # Append data to list and convert into string
48- data = [dn , ds , dnds , length , lnl , species ]
84+ data = [dn , ds , dnds , lnl ]
4985 transcript += ","
5086 for i in data :
5187 transcript += str (i ) + ","
5288 transcript = transcript [:- 1 ]
53- output .write (transcript + "\n " )
54- else :
55- # Skip files with only two sequences
89+ output .write (transcript + "\n " )
90+ except IsADirectoryError :
5691 pass
5792
58- def readPairwise (indir , outfile ):
59- with open (outfile , "w" ) as output :
60- # Open output file and write header
61- output .write ("TranscriptID,dN,dS,dN/dS,lnL\n " )
62- infiles = glob (indir + "*" )
63- for infile in infiles :
64- # Get gene id and number of species remaining for each file
65- filename = infile .split ("/" )[- 1 ]
66- transcript = filename .split ("." )[0 ]
67- with open (infile , "r" ) as mlc :
68- for line in mlc :
69- if line [:2 ] == "t=" :
70- # Extract dN, dS, and dN/dS
71- i = line .index ("dN/dS" )
72- j = line .index ("dN " )
73- k = line .index ("dS " )
74- dnds = line [i :j ].split ("=" )[1 ].strip ()
75- dn = line [j :k ].split ("=" )[1 ].strip ()
76- ds = line [k :].split ("=" )[1 ].strip ()
77- elif "lnL" in line :
78- lnl = line .split ("=" )[1 ].strip ()
79- # Append data to list and convert into string
80- data = [dn , ds , dnds , lnl ]
81- transcript += ","
82- for i in data :
83- transcript += str (i ) + ","
84- transcript = transcript [:- 1 ]
85- output .write (transcript + "\n " )
86-
8793def main ():
8894 parser = argparse .ArgumentParser (description = "This script will \
8995 concatenate CodeML output and print them in a single file." )
0 commit comments