Skip to content

Commit fb1e106

Browse files
authored
Add files via upload
1 parent 0dc1ff7 commit fb1e106

1 file changed

Lines changed: 79 additions & 0 deletions

File tree

makePhylipMatrix.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#
2+
# Author: Hani Z. Girgis, PhD
3+
#
4+
#
5+
# Purpose: This program converts all-versus-all identity scores produced
6+
# by Identity to Phylip distance matrix
7+
# Requirements: Python 3 with NumPy.
8+
#
9+
10+
import numpy as np
11+
import sys
12+
13+
14+
def make_matrix(file_in, file_out):
15+
file = open(file_in, 'r')
16+
# List of labels
17+
label_dict = {}
18+
seq_index = 0
19+
for line in file:
20+
token_list = line.strip().split("\t")
21+
22+
if not token_list[0] in label_dict:
23+
label_dict[token_list[0]] = seq_index
24+
seq_index += 1
25+
26+
if not token_list[1] in label_dict:
27+
label_dict[token_list[1]] = seq_index
28+
seq_index += 1
29+
30+
label_count = len(label_dict.keys())
31+
32+
# The matrix
33+
matrix = np.ones((label_count, label_count))
34+
35+
# Identity does not report the sequence versus itself
36+
for i in range(0, label_count):
37+
matrix[i][i] = 0.00000000
38+
39+
# Go to the begining of the file
40+
file.seek(0)
41+
for line in file:
42+
token_list = line.strip().split("\t")
43+
distance = 1.0 - float(token_list[2])
44+
45+
if(distance < 0.0):
46+
distance = 0.0
47+
elif(distance > 1.0):
48+
distance = 1.0
49+
50+
id_1 = label_dict[token_list[0]]
51+
id_2 = label_dict[token_list[1]]
52+
matrix[id_1][id_2] = distance
53+
matrix[id_2][id_1] = distance
54+
55+
file.close()
56+
57+
# Write Phylip matrix
58+
sorted_key_list = sorted(label_dict.keys(), key=lambda key: label_dict[key])
59+
with open(file_out, 'w') as file_object:
60+
file_object.write(str(label_count) + '\n')
61+
for key in sorted_key_list:
62+
t = list()
63+
i = label_dict[key]
64+
for j in range(0, label_count):
65+
t.append(str.format('{0:.8f}', matrix[i][j])) #round(matrix[i][j], 8))
66+
s = ' '
67+
t = [str(x) for x in t]
68+
# End a species name with \t
69+
file_object.write(key[1:] + '\t' + s.join(t) + '\n')
70+
71+
if len(sys.argv) != 3:
72+
print("Use: python3 ", sys.argv[0], "allVsAllIdentityFile outputFileName")
73+
print()
74+
print("Please provide an all-versus-all file produced by Identity and an output file name.")
75+
print()
76+
print("Example: python3 " + sys.argv[0] + " all_vs_all_identity_scores matrix.phylip")
77+
print()
78+
else:
79+
make_matrix(sys.argv[1], sys.argv[2])

0 commit comments

Comments
 (0)