Skip to content

Commit acdf642

Browse files
committed
ortholg pair
1 parent 2ac5a28 commit acdf642

1 file changed

Lines changed: 48 additions & 0 deletions

File tree

utils/orthoxml2pairs.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
from FastOMA.zoo.hog import transform
3+
4+
#from zoo.tree_utils import collapse, gene_species, transform, HOG_coverages
5+
6+
import io
7+
import lxml.etree
8+
import sys
9+
orthoxml_file = sys.argv[1]
10+
#"/work/FAC/FBM/DBC/cdessim2/default/smajidi1/gethog3_qfo/benchmark-webservice3/orthoxml/euk_omamer200.dev8_13oct.orthoxml"
11+
12+
13+
orthxml_str = []
14+
with open(orthoxml_file, "r") as f:
15+
for i in f:
16+
orthxml_str.append(i)
17+
print(len(orthxml_str))
18+
dic_gene_integer={}
19+
for line in orthxml_str:
20+
if "gene id" in line:
21+
found=False
22+
gene_int= line.split("\"")[1]
23+
gene_name = line.split("\"")[3]
24+
dic_gene_integer[gene_int] = gene_name
25+
26+
27+
28+
orthoxml_etree=lxml.etree.parse(orthoxml_file)
29+
30+
pw_orthologs_integer = sorted(list(transform.iter_pairwise_relations(orthoxml_etree)))
31+
# iter_pairwise_relations(obj, rel_type=None (def:'ortholog' , but possible to use 'paralog')
32+
print(len(pw_orthologs_integer))
33+
print(pw_orthologs_integer[:2])
34+
pw_orthologs_gene =[]
35+
for pair in pw_orthologs_integer:
36+
pw_orthologs_gene.append((dic_gene_integer[pair[0]],dic_gene_integer[pair[1]]))
37+
38+
39+
40+
print(len(pw_orthologs_gene))
41+
print(pw_orthologs_gene[:2])
42+
43+
44+
output_file = open(orthoxml_file+"_pairs.tsv","w")
45+
for pair in pw_orthologs_gene:
46+
output_file.write(pair[0]+"\t"+pair[1]+"\n")
47+
48+
output_file.close()

0 commit comments

Comments
 (0)