File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+
2+ from FastOMA .zoo .hog import transform
3+
4+ #from zoo.tree_utils import collapse, gene_species, transform, HOG_coverages
5+
6+ import io
7+ import lxml .etree
8+ import sys
9+ orthoxml_file = sys .argv [1 ]
10+ #"/work/FAC/FBM/DBC/cdessim2/default/smajidi1/gethog3_qfo/benchmark-webservice3/orthoxml/euk_omamer200.dev8_13oct.orthoxml"
11+
12+
13+ orthxml_str = []
14+ with open (orthoxml_file , "r" ) as f :
15+ for i in f :
16+ orthxml_str .append (i )
17+ print (len (orthxml_str ))
18+ dic_gene_integer = {}
19+ for line in orthxml_str :
20+ if "gene id" in line :
21+ found = False
22+ gene_int = line .split ("\" " )[1 ]
23+ gene_name = line .split ("\" " )[3 ]
24+ dic_gene_integer [gene_int ] = gene_name
25+
26+
27+
28+ orthoxml_etree = lxml .etree .parse (orthoxml_file )
29+
30+ pw_orthologs_integer = sorted (list (transform .iter_pairwise_relations (orthoxml_etree )))
31+ # iter_pairwise_relations(obj, rel_type=None (def:'ortholog' , but possible to use 'paralog')
32+ print (len (pw_orthologs_integer ))
33+ print (pw_orthologs_integer [:2 ])
34+ pw_orthologs_gene = []
35+ for pair in pw_orthologs_integer :
36+ pw_orthologs_gene .append ((dic_gene_integer [pair [0 ]],dic_gene_integer [pair [1 ]]))
37+
38+
39+
40+ print (len (pw_orthologs_gene ))
41+ print (pw_orthologs_gene [:2 ])
42+
43+
44+ output_file = open (orthoxml_file + "_pairs.tsv" ,"w" )
45+ for pair in pw_orthologs_gene :
46+ output_file .write (pair [0 ]+ "\t " + pair [1 ]+ "\n " )
47+
48+ output_file .close ()
You can’t perform that action at this time.
0 commit comments