|
8 | 8 |
|
9 | 9 | # --input-rhog-folder ./bb/ --parrallel True --species-tree species_tree.nwk |
10 | 10 |
|
11 | | -a=2 |
12 | | -infer_subhogs() |
| 11 | +#a=2 |
| 12 | +#infer_subhogs() |
13 | 13 | #infer_roothogs() |
14 | 14 |
|
15 | 15 |
|
| 16 | +from FastOMA.zoo.hog import transform |
| 17 | + |
| 18 | +#from zoo.tree_utils import collapse, gene_species, transform, HOG_coverages |
| 19 | + |
| 20 | +import io |
| 21 | +import lxml.etree |
| 22 | +orthoxml_file = "/work/FAC/FBM/DBC/cdessim2/default/smajidi1/gethog3_qfo/benchmark-webservice3/orthoxml/euk_omamer200.dev8_13oct.orthoxml" |
| 23 | + |
| 24 | + |
| 25 | +orthxml_str = [] |
| 26 | +with open(orthoxml_file, "r") as f: |
| 27 | + for i in f: |
| 28 | + orthxml_str.append(i) |
| 29 | +print(len(orthxml_str)) |
| 30 | +dic_gene_integer={} |
| 31 | +for line in orthxml_str: |
| 32 | + if "gene id" in line: |
| 33 | + found=False |
| 34 | + gene_int= line.split("\"")[1] |
| 35 | + gene_name = line.split("\"")[3] |
| 36 | + dic_gene_integer[gene_int] = gene_name |
| 37 | + |
| 38 | + |
| 39 | + |
| 40 | +orthoxml_etree=lxml.etree.parse(orthoxml_file) |
| 41 | + |
| 42 | +pw_orthologs_integer = sorted(list(transform.iter_pairwise_relations(orthoxml_etree))) |
| 43 | +# iter_pairwise_relations(obj, rel_type=None (def:'ortholog' , but possible to use 'paralog') |
| 44 | +print(len(pw_orthologs_integer)) |
| 45 | +print(pw_orthologs_integer[:2]) |
| 46 | +pw_orthologs_gene =[] |
| 47 | +for pair in pw_orthologs_integer: |
| 48 | + pw_orthologs_gene.append((dic_gene_integer[pair[0]],dic_gene_integer[pair[1]])) |
| 49 | + |
| 50 | + |
| 51 | + |
| 52 | +print(len(pw_orthologs_gene)) |
| 53 | + |
| 54 | +output_file = open(orthoxml_file+"_pairs.tsv","w") |
| 55 | +for pair in pw_orthologs_gene: |
| 56 | + output_file.write(pair[0]+"\t"+pair[1]+"\n") |
| 57 | + |
| 58 | +output_file.close() |
| 59 | + |
| 60 | + |
| 61 | +# |
| 62 | +# |
| 63 | +# # orthoxml_handle= open(orthoxml_file,"r") |
| 64 | +# # orthoxml ="" |
| 65 | +# # for line in orthoxml_handle: |
| 66 | +# # orthoxml+=line |
| 67 | +# |
| 68 | +# |
| 69 | +# from xml.etree.ElementTree import XMLParser |
| 70 | +# |
| 71 | +# parser = XMLParser() |
| 72 | +# with open(orthoxml_file, 'rb') as xml: |
| 73 | +# for chunk in xml: |
| 74 | +# parser.feed(chunk) |
| 75 | +# parser.close() |
| 76 | +# |
| 77 | +# |
| 78 | +# lxml.etree.parse(oxml) |
| 79 | +# |
| 80 | +# orthoxm= lxml.etree.parse(orthoxml) |
| 81 | +# |
| 82 | +# # expected = [("1", "2"), ("1", "3"), ("1", "4"), ("1", "5"), ("1", "6"), |
| 83 | +# # ("2", "5"), ("2", "6"), ("3", "4"), ("3", "5"), ("3", "6"), |
| 84 | +# # ("4", "5"), ("4", "6"), ("5", "6")] |
| 85 | +# # self.assertEqual(expected, pw_orthologs) |
| 86 | +# |
| 87 | +# from xml.etree import ElementTree |
| 88 | +# tree = ElementTree.parse(orthoxml_file) |
| 89 | +# root = tree.getroot() |
0 commit comments