Skip to content

Commit 61b255e

Browse files
committed
update
1 parent 6d59a6c commit 61b255e

5 files changed

Lines changed: 82 additions & 7 deletions

File tree

FastOMA.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ workflow {
154154
(pickle_rest_rhog, msas_out_rest, genetrees_out_test, ready_hog_rest) = hog_rest(rhogsrest_tree_ready)
155155

156156
(orthoxml_file, OrthologousGroupsFasta, OrthologousGroups_tsv, rootHOGs_tsv) = collect_subhogs(ready_hog_rest.collect(), ready_hog_big.collect(), pickles_temp, gene_id_dic_xml, omamer_rhogs)
157+
omamer_rhogs.view{" output omamer_rhogs ${it}"}
157158
orthoxml_file.view{" output orthoxml file ${it}"}
158159

159160
}

FastOMA/_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
keep_subhog_each_pickle = False
4141

4242
big_rhog_size = 60 * 1000
43-
omamer_family_threshold = 110
43+
omamer_family_threshold = 90
4444
#
4545
# omamer_fscore_treshold_big_rhog = 0.04 # 0.5 # means no thresold #0.2 #0.5 # to have more proteins in the ortho groups 0.05 considering for big rhogs
4646
# omamer_treshold_big_rhog_szie = 100 #9000 #100
@@ -49,7 +49,7 @@
4949
# omamer_treshold_big_rhog_szie2 = 50*1000
5050
# omamer_fscore_treshold_big_rhog2 = 0.6 #0.9
5151

52-
hogclass_max_num_seq = 20 # subsampling in msa # ver very 2
52+
hogclass_max_num_seq = 40 # subsampling in msa # ver very 2
5353
hogclass_min_cols_msa_to_filter = hogclass_max_num_seq * 50
5454
hogclass_tresh_ratio_gap_col = 0.6 # 0.8 for very very big
5555
# old code after samplign if there are 2 seq sampled, then at least one nongap
@@ -70,7 +70,7 @@
7070
rooting_mad_executable_path = "mad" # /work/FAC/FBM/DBC/cdessim2/default/smajidi1/software/installers/mad/
7171

7272
##inferhog
73-
inferhog_tresh_ratio_gap_row =0.4 # 0.6 # to have more proteins in the ortho groups 0.1
73+
inferhog_tresh_ratio_gap_row =0.1 # 0.6 # to have more proteins in the ortho groups 0.1
7474
inferhog_tresh_ratio_gap_col =0.5 # 0.6 # ver very 0.8
7575
inferhog_min_cols_msa_to_filter = 50 #300 #50 # used for msa before gene tree inference and saving msa in hog class
7676

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ Then, cd to the `testdata` folder and download the omamer database and change it
109109
```
110110
cd FastOMA/testdata
111111
wget https://omabrowser.org/All/Primates-v2.0.0.h5 # 105MB
112-
mv Primates.h5 in_folder/omamerdb.h5
112+
mv Primates-v2.0.0.h5 in_folder/omamerdb.h5
113113
```
114114
(This is for the test however, I would suggest downloading the `LUCA-v2.0.0.h5` instead of `Primates-v2.0.0.h5` for your real analysis.). Check the item 2 in the [input section](https://github.com/sinamajidian/FastOMA#input) for details.
115115

archive/test_curn.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,82 @@
88

99
# --input-rhog-folder ./bb/ --parrallel True --species-tree species_tree.nwk
1010

11-
a=2
12-
infer_subhogs()
11+
#a=2
12+
#infer_subhogs()
1313
#infer_roothogs()
1414

1515

16+
from FastOMA.zoo.hog import transform
17+
18+
#from zoo.tree_utils import collapse, gene_species, transform, HOG_coverages
19+
20+
import io
21+
import lxml.etree
22+
orthoxml_file = "/work/FAC/FBM/DBC/cdessim2/default/smajidi1/gethog3_qfo/benchmark-webservice3/orthoxml/euk_omamer200.dev8_13oct.orthoxml"
23+
24+
25+
orthxml_str = []
26+
with open(orthoxml_file, "r") as f:
27+
for i in f:
28+
orthxml_str.append(i)
29+
print(len(orthxml_str))
30+
dic_gene_integer={}
31+
for line in orthxml_str:
32+
if "gene id" in line:
33+
found=False
34+
gene_int= line.split("\"")[1]
35+
gene_name = line.split("\"")[3]
36+
dic_gene_integer[gene_int] = gene_name
37+
38+
39+
40+
orthoxml_etree=lxml.etree.parse(orthoxml_file)
41+
42+
pw_orthologs_integer = sorted(list(transform.iter_pairwise_relations(orthoxml_etree)))
43+
# iter_pairwise_relations(obj, rel_type=None (def:'ortholog' , but possible to use 'paralog')
44+
print(len(pw_orthologs_integer))
45+
print(pw_orthologs_integer[:2])
46+
pw_orthologs_gene =[]
47+
for pair in pw_orthologs_integer:
48+
pw_orthologs_gene.append((dic_gene_integer[pair[0]],dic_gene_integer[pair[1]]))
49+
50+
51+
52+
print(len(pw_orthologs_gene))
53+
54+
output_file = open(orthoxml_file+"_pairs.tsv","w")
55+
for pair in pw_orthologs_gene:
56+
output_file.write(pair[0]+"\t"+pair[1]+"\n")
57+
58+
output_file.close()
59+
60+
61+
#
62+
#
63+
# # orthoxml_handle= open(orthoxml_file,"r")
64+
# # orthoxml =""
65+
# # for line in orthoxml_handle:
66+
# # orthoxml+=line
67+
#
68+
#
69+
# from xml.etree.ElementTree import XMLParser
70+
#
71+
# parser = XMLParser()
72+
# with open(orthoxml_file, 'rb') as xml:
73+
# for chunk in xml:
74+
# parser.feed(chunk)
75+
# parser.close()
76+
#
77+
#
78+
# lxml.etree.parse(oxml)
79+
#
80+
# orthoxm= lxml.etree.parse(orthoxml)
81+
#
82+
# # expected = [("1", "2"), ("1", "3"), ("1", "4"), ("1", "5"), ("1", "6"),
83+
# # ("2", "5"), ("2", "6"), ("3", "4"), ("3", "5"), ("3", "6"),
84+
# # ("4", "5"), ("4", "6"), ("5", "6")]
85+
# # self.assertEqual(expected, pw_orthologs)
86+
#
87+
# from xml.etree import ElementTree
88+
# tree = ElementTree.parse(orthoxml_file)
89+
# root = tree.getroot()

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
exec(line.rstrip())
1111

1212
# TODO
13-
requirements = ['biopython', 'ete3', 'omamer>=2.0.0.dev8', 'nextflow', 'pyparsing' , 'DendroPy', 'future', 'lxml','pyham']
13+
requirements = ['biopython', 'ete3', 'omamer>=2.0.0', 'nextflow', 'pyparsing' , 'DendroPy', 'future', 'lxml','pyham']
1414

1515
desc = 'FastOM - a package to infer orthology information '
1616

0 commit comments

Comments
 (0)