Skip to content

Commit 7398e33

Browse files
committed
put rhogs in folder, otherwise sbatch file very big
1 parent 1a923c8 commit 7398e33

3 files changed

Lines changed: 18 additions & 14 deletions

File tree

FastOMA/batch_roothogs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def folder_1h_rhog(address_rhogs_folder, output_folder_big, output_folder_rest):
9191

9292
def batch_roothogs():
9393

94-
input_rhog = "./" # rhogs_all
94+
input_rhog = "./rhogs_all/" #
9595
output_folder_big = "./rhogs_big/"
9696
output_folder_rest = "./rhogs_rest/"
9797
folder_1h_rhog(input_rhog, output_folder_big, output_folder_rest)

FastOMA/collect_subhogs.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
# from . import _config
77
from ._config import logger_hog
88

9+
10+
# This code collect subhogs and writes outputs.
11+
12+
913
def collect_subhogs():
1014

1115
logger_hog.info("started collecting pickle files ")
@@ -16,7 +20,7 @@ def collect_subhogs():
1620
# tr|A0A0N7KCI6|A0A0N7KCI6_ORYSJ
1721
# for qfo benchamrk, the middle should be wirtten in the file
1822

19-
pickle_folder = "./" # pickle_rhogs
23+
pickle_folder = "./" #pickle_rhogs
2024
output_xml_name = "./output_hog.orthoxml"
2125
gene_id_pickle_file = "./gene_id_dic_xml.pickle"
2226

@@ -130,7 +134,7 @@ def max_og_tree(tree):
130134
return og_prot_list
131135

132136
input_orthoxml = output_xml_name # sys.argv[1] # "out_folder/output_hog_.orthoxml"
133-
rhog_all_folder = "./" #sys.argv[2] + "/" # "out_folder/rhogs_all/"
137+
rhog_all_folder = "./rhogs_all/" #sys.argv[2] + "/" # "out_folder/rhogs_all/"
134138
fasta_format = "fa" # of the rhogs_all
135139

136140
output_file_og_tsv = "OrthologousGroups.tsv"

FastOMA_light.nf

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ params.input_folder = "./in_folder/"
33
params.output_folder = "./out_folder/"
44
params.proteome_folder = params.input_folder + "/proteome"
55
params.proteomes = params.proteome_folder + "/*"
6-
params.hogmap_input_folder = params.input_folder + "/hogmap_input_folder"
6+
params.hogmap_in = params.input_folder + "/hogmap_in"
77

88
params.hogmap_folder = params.output_folder + "/hogmap"
99
//params.rhogs_folder = params.output_folder + "/rhogs_all"
@@ -39,7 +39,7 @@ process infer_roothogs{ // publishDir params.rhogs_folder
3939
path hogmap_folder
4040
path proteome_folder
4141
output:
42-
path "rhogs_all/*"
42+
path "rhogs_all" // path "rhogs_all/*"
4343
path "gene_id_dic_xml.pickle"
4444
val true // nextflow-io.github.io/patterns/state-dependency/
4545
script:
@@ -52,7 +52,8 @@ process infer_roothogs{ // publishDir params.rhogs_folder
5252
process batch_roothogs{ // publishDir params.output_folder
5353
input:
5454
val ready_infer_roothogs
55-
path rhogs_folder
55+
//path rhogs_folder
56+
path "rhogs_all"
5657
output:
5758
path "rhogs_rest/*", optional: true
5859
path "rhogs_big/*" , optional: true
@@ -103,7 +104,7 @@ process collect_subhogs{
103104
val ready_hog_big // path pickle_rhogs // this is for depenedcy
104105
path all_pickles //"*.pickle" //path "pickle_rhogs" // this is the folder includes pickles_rhogs
105106
path "gene_id_dic_xml.pickle"
106-
path rhogs_folder
107+
path "rhogs_all"
107108
output:
108109
path "output_hog.orthoxml"
109110
path "OrthologousGroupsFasta"
@@ -123,21 +124,20 @@ workflow {
123124
// rhogs_folder = Channel.fromPath(params.rhogs_folder)
124125

125126
genetrees_folder = Channel.fromPath(params.genetrees_folder)
126-
hogmap_input_folder = Channel.fromPath(params.hogmap_input_folder)
127+
hogmap_in = Channel.fromPath(params.hogmap_in)
127128

128129
// pickles_rhogs_folder = Channel.fromPath(params.pickles_rhogs_folder)
129130
omamerdb = Channel.fromPath(params.input_folder+"/omamerdb.h5") // proteomes.view{"prot ${it}"}
130131
proteomes_omamerdb = proteomes.combine(omamerdb)
131-
proteomes_omamerdb_inputhog = proteomes_omamerdb.combine(hogmap_input_folder) // proteomes_omamerdb_inputhog.view{" rhogsbig ${it}"}
132+
proteomes_omamerdb_inputhog = proteomes_omamerdb.combine(hogmap_in) // proteomes_omamerdb_inputhog.view{" rhogsbig ${it}"}
132133
(hogmap, ready_omamer_run)= omamer_run(proteomes_omamerdb_inputhog)
133134
ready_omamer_run_c = ready_omamer_run.collect()
134135

135-
(rhogs, gene_id_dic_xml, ready_infer_roothogs) = infer_roothogs(ready_omamer_run_c, hogmap_folder, proteome_folder)
136+
(rhogs_folder, gene_id_dic_xml, ready_infer_roothogs) = infer_roothogs(ready_omamer_run_c, hogmap_folder, proteome_folder)
137+
// rhogs_folder and "rhogs_all" are the same
136138
ready_infer_roothogs_c = ready_infer_roothogs.collect()
137139

138-
// old : (rhogs_rest_list, rhogs_big_list, ready_batch_roothogs) = batch_roothogs(ready_infer_roothogs_c, rhogs_folder)
139-
140-
(rhogs_rest_list, rhogs_big_list, ready_batch_roothogs) = batch_roothogs(ready_infer_roothogs_c, rhogs)
140+
(rhogs_rest_list, rhogs_big_list, ready_batch_roothogs) = batch_roothogs(ready_infer_roothogs_c, rhogs_folder)
141141
ready_batch_roothogs_c = ready_batch_roothogs.collect()
142142

143143
species_tree = Channel.fromPath(params.species_tree)
@@ -152,7 +152,7 @@ workflow {
152152
(pickle_rest_rhog, msas_out_rest, genetrees_out_test, ready_hog_rest) = hog_rest(rhogsrest_tree_ready)
153153
all_pickles = pickle_big_rhog.mix(pickle_rest_rhog).collect() // all_pickles.view() // pickle_rhogs_folder = Channel.fromPath(params.output_folder+"/pickle_rhogs")
154154

155-
(orthoxml_file, OrthologousGroupsFasta, OrthologousGroups_tsv, rootHOGs_tsv) = collect_subhogs(ready_hog_rest.collect(), ready_hog_big.collect(), all_pickles, gene_id_dic_xml, rhogs) // pickles_rhogs_folder
155+
(orthoxml_file, OrthologousGroupsFasta, OrthologousGroups_tsv, rootHOGs_tsv) = collect_subhogs(ready_hog_rest.collect(), ready_hog_big.collect(), all_pickles, gene_id_dic_xml, rhogs_folder) // pickles_rhogs_folder
156156
orthoxml_file.view{" output orthoxml file ${it}"}
157157

158158
}

0 commit comments

Comments
 (0)