@@ -8,15 +8,13 @@ params.proteomes = params.proteome_folder + "/*"
88params. hogmap_in = params. input_folder + " /hogmap_in"
99
1010params. hogmap_folder = params. output_folder + " /hogmap"
11- // params.rhogs_folder = params.output_folder + "/rhogs_all"
1211params. species_tree = params. input_folder + " /species_tree.nwk"
13- // params.pickles_rhogs_folder = params.output_folder + "/pickle_rhogs "
12+ params. pickles_temp = params. output_folder + " /pickles_temp "
1413params. genetrees_folder = params. output_folder + " /genetrees"
1514
1615
1716process omamer_run{
1817 time {4. h}
19- memory {4. GB }
2018 publishDir params. hogmap_folder
2119 input:
2220 path proteomes_omamerdb_inputhog
@@ -35,13 +33,13 @@ process omamer_run{
3533}
3634
3735
38- process infer_roothogs{ // publishDir params.rhogs_folder
36+ process infer_roothogs{
3937 input:
4038 val ready_omamer_run
4139 path hogmap_folder
4240 path proteome_folder
4341 output:
44- path " rhogs_all " // path "rhogs_all/* "
42+ path " omamer_rhogs "
4543 path " gene_id_dic_xml.pickle"
4644 val true // nextflow-io.github.io/patterns/state-dependency/
4745 script:
@@ -51,11 +49,10 @@ process infer_roothogs{ // publishDir params.rhogs_folder
5149}
5250
5351
54- process batch_roothogs{ // publishDir params.output_folder
52+ process batch_roothogs{
5553 input:
5654 val ready_infer_roothogs
57- // path rhogs_folder
58- path " rhogs_all"
55+ path " omamer_rhogs"
5956 output:
6057 path " rhogs_rest/*" , optional: true
6158 path " rhogs_big/*" , optional: true
@@ -66,15 +63,14 @@ process batch_roothogs{ // publishDir params.output_folder
6663 """
6764}
6865
69- process hog_big{ // publishDir params.pickles_rhogs_folder
66+ process hog_big{
67+ publishDir params. pickles_temp
7068 cpus 2
7169 time {20. h} // for very big rhog it might need more, or you could re-run and add `-resume`
72- memory {4. GB }
7370 input:
7471 val rhogsbig_tree_ready
7572 output:
76- path " pickle_rhogs/*.pickle"
77- // path "*.pickle"
73+ path " *.pickle"
7874 path " *.fa" , optional: true // msa if write True
7975 path " *.nwk" , optional: true // gene trees if write True
8076 val true
@@ -84,13 +80,12 @@ process hog_big{ //publishDir params.pickles_rhogs_folder
8480 """
8581}
8682
87- process hog_rest{ // publishDir params.pickles_rhogs_folder
83+ process hog_rest{
84+ publishDir params. pickles_temp
8885 input:
8986 val rhogsrest_tree_ready
9087 output:
91- path " pickle_rhogs/*.pickle"
92- // path "*.pickle"
93- // path "pickle_rhogs/*.pickle"
88+ path " *.pickle"
9489 path " *.fa" , optional: true // msa if write True
9590 path " *.nwk" , optional: true // gene trees if write True
9691 val true
@@ -102,14 +97,13 @@ process hog_rest{ //publishDir params.pickles_rhogs_folder
10297
10398
10499process collect_subhogs{
105- memory {4. GB }
106100 publishDir params. output_folder, mode: ' copy'
107101 input:
108102 val ready_hog_rest
109- val ready_hog_big // path pickle_rhogs // this is for depenedcy
110- path " pickle_rhogs/ " // "*.pickle" // path "pickle_rhogs " // this is the folder includes pickles_rhogs
103+ val ready_hog_big
104+ path " pickles_temp " // this is the folder includes pickles_rhogs
111105 path " gene_id_dic_xml.pickle"
112- path " rhogs_all "
106+ path " omamer_rhogs "
113107 output:
114108 path " output_hog.orthoxml"
115109 path " OrthologousGroupsFasta"
@@ -121,28 +115,25 @@ process collect_subhogs{
121115 """
122116}
123117
124-
125118workflow {
126119 proteomes = Channel . fromPath(params. proteomes, type :' any' ,checkIfExists :true )
127120 proteome_folder = Channel . fromPath(params. proteome_folder)
128121 hogmap_folder = Channel . fromPath(params. hogmap_folder)
129- // rhogs_folder = Channel.fromPath(params.rhogs_folder)
130122
131123 genetrees_folder = Channel . fromPath(params. genetrees_folder)
132124 hogmap_in = Channel . fromPath(params. hogmap_in)
133125
134- // pickles_rhogs_folder = Channel.fromPath(params.pickles_rhogs_folder )
135- omamerdb = Channel . fromPath(params. input_folder+ " /omamerdb.h5" ) // proteomes.view{"prot ${it}"}
126+ pickles_temp = Channel . fromPath(params. pickles_temp )
127+ omamerdb = Channel . fromPath(params. input_folder+ " /omamerdb.h5" )
136128 proteomes_omamerdb = proteomes. combine(omamerdb)
137129 proteomes_omamerdb_inputhog = proteomes_omamerdb. combine(hogmap_in) // proteomes_omamerdb_inputhog.view{" rhogsbig ${it}"}
138130 (hogmap, ready_omamer_run)= omamer_run(proteomes_omamerdb_inputhog)
139131 ready_omamer_run_c = ready_omamer_run. collect()
140132
141- (rhogs_folder, gene_id_dic_xml, ready_infer_roothogs) = infer_roothogs(ready_omamer_run_c, hogmap_folder, proteome_folder)
142- // rhogs_folder and "rhogs_all" are the same
133+ (omamer_rhogs, gene_id_dic_xml, ready_infer_roothogs) = infer_roothogs(ready_omamer_run_c, hogmap_folder, proteome_folder)
143134 ready_infer_roothogs_c = ready_infer_roothogs. collect()
144135
145- (rhogs_rest_list, rhogs_big_list, ready_batch_roothogs) = batch_roothogs(ready_infer_roothogs_c, rhogs_folder )
136+ (rhogs_rest_list, rhogs_big_list, ready_batch_roothogs) = batch_roothogs(ready_infer_roothogs_c, omamer_rhogs )
146137 ready_batch_roothogs_c = ready_batch_roothogs. collect()
147138
148139 species_tree = Channel . fromPath(params. species_tree)
@@ -155,16 +146,8 @@ workflow {
155146 rhogsrest_tree = rhogsrest. combine(species_tree)
156147 rhogsrest_tree_ready = rhogsrest_tree. combine(ready_batch_roothogs_c)
157148 (pickle_rest_rhog, msas_out_rest, genetrees_out_test, ready_hog_rest) = hog_rest(rhogsrest_tree_ready)
158- all_pickles = pickle_big_rhog. mix(pickle_rest_rhog). collect() // all_pickles.view() // pickle_rhogs_folder = Channel.fromPath(params.output_folder+"/pickle_rhogs")
159149
160- (orthoxml_file, OrthologousGroupsFasta , OrthologousGroups_tsv , rootHOGs_tsv) = collect_subhogs(ready_hog_rest. collect(), ready_hog_big. collect(), all_pickles , gene_id_dic_xml, rhogs_folder) // pickles_rhogs_folder
150+ (orthoxml_file, OrthologousGroupsFasta , OrthologousGroups_tsv , rootHOGs_tsv) = collect_subhogs(ready_hog_rest. collect(), ready_hog_big. collect(), pickles_temp , gene_id_dic_xml, omamer_rhogs)
161151 orthoxml_file. view{" output orthoxml file ${ it} " }
162152
163153}
164-
165- // memory {12.GB * (2*task.attempt - 1)}
166- // time {24.hour}
167- // errorStrategy {
168- // task.exitStatus in [1,99,143,137,104,134,139,145,140] ? ‘retry’ : ‘terminate’
169- // }
170- // maxRetries 4
0 commit comments