|
1 | 1 |
|
2 | | - |
| 2 | +// NXF_WRAPPER_STAGE_FILE_THRESHOLD='50000' |
3 | 3 |
|
4 | 4 | params.input_folder = "./in_folder/" |
5 | 5 | params.output_folder = "./out_folder/" |
6 | 6 | params.proteome_folder = params.input_folder + "/proteome" |
7 | 7 | params.proteomes = params.proteome_folder + "/*" |
8 | | -params.hogmap_input_folder = params.input_folder + "/hogmap_input_folder" |
9 | | - |
| 8 | +params.hogmap_in = params.input_folder + "/hogmap_in" |
10 | 9 |
|
11 | 10 | params.hogmap_folder = params.output_folder + "/hogmap" |
12 | | -params.rhogs_folder = params.output_folder + "/rhogs_all" |
| 11 | +//params.rhogs_folder = params.output_folder + "/rhogs_all" |
13 | 12 | params.species_tree = params.input_folder + "/species_tree.nwk" |
14 | | -params.pickles_rhogs_folder = params.output_folder + "/pickle_rhogs" |
| 13 | +//params.pickles_rhogs_folder = params.output_folder + "/pickle_rhogs" |
15 | 14 | params.genetrees_folder = params.output_folder + "/genetrees" |
16 | 15 |
|
17 | 16 |
|
18 | | -// https://github.com/nextflow-io/nextflow/issues/1629 |
19 | | -// https://www.nextflow.io/docs/latest/process.html?highlight=cache#cache |
20 | | - |
21 | | -// todo clean up this file |
22 | | - |
23 | 17 | process omamer_run{ |
24 | 18 | time {4.h} |
25 | | - memory {50.GB} |
26 | | - cpus 10 |
| 19 | + memory {16.GB} |
27 | 20 | publishDir params.hogmap_folder |
28 | 21 | input: |
29 | | - path proteomes_omamerdb_inputhog |
| 22 | + path proteomes_omamerdb_inputhog |
30 | 23 | output: |
31 | | - path "*.hogmap" |
32 | | - val true // ready_omamer_run |
| 24 | + path "*.hogmap" |
| 25 | + val true |
33 | 26 | script: |
34 | | - // omamer search --db ${proteomes_omamerdb[1]} --query ${proteomes_omamerdb[0]} --nthreads 1 --out ${proteomes_omamerdb[0]}.hogmap |
35 | | - // cp /work/FAC/FBM/DBC/cdessim2/default/smajidi1/qfo_hogmap/${proteomes_omamerdb[0]}.hogmap . |
36 | 27 | """ |
37 | 28 | if [ -f ${proteomes_omamerdb_inputhog[2]}/${proteomes_omamerdb_inputhog[0]}.hogmap ] |
38 | 29 | then |
39 | 30 | cp ${proteomes_omamerdb_inputhog[2]}/${proteomes_omamerdb_inputhog[0]}.hogmap ${proteomes_omamerdb_inputhog[0]}.hogmap |
40 | 31 | else |
41 | | - omamer search --db ${proteomes_omamerdb_inputhog[1]} --query ${proteomes_omamerdb_inputhog[0]} --nthreads 10 --out ${proteomes_omamerdb_inputhog[0]}.hogmap |
| 32 | + omamer search --db ${proteomes_omamerdb_inputhog[1]} --query ${proteomes_omamerdb_inputhog[0]} --out ${proteomes_omamerdb_inputhog[0]}.hogmap |
42 | 33 | fi |
43 | | - """ |
44 | | - |
| 34 | + """ // --nthreads 10 |
45 | 35 | } |
46 | 36 |
|
47 | 37 |
|
48 | | -process infer_roothogs{ |
49 | | - publishDir params.rhogs_folder // "${params.output_folder}/rhogs_all" |
| 38 | +process infer_roothogs{ // publishDir params.rhogs_folder |
50 | 39 | input: |
51 | | - val ready_omamer_run |
52 | | - path hogmap_folder |
53 | | - path proteome_folder |
| 40 | + val ready_omamer_run |
| 41 | + path hogmap_folder |
| 42 | + path proteome_folder |
54 | 43 | output: |
55 | | - path "*.fa" |
56 | | - path "gene_id_dic_xml.pickle" |
57 | | - val true // ready_infer_roothogs nextflow-io.github.io/patterns/state-dependency/ |
| 44 | + path "rhogs_all" // path "rhogs_all/*" |
| 45 | + path "gene_id_dic_xml.pickle" |
| 46 | + val true // nextflow-io.github.io/patterns/state-dependency/ |
58 | 47 | script: |
59 | | - """ |
60 | | - infer-roothogs --logger-level DEBUG |
61 | | - """ |
| 48 | + """ |
| 49 | + infer-roothogs --logger-level DEBUG |
| 50 | + """ |
62 | 51 | } |
63 | 52 |
|
64 | | -process batch_roothogs{ |
65 | | - publishDir params.output_folder |
| 53 | + |
| 54 | +process batch_roothogs{ // publishDir params.output_folder |
66 | 55 | input: |
67 | | - val ready_infer_roothogs |
68 | | - path rhogs_folder //"${params.output_folder}/rhogs_all" |
| 56 | + val ready_infer_roothogs |
| 57 | + //path rhogs_folder |
| 58 | + path "rhogs_all" |
69 | 59 | output: |
70 | | - path "rhogs_rest/*", optional: true |
71 | | - path "rhogs_big/*" , optional: true |
72 | | - val true |
| 60 | + path "rhogs_rest/*", optional: true |
| 61 | + path "rhogs_big/*" , optional: true |
| 62 | + val true |
73 | 63 | script: |
74 | | - """ |
75 | | - batch-roothogs |
76 | | - """ |
| 64 | + """ |
| 65 | + batch-roothogs |
| 66 | + """ |
77 | 67 | } |
78 | 68 |
|
79 | | -process hog_big{ |
| 69 | +process hog_big{ //publishDir params.pickles_rhogs_folder |
80 | 70 | cpus 6 |
81 | | - time {20.h} // for very big rhog it might need more, or you could re-run and add `-resume` |
82 | | - memory {80.GB} |
83 | | - |
84 | | - publishDir params.pickles_rhogs_folder |
85 | | - |
| 71 | + time {20.h} // for very big rhog it might need more, or you could re-run and add `-resume` |
| 72 | + memory {20.GB} |
86 | 73 | input: |
87 | | - // val ready_batch_roothogs |
88 | | - // path rhogsbig_tree // = rhogsbig.combine(species_tree) |
89 | | - // rhogs_big_i //"$rhogs_big/*.fa" |
90 | | - // path "species_tree.nwk" |
91 | | - val rhogsbig_tree_ready |
| 74 | + val rhogsbig_tree_ready |
92 | 75 | output: |
93 | | - path "*.pickle" |
94 | | - |
95 | | - path "*.fa", optional: true // msa if write True |
96 | | - path "*.nwk", optional: true // gene trees if write True |
97 | | - |
98 | | - val true |
99 | | - // path "pi_big_subhog/*" |
100 | | - // pi_big rhogs_big |
101 | | - // params.species_tree |
102 | | - |
| 76 | + path "pickle_rhogs/*.pickle" |
| 77 | + // path "*.pickle" |
| 78 | + path "*.fa", optional: true // msa if write True |
| 79 | + path "*.nwk", optional: true // gene trees if write True |
| 80 | + val true |
103 | 81 | script: |
104 | | - """ |
105 | | - infer-subhogs --input-rhog-folder ${rhogsbig_tree_ready[0]} --species-tree ${rhogsbig_tree_ready[1]} --parallel --fragment-detection --low-so-detection |
106 | | - """ |
| 82 | + """ |
| 83 | + infer-subhogs --input-rhog-folder ${rhogsbig_tree_ready[0]} --species-tree ${rhogsbig_tree_ready[1]} --parallel --fragment-detection --low-so-detection |
| 84 | + """ |
107 | 85 | } |
108 | 86 |
|
109 | | - |
110 | | -process hog_rest{ |
111 | | - |
112 | | - publishDir params.pickles_rhogs_folder |
113 | | -// publishDir( |
114 | | -// path: {params.pickles_rhogs_folder}, |
115 | | -// pattern: {"*.pickle"} |
116 | | -// ) |
117 | | -// publishDir( |
118 | | -// path: {params.genetrees_folder}, |
119 | | -// pattern: {"*.nwk"} |
120 | | -// ) |
121 | | - |
| 87 | +process hog_rest{ //publishDir params.pickles_rhogs_folder |
122 | 88 | input: |
123 | | - // val ready_batch_roothogs |
124 | | - //path rhogsrest_tree // = rhogsrest.combine(species_tree) |
125 | | - val rhogsrest_tree_ready |
126 | | - |
| 89 | + val rhogsrest_tree_ready |
127 | 90 | output: |
128 | | - path "*.pickle" |
129 | | - |
130 | | - path "*.fa" , optional: true // msa if write True |
131 | | - path "*.nwk" , optional: true // gene trees if write True |
132 | | - |
133 | | - val true |
| 91 | + path "pickle_rhogs/*.pickle" |
| 92 | + // path "*.pickle" |
| 93 | + //path "pickle_rhogs/*.pickle" |
| 94 | + path "*.fa" , optional: true // msa if write True |
| 95 | + path "*.nwk" , optional: true // gene trees if write True |
| 96 | + val true |
134 | 97 | script: |
135 | | - """ |
136 | | - infer-subhogs --input-rhog-folder ${rhogsrest_tree_ready[0]} --species-tree ${rhogsrest_tree_ready[1]} --fragment-detection --low-so-detection |
137 | | - """ // --parrallel False |
| 98 | + """ |
| 99 | + infer-subhogs --input-rhog-folder ${rhogsrest_tree_ready[0]} --species-tree ${rhogsrest_tree_ready[1]} --fragment-detection --low-so-detection |
| 100 | + """ // --parrallel False |
138 | 101 | } |
139 | 102 |
|
140 | | -process collect_subhogs{ |
141 | 103 |
|
| 104 | +process collect_subhogs{ |
142 | 105 | memory {50.GB} |
143 | 106 | publishDir params.output_folder, mode: 'copy' |
144 | 107 | input: |
145 | | - val ready_hog_rest |
146 | | - val ready_hog_big |
147 | | - // path pickle_rhogs // this is for depenedcy |
148 | | - path "pickle_rhogs" // this is the folder includes pickles_rhogs |
149 | | - path "gene_id_dic_xml.pickle" |
150 | | - |
| 108 | + val ready_hog_rest |
| 109 | + val ready_hog_big // path pickle_rhogs // this is for depenedcy |
| 110 | + path "pickle_rhogs/" // "*.pickle" // path "pickle_rhogs" // this is the folder includes pickles_rhogs |
| 111 | + path "gene_id_dic_xml.pickle" |
| 112 | + path "rhogs_all" |
151 | 113 | output: |
152 | | - path "output_hog_.orthoxml" |
153 | | - |
| 114 | + path "output_hog.orthoxml" |
| 115 | + path "OrthologousGroupsFasta" |
| 116 | + path "OrthologousGroups.tsv" |
| 117 | + path "rootHOGs.tsv" |
154 | 118 | script: |
155 | | - """ |
156 | | - collect-subhogs |
157 | | - """ |
| 119 | + """ |
| 120 | + collect-subhogs |
| 121 | + """ |
158 | 122 | } |
159 | 123 |
|
160 | 124 |
|
161 | | - |
162 | 125 | workflow { |
163 | 126 | proteomes = Channel.fromPath(params.proteomes, type:'any' ,checkIfExists:true) |
164 | 127 | proteome_folder = Channel.fromPath(params.proteome_folder) |
165 | 128 | hogmap_folder = Channel.fromPath(params.hogmap_folder) |
166 | | - rhogs_folder = Channel.fromPath(params.rhogs_folder) |
| 129 | +// rhogs_folder = Channel.fromPath(params.rhogs_folder) |
167 | 130 |
|
168 | 131 | genetrees_folder = Channel.fromPath(params.genetrees_folder) |
169 | | - hogmap_input_folder = Channel.fromPath(params.hogmap_input_folder) |
| 132 | + hogmap_in = Channel.fromPath(params.hogmap_in) |
170 | 133 |
|
171 | | - pickles_rhogs_folder = Channel.fromPath(params.pickles_rhogs_folder) |
172 | | - omamerdb = Channel.fromPath(params.input_folder+"/omamerdb.h5") |
173 | | - // proteomes.view{"prot ${it}"} |
| 134 | +// pickles_rhogs_folder = Channel.fromPath(params.pickles_rhogs_folder) |
| 135 | + omamerdb = Channel.fromPath(params.input_folder+"/omamerdb.h5") // proteomes.view{"prot ${it}"} |
174 | 136 | proteomes_omamerdb = proteomes.combine(omamerdb) |
175 | | - proteomes_omamerdb_inputhog = proteomes_omamerdb.combine(hogmap_input_folder) |
176 | | - // proteomes_omamerdb_inputhog.view{" rhogsbig ${it}"} |
177 | | - |
| 137 | + proteomes_omamerdb_inputhog = proteomes_omamerdb.combine(hogmap_in) // proteomes_omamerdb_inputhog.view{" rhogsbig ${it}"} |
178 | 138 | (hogmap, ready_omamer_run)= omamer_run(proteomes_omamerdb_inputhog) |
179 | | - // (hogmap, ready_omamer_run)= omamer_run(proteomes_omamerdb) |
180 | 139 | ready_omamer_run_c = ready_omamer_run.collect() |
181 | | - // hogmaps.view{"hogmap ${it}"} |
182 | | - |
183 | | - // proteome_folder.view{"proteome_folder ${it} "} |
184 | | - // (rhogs, gene_id_dic_xml) = infer_roothogs(hogmaps, hogmap_folder, proteome_folder) |
185 | | - (rhogs, gene_id_dic_xml, ready_infer_roothogs) = infer_roothogs(ready_omamer_run_c, hogmap_folder, proteome_folder) |
186 | | - // rhogs.view{"rhogs ${it}"} |
187 | | - // rhogs_folder.view{"rhogs_folder xx ${it}"} |
188 | 140 |
|
| 141 | + (rhogs_folder, gene_id_dic_xml, ready_infer_roothogs) = infer_roothogs(ready_omamer_run_c, hogmap_folder, proteome_folder) |
| 142 | + // rhogs_folder and "rhogs_all" are the same |
189 | 143 | ready_infer_roothogs_c = ready_infer_roothogs.collect() |
| 144 | + |
190 | 145 | (rhogs_rest_list, rhogs_big_list, ready_batch_roothogs) = batch_roothogs(ready_infer_roothogs_c, rhogs_folder) |
191 | 146 | ready_batch_roothogs_c = ready_batch_roothogs.collect() |
192 | 147 |
|
193 | | - // ready_batch_roothogs_c.view{" ready_batch_roothogs_c 44 ${it}"} |
194 | | - |
195 | 148 | species_tree = Channel.fromPath(params.species_tree) |
196 | 149 | rhogsbig = rhogs_big_list.flatten() |
197 | | - // rhogsbig.view{" rhogsbig ${it}"} |
198 | 150 | rhogsbig_tree = rhogsbig.combine(species_tree) |
199 | | - rhogsbig_tree_ready = rhogsbig_tree.combine(ready_batch_roothogs) |
200 | | - rhogsbig_tree_ready.view{"rhogsbig_tree_ready ${it}"} |
| 151 | + rhogsbig_tree_ready = rhogsbig_tree.combine(ready_batch_roothogs) // rhogsbig_tree_ready.view{"rhogsbig_tree_ready ${it}"} |
201 | 152 | (pickle_big_rhog, msas_out, genetrees_out, ready_hog_big) = hog_big(rhogsbig_tree_ready) |
202 | 153 |
|
203 | 154 | rhogsrest = rhogs_rest_list.flatten() |
204 | | -// rhogsrest.view{" rhogs rest ${it}"} |
205 | 155 | rhogsrest_tree = rhogsrest.combine(species_tree) |
206 | | - |
207 | | - |
208 | 156 | rhogsrest_tree_ready = rhogsrest_tree.combine(ready_batch_roothogs_c) |
209 | | -// rhogsrest_tree_ready.view{"rhogsrest_tree_ready ${it}"} |
210 | | - |
211 | 157 | (pickle_rest_rhog, msas_out_rest, genetrees_out_test, ready_hog_rest) = hog_rest(rhogsrest_tree_ready) |
| 158 | + all_pickles = pickle_big_rhog.mix(pickle_rest_rhog).collect() // all_pickles.view() // pickle_rhogs_folder = Channel.fromPath(params.output_folder+"/pickle_rhogs") |
212 | 159 |
|
213 | | -// pickle_rest_rhog.flatten().view{" pickle_rest_rhog rest ${it}"} |
214 | | -// pickle_big_rhog.flatten().view{" pickle_big_rhog rest ${it}"} |
215 | | - prb = pickle_big_rhog.collect() |
216 | | - prr = pickle_rest_rhog.collect() |
217 | | - all_pickles = prb.mix(prr) |
218 | | -// gene_id_dic_xml = Channel.fromPath("gene_id_dic_xml.pickle") |
219 | | - pickle_rhogs_folder = Channel.fromPath(params.output_folder+"/pickle_rhogs") |
220 | | -// orthoxml_file = collect_subhogs(all_pickles.collect(), pickle_rhogs_folder, gene_id_dic_xml) |
221 | | - |
222 | | - orthoxml_file = collect_subhogs(ready_hog_rest.collect(), ready_hog_big.collect(), pickles_rhogs_folder, gene_id_dic_xml) |
| 160 | + (orthoxml_file, OrthologousGroupsFasta, OrthologousGroups_tsv, rootHOGs_tsv) = collect_subhogs(ready_hog_rest.collect(), ready_hog_big.collect(), all_pickles, gene_id_dic_xml, rhogs_folder) // pickles_rhogs_folder |
223 | 161 | orthoxml_file.view{" output orthoxml file ${it}"} |
224 | 162 |
|
225 | | - |
226 | | - |
227 | 163 | } |
228 | 164 |
|
229 | 165 | // memory {12.GB * (2*task.attempt - 1)} |
|
0 commit comments