Skip to content

Commit 87877e9

Browse files
committed
update nextflow
1 parent 7398e33 commit 87877e9

5 files changed

Lines changed: 180 additions & 106 deletions

File tree

FastOMA/collect_subhogs.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
# from . import _config
77
from ._config import logger_hog
88

9+
from FastOMA.zoo.hog import extract_flat_groups_at_level
10+
11+
from ete3 import Tree
12+
# import sys
13+
import os
14+
from FastOMA.zoo.hog.convert import orthoxml_to_newick
15+
from Bio import SeqIO
916

1017
# This code collect subhogs and writes outputs.
1118

@@ -20,7 +27,7 @@ def collect_subhogs():
2027
# tr|A0A0N7KCI6|A0A0N7KCI6_ORYSJ
2128
# for qfo benchamrk, the middle should be wirtten in the file
2229

23-
pickle_folder = "./" #pickle_rhogs
30+
pickle_folder = "./pickle_rhogs/" #pickle_rhogs
2431
output_xml_name = "./output_hog.orthoxml"
2532
gene_id_pickle_file = "./gene_id_dic_xml.pickle"
2633

@@ -85,11 +92,6 @@ def collect_subhogs():
8592

8693
logger_hog.info("Now writing OG fasta files ")
8794

88-
from ete3 import Tree
89-
# import sys
90-
import os
91-
from FastOMA.zoo.hog.convert import orthoxml_to_newick
92-
from Bio import SeqIO
9395

9496
def max_og_tree(tree):
9597
for node in tree.traverse("preorder"):
@@ -139,8 +141,7 @@ def max_og_tree(tree):
139141

140142
output_file_og_tsv = "OrthologousGroups.tsv"
141143

142-
trees, species_dic = orthoxml_to_newick(input_orthoxml,
143-
return_gene_to_species=True) # encode_levels_as_nhx=False, xref_tag="protId",
144+
trees, species_dic = orthoxml_to_newick(input_orthoxml, return_gene_to_species=True) # encode_levels_as_nhx=False, xref_tag="protId",
144145
print("We extracted " + str(len(trees)) + " trees in NHX format from the input HOG orthoxml" + input_orthoxml)
145146

146147
OGs = {}
@@ -184,11 +185,7 @@ def max_og_tree(tree):
184185

185186

186187

187-
188-
#import sys
189-
190-
from FastOMA.zoo.hog import extract_flat_groups_at_level
191-
188+
# import sys
192189
# input_orthoxml = output_xml_name
193190
output_file = "rootHOGs.tsv"
194191

FastOMA/infer_subhogs.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# import _config
1212

1313
from ._config import logger_hog
14-
14+
import os
1515

1616
# from ._utils import logger_hog
1717
# import ._utils_rhog
@@ -53,21 +53,22 @@ def infer_subhogs():
5353
if inferhog_concurrent_on:
5454
print("parallelization for subhog inference is on.")
5555

56-
pickles_rhog_folder = "./"
56+
pickles_rhog_folder = "./pickle_rhogs/"
57+
if not os.path.exists(pickles_rhog_folder):
58+
os.makedirs(pickles_rhog_folder)
59+
60+
# pickles_rhog_folder = "./"
5761
pickles_subhog_folder_all = "./"
5862

5963
# inferhog_concurrent_on = inferhog_concurrent_on_string == "True"
6064

61-
62-
6365
print("input is", address_rhogs_folder)
6466

6567
list_rhog_fastas_files = _utils_subhog.list_rhog_fastas(address_rhogs_folder)
6668
print("there are ", len(list_rhog_fastas_files), "rhogs in the input folder")
6769

6870
rhogs_fa_folder = address_rhogs_folder
6971

70-
7172
list_rhog_fastas_files_rem = _utils_subhog.list_rhog_fastas(address_rhogs_folder)
7273
print("there are ", len(list_rhog_fastas_files_rem), "rhogs remained in the input folder", list_rhog_fastas_files_rem[:5] )
7374

FastOMA/requirements

Lines changed: 155 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,165 @@
11
$ conda list
2-
# packages in environment at /Users/sina/Installers/miniconda3/envs/gethog3:
2+
# packages in environment at /miniconda/envs/fastoma:
33
#
44
# Name Version Build Channel
5+
_libgcc_mutex 0.1 conda_forge conda-forge
6+
_openmp_mutex 4.5 2_gnu conda-forge
7+
anyio 3.7.1 pyhd8ed1ab_0 conda-forge
8+
argon2-cffi 23.1.0 pyhd8ed1ab_0 conda-forge
9+
argon2-cffi-bindings 21.2.0 py39hb9d737c_3 conda-forge
10+
arrow 1.2.3 pyhd8ed1ab_0 conda-forge
11+
asttokens 2.2.1 pyhd8ed1ab_0 conda-forge
12+
async-lru 2.0.4 pyhd8ed1ab_0 conda-forge
13+
attrs 23.1.0 pyh71513ae_1 conda-forge
14+
babel 2.12.1 pyhd8ed1ab_1 conda-forge
15+
backcall 0.2.0 pyh9f0ad1d_0 conda-forge
16+
backports 1.0 pyhd8ed1ab_3 conda-forge
17+
backports.functools_lru_cache 1.6.5 pyhd8ed1ab_0 conda-forge
18+
beautifulsoup4 4.12.2 pyha770c72_0 conda-forge
519
biopython 1.81 pypi_0 pypi
20+
bleach 6.0.0 pyhd8ed1ab_0 conda-forge
621
blosc2 2.0.0 pypi_0 pypi
7-
ca-certificates 2022.12.7 h033912b_0 conda-forge
8-
certifi 2022.12.7 pyhd8ed1ab_0 conda-forge
9-
cython 0.29.33 pypi_0 pypi
10-
dendropy 4.5.2 pypi_0 pypi
11-
ete3 3.1.2 pypi_0 pypi
12-
fasttree 2.1.11 hdcdfbac_1 bioconda
13-
gethog3 0.0.5 dev_0 <develop>
22+
brotli-python 1.0.9 py39h5a03fae_9 conda-forge
23+
bzip2 1.0.8 h7f98852_4 conda-forge
24+
ca-certificates 2023.7.22 hbcca054_0 conda-forge
25+
cached-property 1.5.2 hd8ed1ab_1 conda-forge
26+
cached_property 1.5.2 pyha770c72_1 conda-forge
27+
certifi 2023.7.22 pyhd8ed1ab_0 conda-forge
28+
cffi 1.15.1 py39he91dace_3 conda-forge
29+
charset-normalizer 3.2.0 pyhd8ed1ab_0 conda-forge
30+
comm 0.1.4 pyhd8ed1ab_0 conda-forge
31+
cython 3.0.1 pypi_0 pypi
32+
debugpy 1.6.8 py39h3d6467e_0 conda-forge
33+
decorator 5.1.1 pyhd8ed1ab_0 conda-forge
34+
defusedxml 0.7.1 pyhd8ed1ab_0 conda-forge
35+
dendropy 4.6.1 pypi_0 pypi
36+
entrypoints 0.4 pyhd8ed1ab_0 conda-forge
37+
ete3 3.1.3 pypi_0 pypi
38+
exceptiongroup 1.1.3 pyhd8ed1ab_0 conda-forge
39+
executing 1.2.0 pyhd8ed1ab_0 conda-forge
40+
fastoma 0.0.6 dev_0 <develop>
41+
fasttree 2.1.11 h031d066_2 bioconda
42+
fqdn 1.5.1 pyhd8ed1ab_0 conda-forge
43+
future 0.18.3 pypi_0 pypi
1444
humanfriendly 10.0 pypi_0 pypi
15-
libcxx 14.0.6 h9765a3e_0
16-
libffi 3.4.2 hecd8cb5_6
17-
llvm-openmp 14.0.6 h0dcd299_0
18-
llvmlite 0.39.1 pypi_0 pypi
19-
mafft 7.515 h1e3017b_0 bioconda
20-
msgpack 1.0.4 pypi_0 pypi
21-
ncurses 6.4 hcec6c5f_0
22-
nextflow 22.10.7 pypi_0 pypi
23-
numba 0.56.4 pypi_0 pypi
24-
numexpr 2.8.4 pypi_0 pypi
25-
numpy 1.23.5 pypi_0 pypi
26-
omamer 0.2.3 pypi_0 pypi
27-
openjdk 11.0.9.1 hcf210ce_1 conda-forge
28-
openssl 1.1.1t hfd90126_0 conda-forge
29-
packaging 23.0 pypi_0 pypi
30-
pandas 1.5.3 pypi_0 pypi
31-
pip 23.0.1 pypi_0 pypi
45+
idna 3.4 pyhd8ed1ab_0 conda-forge
46+
importlib-metadata 6.8.0 pyha770c72_0 conda-forge
47+
importlib_metadata 6.8.0 hd8ed1ab_0 conda-forge
48+
importlib_resources 6.0.1 pyhd8ed1ab_0 conda-forge
49+
ipykernel 6.25.1 pyh71e2992_0 conda-forge
50+
ipython 8.14.0 pyh41d4057_0 conda-forge
51+
iqtree 2.2.3 h21ec9f0_0 bioconda
52+
isoduration 20.11.0 pyhd8ed1ab_0 conda-forge
53+
jedi 0.19.0 pyhd8ed1ab_0 conda-forge
54+
jinja2 3.1.2 pyhd8ed1ab_1 conda-forge
55+
json5 0.9.14 pyhd8ed1ab_0 conda-forge
56+
jsonpointer 2.0 py_0 conda-forge
57+
jsonschema 4.19.0 pyhd8ed1ab_1 conda-forge
58+
jsonschema-specifications 2023.7.1 pyhd8ed1ab_0 conda-forge
59+
jsonschema-with-format-nongpl 4.19.0 pyhd8ed1ab_1 conda-forge
60+
jupyter-lsp 2.2.0 pyhd8ed1ab_0 conda-forge
61+
jupyter_client 8.3.0 pyhd8ed1ab_0 conda-forge
62+
jupyter_core 5.3.1 py39hf3d152e_0 conda-forge
63+
jupyter_events 0.7.0 pyhd8ed1ab_2 conda-forge
64+
jupyter_server 2.7.1 pyhd8ed1ab_0 conda-forge
65+
jupyter_server_terminals 0.4.4 pyhd8ed1ab_1 conda-forge
66+
jupyterlab 4.0.5 pyhd8ed1ab_0 conda-forge
67+
jupyterlab_pygments 0.2.2 pyhd8ed1ab_0 conda-forge
68+
jupyterlab_server 2.24.0 pyhd8ed1ab_0 conda-forge
69+
ld_impl_linux-64 2.40 h41732ed_0 conda-forge
70+
libffi 3.4.2 h7f98852_5 conda-forge
71+
libgcc-ng 13.1.0 he5830b7_0 conda-forge
72+
libgomp 13.1.0 he5830b7_0 conda-forge
73+
libnsl 2.0.0 h7f98852_0 conda-forge
74+
libsodium 1.0.18 h36c2ea0_1 conda-forge
75+
libsqlite 3.43.0 h2797004_0 conda-forge
76+
libstdcxx-ng 13.1.0 hfd8a6a1_0 conda-forge
77+
libuuid 2.38.1 h0b41bf4_0 conda-forge
78+
libzlib 1.2.13 hd590300_5 conda-forge
79+
llvmlite 0.40.1 pypi_0 pypi
80+
lxml 4.9.3 pypi_0 pypi
81+
mafft 7.520 h031d066_2 bioconda
82+
markupsafe 2.1.3 py39hd1e30aa_0 conda-forge
83+
matplotlib-inline 0.1.6 pyhd8ed1ab_0 conda-forge
84+
mistune 3.0.1 pyhd8ed1ab_0 conda-forge
85+
msgpack 1.0.5 pypi_0 pypi
86+
nbclient 0.8.0 pyhd8ed1ab_0 conda-forge
87+
nbconvert-core 7.7.4 pyhd8ed1ab_0 conda-forge
88+
nbformat 5.9.2 pyhd8ed1ab_0 conda-forge
89+
ncurses 6.4 hcb278e6_0 conda-forge
90+
nest-asyncio 1.5.6 pyhd8ed1ab_0 conda-forge
91+
nextflow 23.4.3 pypi_0 pypi
92+
notebook-shim 0.2.3 pyhd8ed1ab_0 conda-forge
93+
numba 0.57.1 pypi_0 pypi
94+
numexpr 2.8.5 pypi_0 pypi
95+
numpy 1.24.4 pypi_0 pypi
96+
omamer 0.2.6 pypi_0 pypi
97+
openssl 3.1.2 hd590300_0 conda-forge
98+
overrides 7.4.0 pyhd8ed1ab_0 conda-forge
99+
packaging 23.1 pyhd8ed1ab_0 conda-forge
100+
pandas 2.0.3 pypi_0 pypi
101+
pandocfilters 1.5.0 pyhd8ed1ab_0 conda-forge
102+
parso 0.8.3 pyhd8ed1ab_0 conda-forge
103+
pexpect 4.8.0 pyh1a96a4e_2 conda-forge
104+
pickleshare 0.7.5 py_1003 conda-forge
105+
pip 23.2.1 pyhd8ed1ab_0 conda-forge
106+
pkgutil-resolve-name 1.3.10 pyhd8ed1ab_0 conda-forge
107+
platformdirs 3.10.0 pyhd8ed1ab_0 conda-forge
108+
prometheus_client 0.17.1 pyhd8ed1ab_0 conda-forge
109+
prompt-toolkit 3.0.39 pyha770c72_0 conda-forge
110+
prompt_toolkit 3.0.39 hd8ed1ab_0 conda-forge
32111
property-manager 3.0 pypi_0 pypi
112+
psutil 5.9.5 py39h72bdee0_0 conda-forge
113+
ptyprocess 0.7.0 pyhd3deb0d_0 conda-forge
114+
pure_eval 0.2.2 pyhd8ed1ab_0 conda-forge
33115
py-cpuinfo 9.0.0 pypi_0 pypi
34-
pyparsing 3.0.9 pypi_0 pypi
35-
pysais 1.0.8 pypi_0 pypi
36-
python 3.9.16 h218abb5_0
37-
python-dateutil 2.8.2 pypi_0 pypi
38-
pytz 2022.7.1 pypi_0 pypi
39-
readline 8.2 hca72f7f_0
40-
scipy 1.10.1 pypi_0 pypi
41-
setuptools 65.6.3 py39hecd8cb5_0
42-
six 1.16.0 pypi_0 pypi
43-
sqlite 3.40.1 h880c91c_0
116+
pycparser 2.21 pyhd8ed1ab_0 conda-forge
117+
pygments 2.16.1 pyhd8ed1ab_0 conda-forge
118+
pyparsing 3.1.1 pypi_0 pypi
119+
pysais 1.1.0 pypi_0 pypi
120+
pysocks 1.7.1 pyha2e5f31_6 conda-forge
121+
python 3.9.17 h0755675_0_cpython conda-forge
122+
python-dateutil 2.8.2 pyhd8ed1ab_0 conda-forge
123+
python-fastjsonschema 2.18.0 pyhd8ed1ab_0 conda-forge
124+
python-json-logger 2.0.7 pyhd8ed1ab_0 conda-forge
125+
python_abi 3.9 3_cp39 conda-forge
126+
pytz 2023.3 pyhd8ed1ab_0 conda-forge
127+
pyyaml 6.0.1 py39hd1e30aa_0 conda-forge
128+
pyzmq 25.1.1 py39hb257651_0 conda-forge
129+
readline 8.2 h8228510_1 conda-forge
130+
referencing 0.30.2 pyhd8ed1ab_0 conda-forge
131+
requests 2.31.0 pyhd8ed1ab_0 conda-forge
132+
rfc3339-validator 0.1.4 pyhd8ed1ab_0 conda-forge
133+
rfc3986-validator 0.1.1 pyh9f0ad1d_0 conda-forge
134+
rpds-py 0.9.2 py39h9fdd4d6_0 conda-forge
135+
scipy 1.11.2 pypi_0 pypi
136+
send2trash 1.8.2 pyh41d4057_0 conda-forge
137+
setuptools 68.1.2 pyhd8ed1ab_0 conda-forge
138+
six 1.16.0 pyh6c4a22f_0 conda-forge
139+
sniffio 1.3.0 pyhd8ed1ab_0 conda-forge
140+
soupsieve 2.3.2.post1 pyhd8ed1ab_0 conda-forge
141+
stack_data 0.6.2 pyhd8ed1ab_0 conda-forge
44142
tables 3.8.0 pypi_0 pypi
45-
tk 8.6.12 h5d9f67b_0
46-
tqdm 4.64.1 pypi_0 pypi
47-
tzdata 2022g h04d1e81_0
143+
terminado 0.17.1 pyh41d4057_0 conda-forge
144+
tinycss2 1.2.1 pyhd8ed1ab_0 conda-forge
145+
tk 8.6.12 h27826a3_0 conda-forge
146+
tomli 2.0.1 pyhd8ed1ab_0 conda-forge
147+
tornado 6.3.3 py39hd1e30aa_0 conda-forge
148+
tqdm 4.66.1 pypi_0 pypi
149+
traitlets 5.9.0 pyhd8ed1ab_0 conda-forge
150+
typing-extensions 4.7.1 hd8ed1ab_0 conda-forge
151+
typing_extensions 4.7.1 pyha770c72_0 conda-forge
152+
typing_utils 0.1.0 pyhd8ed1ab_0 conda-forge
153+
tzdata 2023.3 pypi_0 pypi
154+
uri-template 1.3.0 pyhd8ed1ab_0 conda-forge
155+
urllib3 2.0.4 pyhd8ed1ab_0 conda-forge
48156
verboselogs 1.7 pypi_0 pypi
49-
wheel 0.38.4 py39hecd8cb5_0
50-
xz 5.2.10 h6c40b1e_1
51-
zlib 1.2.13 h4dc903c_0
52-
53-
54-
55-
56-
57-
58-
% python -m pip list
59-
Package Version Editable project location
60-
---------------- --------- -------------------------
61-
biopython 1.81
62-
blosc2 2.0.0
63-
certifi 2022.12.7
64-
Cython 0.29.33
65-
DendroPy 4.5.2
66-
ete3 3.1.2
67-
gethog3 0.0.5 /Volumes/work/a/gethog3
68-
humanfriendly 10.0
69-
llvmlite 0.39.1
70-
msgpack 1.0.4
71-
nextflow 22.10.7
72-
numba 0.56.4
73-
numexpr 2.8.4
74-
numpy 1.23.5
75-
omamer 0.2.3
76-
packaging 23.0
77-
pandas 1.5.3
78-
pip 23.0.1
79-
property-manager 3.0
80-
py-cpuinfo 9.0.0
81-
pyparsing 3.0.9
82-
PySAIS 1.0.8
83-
python-dateutil 2.8.2
84-
pytz 2022.7.1
85-
scipy 1.10.1
86-
setuptools 65.6.3
87-
six 1.16.0
88-
tables 3.8.0
89-
tqdm 4.64.1
90-
verboselogs 1.7
91-
wheel 0.38.4
92-
93-
94-
95-
157+
wcwidth 0.2.6 pyhd8ed1ab_0 conda-forge
158+
webcolors 1.13 pyhd8ed1ab_0 conda-forge
159+
webencodings 0.5.1 py_1 conda-forge
160+
websocket-client 1.6.2 pyhd8ed1ab_0 conda-forge
161+
wheel 0.41.2 pyhd8ed1ab_0 conda-forge
162+
xz 5.2.6 h166bdaf_0 conda-forge
163+
yaml 0.2.5 h7f98852_2 conda-forge
164+
zeromq 4.3.4 h9c3ff4c_1 conda-forge
165+
zipp 3.16.2 pyhd8ed1ab_0 conda-forge

FastOMA_light.nf

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
// NXF_WRAPPER_STAGE_FILE_THRESHOLD='50000'
3+
24
params.input_folder = "./in_folder/"
35
params.output_folder = "./out_folder/"
46
params.proteome_folder = params.input_folder + "/proteome"
@@ -71,7 +73,8 @@ process hog_big{ //publishDir params.pickles_rhogs_folder
7173
input:
7274
val rhogsbig_tree_ready
7375
output:
74-
path "*.pickle"
76+
path "pickle_rhogs/*.pickle"
77+
// path "*.pickle"
7578
path "*.fa", optional: true // msa if write True
7679
path "*.nwk", optional: true // gene trees if write True
7780
val true
@@ -85,7 +88,9 @@ process hog_rest{ //publishDir params.pickles_rhogs_folder
8588
input:
8689
val rhogsrest_tree_ready
8790
output:
88-
path "*.pickle"
91+
path "pickle_rhogs/*.pickle"
92+
// path "*.pickle"
93+
//path "pickle_rhogs/*.pickle"
8994
path "*.fa" , optional: true // msa if write True
9095
path "*.nwk" , optional: true // gene trees if write True
9196
val true
@@ -102,7 +107,7 @@ process collect_subhogs{
102107
input:
103108
val ready_hog_rest
104109
val ready_hog_big // path pickle_rhogs // this is for depenedcy
105-
path all_pickles //"*.pickle" //path "pickle_rhogs" // this is the folder includes pickles_rhogs
110+
path "pickle_rhogs/" // "*.pickle" // path "pickle_rhogs" // this is the folder includes pickles_rhogs
106111
path "gene_id_dic_xml.pickle"
107112
path "rhogs_all"
108113
output:

nextflow_slurm.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22

33

4+
45
process.executor = "slurm"
56
process.queue = "cpu"
67
process.time = 3.h

0 commit comments

Comments
 (0)