Skip to content

Commit e52caa3

Browse files
committed
86: Slurm script template to run Nextflow workflows
1 parent ad47a81 commit e52caa3

3 files changed

Lines changed: 59 additions & 2 deletions

File tree

docs/source/generating_workflow_benchmarks.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,17 @@ workflow benchmark for running with Nextflow::
141141
that depend on another instance of the same abstract task. Thus, the translator
142142
fails when you try to translate a workflow with iterations.
143143

144+
.. note::
145+
146+
If you plan to run Nextflow on an HPC system using Slurm, we **strongly
147+
recommend** using the `HyperQueue <https://github.com/It4innovations/hyperqueue>`_
148+
executor. HyperQueue efficiently distributes workflow tasks across all allocated
149+
compute nodes, improving scalability and resource utilization.
150+
151+
The :class:`~wfcommons.wfbench.translator.nextflow.NextflowTranslator`
152+
class includes functionality to automatically generate a Slurm script
153+
template for running the workflow on HPC systems.
154+
144155
Pegasus
145156
+++++++
146157

wfcommons/wfbench/translator/nextflow.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,19 @@ class NextflowTranslator(Translator):
2626
A WfFormat parser for creating Nextflow workflow applications.
2727
2828
:param workflow: Workflow benchmark object or path to the workflow benchmark JSON instance.
29-
:type workflow: Union[Workflow, pathlib.Path],
29+
:type workflow: Union[Workflow, pathlib.Path]
30+
:param slurm: Whether to generate a Slurm template script for workflow submission using :code:`sbatch`.
31+
:type slurm: bool
3032
:param logger: The logger where to log information/warning or errors (optional).
3133
:type logger: Logger
3234
"""
3335
def __init__(self,
3436
workflow: Union[Workflow, pathlib.Path],
37+
slurm: Optional[bool] = False,
3538
logger: Optional[Logger] = None) -> None:
3639
"""Create an object of the translator."""
3740
super().__init__(workflow, logger)
38-
41+
self.slurm = slurm
3942
self.script = ""
4043
self.out_files = set()
4144

@@ -54,6 +57,9 @@ def translate(self, output_folder: pathlib.Path) -> None:
5457
self._copy_binary_files(output_folder)
5558
self._generate_input_files(output_folder)
5659

60+
if self.slurm:
61+
shutil.copy(this_dir.joinpath("templates/nextflow/nextflow_hyperqueue_job.sh"), output_folder)
62+
5763
if self.workflow.workflow_id:
5864
shutil.copy(this_dir.joinpath("templates/flowcept_agent.py"), output_folder.joinpath("bin"))
5965

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
#SBATCH --job-name=nextflowjob
3+
#SBATCH --partition=<PARTITION_NAME>
4+
#SBATCH --account=<ACCOUNT>
5+
#SBATCH --nodes=<NUM_NODES>
6+
#SBATCH --ntasks-per-node=1
7+
#SBATCH --cpus-per-task=<MAX_NUM_CPU_PER_TASK>
8+
#SBATCH --time=01:00:00
9+
10+
# Set variables
11+
export PATH=$PATH:/$PWD
12+
export NXF_DISABLE_CHECK_LATEST=true
13+
14+
# Set the directory which hyperqueue will use
15+
export HQ_SERVER_DIR=${PWD}/.hq-server
16+
mkdir -p ${HQ_SERVER_DIR}
17+
18+
# Start the server in the background (&) and wait until it has started
19+
hq server start &
20+
until hq job list &>/dev/null ; do sleep 1 ; done
21+
22+
# Start the workers in the background and wait for them to start
23+
srun --overlap --cpu-bind=none --mpi=none hq worker start --cpus=${SLURM_CPUS_PER_TASK} &
24+
hq worker wait "${SLURM_NTASKS}"
25+
26+
# Ensure Nextflow uses the right executor and knows how many jobs it can submit
27+
# The `queueSize` can be limited as needed.
28+
echo "executor {
29+
queueSize = $(( SLURM_CPUS_PER_TASK*SLURM_NNODES ))
30+
name = 'hq'
31+
cpus = $(( SLURM_CPUS_PER_TASK*SLURM_NNODES ))
32+
}" > ${PWD}/nextflow.config
33+
34+
# run the Nextflow pipeline
35+
nextflow run workflow.nf -c nextflow.config --pwd $PWD &> output.log
36+
37+
# Wait for all jobs to finish, then shut down the workers and server
38+
hq job wait all
39+
hq worker stop all
40+
hq server stop

0 commit comments

Comments
 (0)