Merge pull request #82 from wfcommons/nextflow_improvements

rafaelfsilva · web-flow · commit 17f4ee56f206 · 2025-07-01T18:24:13.000-04:00
Nextflow improvements
diff --git a/docs/source/generating_workflow_benchmarks.rst b/docs/source/generating_workflow_benchmarks.rst
@@ -88,58 +88,58 @@ description in :ref:`json-format-label`.
     to execute memory-intensive threads. Therefore, it is crucial to ensure that 
     :code:`stress-ng` is installed on all worker nodes.
 
-Nextflow
+
+Dask
 ++++++++
-`Nextflow <https://www.nextflow.io/>`_ is a workflow management system that enables
-the development of portable and reproducible workflows. It supports deploying workflows
-on a variety of execution platforms including local, HPC schedulers, and cloud-based
+`Dask <https://www.dask.org/>`_ is an open-source library for parallel computing
+in Python. It makes it possible to easily implement and execute workflows local machines, HPC cluster schedulers, and cloud-based
 and container-based environments. Below, we provide an example on how to generate
-workflow benchmark for running with Nextflow::
+workflow benchmark for running with Dask::
 
     import pathlib
 
     from wfcommons import BlastRecipe
-    from wfcommons.wfbench import WorkflowBenchmark, NextflowTranslator
+    from wfcommons.wfbench import WorkflowBenchmark, DaskTranslator
 
     # create a workflow benchmark object to generate specifications based on a recipe
     benchmark = WorkflowBenchmark(recipe=BlastRecipe, num_tasks=500)
 
     # generate a specification based on performance characteristics
     benchmark.create_benchmark(pathlib.Path("/tmp/"), cpu_work=100, data=10, percent_cpu=0.6)
 
-    # generate a Nextflow workflow
-    translator = NextflowTranslator(benchmark.workflow)
-    translator.translate(output_folder=pathlib.Path("./nextflow-wf/""))
-
-.. warning::
-
-    Nextflow's way of defining workflows does not support tasks with iterations i.e. tasks 
-    that depend on another instance of the same abstract task. Thus, the translator
-    fails when you try to translate a workflow with iterations.
+    # generate a Dask workflow
+    translator = DaskTranslator(benchmark.workflow)
+    translator.translate(output_folder=pathlib.Path("./dask-wf/""))
 
-Dask
+Nextflow
 ++++++++
-`Dask <https://www.dask.org/>`_ is an open-source library for parallel computing
-in Python. It makes it possible to easily implement and execute workflows local machines, HPC cluster schedulers, and cloud-based
+
+`Nextflow <https://www.nextflow.io/>`_ is a workflow management system that enables
+the development of portable and reproducible workflows. It supports deploying workflows
+on a variety of execution platforms including local, HPC schedulers, and cloud-based
 and container-based environments. Below, we provide an example on how to generate
-workflow benchmark for running with Dask::
+workflow benchmark for running with Nextflow::
 
     import pathlib
 
     from wfcommons import BlastRecipe
-    from wfcommons.wfbench import WorkflowBenchmark, DaskTranslator
+    from wfcommons.wfbench import WorkflowBenchmark, NextflowTranslator
 
     # create a workflow benchmark object to generate specifications based on a recipe
     benchmark = WorkflowBenchmark(recipe=BlastRecipe, num_tasks=500)
 
     # generate a specification based on performance characteristics
     benchmark.create_benchmark(pathlib.Path("/tmp/"), cpu_work=100, data=10, percent_cpu=0.6)
 
-    # generate a Dask workflow
-    translator = DaskTranslator(benchmark.workflow)
-    translator.translate(output_folder=pathlib.Path("./dask-wf/""))
+    # generate a Nextflow workflow
+    translator = NextflowTranslator(benchmark.workflow)
+    translator.translate(output_folder=pathlib.Path("./nextflow-wf/""))
 
+.. warning::
 
+    Nextflow's way of defining workflows does not support tasks with iterations i.e. tasks 
+    that depend on another instance of the same abstract task. Thus, the translator
+    fails when you try to translate a workflow with iterations.
 
 Pegasus
 +++++++
@@ -175,6 +175,31 @@ for running with Pegasus::
     the :code:`lock_files_folder` parameter when using 
     :meth:`~wfcommons.wfbench.bench.WorkflowBenchmark.create_benchmark`.
 
+PyCOMPSs
+++++++++
+
+`PyCOMPSs <https://compss.bsc.es/>`_ is a programming model and runtime that 
+enables the parallel execution of Python applications on distributed computing 
+infrastructures. It allows developers to define tasks using simple Python 
+decorators, automatically handling task scheduling, data dependencies, and 
+resource management.. Below, we provide an example on how to generate workflow 
+benchmark for running with PyCOMPSs::
+
+    import pathlib
+
+    from wfcommons import CyclesRecipe
+    from wfcommons.wfbench import WorkflowBenchmark, PyCompssTranslator
+
+    # create a workflow benchmark object to generate specifications based on a recipe
+    benchmark = WorkflowBenchmark(recipe=CyclesRecipe, num_tasks=200)
+
+    # generate a specification based on performance characteristics
+    benchmark.create_benchmark(pathlib.Path("/tmp/"), cpu_work=500, data=1000, percent_cpu=0.8)
+
+    # generate a PyCOMPSs workflow
+    translator = PyCompssTranslator(benchmark.workflow)
+    translator.translate(output_folder=pathlib.Path("./pycompss-wf/"))
+
 Swift/T
 +++++++
 
diff --git a/wfcommons/wfbench/translator/nextflow.py b/wfcommons/wfbench/translator/nextflow.py
@@ -194,16 +194,16 @@ def _create_task_script(output_folder: pathlib.Path, task: Task):
         # Generate input spec
         input_spec = "'\\["
         for f in task.input_files:
-            input_spec += "\"" + str(output_folder.joinpath(f"data/{f.file_id}")) + "\","
+            input_spec += f"\"{output_folder.resolve()}/data/{f.file_id}\","
         input_spec = input_spec[:-1] + "\\]'"
 
         # Generate output spec
         output_spec = "'\\{"
         for f in task.output_files:
-            output_spec += "\"" + str(output_folder.joinpath(f"data/{f.file_id}")) + "\":" + str(f.size)+ ","
+            output_spec += f"\"{output_folder.resolve()}/data/{f.file_id}\":{str(f.size)},"
         output_spec = output_spec[:-1] + "\\}'"
 
-        code += str(output_folder.joinpath(f"bin/{task.program} "))
+        code += f"{output_folder.resolve()}/bin/{task.program} "
 
         for a in task.args:
             if "--output-files" in a: