Merge branch 'main' into airflow-translator

henricasanova · henricasanova · commit c6deea803095 · 2025-04-21T13:58:29.000-10:00
diff --git a/bin/wfbench b/bin/wfbench
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
     "scipy",
     "pyyaml",
     "pandas",
+    "shortuuid",
     "stringcase",
     "filelock",
     "pathos",
diff --git a/wfcommons/wfbench/bench.py b/wfcommons/wfbench/bench.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2021-2024 The WfCommons Team.
+# Copyright (c) 2021-2025 The WfCommons Team.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -13,10 +13,9 @@
 import logging
 import os
 import pathlib
-import re
 import subprocess
 import time
-import uuid
+import shortuuid
 import sys
 
 from logging import Logger
@@ -39,14 +38,17 @@ class WorkflowBenchmark:
     :type recipe: Type[WfChefWorkflowRecipe]
     :param num_tasks: Total number of tasks in the benchmark workflow.
     :type num_tasks: int
+    :param with_flowcept:
+    :type with_flowcept: bool
     :param logger: The logger where to log information/warning or errors.
     :type logger: Optional[Logger]
     """
 
     def __init__(self,
                  recipe: Type[WfChefWorkflowRecipe],
                  num_tasks: int,
-                 logger: Optional[Logger] = None, with_flowcept=False) -> None:
+                 with_flowcept: bool = False,
+                 logger: Optional[Logger] = None) -> None:
         """Create an object that represents a workflow benchmark generator."""
         self.logger: Logger = logging.getLogger(
             __name__) if logger is None else logger
@@ -296,7 +298,7 @@ def create_benchmark(self,
             f"{self.workflow.name.lower()}-{self.num_tasks}").with_suffix(".json")
 
         if self.with_flowcept:
-            self.workflow.workflow_id = str(uuid.uuid4())
+            self.workflow.workflow_id = str(shortuuid.uuid())
 
         cores, lock = self._creating_lock_files(lock_files_folder)
         for task in self.workflow.tasks.values():
diff --git a/wfcommons/wfbench/translator/abstract_translator.py b/wfcommons/wfbench/translator/abstract_translator.py
@@ -12,7 +12,7 @@
 import os
 import pathlib
 import shutil
-
+import textwrap
 from abc import ABC, abstractmethod
 from typing import Optional, Union
 
@@ -183,4 +183,30 @@ def _merge_codelines(self, template_file_path: str, wf_codelines: str) -> str:
         with open(this_dir.joinpath(template_file_path)) as fp:
             run_workflow_code = fp.read()
             return run_workflow_code.replace("# Generated code goes here", wf_codelines)
-    
+
+    def _flowcept_init_python(self, workflow_id: str, workflow_name: str) -> str:
+        """
+
+        :param workflow_id: 
+        :type workflow_id: str
+
+        :param workflow_name:
+        :type workflow_name: str
+
+        :return:
+        :rtype: str
+        """
+        code = textwrap.dedent(f"""
+        from flowcept.flowcept_api.flowcept_controller import Flowcept
+        flowcept_agent = Flowcept(workflow_id="{workflow_id}", workflow_name="{workflow_name}", bundle_exec_id="{workflow_id}")
+        flowcept_agent.start()
+        """)
+        return code
+
+    def _flowcept_stop_python(self) -> str:
+        """
+        
+        :return: 
+        :rtype: str
+        """
+        return "flowcept_agent.stop()"
diff --git a/wfcommons/wfbench/translator/bash.py b/wfcommons/wfbench/translator/bash.py
@@ -100,12 +100,14 @@ def _bash_wftasks_codelines(self) -> None:
                     if a.startswith("--output-files"):
                         flag, output_files_dict = a.split(" ", 1)
                         output_files_dict = {f"data/{key}": value for key, value in ast.literal_eval(output_files_dict).items()}
-                        a = f"{flag} '{json.dumps(output_files_dict).replace('"', '\\"')}'"
+                        output_files_dict = json.dumps(output_files_dict).replace('"', '\\"')
+                        a = f"{flag} '{output_files_dict}'"
 
                     if a.startswith("--input-files"):
                         flag, input_files_arr = a.split(" ", 1)
                         input_files_arr = [f"data/{file}" for file in ast.literal_eval(input_files_arr)]
-                        a = f"{flag} '{json.dumps(input_files_arr).replace('"', '\\"')}'"
+                        input_files_arr = json.dumps(input_files_arr).replace('"', '\\"')
+                        a = f"{flag} '{input_files_arr}'"
 
                     args.append(a)
 
diff --git a/wfcommons/wfbench/translator/cwl.py b/wfcommons/wfbench/translator/cwl.py
@@ -107,11 +107,13 @@ def _parse_steps(self) -> None:
                     if a.startswith("--output-files"):
                         flag, output_files_dict = a.split(" ", 1)
                         output_files_dict = {f"{key}": value for key, value in ast.literal_eval(output_files_dict).items()}
-                        a = f"{flag} '{json.dumps(output_files_dict).replace('"', '\\"')}'"
+                        output_files_dict = json.dumps(output_files_dict).replace('"', '\\"')
+                        a = f"{flag} '{output_files_dict}'"
                     if a.startswith("--input-files"):
                         flag, input_files_arr = a.split(" ", 1)
                         input_files_arr = [f"{file}" for file in ast.literal_eval(input_files_arr)]
-                        a = f"{flag} '{json.dumps(input_files_arr).replace('"', '\\"')}'"
+                        input_files_arr = json.dumps(input_files_arr).replace('"', '\\"')
+                        a = f"{flag} '{input_files_arr}'"
                     args_array.append(a)
 
 
diff --git a/wfcommons/wfbench/translator/parsl.py b/wfcommons/wfbench/translator/parsl.py
@@ -103,13 +103,13 @@ def _parsl_wftasks_codelines(self) -> None:
                 for a in task.args:
                     if a.startswith("--output-files"):
                         flag, output_files_dict = a.split(" ", 1)
-                        output_files_dict = ast.literal_eval(output_files_dict)
-                        a = f"{flag} '{json.dumps(output_files_dict).replace('"', '\\"')}'"
+                        output_files_dict = json.dumps(ast.literal_eval(output_files_dict)).replace('"', '\\"')
+                        a = f"{flag} '{output_files_dict}'"
 
                     if a.startswith("--input-files"):
                         flag, input_files_arr = a.split(" ", 1)
-                        input_files_arr = ast.literal_eval(input_files_arr)
-                        a = f"{flag} '{json.dumps(input_files_arr).replace('"', '\\"')}'"
+                        input_files_arr = json.dumps(ast.literal_eval(input_files_arr)).replace('"', '\\"')
+                        a = f"{flag} '{input_files_arr}'"
                     args.append(a)
 
                 args = " ".join(args)
diff --git a/wfcommons/wfbench/translator/pycompss.py b/wfcommons/wfbench/translator/pycompss.py
@@ -47,9 +47,6 @@ def translate(self, output_folder: pathlib.Path) -> None:
         """
         self.output_folder = output_folder
         self.script = ""
-        # IMPORT Flowcept
-        if self.workflow.workflow_id is not None:
-            self.script += "from flowcept.flowcept_api.flowcept_controller import Flowcept\n\n"
 
         # PyCOMPSs translator
         self.script += "\n# workflow tasks\n"
@@ -232,10 +229,10 @@ def _pycompss_code(self) -> None:
         self.script += f"\n\nif __name__ == \"__main__\":\n"
         # START Flowcept
         if self.workflow.workflow_id is not None:
-            self.script += f"\tf = Flowcept(workflow_id='{self.workflow.workflow_id}', workflow_name='{self.workflow.name}', bundle_exec_id='{self.workflow.workflow_id}')\n"
-            self.script += "\tf.start()\n"
+            flowcept_init_code = self._flowcept_init_python(self.workflow.workflow_id, self.workflow.name)
+            self.script += "".join("\t" + line + "\n" for line in flowcept_init_code.splitlines())
         # main
         self.script += f"\tmain_program()\n"
         # STOP Flowcept
         if self.workflow.workflow_id is not None:
-            self.script += "\tf.stop()\n"
+            self.script += f"\t{self._flowcept_stop_python()}\n"
diff --git a/wfcommons/wfbench/translator/swift_t.py b/wfcommons/wfbench/translator/swift_t.py
@@ -74,14 +74,18 @@ def translate(self, output_folder: pathlib.Path) -> None:
         # defining input files
         self.logger.debug("Defining input files")
         in_count = 0
-        self.script = f"string root_in_files[];\n"
+        self.output_folder = output_folder
+        self.cpu_benchmark = output_folder.joinpath("./bin/cpu-benchmark").absolute()
+        self.script = f"string fs = sprintf(flowcept_start, \"{self.workflow.workflow_id}\", \"{self.workflow.name}\");\nstring fss = python_persist(fs);\n\n" if self.workflow.workflow_id else ""
+        self.script += "string root_in_files[];\n"
 
         for task_name in self.root_task_names:
             task = self.tasks[task_name]
             for file in task.input_files:
                 if task.name not in self.categories_input.keys():
                     self.categories_input[task.name] = in_count
-                    self.script += f"root_in_files[{in_count}] = \"{file.file_id}\";\n"
+                    in_file = output_folder.joinpath(f"./data/{file.file_id}").absolute()
+                    self.script += f"root_in_files[{in_count}] = \"{in_file}\";\n"
                     in_count += 1
                 self.files_map[file.file_id] = f"ins[{in_count}]"
         
@@ -96,6 +100,10 @@ def translate(self, output_folder: pathlib.Path) -> None:
         for category in self.categories_list:
             self._add_tasks(category)
 
+        # flowcept stop
+        # if self.workflow.workflow_id:
+        #     self.script += "string fss = sprintf(flowcept_stop);\npython_persist(fss);"
+
         run_workflow_code = self._merge_codelines("templates/swift_t_templates/workflow.swift", self.script)
 
         # write benchmark files
@@ -197,27 +205,28 @@ def _add_tasks(self, category: str) -> None:
                 num_tasks += 1
 
         cats = " + ".join(f"{k}__out[{v - 1}]" for k, v in input_files_cat.items())
-        in_str = ", ".join(f"{k}__{v}" for k, v in input_files_cat.items())
+        in_str = ", ".join(f"{k}_{v - 1}_output.txt" for k, v in input_files_cat.items())
         if "ins[" in cats:
             cats = "0"
             in_str = ""
         self.script += f"int dep_{self.cmd_counter} = {cats};\n"
         args += f", dep_{self.cmd_counter}"
-        self.script += f"string {category}_in = \"{in_str}\";\n"
+        args += f", \"{self.workflow.workflow_id}\", fss" if self.workflow.workflow_id else ", \"\""
+        self.script += f"string {category}_in = \"{self.output_folder.absolute()}/data/{in_str}\";\n"
 
         if num_tasks > 1:
             self.script += f"foreach i in [0:{num_tasks - 1}] {{\n" \
-                f"  string of = sprintf(\"{category}_%i_output.txt\", i);\n" \
-                f"  string cmd_{self.cmd_counter} = sprintf(command, \"{category}\", {args});\n" \
+                f"  string of = sprintf(\"{self.output_folder.absolute()}/data/{category}_%i_output.txt\", i);\n" \
+                f"  string cmd_{self.cmd_counter} = sprintf(command, \"{self.cpu_benchmark}\", \"{category}\", {args});\n" \
                 f"  string co_{self.cmd_counter} = python_persist(cmd_{self.cmd_counter});\n" \
                 f"  string of_{self.cmd_counter} = sprintf(\"0%s\", co_{self.cmd_counter});\n" \
                 f"  {category}__out[i] = string2int(of_{self.cmd_counter});\n" \
                 "}\n\n"
             
         else:
             args = args.replace(
-                ", of", f", \"{category}_0_output.txt\"").replace("[i]", "[0]")
-            self.script += f"string cmd_{self.cmd_counter} = sprintf(command, \"{category}\", {args});\n" \
+                ", of", f", \"{self.output_folder.absolute()}/data/{category}_0_output.txt\"").replace("[i]", "[0]")
+            self.script += f"string cmd_{self.cmd_counter} = sprintf(command, \"{self.cpu_benchmark}\", \"{category}\", {args});\n" \
                 f"string co_{self.cmd_counter} = python_persist(cmd_{self.cmd_counter});\n" \
                 f"string of_{self.cmd_counter} = sprintf(\"0%s\", co_{self.cmd_counter});\n" \
                 f"{category}__out[0] = string2int(of_{self.cmd_counter});\n\n"
diff --git a/wfcommons/wfbench/translator/taskvine.py b/wfcommons/wfbench/translator/taskvine.py
@@ -51,11 +51,18 @@ def translate(self, output_folder: pathlib.Path) -> None:
         self.next_level = self.root_task_names.copy()
         while self.next_level:
             self.next_level = self._add_level_tasks(self.next_level)
-            self.script += "wait_for_tasks_completion()\n\n"
+            self.script += "wait_for_tasks_completion()\n"
 
         # generate code
         run_workflow_code = self._merge_codelines("templates/taskvine_template.py", self.script)
-    
+
+        # generate Flowcept code
+        if self.workflow.workflow_id is not None:
+            run_workflow_code = run_workflow_code.replace("# FLOWCEPT_INIT",
+                                                          self._flowcept_init_python(self.workflow.workflow_id,
+                                                                            self.workflow.name))
+            run_workflow_code = run_workflow_code.replace("# FLOWCEPT_END", self._flowcept_stop_python())
+
         # write benchmark files
         output_folder.mkdir(parents=True)
         with open(output_folder.joinpath("taskvine_workflow.py"), "w") as fp:
diff --git a/wfcommons/wfbench/translator/templates/pycompss_template.py b/wfcommons/wfbench/translator/templates/pycompss_template.py
@@ -1,3 +1,13 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024-2025 The WfCommons Team.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
 import os
 from pycompss.api.task import task
 from pycompss.api.constraint import constraint
diff --git a/wfcommons/wfbench/translator/templates/swift_t_templates/workflow.swift b/wfcommons/wfbench/translator/templates/swift_t_templates/workflow.swift