Modified swift-t translator fork-exec wfbench (which is known to be

henricasanova · henricasanova · commit f6278049dc12 · 2026-03-20T14:52:20.000-10:00
slow) instead of copy-pasting the wfbench code into its code. Later,
we'll try to use python_exec
diff --git a/wfcommons/wfbench/translator/swift_t.py b/wfcommons/wfbench/translator/swift_t.py
@@ -74,7 +74,7 @@ def translate(self, output_folder: pathlib.Path) -> None:
         self.logger.debug("Defining input files")
         in_count = 0
         self.output_folder = output_folder
-        self.cpu_benchmark = output_folder.joinpath("./bin/cpu-benchmark").absolute()
+        self.wfbench = output_folder.joinpath("./bin/wfbench").absolute()
         self.script = f"string fs = sprintf(flowcept_start, \"{self.workflow.workflow_id}\");\nstring fss = python_persist(fs);\n\n" if self.workflow.workflow_id else ""
         self.script += "string root_in_files[];\n"
 
@@ -116,7 +116,7 @@ def translate(self, output_folder: pathlib.Path) -> None:
         self._copy_binary_files(output_folder)
         self._generate_input_files(output_folder)
 
-        # write README file
+        # README file
         self._write_readme_file(output_folder)
 
     def _find_categories_list(self, task_name: str, parent_task: Optional[str] = None) -> None:
@@ -221,7 +221,7 @@ def _add_tasks(self, category: str) -> None:
             self.script += f"foreach i in [0:{num_tasks - 1}] {{\n" \
                 f"  string of = sprintf(\"{self.output_folder.absolute()}/data/{category}_%i_output.txt\", i);\n" \
                 f"  string task_id = \"{category}_\" + i;\n" \
-                f"  string cmd_{self.cmd_counter} = sprintf(command, \"{self.cpu_benchmark}\", task_id, {args});\n" \
+                f"  string cmd_{self.cmd_counter} = sprintf(command, \"{self.wfbench}\", task_id, {args});\n" \
                 f"  string co_{self.cmd_counter} = python_persist(cmd_{self.cmd_counter});\n" \
                 f"  string of_{self.cmd_counter} = sprintf(\"0%s\", co_{self.cmd_counter});\n" \
                 f"  {category}__out[i] = string2int(of_{self.cmd_counter});\n" \
@@ -235,7 +235,7 @@ def _add_tasks(self, category: str) -> None:
                 self.out_files.add(out_file)
             args = args.replace(
                 ", of", f", \"{out_file}\"").replace("[i]", "[0]")
-            self.script += f"string cmd_{self.cmd_counter} = sprintf(command, \"{self.cpu_benchmark}\", \"{category}_{self.cmd_counter}\", {args});\n" \
+            self.script += f"string cmd_{self.cmd_counter} = sprintf(command, \"{self.wfbench}\", \"{category}_{self.cmd_counter}\", {args});\n" \
                 f"string co_{self.cmd_counter} = python_persist(cmd_{self.cmd_counter});\n" \
                 f"string of_{self.cmd_counter} = sprintf(\"0%s\", co_{self.cmd_counter});\n" \
                 f"{category}__out[0] = string2int(of_{self.cmd_counter});\n\n"
@@ -253,5 +253,4 @@ def _write_readme_file(self, output_folder: pathlib.Path) -> None:
         with open(readme_file_path, "w") as out:
             out.write(f"Start a REDIS server: redis-server\n")
             out.write(f"[Optional] Check that REDIS works: redis-cli ping  (it should say \"PONG\")\n")
-            out.write(f"Run the workflow: swift-t workflow.swift\n")
-
+            out.write(f"Run the workflow: swift-t workflow.swift\n")
diff --git a/wfcommons/wfbench/translator/templates/swift_t/workflow.swift b/wfcommons/wfbench/translator/templates/swift_t/workflow.swift
@@ -25,7 +25,7 @@ logging.basicConfig(
     handlers=[logging.StreamHandler()]
 )
 
-workflow_id = "%s"  
+workflow_id = "%s"
 workflow_name = "%s"
 out_files = [%s]
 
@@ -63,12 +63,12 @@ string command =
 """
 import logging
 import os
+import sys
 import pathlib
 import signal
 import socket
 import subprocess
 import time
-from pathos.helpers import mp as multiprocessing
 
 __import__("logging").basicConfig(
     level=logging.INFO,
@@ -77,14 +77,15 @@ __import__("logging").basicConfig(
     handlers=[logging.StreamHandler()]
 )
 
-cpu_benchmark = "%s"
+wfbench = "%s"
 task_name = "%s"
-files_list = ["%s"]
+input_file = ["%s"]
 gpu_work = int(%i)
 cpu_work = int(%i)
 percent_cpu = %f
 cpu_threads = int(10 * percent_cpu)
-output_data = {"%s": int(%i)}
+output_file = "%s"
+output_file_size = int(%i)
 dep = %i
 workflow_id = "%s"
 task_id = f"{workflow_id}_{task_name}"
@@ -106,131 +107,22 @@ if 'workflow_id':
 
 __import__("logging").info(f"Starting {task_name} Benchmark on {socket.gethostname()}")
 
-procs = []
-cpu_queue = multiprocessing.Queue()
-__import__("logging").debug(f"Working directory: {os.getcwd()}")
-
-__import__("logging").debug("Starting IO benchmark...")
-io_proc = None
-termination_event = multiprocessing.Event()
-
-io_proc = multiprocessing.Process(
-    target=lambda inputs=files_list, outputs=output_data, cpu_queue=cpu_queue, 
-           termination_event=termination_event: (
-        memory_limit := 10 * 1024 * 1024,
-        [open(name, "wb").close() for name in outputs],
-        io_completed := 0,
-        bytes_read := {name: 0 for name in inputs},
-        bytes_written := {name: 0 for name in outputs},
-        input_sizes := {name: __import__("os").path.getsize(name) for name in inputs},
-        [
-            (
-                cpu_percent := cpu_queue.get(timeout=1.0),                
-                should_exit := termination_event.is_set(),
-                (
-                    while_loop_var := True,
-                    [
-                        (
-                            new_val := (
-                                cpu_queue.get(timeout = 1.0)
-                                if not cpu_queue.empty() else None
-                            ),
-                            cpu_percent := (
-                                max(cpu_percent, new_val) 
-                                if new_val is not None else cpu_percent
-                            ),
-                            while_loop_var := (
-                                new_val is not None and not cpu_queue.empty()
-                            )
-                        )
-                        for _ in range(100) if while_loop_var
-                    ],
-                    bytes_to_read := {
-                        name: max(0, int(size * (cpu_percent / 100) - bytes_read[name]))
-                        for name, size in input_sizes.items()
-                    },
-                    bytes_to_write := {
-                        name: max(0, int(size * (cpu_percent / 100) - bytes_written[name]))
-                        for name, size in outputs.items()
-                    },
-                    __import__("logging").debug("Starting IO Read Benchmark..."),
-                    in_file := list(bytes_to_read.keys())[0],
-                    in_size := list(bytes_to_read.values())[0],
-                    open(in_file, "rb").read(int(in_size)),
-                    __import__("logging").debug("Completed IO Read Benchmark!"),
-                    out_file := list(outputs.keys())[0],
-                    out_size := list(outputs.values())[0],
-                    __import__("logging").debug(f"Writing output file '{out_file}'"),
-                    open(out_file, "ab").write(__import__("os").urandom(int(out_size))),
-                    bytes_read.update({
-                        name: bytes_read[name] + bytes_to_read[name]
-                        for name in bytes_to_read
-                    }),
-                    bytes_written.update({
-                        name: bytes_written[name] + bytes_to_write[name]
-                        for name in bytes_to_write
-                    }),
-                    
-                    __import__("logging").debug(f"Bytes Read: {bytes_read}"),
-                    __import__("logging").debug(f"Bytes Written: {bytes_written}"),
-                    io_completed := cpu_percent,
-                ) if cpu_percent is not None else time.sleep(0.1),
-                not (should_exit or io_completed >= 100)
-            )
-            for _ in range(1000000)
-            if not (io_completed >= 100 or termination_event.is_set())
-        ],
-        __import__("logging").info("IO benchmark completed")
-    )
-)
-io_proc.start()
-procs.append(io_proc)
-
-if cpu_work > 0:
-    __import__("logging").info(f"Starting CPU and Memory Benchmarks for {task_name}...")
-
-    mem_threads = 10 - cpu_threads
-    cpu_work_per_thread = int(cpu_work / cpu_threads)
-
-    cpu_procs = []
-    mem_procs = []
-    cpu_prog = [f"{cpu_benchmark}", f"{cpu_work_per_thread}"]
-    mem_prog = ["stress-ng", "--vm", f"{mem_threads}",
-                "--vm-bytes", "0.05%%", "--vm-keep"]
-
-    for i in range(cpu_threads):
-        cpu_proc = subprocess.Popen(cpu_prog, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-        cpu_procs.append(cpu_proc)
-        monitor_thread = multiprocessing.Process(
-            target=lambda proc=cpu_proc, queue=cpu_queue: 
-                [
-                    queue.put(float(line.strip().split()[1].strip('%%')))
-                    for line in iter(proc.stdout.readline, "") 
-                    if line.strip() and line.strip().startswith("Progress:")
-                ]
-        )
-        monitor_thread.start()
-
-    if mem_threads > 0:
-        mem_proc = subprocess.Popen(mem_prog, preexec_fn=os.setsid)
-        mem_procs.append(mem_proc)
-
-    procs.extend(cpu_procs)
-    for proc in procs:
-        if isinstance(proc, subprocess.Popen):
-            proc.wait()
-    if io_proc is not None and io_proc.is_alive():
-        io_proc.join()
-
-    for mem_proc in mem_procs:
-        try:
-            os.kill(mem_proc.pid, signal.SIGKILL)
-        except subprocess.TimeoutExpired:
-            __import__("logging").debug("Memory process did not terminate; force-killing.")
-    subprocess.Popen(["pkill", "-f", "stress-ng"]).wait()
-
-    __import__("logging").info("Completed CPU and Memory Benchmarks!")
-    
+cmd = [
+    sys.executable, wfbench,
+    "--name", task_name,
+    "--workflow_id", workflow_id,
+    "--percent-cpu", str(percent_cpu),
+    "--cpu-work", str(cpu_work),
+    "--output-files", f'{{"{output_file}": {output_file_size}}}',
+    "--input-files", str(input_file).replace("'", '"'),
+    "--with-flowcept",
+]
+if gpu_work:
+    cmd += ["--gpu-work", str(gpu_work)]
+
+logging.info(f"Launching wfbench for task {task_name}")
+proc = subprocess.run(cmd)
+
 __import__("logging").info(f"Benchmark {task_name} completed!")
 
 if 'workflow_id':