Merge pull request #71 from wfcommons/flowcept_improvements

rafaelfsilva · web-flow · commit a84a6c268060 · 2025-04-11T12:34:02.000-07:00
Flowcept support for Swift/T
diff --git a/bin/wfbench b/bin/wfbench
@@ -334,7 +334,7 @@ def get_parser() -> argparse.ArgumentParser:
     
 
 def begin_flowcept(args):
-    print("Running with Flowcept.")
+    log_info("Running with Flowcept.")
     from flowcept import Flowcept, FlowceptTask
     # TODO: parametrize to allow storing individual tasks
     f = Flowcept(workflow_id=args.workflow_id,
diff --git a/wfcommons/wfbench/translator/swift_t.py b/wfcommons/wfbench/translator/swift_t.py
@@ -74,14 +74,18 @@ def translate(self, output_folder: pathlib.Path) -> None:
         # defining input files
         self.logger.debug("Defining input files")
         in_count = 0
-        self.script = f"string root_in_files[];\n"
+        self.output_folder = output_folder
+        self.cpu_benchmark = output_folder.joinpath("./bin/cpu-benchmark").absolute()
+        self.script = f"string fs = sprintf(flowcept_start, \"{self.workflow.workflow_id}\", \"{self.workflow.name}\");\nstring fss = python_persist(fs);\n\n" if self.workflow.workflow_id else ""
+        self.script += "string root_in_files[];\n"
 
         for task_name in self.root_task_names:
             task = self.tasks[task_name]
             for file in task.input_files:
                 if task.name not in self.categories_input.keys():
                     self.categories_input[task.name] = in_count
-                    self.script += f"root_in_files[{in_count}] = \"{file.file_id}\";\n"
+                    in_file = output_folder.joinpath(f"./data/{file.file_id}").absolute()
+                    self.script += f"root_in_files[{in_count}] = \"{in_file}\";\n"
                     in_count += 1
                 self.files_map[file.file_id] = f"ins[{in_count}]"
         
@@ -96,6 +100,10 @@ def translate(self, output_folder: pathlib.Path) -> None:
         for category in self.categories_list:
             self._add_tasks(category)
 
+        # flowcept stop
+        # if self.workflow.workflow_id:
+        #     self.script += "string fss = sprintf(flowcept_stop);\npython_persist(fss);"
+
         run_workflow_code = self._merge_codelines("templates/swift_t_templates/workflow.swift", self.script)
 
         # write benchmark files
@@ -197,27 +205,28 @@ def _add_tasks(self, category: str) -> None:
                 num_tasks += 1
 
         cats = " + ".join(f"{k}__out[{v - 1}]" for k, v in input_files_cat.items())
-        in_str = ", ".join(f"{k}__{v}" for k, v in input_files_cat.items())
+        in_str = ", ".join(f"{k}_{v - 1}_output.txt" for k, v in input_files_cat.items())
         if "ins[" in cats:
             cats = "0"
             in_str = ""
         self.script += f"int dep_{self.cmd_counter} = {cats};\n"
         args += f", dep_{self.cmd_counter}"
-        self.script += f"string {category}_in = \"{in_str}\";\n"
+        args += f", \"{self.workflow.workflow_id}\", fss" if self.workflow.workflow_id else ", \"\""
+        self.script += f"string {category}_in = \"{self.output_folder.absolute()}/data/{in_str}\";\n"
 
         if num_tasks > 1:
             self.script += f"foreach i in [0:{num_tasks - 1}] {{\n" \
-                f"  string of = sprintf(\"{category}_%i_output.txt\", i);\n" \
-                f"  string cmd_{self.cmd_counter} = sprintf(command, \"{category}\", {args});\n" \
+                f"  string of = sprintf(\"{self.output_folder.absolute()}/data/{category}_%i_output.txt\", i);\n" \
+                f"  string cmd_{self.cmd_counter} = sprintf(command, \"{self.cpu_benchmark}\", \"{category}\", {args});\n" \
                 f"  string co_{self.cmd_counter} = python_persist(cmd_{self.cmd_counter});\n" \
                 f"  string of_{self.cmd_counter} = sprintf(\"0%s\", co_{self.cmd_counter});\n" \
                 f"  {category}__out[i] = string2int(of_{self.cmd_counter});\n" \
                 "}\n\n"
             
         else:
             args = args.replace(
-                ", of", f", \"{category}_0_output.txt\"").replace("[i]", "[0]")
-            self.script += f"string cmd_{self.cmd_counter} = sprintf(command, \"{category}\", {args});\n" \
+                ", of", f", \"{self.output_folder.absolute()}/data/{category}_0_output.txt\"").replace("[i]", "[0]")
+            self.script += f"string cmd_{self.cmd_counter} = sprintf(command, \"{self.cpu_benchmark}\", \"{category}\", {args});\n" \
                 f"string co_{self.cmd_counter} = python_persist(cmd_{self.cmd_counter});\n" \
                 f"string of_{self.cmd_counter} = sprintf(\"0%s\", co_{self.cmd_counter});\n" \
                 f"{category}__out[0] = string2int(of_{self.cmd_counter});\n\n"
diff --git a/wfcommons/wfbench/translator/templates/swift_t_templates/workflow.swift b/wfcommons/wfbench/translator/templates/swift_t_templates/workflow.swift
@@ -4,99 +4,191 @@ import python;
 import string;
 import unix;
 
+global const string flowcept_start = 
+"""
+workflow_id = "%s"
+from flowcept.flowcept_api.flowcept_controller import Flowcept
+flowcept_agent = Flowcept(workflow_id=workflow_id, workflow_name="%s", bundle_exec_id=workflow_id)
+
+try:
+    flowcept_agent.start()
+except Exception:
+    import traceback
+    traceback.print_exc()
+""";
+
 string command = 
 """
+import logging
 import os
 import pathlib
+import signal
 import socket
 import subprocess
 import time
+from pathos.helpers import mp as multiprocessing
 
-this_dir = pathlib.Path(".").absolute()
+logging.basicConfig(
+    level=logging.INFO,
+    format="[WfBench][%%(asctime)s][%%(levelname)s] %%(message)s",
+    datefmt="%%H:%%M:%%S",
+    handlers=[logging.StreamHandler()]
+)
 
+cpu_benchmark = "%s"
 task_name = "%s"
-files_list = "%s"
+files_list = ["%s"]
 gpu_work = int(%i)
+cpu_work = int(%i)
+cpu_threads = int(10 * %f)
+output_data = {"%s": int(%i)}
+dep = %i
+workflow_id = "%s"
 
-print(f"[WfBench] [{task_name}] Starting Benchmark on {socket.gethostname()}", flush=True)
-
-print(f"[WfBench] [{task_name}] Starting IO Read Benchmark...", flush=True)
-if "__" not in files_list:
-    with open(this_dir.joinpath(f"./data/{files_list}"), "rb") as fp:
-        start = time.perf_counter()
-        print(f"[WfBench]   Reading '{files_list}'", flush=True)
-        fp.readlines()
-        end = time.perf_counter()
-        data_size = this_dir.joinpath(f"./data/{files_list}").stat().st_size
-        print(f"[WfBench] [{task_name}] Metrics (read) [time,size]: {end - start},{data_size}", flush=True)
-else:
-    files = files_list.split(", ")
-    for file in files:
-        counter = 0
-        fd = file.split("__")
-        start = time.perf_counter()
-        file_size = 0
-        for f in this_dir.glob(f"./data/{fd[0]}_*_output.txt"):
-            if counter >= int(fd[1]):
-                break
-            file_size += os.stat(f).st_size
-            with open(f, "rb") as fp:
-                print(f"[WfBench]   Reading '{f}'", flush=True)
-                fp.readlines()
-            counter += 1
-        end = time.perf_counter()
-        print(f"[WfBench] [{task_name}] Metrics (read) [time,size]: {end - start},{file_size}", flush=True)
-print(f"[WfBench] [{task_name}] Completed IO Read Benchmark", flush=True)
-
-if gpu_work > 0:
-    print(f"[WfBench] [{task_name}] Starting GPU Benchmark...", flush=True)
-    gpu_prog = [f"CUDA_DEVICE_ORDER=PCI_BUS_ID {this_dir.joinpath('./bin/gpu-benchmark')} {gpu_work}"]
-    start = time.perf_counter()
-    gpu_proc = subprocess.Popen(gpu_prog, shell=True)
-    gpu_proc.wait()
-    end = time.perf_counter()
-    print(f"[WfBench] [{task_name}] Metrics (compute-gpu) [time,work]: {end - start},{gpu_work}", flush=True)
+if 'workflow_id':
+    logging.info("Running with Flowcept.")
+    from flowcept import Flowcept, FlowceptTask
+    fc = Flowcept(workflow_id=workflow_id,
+                bundle_exec_id=workflow_id,
+                start_persistence=False, save_workflow=False)
+    fc.start()
+    fc_task = FlowceptTask(workflow_id=workflow_id, used={
+      'workflow_id': workflow_id
+    })
+
+logging.info(f"Starting {task_name} Benchmark on {socket.gethostname()}")
+
+procs = []
+cpu_queue = multiprocessing.Queue()
+logging.debug(f"Working directory: {os.getcwd()}")
+
+logging.debug("Starting IO benchmark...")
+io_proc = None
+termination_event = multiprocessing.Event()
+
+io_proc = multiprocessing.Process(
+    target=lambda inputs=files_list, outputs=output_data, cpu_queue=cpu_queue, 
+           termination_event=termination_event: (
+        memory_limit := 10 * 1024 * 1024,
+        [open(name, "wb").close() for name in outputs],
+        io_completed := 0,
+        bytes_read := {name: 0 for name in inputs},
+        bytes_written := {name: 0 for name in outputs},
+        input_sizes := {name: __import__("os").path.getsize(name) for name in inputs},
+        [
+            (
+                cpu_percent := cpu_queue.get(timeout=1.0),                
+                should_exit := termination_event.is_set(),
+                (
+                    while_loop_var := True,
+                    [
+                        (
+                            new_val := (
+                                cpu_queue.get(timeout = 1.0)
+                                if not cpu_queue.empty() else None
+                            ),
+                            cpu_percent := (
+                                max(cpu_percent, new_val) 
+                                if new_val is not None else cpu_percent
+                            ),
+                            while_loop_var := (
+                                new_val is not None and not cpu_queue.empty()
+                            )
+                        )
+                        for _ in range(100) if while_loop_var
+                    ],
+                    bytes_to_read := {
+                        name: max(0, int(size * (cpu_percent / 100) - bytes_read[name]))
+                        for name, size in input_sizes.items()
+                    },
+                    bytes_to_write := {
+                        name: max(0, int(size * (cpu_percent / 100) - bytes_written[name]))
+                        for name, size in outputs.items()
+                    },
+                    logging.debug("Starting IO Read Benchmark..."),
+                    in_file := list(bytes_to_read.keys())[0],
+                    in_size := list(bytes_to_read.values())[0],
+                    open(in_file, "rb").read(int(in_size)),
+                    logging.debug("Completed IO Read Benchmark!"),
+                    out_file := list(output_data.keys())[0],
+                    out_size := list(output_data.values())[0],
+                    logging.debug(f"Writing output file '{out_file}'"),
+                    open(out_file, "ab").write(__import__("os").urandom(int(out_size))),
+                    bytes_read.update({
+                        name: bytes_read[name] + bytes_to_read[name]
+                        for name in bytes_to_read
+                    }),
+                    bytes_written.update({
+                        name: bytes_written[name] + bytes_to_write[name]
+                        for name in bytes_to_write
+                    }),
+                    
+                    logging.debug(f"Bytes Read: {bytes_read}"),
+                    logging.debug(f"Bytes Written: {bytes_written}"),
+                    io_completed := cpu_percent,
+                ) if cpu_percent is not None else time.sleep(0.1),
+                not (should_exit or io_completed >= 100)
+            )
+            for _ in range(1000000)
+            if not (io_completed >= 100 or termination_event.is_set())
+        ],
+        logging.info("IO benchmark completed")
+    )
+)
+io_proc.start()
+procs.append(io_proc)
 
-cpu_work = int(%i)
 if cpu_work > 0:
-    print(f"[WfBench] [{task_name}] Starting CPU and Memory Benchmarks...", flush=True)
-    cpu_threads=int(10 * %f)
-    mem_threads=10 - cpu_threads
-    total_mem_bytes = 0.05
+    logging.info(f"Starting CPU and Memory Benchmarks for {task_name}...")
+
+    mem_threads = 10 - cpu_threads
     cpu_work_per_thread = int(cpu_work / cpu_threads)
 
     cpu_procs = []
-    cpu_prog = [
-        f"{this_dir.joinpath('./bin/cpu-benchmark')}", f"{cpu_work_per_thread}"]
+    mem_procs = []
+    cpu_prog = [f"{cpu_benchmark}", f"{cpu_work_per_thread}"]
+    mem_prog = ["stress-ng", "--vm", f"{mem_threads}",
+                "--vm-bytes", "0.05%%", "--vm-keep"]
 
-    start = time.perf_counter()
     for i in range(cpu_threads):
-        cpu_proc = subprocess.Popen(cpu_prog)
+        cpu_proc = subprocess.Popen(cpu_prog, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
         cpu_procs.append(cpu_proc)
+        monitor_thread = multiprocessing.Process(
+            target=lambda proc=cpu_proc, queue=cpu_queue: 
+                [
+                    queue.put(float(line.strip().split()[1].strip('%%')))
+                    for line in iter(proc.stdout.readline, "") 
+                    if line.strip() and line.strip().startswith("Progress:")
+                ]
+        )
+        monitor_thread.start()
 
     if mem_threads > 0:
-        mem_prog = ["stress-ng", "--vm", f"{mem_threads}",
-                    "--vm-bytes", f"{total_mem_bytes}%%", "--vm-keep"]
-        mem_proc = subprocess.Popen(mem_prog, stderr=subprocess.DEVNULL)
-
-    for proc in cpu_procs:
-        proc.wait()
-    mem_kill = subprocess.Popen(["killall", "stress-ng"])
-    mem_kill.wait()
-    end = time.perf_counter()
-    print(f"[WfBench] [{task_name}] Metrics (compute) [time,work]: {end - start},{cpu_work}", flush=True)
-    print(f"[WfBench] [{task_name}] Completed CPU and Memory Benchmarks", flush=True)
-
-print(f"[WfBench] [{task_name}] Writing output file", flush=True)
-start = time.perf_counter()
-with open(this_dir.joinpath("./data/%s"), "wb") as fp:
-    file_size = int(%i)
-    fp.write(os.urandom(file_size))
-end = time.perf_counter()
-print(f"[WfBench] [{task_name}] Metrics (write) [time,size]: {end - start},{file_size}", flush=True)
-
-print(f"[WfBench] [{task_name}] Benchmark completed!", flush=True)
-dep = %i
+        mem_proc = subprocess.Popen(mem_prog, preexec_fn=os.setsid)
+        mem_procs.append(mem_proc)
+
+    procs.extend(cpu_procs)
+    for proc in procs:
+        if isinstance(proc, subprocess.Popen):
+            proc.wait()
+    if io_proc is not None and io_proc.is_alive():
+        io_proc.join()
+
+    for mem_proc in mem_procs:
+        try:
+            os.kill(mem_proc.pid, signal.SIGKILL)
+        except subprocess.TimeoutExpired:
+            logging.debug("Memory process did not terminate; force-killing.")
+    subprocess.Popen(["pkill", "-f", "stress-ng"]).wait()
+
+    logging.debug("Completed CPU and Memory Benchmarks!")
+    
+logging.info(f"Benchmark {task_name} completed!")
+
+if 'workflow_id':
+    fc_task.end()
+    fc.stop()
+    time.sleep(1)
 """;
 
 # Generated code goes here