wfcommons
diff --git a/‎bin/wfbench‎
Lines changed: 23 additions & 311 deletions b/‎bin/wfbench‎
Lines changed: 23 additions & 311 deletions
@@ -12,6 +12,7 @@ import os
 import pathlib
 import subprocess
 import time
+import signal
 import sys
 import argparse
 import re
@@ -80,26 +81,26 @@ class ProcessHandle:
         self._proc.terminate()
 
     def terminate_along_with_children(self):
-        # If it's a multiprocessing, just kill the parent and return
         if isinstance(self._proc, multiprocessing.Process):
             self._proc.terminate()
             return
-        # If it's a Popen, then do the brute-force thing
         try:
-            parent = psutil.Process(self._proc.pid)
-            children = parent.children(recursive=True)
-            for child in children:
-                try:
-                    child.kill()
-                except psutil.NoSuchProcess:
-                    pass  # Process is already dead'
+            pgid = os.getpgid(self._proc.pid)
+            os.killpg(pgid, signal.SIGKILL)
+        except ProcessLookupError:
+            pass  # group leader already gone, try children directly
+        except PermissionError:
+            pass
+        finally:
+            # Catch any re-parented children (ppid=1) that psutil can still see
             try:
-                parent.kill()
+                for child in psutil.Process(self._proc.pid).children(recursive=True):
+                    try:
+                        child.kill()
+                    except psutil.NoSuchProcess:
+                        pass
             except psutil.NoSuchProcess:
-                pass # Nevermind
-        except subprocess.TimeoutExpired:
-            log_debug("Process did not terminate; force-killing.")
-            subprocess.Popen(["pkill", "-f", "stress-ng"]).wait()
+                pass
 
     def wait(self):
         if isinstance(self._proc, multiprocessing.Process):
@@ -348,186 +349,6 @@ def unlock_core(path_locked: pathlib.Path,
         finally:
             lock.release()
 
-# def monitor_progress(proc, cpu_queue):
-#     """Monitor progress from the CPU benchmark process."""
-#     for line in iter(proc.stdout.readline, ""):  # No decode needed
-#         line = line.strip()
-#         if line.startswith("Progress:"):
-#             try:
-#                 progress = float(line.split()[1].strip('%'))
-#                 cpu_queue.put(progress)
-#             except (ValueError, IndexError):
-#                 pass
-#
-# def cpu_mem_benchmark(cpu_queue: multiprocessing.Queue,
-#                       cpu_threads: Optional[int] = 5,
-#                       mem_threads: Optional[int] = 5,
-#                       cpu_work: Optional[int] = 100,
-#                       core: Optional[int] = None,
-#                       total_mem: Optional[int] = None) -> List:
-#     """
-#     Run CPU and memory benchmark.
-#
-#     :param cpu_queue: Queue to push CPU benchmark progress as a float.
-#     :type cpu_queue: multiprocessing.Queue
-#     :param cpu_threads: Number of threads for CPU benchmark.
-#     :type cpu_threads: Optional[int]
-#     :param mem_threads: Number of threads for memory benchmark.
-#     :type mem_threads: Optional[int]
-#     :param cpu_work: Total work units for CPU benchmark.
-#     :type cpu_work: Optional[int]
-#     :param core: Core to pin the benchmark processes to.
-#     :type core: Optional[int]
-#     :param total_mem: Total memory to use for memory benchmark.
-#     :type total_mem: Optional[float]
-#
-#     :return: Lists of CPU and memory subprocesses.
-#     :rtype: List
-#     """
-#     total_mem = f"{total_mem}B" if total_mem else f"{100.0 / os.cpu_count()}%"
-#     cpu_work_per_thread = int(1000000 * cpu_work / (16384 * cpu_threads)) if cpu_threads != 0 else int32_max**2
-#     cpu_samples = min(cpu_work_per_thread, int32_max)
-#     cpu_ops = (cpu_work_per_thread + int32_max - 1) // int32_max
-#     if cpu_ops > int32_max:
-#         log_info("Exceeded maximum allowed value of cpu work.")
-#         cpu_ops = int32_max
-#
-#     cpu_proc = None
-#     mem_proc = None
-#
-#     cpu_prog = ["stress-ng", "--monte-carlo", f"{cpu_threads}",
-#                 "--monte-carlo-method", "pi",
-#                 "--monte-carlo-rand", "lcg",
-#                 "--monte-carlo-samples", f"{cpu_samples}",
-#                 "--monte-carlo-ops", f"{cpu_ops}",
-#                 "--quiet"]
-#     mem_prog = ["stress-ng", "--vm", f"{mem_threads}",
-#                 "--vm-bytes", f"{total_mem}", "--vm-keep", "--quiet"]
-#
-#     if cpu_threads > 0:
-#         cpu_proc = subprocess.Popen(cpu_prog, preexec_fn=os.setsid)
-#
-#         # NOTE: might be a good idea to use psutil to set the affinity (works across platforms)
-#         if core:
-#             os.sched_setaffinity(cpu_proc.pid, {core})
-#
-#     if mem_threads > 0:
-#         # NOTE: add a check to use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP for Windows
-#         mem_proc = subprocess.Popen(mem_prog, preexec_fn=os.setsid)
-#         if core:
-#             os.sched_setaffinity(mem_proc.pid, {core})
-#
-#     return [cpu_proc, mem_proc]
-#
-#
-# def io_read_benchmark_user_input_data_size(inputs,
-#                                            rundir=None,
-#                                            memory_limit=None):
-#     if memory_limit is None:
-#         memory_limit = -1
-#     memory_limit = int(memory_limit)
-#     log_debug("Starting IO Read Benchmark...")
-#     for file, size in inputs.items():
-#         with open(rundir.joinpath(file), "rb") as fp:
-#             log_debug(f"Reading '{file}'")
-#             chunk_size = min(size, memory_limit)
-#             while fp.read(chunk_size):
-#                 pass
-#     log_debug("Completed IO Read Benchmark!")
-#
-#
-# def io_write_benchmark_user_input_data_size(outputs,
-#                                             rundir=None,
-#                                             memory_limit=None):
-#     if memory_limit is None:
-#         memory_limit = sys.maxsize
-#     memory_limit = int(memory_limit)
-#     for file_name, file_size in outputs.items():
-#         log_debug(f"Writing output file '{file_name}'")
-#         file_size_todo = file_size
-#         while file_size_todo > 0:
-#             with open(rundir.joinpath(file_name), "ab") as fp:
-#                 chunk_size = min(file_size_todo, memory_limit)
-#                 file_size_todo -= fp.write(os.urandom(int(chunk_size)))
-#
-#
-# def io_alternate(inputs, outputs, cpu_queue: multiprocessing.Queue, memory_limit=None, rundir=None, event=None):
-#     """Alternate between reading and writing to a file, ensuring read only happens after write."""
-#
-#     if memory_limit is None:
-#         memory_limit = 10 * 1024 * 1024  # sys.maxsize
-#     memory_limit = int(memory_limit)
-#
-#     # queue will have messages in the form (cpu_percent_completed)
-#     # Get the last message and trash the rest
-#
-#     # Create empty files
-#     for name in outputs:
-#         open(rundir.joinpath(name), "wb").close()
-#
-#     io_completed = 1
-#     bytes_read = {
-#         name: 0
-#         for name in inputs
-#     }
-#     bytes_written = {
-#         name: 0
-#         for name in outputs
-#     }
-#
-#     # get size of inputs
-#     inputs = {
-#         name: os.path.getsize(rundir.joinpath(name))
-#         for name in inputs
-#     }
-#
-#     while io_completed < 100:
-#         #cpu_percent = max(io_completed, cpu_queue.get())
-#         #while True: # Get the last message
-#         #    try:
-#         #        cpu_percent = max(io_completed, cpu_queue.get_nowait())
-#         #    except queue.Empty:
-#         #        break
-#
-#         log_debug(f"IO Percent: {io_completed}")
-#         if True: #cpu_percent:
-#             bytes_to_read = {
-#                 name: int(size * (io_completed / 100) - bytes_read[name])
-#                 for name, size in inputs.items()
-#             }
-#             bytes_to_write = {
-#                 name: int(size * (io_completed / 100) - bytes_written[name])
-#                 for name, size in outputs.items()
-#             }
-#             io_read_benchmark_user_input_data_size(bytes_to_read, rundir, memory_limit=memory_limit)
-#             io_write_benchmark_user_input_data_size(bytes_to_write, rundir, memory_limit=memory_limit)
-#
-#             bytes_read = {
-#                 name: bytes_read[name] + bytes_to_read[name]
-#                 for name in bytes_to_read
-#             }
-#             bytes_written = {
-#                 name: bytes_written[name] + bytes_to_write[name]
-#                 for name in bytes_to_write
-#             }
-#
-#             log_debug(f"Bytes Read: {bytes_read}")
-#             log_debug(f"Bytes Written: {bytes_written}")
-#
-#             io_completed = io_completed + 1
-#
-#             if io_completed >= 100:
-#                 break
-
-
-# def gpu_benchmark(time: int = 100,
-#                   work: int = 100,
-#                   device: int = 0): #work, device
-#
-#     gpu_prog = [f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={device} {this_dir.joinpath('./gpu_benchmark')} {work} {time}"]
-#     log_debug(f"Running GPU Benchmark: {gpu_prog}")
-#     subprocess.Popen(gpu_prog, shell=True)
-
 
 def get_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser()
@@ -550,7 +371,8 @@ def get_parser() -> argparse.ArgumentParser:
                                                "(e.g., --output-files {\\\"file1\\\": 1024, \\\"file2\\\": 2048}).")
     parser.add_argument("--input-files", help="Input files names as a JSON array "
                                               "(e.g., --input-files [\\\"file3\\\", \\\"file4\\\"]).")
-    parser.add_argument("--debug", action="store_true", help="Enable debug messages.")
+    parser.add_argument("--silent", action="store_true", help="Disable all log messages.")
+    parser.add_argument("--debug", action="store_true", help="Enable debug log messages.")
     parser.add_argument("--with-flowcept", action="store_true", default=False, help="Enable Flowcept monitoring.")
     parser.add_argument("--workflow_id", default=None, help="Id to group tasks in a workflow.")
 
@@ -612,6 +434,8 @@ def main():
     if args.with_flowcept:
         flowcept, flowcept_task = begin_flowcept(args)
 
+    if args.silent:
+        logging.getLogger().setLevel(logging.NOTSET)
     if args.debug:
         logging.getLogger().setLevel(logging.DEBUG)
 
@@ -625,9 +449,9 @@ def main():
         path_cores = pathlib.Path(args.path_cores)
         core = lock_core(path_locked, path_cores)
 
-    # if args.time and (not args.cpu_work and not args.gpu_work):
-    #     log_error("If --time is provided, at least one of --cpu-work and --gpu-work must also be provided.")
-    #     sys.exit(1)
+    if not args.time and (not args.cpu_work and not args.gpu_work):
+        log_error("At least one of --time, --cpu-work, or --gpu-work should be provided.")
+        sys.exit(1)
 
     # Compute the (feasible) number of chunks based on the arguments
     num_chunks = compute_num_chunks(args)
@@ -782,119 +606,7 @@ def main():
     if args.with_flowcept:
         end_flowcept(flowcept, flowcept_task)
 
-    log_debug(f"{args.name} Benchmark Completed")
-
-    # OLD CODE BELOW:
-    #
-    # procs = []
-    # io_proc = None
-    # outputs_dict = {}
-    #
-    # cpu_queue = multiprocessing.Queue()
-    #
-    # log_debug(f"Working directory: {os.getcwd()}")
-    #
-    # if cleaned_input or cleaned_output:
-    #     log_debug("Starting IO benchmark...")
-    #
-    #     # Attempt to parse the cleaned string
-    #     try:
-    #         outputs_dict = json.loads(cleaned_output)
-    #     except json.JSONDecodeError as e:
-    #         log_error(f"Failed to decode --output-files JSON string argument: {e}")
-    #         sys.exit(1)
-    #
-    #     try:
-    #         inputs_array = json.loads(cleaned_input)
-    #     except json.JSONDecodeError as e:
-    #         log_error(f"Failed to decode --input-files JSON string argument: {e}")
-    #         sys.exit(1)
-    #
-    #     # print("OUTPUT", outputs_dict)
-    #     # print("INPUTS", inputs_array)
-    #
-    #     # Create a multiprocessing event that in the first run is set to True
-    #     write_done_event = multiprocessing.Event()
-    #     # Set this to True to allow the first read to happen
-    #     write_done_event.set()
-    #     # Print the value of the event
-    #     # print("Event Value:", write_done_event.is_set())
-    #
-    #     io_proc = multiprocessing.Process(
-    #         target=io_alternate,
-    #         args=(inputs_array, outputs_dict, cpu_queue, mem_bytes, rundir, write_done_event)
-    #     )
-    #     io_proc.start()
-    #     procs.append(io_proc)
-    #
-    # if args.gpu_work:
-    #     log_info(f"Starting GPU Benchmark for {args.name}...")
-    #     available_gpus = get_available_gpus() #checking for available GPUs
-    #
-    #     if not available_gpus:
-    #         log_error("No GPU available")
-    #         sys.exit(1)
-    #     else:
-    #         device = available_gpus[0]
-    #         log_debug(f"Running on GPU {device}")
-    #
-    #         if args.time:
-    #             log_debug(f" Time:{args.time}, Work:{args.gpu_work}, Device:{device}")
-    #             gpu_benchmark(time=int(args.time), work=int(args.gpu_work), device=device)
-    #         else:
-    #             gpu_benchmark(work=int(args.gpu_work), device=device)
-    #
-    # if args.cpu_work:
-    #     log_info(f"Starting CPU and Memory Benchmarks for {args.name}...")
-    #     if core:
-    #         log_debug(f"{args.name} acquired core {core}")
-    #
-    #     mem_threads=int(10 - 10 * args.percent_cpu)
-    #     [cpu_proc, mem_proc] = cpu_mem_benchmark(cpu_queue=cpu_queue,
-    #                                              cpu_threads=int(10 * args.percent_cpu),
-    #                                              mem_threads=mem_threads,
-    #                                              cpu_work=int32_max**2 if args.time else int(args.cpu_work),
-    #                                              core=core,
-    #                                              total_mem=mem_bytes)
-    #     procs.append(cpu_proc)
-    #     if args.time:
-    #         time.sleep(int(args.time))
-    #         for proc in procs:
-    #             if isinstance(proc, multiprocessing.Process):
-    #                 if proc.is_alive():
-    #                     proc.terminate()
-    #             elif isinstance(proc, subprocess.Popen):
-    #                 kill_process_and_children(proc)
-    #     else:
-    #         for proc in procs:
-    #             if isinstance(proc, subprocess.Popen):
-    #                 proc.wait()
-    #     if io_proc is not None and io_proc.is_alive():
-    #         #io_proc.terminate()
-    #         io_proc.join()
-    #
-    #     try:
-    #         kill_process_and_children(mem_proc)
-    #     except subprocess.TimeoutExpired:
-    #         log_debug("Memory process did not terminate; force-killing.")
-    #     # As a fallback, use pkill if any remaining instances are stuck
-    #     subprocess.Popen(["pkill", "-f", "stress-ng"]).wait()
-    #
-    #     log_debug("Completed CPU and Memory Benchmarks!")
-    #
-    # NOTE: If you would like to run only IO add time.sleep(2)
-    # Check if all procs are done, if not, kill them
-    # log_debug("Checking if all processes are done...")
-    # for proc in procs:
-    #     if isinstance(proc, multiprocessing.Process):
-    #         if proc.is_alive():
-    #             proc.terminate()
-    #             proc.join()
-    #     if isinstance(proc, subprocess.Popen):
-    #         proc.wait()
-    #
-    #
-    # log_info(f"Benchmark {args.name} completed!")
+    log_info(f"{args.name} benchmark completed")
 
 if __name__ == "__main__":
     main()