|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# |
| 4 | +# Copyright (c) 2023 The WfCommons Team. |
| 5 | +# |
| 6 | +# This program is free software: you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation, either version 3 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | + |
| 11 | +import pathlib |
| 12 | + |
| 13 | +from logging import Logger |
| 14 | +from typing import Optional, Union |
| 15 | + |
| 16 | +from .abstract_translator import Translator |
| 17 | +from ...common import FileLink, Workflow |
| 18 | + |
| 19 | +this_dir = pathlib.Path(__file__).resolve().parent |
| 20 | + |
| 21 | + |
| 22 | +class DaskTranslator(Translator): |
| 23 | + """ |
| 24 | + A WfFormat parser for creating Dask workflow applications. |
| 25 | +
|
| 26 | + :param workflow: Workflow benchmark object or path to the workflow benchmark JSON instance. |
| 27 | + :type workflow: Union[Workflow, pathlib.Path], |
| 28 | + :param logger: The logger where to log information/warning or errors (optional). |
| 29 | + :type logger: Logger |
| 30 | + """ |
| 31 | + |
| 32 | + def __init__(self, |
| 33 | + workflow: Union[Workflow, pathlib.Path], |
| 34 | + logger: Optional[Logger] = None) -> None: |
| 35 | + """Create an object of the translator.""" |
| 36 | + super().__init__(workflow, logger) |
| 37 | + self.parsed_tasks = [] |
| 38 | + self.tasks_futures = {} |
| 39 | + self.task_id = 0 |
| 40 | + |
| 41 | + def translate(self, output_file_name: pathlib.Path) -> None: |
| 42 | + """ |
| 43 | + Translate a workflow benchmark description (WfFormat) into a Dask workflow application. |
| 44 | +
|
| 45 | + :param output_file_name: The name of the output file (e.g., workflow.py). |
| 46 | + :type output_file_name: pathlib.Path |
| 47 | + """ |
| 48 | + noindent_python_codelines = self._dask_wftasks_codelines("randomizer") |
| 49 | + |
| 50 | + for task_name in self.root_task_names: |
| 51 | + noindent_python_codelines.extend(self._parse_tasks(task_name)) |
| 52 | + |
| 53 | + # generate results |
| 54 | + while self.task_id > 0: |
| 55 | + self.task_id -= 1 |
| 56 | + noindent_python_codelines.append(f"TASKS['{self.parsed_tasks[self.task_id]}'] = fut_dv_{self.task_id}.result()") |
| 57 | + |
| 58 | + # generate code |
| 59 | + INDENT = " " |
| 60 | + wf_codelines = "\n".join(["%s%s" % (INDENT, codeline) for codeline in noindent_python_codelines]) |
| 61 | + with open(this_dir.joinpath("templates/dask_template.py")) as fp: |
| 62 | + run_workflow_code = fp.read() |
| 63 | + run_workflow_code = run_workflow_code.replace("# Generated code goes here", wf_codelines) |
| 64 | + with open("dask_workflow.py", "w") as fp: |
| 65 | + fp.write(run_workflow_code) |
| 66 | + |
| 67 | + def _dask_wftasks_codelines(self, |
| 68 | + randomizer_varname: str, |
| 69 | + simulate_minimum_execution_time: float = 0.1, |
| 70 | + simulate_maximum_execution_time: float = 1.1) -> list[str]: |
| 71 | + """ |
| 72 | + Build the code definining all tasks in the workflow, i.e. WorkflowTask instances. |
| 73 | + |
| 74 | + :param randomizer_varname: The name of the randomizer. |
| 75 | + :type randomizer_varname: str |
| 76 | +
|
| 77 | + :return: The non-indented Python lines of code used to instantiate the WorkflowTask instances. |
| 78 | + :rtype: list[str] |
| 79 | + """ |
| 80 | + codelines = ["randomizer = random.Random(seed)", |
| 81 | + "TASKS = {}"] |
| 82 | + for task in self.tasks.values(): |
| 83 | + input_files = [f.name for f in task.files if f.link == FileLink.INPUT] |
| 84 | + output_files = [f.name for f in task.files if f.link == FileLink.OUTPUT] |
| 85 | + code = [f"WorkflowTask(dag_id = '{task.name}',", |
| 86 | + f" name = '{task.name}',", |
| 87 | + f" command_arguments = {[task.program] + task.args},", |
| 88 | + f" inputs = {input_files},", |
| 89 | + f" outputs = {output_files},", |
| 90 | + " simulate = simulate,", |
| 91 | + f" randomizer = {randomizer_varname},", |
| 92 | + f" simulate_minimum_execution_time = {simulate_minimum_execution_time},", |
| 93 | + f" simulate_maximum_execution_time = {simulate_maximum_execution_time},", |
| 94 | + " )"] |
| 95 | + codelines.append(f"TASKS['{task.name}'] = {code[0]}") |
| 96 | + codelines.extend([codeline for codeline in code[1:]]) |
| 97 | + return codelines |
| 98 | + |
| 99 | + def _parse_tasks(self, task_name: str) -> list[str]: |
| 100 | + """ |
| 101 | + Recursively iterates over workflow tasks to generate submit command. |
| 102 | + |
| 103 | + :param task_name: The name of a task. |
| 104 | + :type task_name: str |
| 105 | +
|
| 106 | + :return: The |
| 107 | + :rtype: list[str] |
| 108 | + """ |
| 109 | + if task_name not in self.parsed_tasks: |
| 110 | + # check for dependencies |
| 111 | + for parent in self.task_parents[task_name]: |
| 112 | + if parent not in self.parsed_tasks: |
| 113 | + return [] |
| 114 | + |
| 115 | + self.parsed_tasks.append(task_name) |
| 116 | + self.tasks_futures[task_name] = f"fut_dv_{self.task_id}" |
| 117 | + self.task_id += 1 |
| 118 | + noindent_python_codelines = [f"{self.tasks_futures[task_name]} = client.submit(execute_task, TASKS['{task_name}'], [])"] |
| 119 | + |
| 120 | + # parse children |
| 121 | + for child in self.task_children[task_name]: |
| 122 | + noindent_python_codelines.extend(self._parse_tasks(child)) |
| 123 | + |
| 124 | + return noindent_python_codelines |
0 commit comments