Skip to content

Commit 26c8552

Browse files
committed
#17: fixing data dependency issue
1 parent 564f732 commit 26c8552

3 files changed

Lines changed: 36 additions & 11 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dist/
1616
# Installer logs
1717
pip-log.txt
1818
pip-delete-this-directory.txt
19+
build/
1920

2021
# Sphinx documentation
2122
docs/build/

wfcommons/wfchef/wfchef_abstract_recipe.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from logging import Logger
1919
from typing import Any, Dict, List, Optional, Set, Union
20+
from wfcommons.common.task import Task
2021
from wfcommons.common.workflow import Workflow
2122
from wfcommons.wfchef.duplicate import duplicate
2223
from wfcommons.wfgen.abstract_recipe import WorkflowRecipe
@@ -31,6 +32,7 @@ class BaseMethod(Enum):
3132
BIGGEST = 2
3233
RANDOM = 3
3334

35+
3436
this_dir = pathlib.Path(__file__).resolve().parent
3537

3638

@@ -116,11 +118,12 @@ def from_num_tasks(cls,
116118
:rtype: WfChefWorkflowRecipe
117119
118120
"""
119-
return cls(num_tasks=num_tasks, exclude_graphs=exclude_graphs, runtime_factor=runtime_factor,
121+
return cls(num_tasks=num_tasks,
122+
exclude_graphs=exclude_graphs,
123+
runtime_factor=runtime_factor,
120124
input_file_size_factor=input_file_size_factor,
121125
output_file_size_factor=output_file_size_factor)
122126

123-
124127
def generate_nx_graph(self) -> nx.DiGraph:
125128
summary_path = self.this_dir.joinpath("microstructures", "summary.json")
126129
summary = json.loads(summary_path.read_text())
@@ -135,24 +138,24 @@ def generate_nx_graph(self) -> nx.DiGraph:
135138
reference_orders = [summary["base_graphs"][col]["order"] for col in df.columns]
136139
idx = np.argmin([abs(self.num_tasks - ref_num_tasks) for ref_num_tasks in reference_orders])
137140
reference = df.columns[idx]
138-
141+
139142
if self.base_method == BaseMethod.ERROR_TABLE:
140143
base = df.index[df[reference].argmin()]
141144
elif self.base_method == BaseMethod.SMALLEST:
142-
base = min(
145+
base = min(
143146
[k for k in summary["base_graphs"].keys() if summary["base_graphs"][k] not in self.exclude_graphs],
144147
key=lambda k: summary["base_graphs"][k]["order"]
145148
)
146149
elif self.base_method == BaseMethod.BIGGEST:
147150
base = max(
148-
[k for k in summary["base_graphs"].keys() if summary["base_graphs"][k]["order"] <= self.num_tasks and
149-
summary["base_graphs"][k] not in self.exclude_graphs],
151+
[k for k in summary["base_graphs"].keys() if summary["base_graphs"][k]["order"] <= self.num_tasks and
152+
summary["base_graphs"][k] not in self.exclude_graphs],
150153
key=lambda k: summary["base_graphs"][k]["order"]
151154
)
152155
else:
153156
base = random.choice(
154-
[k for k in summary["base_graphs"].keys() if summary["base_graphs"][k]["order"] <= self.num_tasks and
155-
summary["base_graphs"][k] not in self.exclude_graphs]
157+
[k for k in summary["base_graphs"].keys() if summary["base_graphs"][k]["order"] <= self.num_tasks and
158+
summary["base_graphs"][k] not in self.exclude_graphs]
156159
)
157160

158161
graph = duplicate(self.this_dir.joinpath("microstructures"), base, self.num_tasks)
@@ -167,7 +170,8 @@ def build_workflow(self, workflow_name: Optional[str] = None) -> Workflow:
167170
:return: A synthetic workflow instance object.
168171
:rtype: Workflow
169172
"""
170-
workflow = Workflow(name=self.name + "-synthetic-instance" if not workflow_name else workflow_name, makespan=None)
173+
workflow = Workflow(name=self.name + "-synthetic-instance" if not workflow_name else workflow_name,
174+
makespan=0)
171175
graph = self.generate_nx_graph()
172176

173177
task_names = {}
@@ -181,11 +185,30 @@ def build_workflow(self, workflow_name: Optional[str] = None) -> Workflow:
181185

182186
task_names[node] = task_name
183187

188+
# tasks dependencies
184189
for (src, dst) in graph.edges:
185190
if src in ["SRC", "DST"] or dst in ["SRC", "DST"]:
186191
continue
187192
workflow.add_edge(task_names[src], task_names[dst])
188193

194+
if task_names[src] not in self.tasks_children:
195+
self.tasks_children[task_names[src]] = []
196+
if task_names[dst] not in self.tasks_parents:
197+
self.tasks_parents[task_names[dst]] = []
198+
199+
self.tasks_children[task_names[src]].append(task_names[dst])
200+
self.tasks_parents[task_names[dst]].append(task_names[src])
201+
202+
# find leaf tasks
203+
leaf_tasks = []
204+
for node_name in workflow.nodes:
205+
task: Task = workflow.nodes[node_name]['task']
206+
if task.name not in self.tasks_children:
207+
leaf_tasks.append(task)
208+
209+
for task in leaf_tasks:
210+
self._generate_task_files(task)
211+
189212
workflow.nxgraph = graph
190213
self.workflows.append(workflow)
191214
return workflow
@@ -195,5 +218,3 @@ def _load_base_graph(self) -> nx.DiGraph:
195218

196219
def _load_microstructures(self) -> Dict:
197220
return json.loads(self.this_dir.joinpath("microstructures.json").read_text())
198-
199-

wfcommons/wfgen/abstract_recipe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ def __init__(self, name: str,
6767
self.tasks_files: Dict[str, List[File]] = {}
6868
self.tasks_files_names: Dict[str, List[str]] = {}
6969
self.task_id_counter = 1
70+
self.tasks_map = {}
71+
self.tasks_children = {}
72+
self.tasks_parents = {}
7073

7174
@abstractmethod
7275
def _workflow_recipe(self) -> Dict[str, Any]:

0 commit comments

Comments
 (0)