Skip to content

Commit ed7184c

Browse files
committed
(#11) allowing increase/reduce task runtime or file sizes by a factor of X
1 parent 25e32d1 commit ed7184c

10 files changed

Lines changed: 423 additions & 58 deletions

docs/source/quickstart_installation.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Installation
22
============
33

44
WorkflowHub is available on `PyPI <https://pypi.org/project/workflowhub>`_.
5-
WorkflowHub requires Python3.5+ and has been tested on Linux and macOS.
5+
WorkflowHub requires Python3.6+ and has been tested on Linux and macOS.
66

77
Requirements
88
------------

workflowhub/generator/generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#
4-
# Copyright (c) 2020 The WorkflowHub Team.
4+
# Copyright (c) 2020-2021 The WorkflowHub Team.
55
#
66
# This program is free software: you can redistribute it and/or modify
77
# it under the terms of the GNU General Public License as published by

workflowhub/generator/workflow/abstract_recipe.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#
4-
# Copyright (c) 2020 The WorkflowHub Team.
4+
# Copyright (c) 2020-2021 The WorkflowHub Team.
55
#
66
# This program is free software: you can redistribute it and/or modify
77
# it under the terms of the GNU General Public License as published by
@@ -31,17 +31,39 @@ class WorkflowRecipe(ABC):
3131
:type data_footprint: int
3232
:param num_tasks: The upper bound for the total number of tasks in the workflow.
3333
:type num_tasks: int
34+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
35+
:type runtime_factor: float
36+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
37+
:type input_file_size_factor: float
38+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
39+
:type output_file_size_factor: float
3440
:param logger: The logger where to log information/warning or errors (optional).
3541
:type logger: Logger
3642
"""
3743

38-
def __init__(self, name: str, data_footprint: Optional[int], num_tasks: Optional[int],
44+
def __init__(self, name: str,
45+
data_footprint: Optional[int],
46+
num_tasks: Optional[int],
47+
runtime_factor: Optional[float] = 1.0,
48+
input_file_size_factor: Optional[float] = 1.0,
49+
output_file_size_factor: Optional[float] = 1.0,
3950
logger: Optional[Logger] = None) -> None:
4051
"""Create an object of the workflow recipe."""
52+
# sanity checks
53+
if runtime_factor <= 0.0:
54+
raise ValueError("The runtime factor should be a number higher than 0.0.")
55+
if input_file_size_factor <= 0.0:
56+
raise ValueError("The input file size factor should be a number higher than 0.0.")
57+
if output_file_size_factor <= 0.0:
58+
raise ValueError("The output file size factor should be a number higher than 0.0.")
59+
4160
self.logger = logging.getLogger(__name__) if logger is None else logger
4261
self.name = name
4362
self.data_footprint = data_footprint
4463
self.num_tasks = num_tasks
64+
self.runtime_factor = runtime_factor
65+
self.input_file_size_factor = input_file_size_factor
66+
self.output_file_size_factor = output_file_size_factor
4567
self.workflows: List[Workflow] = []
4668
self.tasks_files: Dict[str, List[File]] = {}
4769
self.task_id_counter = 1
@@ -57,13 +79,24 @@ def _workflow_recipe(self) -> Dict[str, Any]:
5779

5880
@classmethod
5981
@abstractmethod
60-
def from_num_tasks(cls, num_tasks: int) -> 'WorkflowRecipe':
82+
def from_num_tasks(cls,
83+
num_tasks: int,
84+
runtime_factor: Optional[float] = 1.0,
85+
input_file_size_factor: Optional[float] = 1.0,
86+
output_file_size_factor: Optional[float] = 1.0
87+
) -> 'WorkflowRecipe':
6188
"""
6289
Instantiate a workflow recipe that will generate synthetic workflows up to the
6390
total number of tasks provided.
6491
6592
:param num_tasks: The upper bound for the total number of tasks in the workflow.
6693
:type num_tasks: int
94+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
95+
:type runtime_factor: float
96+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
97+
:type input_file_size_factor: float
98+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
99+
:type output_file_size_factor: float
67100
68101
:return: A workflow recipe object that will generate synthetic workflows up to
69102
the total number of tasks provided.
@@ -100,9 +133,10 @@ def _generate_task(self, task_name: str, task_id: str, input_files: Optional[Lis
100133
task_recipe = self._workflow_recipe()[task_name]
101134

102135
# runtime
103-
runtime: float = float(format(generate_rvs(task_recipe['runtime']['distribution'],
104-
task_recipe['runtime']['min'],
105-
task_recipe['runtime']['max']), '.3f'))
136+
runtime: float = float(format(
137+
self.runtime_factor * generate_rvs(task_recipe['runtime']['distribution'],
138+
task_recipe['runtime']['min'],
139+
task_recipe['runtime']['max']), '.3f'))
106140

107141
# linking previous generated output files as input files
108142
self.tasks_files[task_id] = []
@@ -184,11 +218,13 @@ def _generate_file(self, extension: str, recipe: Dict[str, Any], link: FileLink)
184218
:return: The generated file.
185219
:rtype: File
186220
"""
221+
size = int((self.input_file_size_factor if link == FileLink.INPUT
222+
else self.output_file_size_factor) * generate_rvs(recipe[extension]['distribution'],
223+
recipe[extension]['min'],
224+
recipe[extension]['max']))
187225
return File(name=str(uuid.uuid4()) + extension,
188226
link=link,
189-
size=int(generate_rvs(recipe[extension]['distribution'],
190-
recipe[extension]['min'],
191-
recipe[extension]['max'])))
227+
size=size)
192228

193229
def _get_files_by_task_and_link(self, task_id: str, link: FileLink) -> List[File]:
194230
"""Get the list of files for a task ID and link type.

workflowhub/generator/workflow/cycles_recipe.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,55 @@ class CyclesRecipe(WorkflowRecipe):
2929
:type data_footprint: int
3030
:param num_tasks: The upper bound for the total number of tasks in the workflow.
3131
:type num_tasks: int
32+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
33+
:type runtime_factor: float
34+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
35+
:type input_file_size_factor: float
36+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
37+
:type output_file_size_factor: float
3238
"""
3339

3440
def __init__(self,
3541
num_points: Optional[int] = 1,
3642
num_crops: Optional[int] = 1,
3743
num_params: Optional[int] = 4,
3844
data_footprint: Optional[int] = 0,
39-
num_tasks: Optional[int] = 7
45+
num_tasks: Optional[int] = 7,
46+
runtime_factor: Optional[float] = 1.0,
47+
input_file_size_factor: Optional[float] = 1.0,
48+
output_file_size_factor: Optional[float] = 1.0
4049
) -> None:
4150
"""Create an object of the Cycles workflow recipe."""
42-
super().__init__("Cycles", data_footprint, num_tasks)
51+
super().__init__("Cycles",
52+
data_footprint,
53+
num_tasks,
54+
runtime_factor,
55+
input_file_size_factor,
56+
output_file_size_factor)
4357

4458
self.num_points: int = num_points
4559
self.num_crops: int = num_crops
4660
self.num_params: int = num_params
4761

4862
@classmethod
49-
def from_num_tasks(cls, num_tasks: int) -> 'CyclesRecipe':
63+
def from_num_tasks(cls,
64+
num_tasks: int,
65+
runtime_factor: Optional[float] = 1.0,
66+
input_file_size_factor: Optional[float] = 1.0,
67+
output_file_size_factor: Optional[float] = 1.0
68+
) -> 'CyclesRecipe':
5069
"""
5170
Instantiate a Cycles workflow recipe that will generate synthetic workflows up to
5271
the total number of tasks provided.
5372
5473
:param num_tasks: The upper bound for the total number of tasks in the workflow (at least 7).
5574
:type num_tasks: int
75+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
76+
:type runtime_factor: float
77+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
78+
:type input_file_size_factor: float
79+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
80+
:type output_file_size_factor: float
5681
5782
:return: A Cycles workflow recipe object that will generate synthetic workflows up
5883
to the total number of tasks provided.
@@ -89,14 +114,23 @@ def from_num_tasks(cls, num_tasks: int) -> 'CyclesRecipe':
89114
if not added_task:
90115
break
91116

92-
return cls(num_points=num_points, num_crops=num_crops, num_params=num_params, data_footprint=None,
93-
num_tasks=num_tasks)
117+
return cls(num_points=num_points,
118+
num_crops=num_crops,
119+
num_params=num_params,
120+
data_footprint=None,
121+
num_tasks=num_tasks,
122+
runtime_factor=runtime_factor,
123+
input_file_size_factor=input_file_size_factor,
124+
output_file_size_factor=output_file_size_factor)
94125

95126
@classmethod
96127
def from_points_and_crops(cls,
97128
num_points: int,
98129
num_crops: int,
99130
num_params: int,
131+
runtime_factor: Optional[float] = 1.0,
132+
input_file_size_factor: Optional[float] = 1.0,
133+
output_file_size_factor: Optional[float] = 1.0
100134
) -> 'CyclesRecipe':
101135
"""
102136
Instantiate a Cycles workflow recipe that will generate synthetic workflows using
@@ -108,6 +142,12 @@ def from_points_and_crops(cls,
108142
:type num_crops: int
109143
:param num_params: The number of parameter values from the simulation matrix.
110144
:type num_params: int
145+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
146+
:type runtime_factor: float
147+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
148+
:type input_file_size_factor: float
149+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
150+
:type output_file_size_factor: float
111151
112152
:return: A Cycles workflow recipe object that will generate synthetic workflows
113153
using the defined number of points, crops, and params.
@@ -120,8 +160,14 @@ def from_points_and_crops(cls,
120160
if num_params < 4:
121161
raise ValueError("The number of params should be 4 or higher.")
122162

123-
return cls(num_points=num_points, num_crops=num_crops, num_params=num_params, data_footprint=None,
124-
num_tasks=None)
163+
return cls(num_points=num_points,
164+
num_crops=num_crops,
165+
num_params=num_params,
166+
data_footprint=None,
167+
num_tasks=None,
168+
runtime_factor=runtime_factor,
169+
input_file_size_factor=input_file_size_factor,
170+
output_file_size_factor=output_file_size_factor)
125171

126172
def build_workflow(self, workflow_name: Optional[str] = None) -> Workflow:
127173
"""Generate a synthetic workflow trace of a Cycles workflow.

workflowhub/generator/workflow/epigenomics_recipe.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,55 @@ class EpigenomicsRecipe(WorkflowRecipe):
3131
:type data_footprint: int
3232
:param num_tasks: The upper bound for the total number of tasks in the workflow.
3333
:type num_tasks: int
34+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
35+
:type runtime_factor: float
36+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
37+
:type input_file_size_factor: float
38+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
39+
:type output_file_size_factor: float
3440
"""
3541

3642
def __init__(self,
3743
num_sequence_files: Optional[int] = 1,
3844
num_lines: Optional[int] = 10,
3945
bin_size: Optional[int] = 10,
4046
data_footprint: Optional[int] = 0,
41-
num_tasks: Optional[int] = 9
47+
num_tasks: Optional[int] = 9,
48+
runtime_factor: Optional[float] = 1.0,
49+
input_file_size_factor: Optional[float] = 1.0,
50+
output_file_size_factor: Optional[float] = 1.0
4251
) -> None:
4352
"""Create an object of the Epigenomics workflow recipe."""
44-
super().__init__("Epigenomics", data_footprint, num_tasks)
53+
super().__init__("Epigenomics",
54+
data_footprint,
55+
num_tasks,
56+
runtime_factor,
57+
input_file_size_factor,
58+
output_file_size_factor)
4559

4660
self.num_sequence_files: int = num_sequence_files
4761
self.num_lines: int = num_lines
4862
self.bin_size: int = bin_size
4963

5064
@classmethod
51-
def from_num_tasks(cls, num_tasks: int) -> 'EpigenomicsRecipe':
65+
def from_num_tasks(cls,
66+
num_tasks: int,
67+
runtime_factor: Optional[float] = 1.0,
68+
input_file_size_factor: Optional[float] = 1.0,
69+
output_file_size_factor: Optional[float] = 1.0
70+
) -> 'EpigenomicsRecipe':
5271
"""
5372
Instantiate an Epigenomics workflow recipe that will generate synthetic workflows
5473
up to the total number of tasks provided.
5574
5675
:param num_tasks: The upper bound for the total number of tasks in the workflow (at least 9).
5776
:type num_tasks: int
77+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
78+
:type runtime_factor: float
79+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
80+
:type input_file_size_factor: float
81+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
82+
:type output_file_size_factor: float
5883
5984
:return: An Epigenomics workflow recipe object that will generate synthetic workflows up
6085
to the total number of tasks provided.
@@ -74,14 +99,23 @@ def from_num_tasks(cls, num_tasks: int) -> 'EpigenomicsRecipe':
7499
else:
75100
break
76101

77-
return cls(num_sequence_files=num_sequence_files, num_lines=num_lines * 10, bin_size=10,
78-
data_footprint=None, num_tasks=num_tasks)
102+
return cls(num_sequence_files=num_sequence_files,
103+
num_lines=num_lines * 10,
104+
bin_size=10,
105+
data_footprint=None,
106+
num_tasks=num_tasks,
107+
runtime_factor=runtime_factor,
108+
input_file_size_factor=input_file_size_factor,
109+
output_file_size_factor=output_file_size_factor)
79110

80111
@classmethod
81112
def from_sequences(cls,
82113
num_sequence_files: int,
83114
num_lines: int,
84115
bin_size: int,
116+
runtime_factor: Optional[float] = 1.0,
117+
input_file_size_factor: Optional[float] = 1.0,
118+
output_file_size_factor: Optional[float] = 1.0
85119
) -> 'EpigenomicsRecipe':
86120
"""
87121
Instantiate an Epigenomics workflow recipe that will generate synthetic workflows
@@ -93,6 +127,12 @@ def from_sequences(cls,
93127
:type num_lines: int
94128
:param bin_size: Number of DNA and protein sequence information to be processed by each computational task.
95129
:type bin_size: int
130+
:param runtime_factor: The factor of which tasks runtime will be increased/decreased.
131+
:type runtime_factor: float
132+
:param input_file_size_factor: The factor of which tasks input files size will be increased/decreased.
133+
:type input_file_size_factor: float
134+
:param output_file_size_factor: The factor of which tasks output files size will be increased/decreased.
135+
:type output_file_size_factor: float
96136
97137
:return: An Epigenomics workflow recipe object that will generate synthetic workflows
98138
using the defined number of sequence files, lines, and bin size.
@@ -105,8 +145,14 @@ def from_sequences(cls,
105145
if bin_size < 10:
106146
raise ValueError("The bin size should be at least 10.")
107147

108-
return cls(num_sequence_files=num_sequence_files, num_lines=num_lines, bin_size=bin_size, data_footprint=None,
109-
num_tasks=None)
148+
return cls(num_sequence_files=num_sequence_files,
149+
num_lines=num_lines,
150+
bin_size=bin_size,
151+
data_footprint=None,
152+
num_tasks=None,
153+
runtime_factor=runtime_factor,
154+
input_file_size_factor=input_file_size_factor,
155+
output_file_size_factor=output_file_size_factor)
110156

111157
def build_workflow(self, workflow_name: str = None) -> Workflow:
112158
"""Generate a synthetic workflow trace of an Epigenomics workflow.

0 commit comments

Comments
 (0)