Skip to content

Commit 022fcec

Browse files
Fixes input/output keys
2 parents 42f3ec0 + 3e761e7 commit 022fcec

5 files changed

Lines changed: 39 additions & 20 deletions

File tree

wfcommons/common/workflow.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,18 @@ def write_json(self, json_file_path: Optional[pathlib.Path] = None) -> None:
177177
machines_list.append(machine.name)
178178
workflow_machines.append(machine.as_dict())
179179

180+
# add files to the workflow json object (input and output)
181+
for file in task.input_files:
182+
files.append(file.as_dict())
183+
for file in task.output_files:
184+
files.append(file.as_dict())
185+
180186
if workflow_machines:
181187
workflow_json["workflow"]["execution"]["machines"] = workflow_machines
182188

189+
if files and len(files) > 0:
190+
workflow_json["workflow"]["specification"]["files"] = files
191+
183192
# write to file
184193
if not json_file_path:
185194
json_file_path = pathlib.Path(f"{self.name.lower()}.json")

wfcommons/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
# it under the terms of the GNU General Public License as published by
88
# the Free Software Foundation, either version 3 of the License, or
99
# (at your option) any later version.
10-
1110
import warnings
1211
warnings.filterwarnings('ignore')
1312

@@ -25,6 +24,7 @@
2524
from typing import Any, Dict, Optional, List, Tuple
2625

2726

27+
2828
class NoValue(Enum):
2929
def __repr__(self):
3030
return '<%s.%s>' % (self.__class__.__name__, self.name)

wfcommons/wfchef/chef.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def uninstall_recipe(module_name:str,
199199
Uninstalls a recipe installed in the system.
200200
"""
201201

202-
dst = pathlib.Path(this_dir.joinpath(f"recipes/{savedir.stem}")).resolve()
202+
dst = f"wfcommons.wfchef.recipe.{savedir.stem}"
203203
try:
204204
subprocess.run(["pip", "uninstall", "-y", dst])
205205
traceback.print_exc()

wfcommons/wfgen/abstract_recipe.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,11 @@ def _generate_task_files(self, task: Task) -> List[File]:
203203

204204
# generate output files
205205
output_files_list = self._generate_files(task.task_id, task_recipe['output'], FileLink.OUTPUT)
206-
task.files = self.tasks_files[task.task_id]
206+
task.output_files = self.tasks_files[task.task_id]
207207

208208
# obtain input files from parents
209209
input_files = []
210+
210211
if task.name in self.tasks_parents.keys():
211212
for parent_task_name in self.tasks_parents[task.name]:
212213
output_files = self._generate_task_files(self.tasks_map[parent_task_name])
@@ -215,14 +216,16 @@ def _generate_task_files(self, task: Task) -> List[File]:
215216
input_files.extend(output_files)
216217

217218
for input_file in input_files:
218-
if input_file.name not in self.tasks_files_names[task.task_id]:
219-
self.tasks_files[task.task_id].append(File(name=input_file.name,
220-
link=FileLink.INPUT,
221-
size=input_file.size))
222-
self.tasks_files_names[task.task_id].append(input_file.name)
219+
if input_file not in self.tasks_files_names[task.task_id]:
220+
self.tasks_files[task.task_id].append(File(name=input_file,
221+
link=FileLink.INPUT,
222+
size=input_file.size))
223+
self.tasks_files_names[task.task_id].append(input_file)
223224

224225
# generate additional input files
225226
self._generate_files(task.task_id, task_recipe['input'], FileLink.INPUT)
227+
task.input_files = [ifile for ifile in self.tasks_files[task.task_id] if ifile.link == FileLink.INPUT]
228+
226229

227230
return output_files_list
228231

@@ -244,9 +247,9 @@ def _generate_files(self, task_id: str, recipe: Dict[str, Any], link: FileLink)
244247
extension_list: List[str] = []
245248
for f in self.tasks_files[task_id]:
246249
if f.link == link:
250+
extension_list.append(path.splitext(f.file_id)[1] if '.' in f.file_id else f.file_id)
247251
files_list.append(f)
248-
extension_list.append(path.splitext(f.name)[1] if '.' in f.name else f.name)
249-
252+
250253
for extension in recipe:
251254
if extension not in extension_list:
252255
file = self._generate_file(extension, recipe, link)
@@ -274,10 +277,12 @@ def _generate_file(self, extension: str, recipe: Dict[str, Any], link: FileLink)
274277
else self.output_file_size_factor) * generate_rvs(recipe[extension]['distribution'],
275278
recipe[extension]['min'],
276279
recipe[extension]['max']))
280+
277281
return File(file_id=str(uuid.uuid4()) + extension,
278282
link=link,
279283
size=size)
280284

285+
281286
def _get_files_by_task_and_link(self, task_id: str, link: FileLink) -> List[File]:
282287
"""
283288
Get the list of files for a task ID and link type.

wfcommons/wfinstances/instance_analyzer.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import math
1313
import numpy
1414
import scipy.stats
15+
import warnings
1516

1617
from logging import Logger
1718
from matplotlib import pyplot
@@ -105,26 +106,29 @@ def build_summary(self,
105106
for task in self.tasks_summary[task_name]:
106107
runtime_list.append(task.runtime)
107108

108-
109109
# For each input_file and output_file, append the file size to the dictionary
110110
for infile in task.input_files:
111111
extension: str = path.splitext(infile.file_id)[1] if '.' in infile.file_id else infile.file_id
112+
112113
if extension[1:].isnumeric():
113114
extension = path.splitext(infile.file_id.replace(extension, ''))[1]
114115

115116
# Check if the file is definetly an input
116117
assert infile.link == FileLink.INPUT, f"{infile.file_id} is not set as input"
117118
_append_file_to_dict(extension, inputs_dict, infile.size)
119+
118120

119121
for outfile in task.output_files:
120122
extension: str = path.splitext(outfile.file_id)[1] if '.' in outfile.file_id else outfile.file_id
123+
# print(f"file {outfile.file_id} extension: {extension}")
121124
if extension[1:].isnumeric():
122125
extension = path.splitext(outfile.file_id.replace(extension, ''))[1]
123126

124127
# Check if the file is definetly an output
125128
assert outfile.link == FileLink.OUTPUT, f"{outfile.file_id} is not set as output"
126129
_append_file_to_dict(extension, outputs_dict, outfile.size)
127130

131+
128132
# Find the best fit distribution for each file type
129133
_best_fit_distribution_for_file(inputs_dict, include_raw_data)
130134
_best_fit_distribution_for_file(outputs_dict, include_raw_data)
@@ -197,7 +201,6 @@ def _append_file_to_dict(extension: str, dict_obj: Dict[str, Any], file_size: in
197201
dict_obj[extension] = {'data': [], 'distribution': None}
198202
dict_obj[extension]['data'].append(file_size)
199203

200-
201204
def _best_fit_distribution_for_file(dict_obj, include_raw_data) -> None:
202205
"""
203206
Find the best fit distribution for a file.
@@ -207,14 +210,16 @@ def _best_fit_distribution_for_file(dict_obj, include_raw_data) -> None:
207210
:param include_raw_data:
208211
:type include_raw_data: bool
209212
"""
210-
for ext in dict_obj:
211-
dict_obj[ext]['min'] = min(dict_obj[ext]['data'])
212-
dict_obj[ext]['max'] = max(dict_obj[ext]['data'])
213-
if dict_obj[ext]['min'] != dict_obj[ext]['max']:
214-
dict_obj[ext]['distribution'] = _json_format_distribution_fit(
215-
best_fit_distribution(dict_obj[ext]['data']))
216-
if not include_raw_data:
217-
del dict_obj[ext]['data']
213+
with warnings.catch_warnings():
214+
warnings.simplefilter("ignore")
215+
for ext in dict_obj:
216+
dict_obj[ext]['min'] = min(dict_obj[ext]['data'])
217+
dict_obj[ext]['max'] = max(dict_obj[ext]['data'])
218+
if dict_obj[ext]['min'] != dict_obj[ext]['max']:
219+
dict_obj[ext]['distribution'] = _json_format_distribution_fit(
220+
best_fit_distribution(dict_obj[ext]['data']))
221+
if not include_raw_data:
222+
del dict_obj[ext]['data']
218223

219224

220225
def _json_format_distribution_fit(dist_tuple: Tuple) -> Dict[str, Any]:

0 commit comments

Comments
 (0)