Skip to content

Commit 41c61c7

Browse files
Fixing input/ouput files
1 parent 32ce757 commit 41c61c7

2 files changed

Lines changed: 14 additions & 8 deletions

File tree

wfcommons/wfgen/abstract_recipe.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,13 @@ def _generate_task_files(self, task: Task) -> List[File]:
203203

204204
# generate output files
205205
output_files_list = self._generate_files(task.task_id, task_recipe['output'], FileLink.OUTPUT)
206-
task.files = self.tasks_files[task.task_id]
206+
task.output_files = self.tasks_files[task.task_id]
207+
208+
207209

208210
# obtain input files from parents
209211
input_files = []
212+
210213
if task.name in self.tasks_parents.keys():
211214
for parent_task_name in self.tasks_parents[task.name]:
212215
output_files = self._generate_task_files(self.tasks_map[parent_task_name])
@@ -215,14 +218,15 @@ def _generate_task_files(self, task: Task) -> List[File]:
215218
input_files.extend(output_files)
216219

217220
for input_file in input_files:
218-
if input_file.name not in self.tasks_files_names[task.task_id]:
219-
self.tasks_files[task.task_id].append(File(name=input_file.name,
220-
link=FileLink.INPUT,
221-
size=input_file.size))
222-
self.tasks_files_names[task.task_id].append(input_file.name)
221+
if input_file not in self.tasks_files_names[task.task_id]:
222+
self.tasks_files[task.task_id].append(File(name=input_file,
223+
link=FileLink.INPUT,
224+
size=input_file.size))
225+
self.tasks_files_names[task.task_id].append(input_file)
223226

224227
# generate additional input files
225228
self._generate_files(task.task_id, task_recipe['input'], FileLink.INPUT)
229+
task.input_files = [ifile for ifile in self.tasks_files[task.task_id] if ifile.link == FileLink.INPUT]
226230

227231
return output_files_list
228232

wfcommons/wfinstances/instance_analyzer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,26 +106,29 @@ def build_summary(self,
106106
for task in self.tasks_summary[task_name]:
107107
runtime_list.append(task.runtime)
108108

109-
110109
# For each input_file and output_file, append the file size to the dictionary
111110
for infile in task.input_files:
112111
extension: str = path.splitext(infile.file_id)[1] if '.' in infile.file_id else infile.file_id
112+
# print(f"file {infile.file_id} extension: {extension}")
113113
if extension[1:].isnumeric():
114114
extension = path.splitext(infile.file_id.replace(extension, ''))[1]
115115

116116
# Check if the file is definetly an input
117117
assert infile.link == FileLink.INPUT, f"{infile.file_id} is not set as input"
118118
_append_file_to_dict(extension, inputs_dict, infile.size)
119+
119120

120121
for outfile in task.output_files:
121122
extension: str = path.splitext(outfile.file_id)[1] if '.' in outfile.file_id else outfile.file_id
123+
# print(f"file {outfile.file_id} extension: {extension}")
122124
if extension[1:].isnumeric():
123125
extension = path.splitext(outfile.file_id.replace(extension, ''))[1]
124126

125127
# Check if the file is definetly an output
126128
assert outfile.link == FileLink.OUTPUT, f"{outfile.file_id} is not set as output"
127129
_append_file_to_dict(extension, outputs_dict, outfile.size)
128130

131+
129132
# Find the best fit distribution for each file type
130133
_best_fit_distribution_for_file(inputs_dict, include_raw_data)
131134
_best_fit_distribution_for_file(outputs_dict, include_raw_data)
@@ -198,7 +201,6 @@ def _append_file_to_dict(extension: str, dict_obj: Dict[str, Any], file_size: in
198201
dict_obj[extension] = {'data': [], 'distribution': None}
199202
dict_obj[extension]['data'].append(file_size)
200203

201-
202204
def _best_fit_distribution_for_file(dict_obj, include_raw_data) -> None:
203205
"""
204206
Find the best fit distribution for a file.

0 commit comments

Comments
 (0)