1212import math
1313import numpy
1414import scipy .stats
15+ import warnings
1516
1617from logging import Logger
1718from matplotlib import pyplot
@@ -105,26 +106,29 @@ def build_summary(self,
105106 for task in self .tasks_summary [task_name ]:
106107 runtime_list .append (task .runtime )
107108
108-
109109 # For each input_file and output_file, append the file size to the dictionary
110110 for infile in task .input_files :
111111 extension : str = path .splitext (infile .file_id )[1 ] if '.' in infile .file_id else infile .file_id
112+
112113 if extension [1 :].isnumeric ():
113114 extension = path .splitext (infile .file_id .replace (extension , '' ))[1 ]
114115
115116 # Check if the file is definetly an input
116117 assert infile .link == FileLink .INPUT , f"{ infile .file_id } is not set as input"
117118 _append_file_to_dict (extension , inputs_dict , infile .size )
119+
118120
119121 for outfile in task .output_files :
120122 extension : str = path .splitext (outfile .file_id )[1 ] if '.' in outfile .file_id else outfile .file_id
123+ # print(f"file {outfile.file_id} extension: {extension}")
121124 if extension [1 :].isnumeric ():
122125 extension = path .splitext (outfile .file_id .replace (extension , '' ))[1 ]
123126
124127 # Check if the file is definetly an output
125128 assert outfile .link == FileLink .OUTPUT , f"{ outfile .file_id } is not set as output"
126129 _append_file_to_dict (extension , outputs_dict , outfile .size )
127130
131+
128132 # Find the best fit distribution for each file type
129133 _best_fit_distribution_for_file (inputs_dict , include_raw_data )
130134 _best_fit_distribution_for_file (outputs_dict , include_raw_data )
@@ -197,7 +201,6 @@ def _append_file_to_dict(extension: str, dict_obj: Dict[str, Any], file_size: in
197201 dict_obj [extension ] = {'data' : [], 'distribution' : None }
198202 dict_obj [extension ]['data' ].append (file_size )
199203
200-
201204def _best_fit_distribution_for_file (dict_obj , include_raw_data ) -> None :
202205 """
203206 Find the best fit distribution for a file.
@@ -207,14 +210,16 @@ def _best_fit_distribution_for_file(dict_obj, include_raw_data) -> None:
207210 :param include_raw_data:
208211 :type include_raw_data: bool
209212 """
210- for ext in dict_obj :
211- dict_obj [ext ]['min' ] = min (dict_obj [ext ]['data' ])
212- dict_obj [ext ]['max' ] = max (dict_obj [ext ]['data' ])
213- if dict_obj [ext ]['min' ] != dict_obj [ext ]['max' ]:
214- dict_obj [ext ]['distribution' ] = _json_format_distribution_fit (
215- best_fit_distribution (dict_obj [ext ]['data' ]))
216- if not include_raw_data :
217- del dict_obj [ext ]['data' ]
213+ with warnings .catch_warnings ():
214+ warnings .simplefilter ("ignore" )
215+ for ext in dict_obj :
216+ dict_obj [ext ]['min' ] = min (dict_obj [ext ]['data' ])
217+ dict_obj [ext ]['max' ] = max (dict_obj [ext ]['data' ])
218+ if dict_obj [ext ]['min' ] != dict_obj [ext ]['max' ]:
219+ dict_obj [ext ]['distribution' ] = _json_format_distribution_fit (
220+ best_fit_distribution (dict_obj [ext ]['data' ]))
221+ if not include_raw_data :
222+ del dict_obj [ext ]['data' ]
218223
219224
220225def _json_format_distribution_fit (dist_tuple : Tuple ) -> Dict [str , Any ]:
0 commit comments