@@ -28,9 +28,9 @@ class MakeflowLogsParser(LogsParser):
2828 """
2929 Parse Makeflow submit directory to generate workflow instance.
3030
31- :param execution_dir: Makeflow workflow execution directory (contains .mf and .makeflowlog files).
31+ :param execution_dir: Makeflow workflow execution directory (contains .mf/.makeflow and .makeflowlog files).
3232 :type execution_dir: pathlib.Path
33- :param resource_monitor_logs_dir: Resource Monitor log files directory.
33+ :param resource_monitor_logs_dir: Resource Monitor log files directory (created with `makeflow ----monitor=... ...`)
3434 :type resource_monitor_logs_dir: pathlib.Path
3535 :param description: Workflow instance description.
3636 :type description: Optional[str]
@@ -46,28 +46,38 @@ def __init__(self,
4646 """Create an object of the makeflow log parser."""
4747 super ().__init__ ('Makeflow' , 'http://ccl.cse.nd.edu/software/makeflow/' , description , logger )
4848
49- # Sanity check
49+ # Sanity checks
5050 if not execution_dir .is_dir ():
5151 raise OSError (f'The provided path does not exist or is not a folder: { execution_dir } ' )
52+ if not resource_monitor_logs_dir .is_dir ():
53+ raise OSError (f'The provided path does not exist or is not a folder: { resource_monitor_logs_dir } ' )
5254
55+ # Makeflow file
5356 files : List [pathlib .Path ] = list (execution_dir .glob ('*.mf' ))
57+ if len (files ) > 1 :
58+ raise OSError (f'Multiple .mf files in: { execution_dir } ' )
5459 if len (files ) == 0 :
55- raise OSError (f'Unable to find .mf file in: { execution_dir } ' )
60+ files : List [pathlib .Path ] = list (execution_dir .glob ('*.makeflow' ))
61+ if len (files ) > 1 :
62+ raise OSError (f'Multiple .makeflow files in: { execution_dir } ' )
63+ if len (files ) == 0 :
64+ raise OSError (f'Unable to find a .mf or .makeflow file in: { execution_dir } ' )
5665 self .mf_file : pathlib .Path = files [0 ]
5766
67+ # Log file
5868 files = list (execution_dir .glob ('*.makeflowlog' ))
5969 if len (files ) == 0 :
6070 raise OSError (f'Unable to find .makeflowlog file in: { execution_dir } ' )
71+ if len (files ) > 1 :
72+ raise OSError (f'Multiple .makeflowlog files in: { execution_dir } ' )
6173 self .mf_log_file : pathlib .Path = files [0 ]
74+ if self .mf_log_file .read_text ().count ("# NODE" ) == 0 :
75+ raise OSError (f'Not sufficiently verbose log file { self .mf_log_file } . Re-run the workflow with `makeflow --log-verbose ...`' )
6276
63- if not resource_monitor_logs_dir .is_dir ():
64- raise OSError (f'The provided path does not exist or is not a folder: { resource_monitor_logs_dir } ' )
65-
66- self .execution_dir : pathlib .Path = execution_dir
67-
68- self .resource_monitor_logs_dir : pathlib .Path = resource_monitor_logs_dir
69- self .files_map = {}
70- self .args_map = {}
77+ self ._execution_dir : pathlib .Path = execution_dir
78+ self ._resource_monitor_logs_dir : pathlib .Path = resource_monitor_logs_dir
79+ self ._files_map = {}
80+ self ._args_map = {}
7181
7282 def build_workflow (self , workflow_name : Optional [str ] = None ) -> Workflow :
7383 """
@@ -106,46 +116,48 @@ def _parse_workflow_file(self) -> None:
106116 outputs = []
107117 inputs = []
108118 for line in f :
109- if ':' in line :
119+ # print(f"Processing line: {line}")
120+ if line .lstrip ().startswith ('#' ):
121+ continue
122+ if ':' in line and '\t ' not in line :
110123 outputs = line .split (':' )[0 ].split ()
111124 inputs = line .split (':' )[1 ].split ()
112125
113126 for file in itertools .chain (outputs , inputs ):
114- if file not in self .files_map :
115- self .files_map [file ] = {'task_name' : None , 'children' : [], 'file' : []}
127+ if file not in self ._files_map :
128+ self ._files_map [file ] = {'task_name' : None , 'children' : [], 'file' : []}
116129
117- elif len ( line . strip ()) > 0 :
118- # task execution command
119- prefix = line .replace ('./' , '' ).replace ( 'perl' , '' ). strip ().split ()[1 if 'LOCAL' in line else 0 ]
120- task_name = "{}_ID{ :07d}" .format (prefix , task_id_counter )
130+ elif ' \t ' in line :
131+ # task execution command (likely olf here)
132+ prefix = line .replace ('./' , '' ).strip ().split ()[1 if 'LOCAL' in line else 0 ]
133+ task_name = "ID{ :07d}" .format (task_id_counter )
121134
122- # create list of task files
123- list_files = []
135+ # create list of input and output files
124136 output_files = self ._create_files (outputs , "output" , task_name )
125137 input_files = self ._create_files (inputs , "input" , task_name )
126138
127139 # create task
128- args = ' ' .join (line .replace ( 'LOCAL' , '' ). replace ( 'perl' , '' ). strip (). split ())
140+ args = ' ' .join (line .split ())
129141 task = Task (name = task_name ,
130- task_id = "ID{:07d}" . format ( task_id_counter ) ,
142+ task_id = task_name ,
131143 category = prefix ,
132- task_type = TaskType .COMPUTE ,
133144 runtime = 0 ,
134145 program = prefix ,
135146 args = args .split (),
136147 cores = 1 ,
137148 input_files = input_files ,
138149 output_files = output_files ,
139150 logger = self .logger )
140- self .workflow .add_node (task_name , task = task )
141- self .args_map [args ] = task
151+ self .workflow .add_task (task )
152+ args = args .replace ('\\ \\ ' , '\\ ' )
153+ self ._args_map [args ] = task
142154 task_id_counter += 1
143155
144156 # adding edges
145- for file in self .files_map :
146- for child in self .files_map [file ]['children' ]:
147- if self .files_map [file ]['task_name' ]:
148- self .workflow .add_edge (self .files_map [file ]['task_name' ], child )
157+ for file in self ._files_map :
158+ for child in self ._files_map [file ]['children' ]:
159+ if self ._files_map [file ]['task_name' ]:
160+ self .workflow .add_edge (self ._files_map [file ]['task_name' ], child )
149161
150162 def _create_files (self , files_list : List [str ], input_or_output : str , task_name : str ) -> List [File ]:
151163 """
@@ -163,16 +175,16 @@ def _create_files(self, files_list: List[str], input_or_output: str, task_name:
163175 """
164176 list_files = []
165177 for file in files_list :
166- if self .files_map [file ]['file' ]:
178+ if self ._files_map [file ]['file' ]:
167179 list_files .append (
168- self .files_map [file ]['file' ][0 ] if input_or_output == "input" else self .files_map [file ]['file' ][1 ])
180+ self ._files_map [file ]['file' ][0 ] if input_or_output == "input" else self ._files_map [file ]['file' ][1 ])
169181 else :
170182 size = 0
171- file_path = self .execution_dir .joinpath (file )
183+ file_path = self ._execution_dir .joinpath (file )
172184 if file_path .is_dir ():
173- size = sum (math . ceil ( f .stat ().st_size / 1000 ) for f in file_path .glob ("*" ) if f .is_file ())
185+ size = sum (f .stat ().st_size for f in file_path .glob ("*" ) if f .is_file ())
174186 elif file_path .is_file ():
175- size = int (math . ceil ( file_path .stat ().st_size / 1000 )) # B to KB
187+ size = int (file_path .stat ().st_size )
176188
177189 file_obj_in = File (file_id = file ,
178190 size = size ,
@@ -181,13 +193,13 @@ def _create_files(self, files_list: List[str], input_or_output: str, task_name:
181193 size = size ,
182194 logger = self .logger )
183195 list_files .append (file_obj_in if input_or_output == "input" else file_obj_out )
184- self .files_map [file ]['file' ].extend ([file_obj_in , file_obj_out ])
196+ self ._files_map [file ]['file' ].extend ([file_obj_in , file_obj_out ])
185197
186198 # files dependencies
187199 if input_or_output == "input" :
188- self .files_map [file ]['children' ].append (task_name )
200+ self ._files_map [file ]['children' ].append (task_name )
189201 else :
190- self .files_map [file ]['task_name' ] = task_name
202+ self ._files_map [file ]['task_name' ] = task_name
191203
192204 return list_files
193205
@@ -208,24 +220,24 @@ def _parse_makeflow_log_file(self):
208220
209221 elif line .startswith ('# FILE' ) and 'condorlog' not in line :
210222 file_name = line .split ()[3 ]
211- if file_name in self .files_map :
212- size = int (math . ceil ( int ( line .split ()[5 ]) / 1000 )) # B to KB
213- for file_obj in self .files_map [file_name ]['file' ]:
223+ if file_name in self ._files_map :
224+ size = int (line .split ()[5 ])
225+ for file_obj in self ._files_map [file_name ]['file' ]:
214226 file_obj .size = size
215227
216228 def _parse_resource_monitor_logs (self ):
217229 """Parse the log files produced by resource monitor"""
218- for file in pathlib . Path .glob (f' { self . resource_monitor_logs_dir } / *.summary' ):
230+ for file in self . _resource_monitor_logs_dir .glob (" *.summary" ):
219231 with open (file ) as f :
220232 data = json .load (f )
221233
222234 # task
223- task = self .args_map [data ['command' ].replace ('perl' , '' ).strip ()]
235+ task = self ._args_map [data ['command' ].replace ('perl' , '' ).strip ()]
224236 task .runtime = float (data ['wall_time' ][0 ])
225237 task .cores = float (data ['cores' ][0 ])
226- task .memory = int (data ['memory' ][0 ]) * 1000 # MB to KB
227- task .bytes_read = int (data ['bytes_read' ][0 ] * 1000 ) # MB to KB
228- task .bytes_written = int (data ['bytes_written' ][0 ] * 1000 ) # MB to KB
238+ task .memory = int (data ['memory' ][0 ])
239+ task .bytes_read = int (data ['bytes_read' ][0 ])
240+ task .bytes_written = int (data ['bytes_written' ][0 ])
229241 task .avg_cpu = float ('%.4f' % (float (data ['cpu_time' ][0 ]) / float (data ['wall_time' ][0 ]) * 100 ))
230242 task .machine = Machine (name = data ['host' ],
231243 cpu = {'coreCount' : int (data ['machine_cpus' ][0 ]), 'speedInMHz' : 0 , 'vendor' : '' },
0 commit comments