Skip to content

Commit 2814868

Browse files
author
danameyer
committed
extract parsing logic from initialisation so try - except can be wrapped around it and files which failed to be processed are added to the appropriate lists. Refs #6.
1 parent 4c4ac47 commit 2814868

1 file changed

Lines changed: 35 additions & 5 deletions

File tree

flow_preprocessor/preprocessing_logic/parse_textlines.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,42 @@ def __init__(self, xml_file) -> None:
227227
:param self.xmlns: namespace declaration.
228228
:param self.failed_processing: list of images that could not be processed.
229229
"""
230-
self.tree = et.parse(xml_file)
231-
self.root = self.tree.getroot()
232-
self.namespace_uri = self.root.tag.split('}')[0][1:]
233-
self.namespace = {'prefix': self.namespace_uri}
234-
self.xmlns = {'ns': self.namespace_uri}
235230
self.failed_processing = []
231+
self.namespace_uri = None
232+
self.namespace = None
233+
self.xmlns = None
234+
self.tree = None
235+
self.root = None
236+
self.parse_xml_file(xml_file)
237+
238+
def parse_xml_file(self, xml_file):
239+
try:
240+
self.tree = et.parse(xml_file)
241+
self.root = self.tree.getroot()
242+
self.namespace_uri = self.root.tag.split('}')[0][1:]
243+
self.namespace = {'prefix': self.namespace_uri}
244+
self.xmlns = {'ns': self.namespace_uri}
245+
except (et.XMLSyntaxError, et.ParseError) as e:
246+
self.failed_processing.append(xml_file)
247+
logger.error(
248+
f'{self.__class__.__name__} - Error parsing file {xml_file}',
249+
exc_info=True,
250+
)
251+
raise ParseTextLinesException(f'Error parsing file {xml_file}: {e}')
252+
except FileNotFoundError as e:
253+
self.failed_processing.append(xml_file)
254+
logger.error(
255+
f'{self.__class__.__name__} - XML file not found: {xml_file}',
256+
exc_info=True,
257+
)
258+
raise ParseTextLinesException(f'XML file not found: {xml_file}', e)
259+
except Exception as e:
260+
self.failed_processing.append(xml_file)
261+
logger.error(
262+
f'{self.__class__.__name__} - An unexpected error occurred for file {xml_file}',
263+
exc_info=True,
264+
)
265+
raise ParseTextLinesException(f'An unexpected error occurred for file {xml_file}: {e}')
236266

237267
def process_lines_from_xml_file(self) -> List[Line]:
238268
"""

0 commit comments

Comments
 (0)