Skip to content

Commit 1139373

Browse files
author
Jonas Widmer
committed
some fixes and added delete_repo.py
1 parent 749a5ac commit 1139373

5 files changed

Lines changed: 34 additions & 10 deletions

File tree

flow_preprocessor/preprocessing_logic/parse_textlines.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def process_lines_from_xml_file(self) -> List[Line]:
303303
"""
304304
line_document = None
305305
try:
306-
line_document = self.xml_filename
306+
line_document = self.get_image_file_name()
307307
line_list: List[Line] = []
308308
for text_line in self.root.findall(".//ns:TextLine", namespaces=self.xmlns):
309309
line_number = self.get_line_id(text_line)
@@ -313,7 +313,7 @@ def process_lines_from_xml_file(self) -> List[Line]:
313313
custom_attributes = self.get_custom_attribute(text_line)
314314
if line_text == '' or line_coordinates == [] or line_baseline_points == []:
315315
logger.warning(
316-
'%s - Skipping line %d in file %s as it is '
316+
'%s - Skipping line %s in file %s as it is '
317317
'empty or has no coordinates or baseline points.',
318318
self.__class__.__name__,
319319
line_number,

flow_preprocessor/preprocessing_logic/preprocess.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def preprocess_single_xml_file(self, xml_file: str) -> None:
233233
else:
234234
gt_dict[line_name] = line.expand_abbreviations()
235235

236+
logger.info("Added to gt_dict: key = %s, value = %s", line_name, gt_dict[line_name])
236237
image_path = os.path.join(self.in_path, page.image_file_name)
237238

238239
if not self.crop:
@@ -274,12 +275,11 @@ def _save_gt_dict(gt_dict: Dict[str, str], out_path: str) -> None:
274275
:param gt_dict: Dictionary containing line names and texts.
275276
:param out_path: The output path where the file will be saved.
276277
"""
277-
file_path = os.path.join(out_path, 'gt.txt')
278+
file_path = os.path.join(out_path, "gt.txt")
278279

279-
with open(file_path, "a", encoding='utf-8') as txt_file:
280+
with open(file_path, "a", encoding="utf-8") as txt_file:
280281
for line_name, line_text in gt_dict.items():
281-
escaped_text = json.dumps(line_text)
282-
txt_file.write(f"{line_name}\t{escaped_text}\n")
282+
txt_file.write(f"{line_name}\t{line_text}\n")
283283

284284
@staticmethod
285285
def _get_gt_dict(out_path: str) -> Dict[str, str]:

flow_preprocessor/preprocessing_logic/process_images.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def extract_line_from_image(self,
122122
raise ImageProcessException(f'The image cannot be opened and identified for file {in_path}, {e}') from e
123123
except ValueError as e:
124124
logger.error(
125-
'%s - Wrong value provided for file %s on line %d, %s',
125+
'%s - Wrong value provided for file %s on line %s, %s',
126126
self.__class__.__name__,
127127
in_path,
128128
line_number,
@@ -174,7 +174,7 @@ def crop_line_from_image(self,
174174
cropped_image.paste(cutout, (0, 0), mask=cutout)
175175
cropped_image = cropped_image.convert('RGB')
176176
logger.info(
177-
'%s - Successfully extracted line %d for image %s',
177+
'%s - Successfully extracted line %s for image %s',
178178
self.__class__.__name__,
179179
line_number,
180180
image_path,

flow_preprocessor/preprocessing_logic/status.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# IMPORT STATEMENTS
77
# ===============================================================================
88
from datetime import datetime
9-
from typing import List, Any
9+
from typing import List, Union
1010
from flow_preprocessor.preprocessing_logic.models import PreprocessState, StateEnum
1111
from flow_preprocessor.exceptions.exceptions import ImageFetchException
1212

@@ -92,7 +92,7 @@ async def update_progress(self,
9292

9393
return PreprocessState(**self.state.model_dump(by_alias=True))
9494

95-
def update_image_list(self, new_line_images: Any[str, List]) -> PreprocessState:
95+
def update_image_list(self, new_line_images: Union[str, List]) -> PreprocessState:
9696
"""
9797
Update the image list.
9898
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
Simple Util to delete a repository.
3+
"""
4+
5+
import shutil
6+
import os
7+
from typing import Tuple
8+
9+
10+
async def deleteRepo(repo_name: str) -> Tuple[bool, str]:
11+
"""
12+
Delete a repository locally.
13+
"""
14+
dir_to_delete = os.path.join('data', repo_name.replace('/', '___'))
15+
if os.path.exists(dir_to_delete):
16+
shutil.rmtree(dir_to_delete)
17+
success = True
18+
else:
19+
return False, f"Directory for {repo_name} doesn't exist"
20+
21+
if success:
22+
return True, f"Successfully deleted {repo_name}"
23+
else:
24+
return False, f"Failed to delete {repo_name}"

0 commit comments

Comments
 (0)