Skip to content

Commit 6ce219b

Browse files
MaoSong2022MaoSong2022
authored andcommitted
refactor(layout_annotation.py): perform axis transformation before generating bounding box
1 parent cb418cc commit 6ce219b

1 file changed

Lines changed: 14 additions & 11 deletions

File tree

vrdu/layout_annotation.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,18 +154,24 @@ def generate_figure_bb(self, pdf_layouts: List[LTPage]) -> Dict[int, List[Block]
154154
"""
155155
layout_info = defaultdict(list)
156156
for page_index, page_layout in enumerate(pdf_layouts):
157-
layout_info[page_index].extend(
158-
[
157+
height = page_layout.height
158+
for element in page_layout:
159+
if not isinstance(element, LTFigure):
160+
continue
161+
# the coordinate system of Pdfminer is in contrast to the coordinate system of the image
162+
# by fliping the y axis
163+
y0 = height - element.bbox[3]
164+
y1 = height - element.bbox[1]
165+
x0 = element.bbox[0]
166+
x1 = element.bbox[2]
167+
layout_info[page_index].append(
159168
Block(
160-
bounding_box=BoundingBox(*element.bbox),
169+
bounding_box=BoundingBox(x0, y0, x1, y1),
161170
page_index=page_index,
162171
category=config.name2category["Figure"],
163-
source_code="", # currently, figure block will have no source code match
172+
source_code="",
164173
)
165-
for element in page_layout
166-
if isinstance(element, LTFigure)
167-
]
168-
)
174+
)
169175

170176
# convert bounding boxes from PDF coordinate system to image coordinate system
171177
self.transform(layout_info)
@@ -184,12 +190,9 @@ def transform(self, layout_info: Dict[int, List[Block]]) -> None:
184190
None
185191
"""
186192
for page_index in layout_info.keys():
187-
pdf_height = self.layout_metadata[page_index]["pdf_height"]
188193
px2img = self.layout_metadata[page_index]["px2img"]
189194
for index, element in enumerate(layout_info[page_index]):
190195
x0, y0, x1, y1 = element.bbox
191-
# flip the y-axis
192-
y0, y1 = pdf_height - y1, pdf_height - y0
193196
# scale
194197
width, height = element.width, element.height
195198
x0, y0 = x0 * px2img, y0 * px2img

0 commit comments

Comments
 (0)