@@ -154,18 +154,24 @@ def generate_figure_bb(self, pdf_layouts: List[LTPage]) -> Dict[int, List[Block]
154154 """
155155 layout_info = defaultdict (list )
156156 for page_index , page_layout in enumerate (pdf_layouts ):
157- layout_info [page_index ].extend (
158- [
157+ height = page_layout .height
158+ for element in page_layout :
159+ if not isinstance (element , LTFigure ):
160+ continue
161+ # the coordinate system of Pdfminer is in contrast to the coordinate system of the image
162+ # by fliping the y axis
163+ y0 = height - element .bbox [3 ]
164+ y1 = height - element .bbox [1 ]
165+ x0 = element .bbox [0 ]
166+ x1 = element .bbox [2 ]
167+ layout_info [page_index ].append (
159168 Block (
160- bounding_box = BoundingBox (* element . bbox ),
169+ bounding_box = BoundingBox (x0 , y0 , x1 , y1 ),
161170 page_index = page_index ,
162171 category = config .name2category ["Figure" ],
163- source_code = "" , # currently, figure block will have no source code match
172+ source_code = "" ,
164173 )
165- for element in page_layout
166- if isinstance (element , LTFigure )
167- ]
168- )
174+ )
169175
170176 # convert bounding boxes from PDF coordinate system to image coordinate system
171177 self .transform (layout_info )
@@ -184,12 +190,9 @@ def transform(self, layout_info: Dict[int, List[Block]]) -> None:
184190 None
185191 """
186192 for page_index in layout_info .keys ():
187- pdf_height = self .layout_metadata [page_index ]["pdf_height" ]
188193 px2img = self .layout_metadata [page_index ]["px2img" ]
189194 for index , element in enumerate (layout_info [page_index ]):
190195 x0 , y0 , x1 , y1 = element .bbox
191- # flip the y-axis
192- y0 , y1 = pdf_height - y1 , pdf_height - y0
193196 # scale
194197 width , height = element .width , element .height
195198 x0 , y0 = x0 * px2img , y0 * px2img
0 commit comments