Skip to content

Commit 35b500e

Browse files
committed
feat(generate_reading_annotation.py): change the format of reading annotation
previous version relies on block image, this version uses a independent way
1 parent 931098c commit 35b500e

1 file changed

Lines changed: 70 additions & 0 deletions

File tree

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import argparse
2+
import glob
3+
import multiprocessing
4+
import os
5+
6+
from vrdu import utils
7+
from vrdu import logger
8+
9+
log = logger.setup_app_level_logger(file_name="generate_reading_annotation.log")
10+
11+
12+
def generate_annotation(paper_path) -> None:
13+
log.debug(f"processing paper {paper_path}")
14+
order_json_file = os.path.join(paper_path, "order_annotation.json")
15+
16+
if not os.path.exists(order_json_file):
17+
log.error(f"{order_json_file} does not exist.")
18+
return
19+
20+
order_json_data = utils.load_json(order_json_file)
21+
if "annotations" not in order_json_data:
22+
log.error(f"{order_json_file} does not contain annotations.")
23+
return
24+
25+
layout_info = order_json_data["annotations"]
26+
27+
result = []
28+
29+
for block in layout_info:
30+
result.append(
31+
{
32+
"block_id": block["block_id"],
33+
"bbox": block["bbox"],
34+
"category": block["category"],
35+
"page_index": block["page_index"],
36+
"source_code": block["source_code"],
37+
}
38+
)
39+
40+
reading_json_file = os.path.join(paper_path, "reading_annotation.json")
41+
if os.path.exists(reading_json_file):
42+
log.error(f"{reading_json_file} already exists.")
43+
return
44+
utils.export_to_json(result, reading_json_file)
45+
46+
47+
def generate_reading_annotation(input_path) -> None:
48+
discipline_paths = glob.glob(os.path.join(input_path, "*/"))
49+
50+
for discipline_path in discipline_paths:
51+
log.debug(f"processing {discipline_path}")
52+
paper_paths = glob.glob(os.path.join(discipline_path, "*/"))
53+
54+
with multiprocessing.Pool(34) as pool:
55+
pool.map(generate_annotation, paper_paths)
56+
57+
58+
def main():
59+
parser = argparse.ArgumentParser()
60+
parser.add_argument(
61+
"-i", "--input_path", type=str, required=True, help="Path of dataset source"
62+
)
63+
args = parser.parse_args()
64+
input_path = args.input_path
65+
66+
generate_reading_annotation(input_path)
67+
68+
69+
if __name__ == "__main__":
70+
main()

0 commit comments

Comments
 (0)