Skip to content

Commit 45c7385

Browse files
committed
Merge branch 'dev' into main
2 parents d0b2cbd + 4cf4b8e commit 45c7385

2 files changed

Lines changed: 61 additions & 172 deletions

File tree

batch_process.py

Lines changed: 1 addition & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,137 +1,18 @@
11
import os
2-
import glob
32
import shutil
43
import argparse
54
import multiprocessing
65
from typing import List
76
from uuid import uuid4
87

9-
from tqdm import tqdm
10-
118
from vrdu import logger
129
from vrdu import utils
13-
from vrdu import renderer
14-
from vrdu import preprocess
15-
from vrdu.annotation import LayoutAnnotation
16-
from vrdu.config import config
17-
10+
from main import process_one_file
1811

1912
log_file = str(uuid4()) + ".log"
2013
log = logger.setup_app_level_logger(file_name=log_file, level="INFO", mode="a")
2114

2215

23-
def transform_tex_to_images(main_directory: str) -> None:
24-
"""
25-
Transforms TeX files with pattern paper_*.tex in the specified directory into jpg images.
26-
27-
Args:
28-
main_directory (str): The main directory where the TeX files are located.
29-
30-
Returns:
31-
None
32-
"""
33-
tex_files = glob.glob(f"{main_directory}/paper_*.tex")
34-
output_directory = os.path.join(main_directory, "output")
35-
for tex_file in tqdm(tex_files):
36-
log.debug(f"[VRDU] file: {tex_file}, start transforming into images.")
37-
utils.compile_latex(tex_file)
38-
39-
# get the pdf file name
40-
filename_without_extension = os.path.splitext(os.path.basename(tex_file))[0]
41-
pdf_file = os.path.join(main_directory, f"{filename_without_extension}.pdf")
42-
43-
# convert into images
44-
image_directory = os.path.join(output_directory, filename_without_extension)
45-
os.makedirs(image_directory)
46-
utils.pdf2jpg(pdf_file, image_directory)
47-
48-
49-
def remove_redundant_stuff(main_directory: str) -> None:
50-
"""
51-
Remove redundant files and folders from the main directory.
52-
53-
Args:
54-
main_directory (str): The path of the main directory.
55-
56-
Returns:
57-
None
58-
"""
59-
# remove generated tex related files
60-
redundant_files = glob.glob(f"{main_directory}/paper_*")
61-
for file in redundant_files:
62-
os.remove(file)
63-
64-
# remove useless pdf and image files
65-
# TODO: move this name pattern into config
66-
redundant_folders = glob.glob(
67-
f"{main_directory}/output/paper_{config.folder_prefix}*"
68-
)
69-
redundant_folders += [
70-
f"{main_directory}/output/paper_white",
71-
f"{main_directory}/output/paper_original",
72-
]
73-
for folder in redundant_folders:
74-
if os.path.exists(folder):
75-
shutil.rmtree(folder)
76-
77-
78-
def process_one_file(file_name) -> None:
79-
main_directory = os.path.dirname(file_name)
80-
log.info(f"[VRDU] file: {file_name}, start processing.")
81-
82-
# check if this paper has been processed
83-
quality_report_file = os.path.join(
84-
main_directory, "output/result/quality_report.json"
85-
)
86-
if os.path.exists(quality_report_file):
87-
log.info(f"[VRDU] file: {file_name}, paper has been processed")
88-
return
89-
90-
# make a copy of the original tex file
91-
original_tex = os.path.join(main_directory, "paper_original.tex")
92-
shutil.copyfile(file_name, original_tex)
93-
94-
cwd = os.getcwd()
95-
96-
try:
97-
# change the working directory to the main directory
98-
os.chdir(main_directory)
99-
preprocess.run(original_tex)
100-
101-
# run rendering
102-
vrdu_renderer = renderer.Renderer()
103-
vrdu_renderer.render(original_tex)
104-
105-
# compile into PDFs, and then convert into images
106-
log.info(
107-
f"[VRDU] file: {original_tex}, start transforming into images, this may take a while..."
108-
)
109-
transform_tex_to_images(main_directory)
110-
111-
# generate annotations
112-
log.info(
113-
f"[VRDU] file: {original_tex}, start generating annotations, this may take a while..."
114-
)
115-
vrdu_annotation = LayoutAnnotation(main_directory)
116-
vrdu_annotation.annotate()
117-
118-
log.info(f"[VRDU] file: {original_tex}, successfully processed.")
119-
120-
except Exception as e:
121-
error_type = e.__class__.__name__
122-
error_info = str(e)
123-
log.error(
124-
f"[VRDU] file: {file_name}, type: {error_type}, message: {error_info}"
125-
)
126-
127-
finally:
128-
# remove redundant files
129-
remove_redundant_stuff(main_directory)
130-
131-
# Change back to original dir
132-
os.chdir(cwd)
133-
134-
13516
def filter_tex_files(tex_files: List[str], main_path: str = None) -> List[str]:
13617
"""extract all MAIN.tex files for processing, if main_path is not None, then
13718
only extract MAIN.tex files in the main_path (not recursive)

main.py

Lines changed: 60 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,65 @@ def remove_redundant_stuff(main_directory: str) -> None:
6767
f"{main_directory}/output/paper_original",
6868
]
6969
for folder in redundant_folders:
70-
shutil.rmtree(folder)
70+
if os.path.exists(folder):
71+
shutil.rmtree(folder)
72+
73+
74+
def process_one_file(file_name) -> None:
75+
main_directory = os.path.dirname(file_name)
76+
log.info(f"[VRDU] file: {file_name}, start processing.")
77+
78+
# check if this paper has been processed
79+
quality_report_file = os.path.join(
80+
main_directory, "output/result/quality_report.json"
81+
)
82+
if os.path.exists(quality_report_file):
83+
log.info(f"[VRDU] file: {file_name}, paper has been processed")
84+
return
85+
86+
# make a copy of the original tex file
87+
original_tex = os.path.join(main_directory, "paper_original.tex")
88+
shutil.copyfile(file_name, original_tex)
89+
90+
cwd = os.getcwd()
91+
92+
try:
93+
# change the working directory to the main directory
94+
os.chdir(main_directory)
95+
preprocess.run(original_tex)
96+
97+
# run rendering
98+
vrdu_renderer = renderer.Renderer()
99+
vrdu_renderer.render(original_tex)
100+
101+
# compile into PDFs, and then convert into images
102+
log.info(
103+
f"[VRDU] file: {original_tex}, start transforming into images, this may take a while..."
104+
)
105+
transform_tex_to_images(main_directory)
106+
107+
# generate annotations
108+
log.info(
109+
f"[VRDU] file: {original_tex}, start generating annotations, this may take a while..."
110+
)
111+
vrdu_annotation = annotation.LayoutAnnotation(main_directory)
112+
vrdu_annotation.annotate()
113+
114+
log.info(f"[VRDU] file: {original_tex}, successfully processed.")
115+
116+
except Exception as e:
117+
error_type = e.__class__.__name__
118+
error_info = str(e)
119+
log.error(
120+
f"[VRDU] file: {file_name}, type: {error_type}, message: {error_info}"
121+
)
122+
123+
finally:
124+
# remove redundant files
125+
remove_redundant_stuff(main_directory)
126+
127+
# Change back to original dir
128+
os.chdir(cwd)
71129

72130

73131
def parse_arguments() -> str:
@@ -120,57 +178,7 @@ def main() -> None:
120178
None
121179
"""
122180
file_name = parse_arguments()
123-
main_directory = os.path.dirname(file_name)
124-
log.info(f"[VRDU] file: {file_name}, start processing.")
125-
126-
# remove output folder if it exists
127-
output_directory = os.path.join(main_directory, "output")
128-
if os.path.exists(output_directory):
129-
shutil.rmtree(output_directory)
130-
131-
# make a copy of the original tex file
132-
original_tex = os.path.join(main_directory, "paper_original.tex")
133-
shutil.copyfile(file_name, original_tex)
134-
135-
cwd = os.getcwd()
136-
137-
try:
138-
# change the working directory to the main directory
139-
os.chdir(main_directory)
140-
log.info(f"[VRDU] file: {original_tex}, start pre processing.")
141-
preprocess.run(original_tex)
142-
143-
# run rendering
144-
log.info(f"[VRDU] file: {original_tex}, start rendering.")
145-
vrdu_renderer = renderer.Renderer()
146-
vrdu_renderer.render(original_tex)
147-
148-
# compile into PDFs, and then convert into images
149-
log.info(
150-
f"[VRDU] file: {original_tex}, start transforming into images, this may take a while..."
151-
)
152-
transform_tex_to_images(main_directory)
153-
154-
# generate annotations
155-
log.info(
156-
f"[VRDU] file: {original_tex}, start generating annotations, this may take a while..."
157-
)
158-
vrdu_annotation = annotation.LayoutAnnotation(main_directory)
159-
vrdu_annotation.annotate()
160-
161-
log.info(
162-
f"[VRDU] file: {original_tex}, successfully processed. Directory: {main_directory}"
163-
)
164-
165-
except Exception:
166-
log.exception(f"[VRDU] file: {original_tex}, failed.")
167-
168-
finally:
169-
# remove redundant files
170-
remove_redundant_stuff(main_directory)
171-
172-
# Change back to original dir
173-
os.chdir(cwd)
181+
process_one_file(file_name)
174182

175183

176184
if __name__ == "__main__":

0 commit comments

Comments
 (0)