|
1 | 1 | import os |
2 | | -import glob |
3 | 2 | import shutil |
4 | 3 | import argparse |
5 | 4 | import multiprocessing |
6 | 5 | from typing import List |
7 | 6 | from uuid import uuid4 |
8 | 7 |
|
9 | | -from tqdm import tqdm |
10 | | - |
11 | 8 | from vrdu import logger |
12 | 9 | from vrdu import utils |
13 | | -from vrdu import renderer |
14 | | -from vrdu import preprocess |
15 | | -from vrdu.annotation import LayoutAnnotation |
16 | | -from vrdu.config import config |
17 | | - |
| 10 | +from main import process_one_file |
18 | 11 |
|
19 | 12 | log_file = str(uuid4()) + ".log" |
20 | 13 | log = logger.setup_app_level_logger(file_name=log_file, level="INFO", mode="a") |
21 | 14 |
|
22 | 15 |
|
23 | | -def transform_tex_to_images(main_directory: str) -> None: |
24 | | - """ |
25 | | - Transforms TeX files with pattern paper_*.tex in the specified directory into jpg images. |
26 | | -
|
27 | | - Args: |
28 | | - main_directory (str): The main directory where the TeX files are located. |
29 | | -
|
30 | | - Returns: |
31 | | - None |
32 | | - """ |
33 | | - tex_files = glob.glob(f"{main_directory}/paper_*.tex") |
34 | | - output_directory = os.path.join(main_directory, "output") |
35 | | - for tex_file in tqdm(tex_files): |
36 | | - log.debug(f"[VRDU] file: {tex_file}, start transforming into images.") |
37 | | - utils.compile_latex(tex_file) |
38 | | - |
39 | | - # get the pdf file name |
40 | | - filename_without_extension = os.path.splitext(os.path.basename(tex_file))[0] |
41 | | - pdf_file = os.path.join(main_directory, f"{filename_without_extension}.pdf") |
42 | | - |
43 | | - # convert into images |
44 | | - image_directory = os.path.join(output_directory, filename_without_extension) |
45 | | - os.makedirs(image_directory) |
46 | | - utils.pdf2jpg(pdf_file, image_directory) |
47 | | - |
48 | | - |
49 | | -def remove_redundant_stuff(main_directory: str) -> None: |
50 | | - """ |
51 | | - Remove redundant files and folders from the main directory. |
52 | | -
|
53 | | - Args: |
54 | | - main_directory (str): The path of the main directory. |
55 | | -
|
56 | | - Returns: |
57 | | - None |
58 | | - """ |
59 | | - # remove generated tex related files |
60 | | - redundant_files = glob.glob(f"{main_directory}/paper_*") |
61 | | - for file in redundant_files: |
62 | | - os.remove(file) |
63 | | - |
64 | | - # remove useless pdf and image files |
65 | | - # TODO: move this name pattern into config |
66 | | - redundant_folders = glob.glob( |
67 | | - f"{main_directory}/output/paper_{config.folder_prefix}*" |
68 | | - ) |
69 | | - redundant_folders += [ |
70 | | - f"{main_directory}/output/paper_white", |
71 | | - f"{main_directory}/output/paper_original", |
72 | | - ] |
73 | | - for folder in redundant_folders: |
74 | | - if os.path.exists(folder): |
75 | | - shutil.rmtree(folder) |
76 | | - |
77 | | - |
78 | | -def process_one_file(file_name) -> None: |
79 | | - main_directory = os.path.dirname(file_name) |
80 | | - log.info(f"[VRDU] file: {file_name}, start processing.") |
81 | | - |
82 | | - # check if this paper has been processed |
83 | | - quality_report_file = os.path.join( |
84 | | - main_directory, "output/result/quality_report.json" |
85 | | - ) |
86 | | - if os.path.exists(quality_report_file): |
87 | | - log.info(f"[VRDU] file: {file_name}, paper has been processed") |
88 | | - return |
89 | | - |
90 | | - # make a copy of the original tex file |
91 | | - original_tex = os.path.join(main_directory, "paper_original.tex") |
92 | | - shutil.copyfile(file_name, original_tex) |
93 | | - |
94 | | - cwd = os.getcwd() |
95 | | - |
96 | | - try: |
97 | | - # change the working directory to the main directory |
98 | | - os.chdir(main_directory) |
99 | | - preprocess.run(original_tex) |
100 | | - |
101 | | - # run rendering |
102 | | - vrdu_renderer = renderer.Renderer() |
103 | | - vrdu_renderer.render(original_tex) |
104 | | - |
105 | | - # compile into PDFs, and then convert into images |
106 | | - log.info( |
107 | | - f"[VRDU] file: {original_tex}, start transforming into images, this may take a while..." |
108 | | - ) |
109 | | - transform_tex_to_images(main_directory) |
110 | | - |
111 | | - # generate annotations |
112 | | - log.info( |
113 | | - f"[VRDU] file: {original_tex}, start generating annotations, this may take a while..." |
114 | | - ) |
115 | | - vrdu_annotation = LayoutAnnotation(main_directory) |
116 | | - vrdu_annotation.annotate() |
117 | | - |
118 | | - log.info(f"[VRDU] file: {original_tex}, successfully processed.") |
119 | | - |
120 | | - except Exception as e: |
121 | | - error_type = e.__class__.__name__ |
122 | | - error_info = str(e) |
123 | | - log.error( |
124 | | - f"[VRDU] file: {file_name}, type: {error_type}, message: {error_info}" |
125 | | - ) |
126 | | - |
127 | | - finally: |
128 | | - # remove redundant files |
129 | | - remove_redundant_stuff(main_directory) |
130 | | - |
131 | | - # Change back to original dir |
132 | | - os.chdir(cwd) |
133 | | - |
134 | | - |
135 | 16 | def filter_tex_files(tex_files: List[str], main_path: str = None) -> List[str]: |
136 | 17 | """extract all MAIN.tex files for processing, if main_path is not None, then |
137 | 18 | only extract MAIN.tex files in the main_path (not recursive) |
|
0 commit comments