Skip to content

Commit 4fee658

Browse files
committed
fix(batch_process.py): typo
1 parent 69f3b6f commit 4fee658

1 file changed

Lines changed: 22 additions & 20 deletions

File tree

batch_process.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import os
2-
import shutil
32
import argparse
43
import multiprocessing
5-
from typing import List
4+
import shutil
5+
from typing import List, Optional
66
from uuid import uuid4
77
import pandas as pd
88

@@ -16,7 +16,9 @@
1616
database = "data/processed_paper_database.csv"
1717

1818

19-
def filter_tex_files(tex_files: List[str], main_path: str = None) -> List[str]:
19+
def filter_tex_files(
20+
tex_files: List[str], main_path: Optional[str] = None
21+
) -> List[str]:
2022
"""extract all MAIN.tex files for processing, if main_path is not None, then
2123
only extract MAIN.tex files in the main_path (not recursive)
2224
@@ -25,7 +27,7 @@ def filter_tex_files(tex_files: List[str], main_path: str = None) -> List[str]:
2527
main_path (str, optional): path to main directory. Defaults to None.
2628
2729
Returns:
28-
List[str]: list of tex files that are compiable.
30+
List[str]: list of tex files that are compilable.
2931
"""
3032

3133
# TODO: move this to config
@@ -48,7 +50,7 @@ def filter_tex_files(tex_files: List[str], main_path: str = None) -> List[str]:
4850
if main_path and os.path.dirname(os.path.dirname(tex_file)) != main_path:
4951
continue
5052

51-
# make sure the tex file is compiable (main document)
53+
# make sure the tex file is compilable (main document)
5254
try:
5355
with open(tex_file) as f:
5456
content = f.read()
@@ -70,35 +72,35 @@ def filter_tex_files(tex_files: List[str], main_path: str = None) -> List[str]:
7072
return result
7173

7274

73-
def process_one_discpline(path: str, cpu_count: int, discpline: str) -> None:
74-
"""Process the data in a specific discpline.
75+
def process_one_discipline(path: str, cpu_count: int, discipline: str) -> None:
76+
"""Process the data in a specific discipline.
7577
7678
Args:
7779
path (str): The path to the raw data.
7880
cpu_count (int): The number of CPUs to use for multiprocessing.
79-
discpline (str): The discpline to process.
81+
discipline (str): The discipline to process.
8082
8183
Raises:
8284
Exception: If the processing fails.
8385
8486
Returns:
8587
None
8688
"""
87-
discpline_path = os.path.join(path, discpline)
88-
log.info(f"[VRDU] Path to raw data: {discpline_path}")
89+
discipline_path = os.path.join(path, discipline)
90+
log.info(f"[VRDU] Path to raw data: {discipline_path}")
8991
log.info(f"[VRDU] Using cpu counts: {cpu_count}")
90-
tex_files = utils.extract_all_tex_files(discpline_path)
91-
tex_files = filter_tex_files(tex_files, discpline_path)
92+
tex_files = utils.extract_all_tex_files(discipline_path)
93+
tex_files = filter_tex_files(tex_files, discipline_path)
9294

9395
try:
9496
with multiprocessing.Pool(cpu_count) as pool:
9597
pool.map(process_one_file, tex_files)
9698
except Exception:
97-
log.exception(f"[VRDU] discpline: {discpline}, failed to process.")
99+
log.exception(f"[VRDU] discipline: {discipline}, failed to process.")
98100
finally:
99101
# save the process log
100-
log.info(f"[VRDU] discpline: {discpline}, finished processing.")
101-
shutil.move(log_file, f"data/batch_process_{discpline}.log")
102+
log.info(f"[VRDU] discipline: {discipline}, finished processing.")
103+
shutil.move(log_file, f"data/batch_process_{discipline}.log")
102104

103105

104106
def main():
@@ -107,7 +109,7 @@ def main():
107109
Args:
108110
path (str): The path to the raw data.
109111
cpu_count (int): The number of CPUs to use for multiprocessing.
110-
discpline (str): The discpline to process.
112+
discipline (str): The discipline to process.
111113
112114
Raises:
113115
Exception: If the processing fails.
@@ -131,13 +133,13 @@ def main():
131133
help="cpu count for multiprocessing",
132134
)
133135
parser.add_argument(
134-
"-t", "--discpline", type=str, required=True, help="discpline to process"
136+
"-t", "--discipline", type=str, required=True, help="discipline to process"
135137
)
136138
args = parser.parse_args()
137-
path, cpu_count, discpline = args.path, args.cpu_count, args.discpline
139+
path, cpu_count, discipline = args.path, args.cpu_count, args.discipline
138140

139-
log.info(f"[VRDU] discpline: {discpline}, start to process.")
140-
process_one_discpline(path, cpu_count, discpline)
141+
log.info(f"[VRDU] discipline: {discipline}, start to process.")
142+
process_one_discipline(path, cpu_count, discipline)
141143

142144

143145
if __name__ == "__main__":

0 commit comments

Comments
 (0)