Skip to content

Commit 0da6604

Browse files
committed
refactor(run_statistics.py): simplify code logic
1 parent 0502413 commit 0da6604

1 file changed

Lines changed: 8 additions & 11 deletions

File tree

scripts/run_statistics.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import glob
22
import os
33
import pandas as pd
4-
from datetime import datetime
54
import argparse
65
from datetime import datetime
76

@@ -111,7 +110,8 @@ def update_processed_database(input_path: str):
111110
or line.find("paper has been processed") != -1
112111
):
113112
if uuid in df["uuid"].values:
114-
if df[df["uuid"] == uuid]["status"] == "success":
113+
index = df[df["uuid"] == uuid].index[0]
114+
if df.loc[index, "status"] == "success":
115115
continue
116116
df.loc[df["uuid"] == uuid, "status"] = "success"
117117
df.loc[df["uuid"] == uuid, "end_time"] = current_time
@@ -120,7 +120,8 @@ def update_processed_database(input_path: str):
120120
# failed to process file, update status and eror information
121121
if line.find("message: ") != -1:
122122
if uuid in df["uuid"].values:
123-
if df[df["uuid"] == uuid]["status"] == "failure":
123+
index = df[df["uuid"] == uuid].index[0]
124+
if df.loc[index, "status"] == "failure":
124125
continue
125126
error_type = line.split("type: ")[1].split(", ")[0]
126127
error_info = line.split("message: ")[1].strip()
@@ -154,6 +155,9 @@ def update_processed_database(input_path: str):
154155
]
155156
df.loc[index, "overlap"] = quality_report["page_quality"][-1]["ratio"]
156157

158+
for category_item in quality_report["category_quality"]:
159+
df.loc[index, category_item["category"]] = category_item["geometry_count"]
160+
157161
# remove processing files
158162
df = df[~(df["status"] == "processing")]
159163
df.to_csv(database_file, index=False)
@@ -169,13 +173,6 @@ def update_discpline_info():
169173
with open(log_file) as f:
170174
lines = f.readlines()
171175
for line in lines:
172-
if line.find("[VRDU] Before filtering") != -1:
173-
processable_files = int(line.split("found ")[1].split(" ")[0])
174-
log.debug(
175-
f"discpline: {discpline}, processable files: {processable_files}"
176-
)
177-
df.loc[df["discpline"] == discpline, "num_papers"] = processable_files
178-
179176
if line.find("finished processing.") != -1:
180177
df.loc[df["discpline"] == discpline, "status"] = "complete"
181178
else:
@@ -238,7 +235,7 @@ def update_daily_overview() -> None:
238235

239236
def main():
240237
parser = argparse.ArgumentParser()
241-
parser.add_argument("--input_path", type=str, default="data/")
238+
parser.add_argument("--input_path", type=str, default="output/")
242239
args = parser.parse_args()
243240

244241
update_processed_database(args.input_path)

0 commit comments

Comments
 (0)