Skip to content

Commit 5790c5c

Browse files
committed
refactor(visualize_dataset_distribution.py, utils.py): move get_all_categories to imported file
1 parent 5c28737 commit 5790c5c

2 files changed

Lines changed: 19 additions & 20 deletions

File tree

scripts/visualize_dataset_distribution.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,24 @@
44
import numpy as np
55
import csv
66

7-
from vrdu import utils
7+
8+
def get_all_categories():
9+
"""
10+
Retrieves all categories from the "category_count.csv" file.
11+
12+
Returns:
13+
categories (list): A list of all categories.
14+
15+
Reference:
16+
https://arxiv.org/category_taxonomy
17+
"""
18+
categories = []
19+
with open("scripts/category_count.csv", "r") as f:
20+
reader = csv.DictReader(f)
21+
for row in reader:
22+
categories.append(row["categories"])
23+
24+
return categories
825

926

1027
def visualize_distribution(dict1, dict2):
@@ -50,7 +67,7 @@ def visualize_distribution(dict1, dict2):
5067

5168

5269
def analyze_raw_data(path):
53-
all_categories = utils.get_all_categories()
70+
all_categories = get_all_categories()
5471

5572
data = defaultdict(int)
5673
for category in all_categories:

vrdu/utils.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -221,24 +221,6 @@ def convert_eps_image_to_pdf_image(eps_image_path: str, pdf_image_path: str):
221221
subprocess.run(["epspdf", eps_image_path, pdf_image_path])
222222

223223

224-
def get_all_categories():
225-
"""
226-
Retrieves all categories from the "category_count.csv" file.
227-
228-
Returns:
229-
categories (list): A list of all categories.
230-
231-
Reference:
232-
https://arxiv.org/category_taxonomy
233-
"""
234-
categories = []
235-
with open("scripts/category_count.csv", "r") as f:
236-
reader = csv.DictReader(f)
237-
for row in reader:
238-
categories.append(row["categories"])
239-
240-
return categories
241-
242224

243225
def extract_macro_definitions(tex_file) -> List[str]:
244226
"""

0 commit comments

Comments
 (0)