|
| 1 | +import os |
| 2 | +import arxiv |
| 3 | +import shutil |
| 4 | + |
| 5 | + |
| 6 | +def retrieve_arxiv_metadata(path: str): |
| 7 | + """ |
| 8 | + Retrieves metadata for a given arXiv document folder, the folder follows the pattern "****.****", where * is a digit. |
| 9 | +
|
| 10 | + Args: |
| 11 | + path (str): The path of the arXiv document. |
| 12 | +
|
| 13 | + Returns: |
| 14 | + Tuple[str, str]: A tuple containing the file name and the category of the arxiv document. |
| 15 | +
|
| 16 | + Raises: |
| 17 | + FileNotFoundError: If the metadata of the document cannot be found in arxiv. |
| 18 | + """ |
| 19 | + search = arxiv.Search(id_list=[path]) |
| 20 | + |
| 21 | + file_name, category = None, None |
| 22 | + for result in search.results(): |
| 23 | + file_name = result._get_default_filename(extension="") |
| 24 | + category = result.primary_category |
| 25 | + break |
| 26 | + |
| 27 | + if file_name is None or category is None: |
| 28 | + raise FileNotFoundError(f"metadata of {path} cannot be found in arXiv.") |
| 29 | + return file_name[:-1], category |
| 30 | + |
| 31 | + |
| 32 | +def run(path): |
| 33 | + """ |
| 34 | + Moves subfolders in the given path to a new location based on arxiv metadata. |
| 35 | + subfolders must have pattern "****.****", where * is a digit. |
| 36 | +
|
| 37 | + Args: |
| 38 | + path (str): The path to the directory containing the subfolders. |
| 39 | +
|
| 40 | + Returns: |
| 41 | + None |
| 42 | +
|
| 43 | + Example: |
| 44 | +
|
| 45 | + """ |
| 46 | + subfolders = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))] |
| 47 | + filtered_subfolders = [ |
| 48 | + f |
| 49 | + for f in subfolders |
| 50 | + if len(f) == 9 and f[:4].isdigit() and f[5:].isdigit() and f[4] == "." |
| 51 | + ] |
| 52 | + for dir_name in filtered_subfolders: |
| 53 | + new_dir_name, category = retrieve_arxiv_metadata(dir_name) |
| 54 | + if not os.path.exists(os.path.join(path, category)): |
| 55 | + os.makedirs(os.path.join(path, category)) |
| 56 | + shutil.move( |
| 57 | + os.path.join(path, dir_name), |
| 58 | + os.path.join(path, category + "/" + new_dir_name), |
| 59 | + ) |
| 60 | + print(f"Moved {dir_name} to {category}/{new_dir_name}") |
| 61 | + |
| 62 | + |
| 63 | +if __name__ == "__main__": |
| 64 | + import argparse |
| 65 | + |
| 66 | + parser = argparse.ArgumentParser() |
| 67 | + parser.add_argument("-p", "--path", help="path to directory containing subfolders") |
| 68 | + args = parser.parse_args() |
| 69 | + |
| 70 | + run(args.path) |
0 commit comments