forked from semmyk-research/AutomatedSearchHelper
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_articles_download.py
More file actions
55 lines (40 loc) · 2.37 KB
/
run_articles_download.py
File metadata and controls
55 lines (40 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
"""
Downloads articles from input, to given folder in .json format
"""
import argparse
import sys
import logging
from AutomatedSearchHelperUtilities.extract_doi_from_csv import extract_doi_from_csv
from AutomatedSearchHelperUtilities.utilities import createDirectoryIfNotExists
import AutomatedSearchHelperUtilities.configuration as configuration
from ArticlesDataDownloader.ArticlesDataDownloader import ArticlesDataDownloader
from ArticlesDataDownloader.read_input_file import read_input_file
def run_articles_download(outputArticles, article_datas, proxyFile):
createDirectoryIfNotExists(outputArticles)
downloader = ArticlesDataDownloader(outputArticles, proxyFile)
return [downloader.read_article(article_data)[0] for article_data in article_datas]
def getArgumentsParser():
parser = argparse.ArgumentParser(description=__doc__, add_help=True)
parser.add_argument('--output_articles', default='outputArticles', type=str, help='Location for articles .json files')
parser.add_argument('--proxy_file', default='proxy_auth_plugin.zip', type=str, help='Proxy configuration file')
parser.add_argument('--articles_list', default='scopus.csv', type=str, help='file containing articles data in supported format')
parser.add_argument('--file_type', default=1, type=int, help='File format: '
'1- SCOPUS_CSV (default), '
'2- IEEE_CSV '
'3- SCIENCE_DIRECT_RIS, '
'4- SPRINGER_CSV'
'5- WILLEY_RIS'
'6- ACM_BIB')
return parser
def main(args = None):
configuration.configureLogger()
logger = logging.getLogger('run_articles_download')
p = getArgumentsParser()
a = p.parse_args(args=args)
logger.info("Starting run_articles_download with following arguments")
logger.info("output_articles = " + a.output_articles)
article_datas = read_input_file(a.articles_list, a.file_type)
logger.info("doi_list = " + str(article_datas))
run_articles_download(a.output_articles, article_datas, a.proxy_file)
if __name__ == '__main__': sys.exit(main())