|
1 | 1 | """scholarly.py""" |
2 | 2 | import requests |
| 3 | +import re |
3 | 4 | import os |
4 | 5 | import copy |
5 | 6 | import csv |
|
15 | 16 | _AUTHSEARCH = '/citations?hl=en&view_op=search_authors&mauthors={0}' |
16 | 17 | _KEYWORDSEARCH = '/citations?hl=en&view_op=search_authors&mauthors=label:{0}' |
17 | 18 | _KEYWORDSEARCHBASE = '/citations?hl=en&view_op=search_authors&mauthors={}' |
| 19 | +_KEYWORDSEARCH_PATTERN = "[-: #(),;]+" # Unallowed characters in the keywords. |
18 | 20 | _PUBSEARCH = '/scholar?hl=en&q={0}' |
19 | 21 | _CITEDBYSEARCH = '/scholar?hl=en&cites={0}' |
20 | 22 | _ORGSEARCH = "/citations?view_op=view_org&hl=en&org={0}" |
@@ -321,7 +323,9 @@ def search_keyword(self, keyword: str): |
321 | 323 | 'source': 'SEARCH_AUTHOR_SNIPPETS', |
322 | 324 | 'url_picture': 'https://scholar.google.com/citations?view_op=medium_photo&user=lHrs3Y4AAAAJ'} |
323 | 325 | """ |
324 | | - url = _KEYWORDSEARCH.format(requests.utils.quote(keyword)) |
| 326 | + |
| 327 | + reg_keyword = re.sub(_KEYWORDSEARCH_PATTERN, "_", keyword) |
| 328 | + url = _KEYWORDSEARCH.format(requests.utils.quote(reg_keyword)) |
325 | 329 | return self.__nav.search_authors(url) |
326 | 330 |
|
327 | 331 | def search_keywords(self, keywords: List[str]): |
@@ -355,8 +359,8 @@ def search_keywords(self, keywords: List[str]): |
355 | 359 | 'url_picture': 'https://scholar.google.com/citations?view_op=medium_photo&user=_cMw1IUAAAAJ'} |
356 | 360 |
|
357 | 361 | """ |
358 | | - |
359 | | - formated_keywords = ['label:'+requests.utils.quote(keyword) for keyword in keywords] |
| 362 | + reg_keywords = (re.sub(_KEYWORDSEARCH_PATTERN, "_", keyword) for keyword in keywords) |
| 363 | + formated_keywords = ['label:'+requests.utils.quote(keyword) for keyword in reg_keywords] |
360 | 364 | formated_keywords = '+'.join(formated_keywords) |
361 | 365 | url = _KEYWORDSEARCHBASE.format(formated_keywords) |
362 | 366 | return self.__nav.search_authors(url) |
|
0 commit comments