Skip to content

Commit 682b4a8

Browse files
committed
Escape unacceptable characters in keyword searches
Fixes #437
1 parent 22d2cba commit 682b4a8

1 file changed

Lines changed: 7 additions & 3 deletions

File tree

scholarly/_scholarly.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""scholarly.py"""
22
import requests
3+
import re
34
import os
45
import copy
56
import csv
@@ -15,6 +16,7 @@
1516
_AUTHSEARCH = '/citations?hl=en&view_op=search_authors&mauthors={0}'
1617
_KEYWORDSEARCH = '/citations?hl=en&view_op=search_authors&mauthors=label:{0}'
1718
_KEYWORDSEARCHBASE = '/citations?hl=en&view_op=search_authors&mauthors={}'
19+
_KEYWORDSEARCH_PATTERN = "[-: #(),;]+" # Unallowed characters in the keywords.
1820
_PUBSEARCH = '/scholar?hl=en&q={0}'
1921
_CITEDBYSEARCH = '/scholar?hl=en&cites={0}'
2022
_ORGSEARCH = "/citations?view_op=view_org&hl=en&org={0}"
@@ -321,7 +323,9 @@ def search_keyword(self, keyword: str):
321323
'source': 'SEARCH_AUTHOR_SNIPPETS',
322324
'url_picture': 'https://scholar.google.com/citations?view_op=medium_photo&user=lHrs3Y4AAAAJ'}
323325
"""
324-
url = _KEYWORDSEARCH.format(requests.utils.quote(keyword))
326+
327+
reg_keyword = re.sub(_KEYWORDSEARCH_PATTERN, "_", keyword)
328+
url = _KEYWORDSEARCH.format(requests.utils.quote(reg_keyword))
325329
return self.__nav.search_authors(url)
326330

327331
def search_keywords(self, keywords: List[str]):
@@ -355,8 +359,8 @@ def search_keywords(self, keywords: List[str]):
355359
'url_picture': 'https://scholar.google.com/citations?view_op=medium_photo&user=_cMw1IUAAAAJ'}
356360
357361
"""
358-
359-
formated_keywords = ['label:'+requests.utils.quote(keyword) for keyword in keywords]
362+
reg_keywords = (re.sub(_KEYWORDSEARCH_PATTERN, "_", keyword) for keyword in keywords)
363+
formated_keywords = ['label:'+requests.utils.quote(keyword) for keyword in reg_keywords]
360364
formated_keywords = '+'.join(formated_keywords)
361365
url = _KEYWORDSEARCHBASE.format(formated_keywords)
362366
return self.__nav.search_authors(url)

0 commit comments

Comments
 (0)