-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreviews.py
More file actions
63 lines (56 loc) · 1.86 KB
/
reviews.py
File metadata and controls
63 lines (56 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from beautifulSoupUtils import runBeautifulSoup
# ------- Create URL of the Reviews Page (The Chosen Restaurant Page) ------- #
def createThePlaceURL(link):
# example = "https://www.yelp.com/biz/baltimore-built-bistro-b3-baltimore-3?start=1"
Yelp = "https://www.yelp.com"
start1 = "?start=1"
# url = Yelp+link+start1
# return url
return Yelp+link
# find total (reviews) pages to scrape all reviews
def findTotalReviewPages(url):
totalPages = 1
span = runBeautifulSoup(url).find('div', class_='css-1aq64zd').find('span',class_='css-chan6m')
totalPages = span.text.split(' of ')[1]
totalPages = int(totalPages)
if "127.0.0.1" in url:
return 1
if(totalPages > 3):
return 3
else:
return totalPages
# Scraping all reviews
def scrapeReviews(link, totalPages = 1):
allReviews = []
# link = link[:-1]
page = 1
countReviews = 1
startAt = 1
print(f"link!!! : {link}")
# startAt = start at review number(1,20,40,60,80) this is for the url
while page <= totalPages:
if page == 1:
startAt = 1
elif page > 1:
startAt = (page-1)*20
strStartAt = str(startAt)
if "127.0.0.1" in link:
url = link
else:
url = link+strStartAt
pageOfPages = "(Page " + str(page) + " of " + str(totalPages) + ")"
# pageOfPages = 1
# url = link
print(url, pageOfPages)
soup = runBeautifulSoup(url)
for span in soup.find_all('span', {"lang": "en"}):
review = span.text
allReviews.append(review)
countReviews += 1
page += 1
return allReviews
def getReviews(urlPath):
url = createThePlaceURL(urlPath)
# url = 'http://127.0.0.1:5500/reviewPage.html'
totalPages = findTotalReviewPages(url)
return scrapeReviews(url, totalPages)