Skip to content

Commit 2d7a389

Browse files
authored
Add files via upload
1 parent 843bc71 commit 2d7a389

4 files changed

Lines changed: 191 additions & 0 deletions

File tree

Web_scraping/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# web_scraping_SJ
2+
Repo for Web Scraping
62.3 KB
Binary file not shown.

Web_scraping/WebScrapingDemo.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
4+
@author: nnair
5+
"""
6+
from selenium import webdriver
7+
import pandas as pd
8+
from selenium.webdriver.support.ui import WebDriverWait
9+
10+
main_url="https://www.swiggy.com/search?q=chinese"
11+
12+
driver= webdriver.Chrome()
13+
14+
# Good practices
15+
# driver.implicitly_wait(5)
16+
driver.maximize_window()
17+
18+
driver.get(main_url)
19+
20+
21+
city='Mumbai'
22+
23+
search_item= driver.find_element_by_id("location")
24+
search_item.send_keys(city)
25+
26+
WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='_2W-T9']"))
27+
28+
cityname=driver.find_element_by_xpath("//*[@class='_2W-T9']")
29+
cityname.click()
30+
31+
32+
df=pd.DataFrame(columns=['restaurant name'])
33+
34+
WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='_3XX_A']"))
35+
36+
37+
search_url="https://www.swiggy.com/search?q=Chinese"
38+
driver.get(search_url)
39+
40+
WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='nA6kb']"))
41+
42+
# rest_names= driver.find_elements_by_xpath("//*[@class='nA6kb']")
43+
# print(rest_names)
44+
45+
# for rest in rest_names:
46+
# print(rest.text)
47+
48+
listings=driver.find_elements_by_xpath("//*[@class='_3XX_A']/a")
49+
50+
current_window=driver.current_window_handle
51+
52+
for listing in listings:
53+
url= listing.get_attribute('href')
54+
55+
driver.execute_script('window.open(arguments[0]);',url)
56+
new_window=driver.window_handles[1]
57+
driver.switch_to.window(new_window)
58+
59+
60+
rest_name=driver.find_element_by_xpath("//*[@class='_3aqeL']").text
61+
print(rest_name)
62+
63+
df= df.append({'restaurant name': rest_name}, ignore_index=True)
64+
65+
66+
if(len(df)%5==0):
67+
driver.close()
68+
driver.switch_to.window(current_window)
69+
break
70+
71+
driver.close()
72+
driver.switch_to.window(current_window)
73+
74+
driver.close()
75+
76+
print(df)
77+
78+
79+

Web_scraping/web_scrapping.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun May 3 10:01:48 2020
4+
5+
@author: NNAIR
6+
"""
7+
8+
# Importing header files
9+
from selenium import webdriver
10+
import pandas as pd
11+
from selenium.webdriver.support.ui import WebDriverWait
12+
13+
14+
main_url="https://www.swiggy.com"
15+
16+
# Opening the web driver
17+
driver=webdriver.Chrome()
18+
19+
driver.implicitly_wait(5)
20+
driver.maximize_window()
21+
22+
driver.get(main_url)
23+
24+
25+
# Setting the location for swiggy
26+
city = 'Mumbai'
27+
search_item = driver.find_element_by_xpath("//*[@id='location']")
28+
search_item.send_keys(city)
29+
WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='_2W-T9']"))
30+
cityname = driver.find_element_by_xpath("//*[@class='_2W-T9']")
31+
cityname.click()
32+
33+
# Wait for swiggy.com/restaurnts link to validate location
34+
WebDriverWait(driver,100).until(lambda driver: driver.find_element_by_xpath("//*[@class='_3XX_A']"))
35+
36+
37+
# Initialising dataframe
38+
df = pd.DataFrame(columns=['restaurant name','cuisine','rating','price for two','location','url'])
39+
40+
41+
# Initialsing cuisine options
42+
cuisine_options=["chinese","north indian thalis","italian"]
43+
44+
45+
# Running a loop across the options
46+
for cuisine in cuisine_options:
47+
48+
# Opening cuisine specific URL
49+
search_url="https://www.swiggy.com/search?q=" + "+".join(str(ci) for ci in cuisine.split())
50+
driver.get(search_url)
51+
52+
# Getting all the search result links
53+
listings=driver.find_elements_by_xpath("//*[@class='_3XX_A']/a")
54+
current_window=driver.current_window_handle
55+
56+
57+
for listing in listings:
58+
59+
# Getting restaurant URL
60+
url=listing.get_attribute('href')
61+
62+
# Opening the restaurant url
63+
driver.execute_script('window.open(arguments[0]);',url)
64+
new_window= driver.window_handles[1]
65+
driver.switch_to.window(new_window)
66+
67+
# Getting restaurant name
68+
rest_name=driver.find_element_by_xpath("//*[@class='_3aqeL']").text
69+
70+
# Getting restaurant cuisines
71+
cuisine_list=driver.find_element_by_xpath("//*[@class='_3Plw0 JMACF']").text
72+
print(cuisine_list)
73+
74+
75+
# Getting restaurant rating
76+
rest_rating=driver.find_elements_by_xpath("//*[@class='_2l3H5']")[0].text
77+
print(rest_rating)
78+
79+
# Getting restaurant price
80+
rest_price=driver.find_elements_by_xpath("//*[@class='_2l3H5']")[2].text
81+
print(rest_price)
82+
83+
84+
# Getting restaurant location
85+
try:
86+
rest_location=driver.find_element_by_xpath("//*[@class='Gf2NS _2Y6HW']").text
87+
except:
88+
rest_location=driver.find_element_by_xpath("//*[@class='Gf2NS _2Y6HW _2x0-U']").text
89+
90+
# Adding elements to dataframe
91+
df = df.append({'restaurant name': rest_name,'cuisine': cuisine_list,'rating':rest_rating,'price for two':rest_price,'location':rest_location, 'url':url}, ignore_index=True)
92+
93+
# Setting up the size of each cuisine
94+
if(len(df)%5==0):
95+
96+
driver.close()
97+
driver.switch_to.window(current_window)
98+
break
99+
100+
101+
# Closing the restaurant URL
102+
driver.close()
103+
driver.switch_to.window(current_window)
104+
105+
# Closing the main driver
106+
driver.close()
107+
108+
109+
# Saving the data into a csv file
110+
df.to_csv("Swiggy_data.csv",index=False)

0 commit comments

Comments
 (0)