Add files via upload

sonik8494 · web-flow · commit 2d7a3894758f · 2020-08-23T12:26:57.000+05:30
diff --git a/Web_scraping/README.md b/Web_scraping/README.md
@@ -0,0 +1,2 @@
+# web_scraping_SJ
+Repo for Web Scraping
diff --git a/Web_scraping/Web Scraping Intro.pdf b/Web_scraping/Web Scraping Intro.pdf
diff --git a/Web_scraping/WebScrapingDemo.py b/Web_scraping/WebScrapingDemo.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+"""
+
+@author: nnair
+"""
+from selenium import webdriver
+import pandas as pd
+from selenium.webdriver.support.ui import WebDriverWait
+
+main_url="https://www.swiggy.com/search?q=chinese"
+
+driver= webdriver.Chrome()
+
+# Good practices
+# driver.implicitly_wait(5)
+driver.maximize_window()
+
+driver.get(main_url)
+
+
+city='Mumbai'
+
+search_item= driver.find_element_by_id("location")
+search_item.send_keys(city)
+
+WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='_2W-T9']"))
+
+cityname=driver.find_element_by_xpath("//*[@class='_2W-T9']")
+cityname.click()
+
+
+df=pd.DataFrame(columns=['restaurant name'])
+
+WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='_3XX_A']"))
+
+
+search_url="https://www.swiggy.com/search?q=Chinese"
+driver.get(search_url)
+
+WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='nA6kb']"))
+
+# rest_names= driver.find_elements_by_xpath("//*[@class='nA6kb']")
+# print(rest_names)
+
+# for rest in rest_names:
+#          print(rest.text)
+                   
+listings=driver.find_elements_by_xpath("//*[@class='_3XX_A']/a")
+
+current_window=driver.current_window_handle
+
+for listing in listings:
+    url= listing.get_attribute('href')
+    
+    driver.execute_script('window.open(arguments[0]);',url)
+    new_window=driver.window_handles[1]
+    driver.switch_to.window(new_window)
+    
+    
+    rest_name=driver.find_element_by_xpath("//*[@class='_3aqeL']").text
+    print(rest_name)
+    
+    df= df.append({'restaurant name': rest_name}, ignore_index=True)
+    
+    
+    if(len(df)%5==0):
+        driver.close()
+        driver.switch_to.window(current_window)
+        break
+    
+    driver.close()
+    driver.switch_to.window(current_window)
+   
+driver.close()    
+    
+print(df)    
+    
+    
+
diff --git a/Web_scraping/web_scrapping.py b/Web_scraping/web_scrapping.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun May  3 10:01:48 2020
+
+@author: NNAIR
+"""
+
+# Importing header files
+from selenium import webdriver
+import pandas as pd
+from selenium.webdriver.support.ui import WebDriverWait
+
+
+main_url="https://www.swiggy.com"
+
+# Opening the web driver
+driver=webdriver.Chrome()
+
+driver.implicitly_wait(5)
+driver.maximize_window()
+
+driver.get(main_url)
+
+
+# Setting the location for swiggy 
+city = 'Mumbai'
+search_item = driver.find_element_by_xpath("//*[@id='location']")
+search_item.send_keys(city)
+WebDriverWait(driver, 500).until(lambda driver: driver.find_element_by_xpath("//*[@class='_2W-T9']"))
+cityname = driver.find_element_by_xpath("//*[@class='_2W-T9']")
+cityname.click()
+
+# Wait for swiggy.com/restaurnts link to validate location
+WebDriverWait(driver,100).until(lambda driver: driver.find_element_by_xpath("//*[@class='_3XX_A']"))
+
+
+# Initialising dataframe
+df = pd.DataFrame(columns=['restaurant name','cuisine','rating','price for two','location','url'])
+
+
+# Initialsing cuisine options
+cuisine_options=["chinese","north indian thalis","italian"]
+
+
+# Running a loop across the options
+for cuisine in cuisine_options:
+    
+    # Opening cuisine specific URL
+    search_url="https://www.swiggy.com/search?q=" + "+".join(str(ci) for ci in cuisine.split())
+    driver.get(search_url) 
+    
+    # Getting all the search result links
+    listings=driver.find_elements_by_xpath("//*[@class='_3XX_A']/a")
+    current_window=driver.current_window_handle
+    
+    
+    for listing in listings:
+        
+        # Getting restaurant URL
+        url=listing.get_attribute('href')
+        
+        # Opening the restaurant url
+        driver.execute_script('window.open(arguments[0]);',url)
+        new_window= driver.window_handles[1]
+        driver.switch_to.window(new_window)
+
+        # Getting restaurant name
+        rest_name=driver.find_element_by_xpath("//*[@class='_3aqeL']").text
+        
+        # Getting restaurant cuisines
+        cuisine_list=driver.find_element_by_xpath("//*[@class='_3Plw0 JMACF']").text
+        print(cuisine_list)
+        
+
+        # Getting restaurant rating        
+        rest_rating=driver.find_elements_by_xpath("//*[@class='_2l3H5']")[0].text
+        print(rest_rating)
+        
+        # Getting restaurant price
+        rest_price=driver.find_elements_by_xpath("//*[@class='_2l3H5']")[2].text
+        print(rest_price)
+        
+        
+        # Getting restaurant location
+        try:
+            rest_location=driver.find_element_by_xpath("//*[@class='Gf2NS _2Y6HW']").text
+        except:
+            rest_location=driver.find_element_by_xpath("//*[@class='Gf2NS _2Y6HW _2x0-U']").text
+
+        # Adding elements to dataframe
+        df = df.append({'restaurant name': rest_name,'cuisine': cuisine_list,'rating':rest_rating,'price for two':rest_price,'location':rest_location, 'url':url}, ignore_index=True)
+
+        # Setting up the size of each cuisine
+        if(len(df)%5==0):
+            
+            driver.close()
+            driver.switch_to.window(current_window)
+            break
+                
+            
+        # Closing the restaurant URL    
+        driver.close()
+        driver.switch_to.window(current_window)
+
+# Closing the main driver    
+driver.close()
+
+
+# Saving the data into a csv file
+df.to_csv("Swiggy_data.csv",index=False) 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# web_scraping_SJ`
	`2`	`+Repo for Web Scraping`