diff --git a/Convert_py_exe.txt b/Convert_py_exe.txt new file mode 100644 index 0000000..2fcf01c --- /dev/null +++ b/Convert_py_exe.txt @@ -0,0 +1,32 @@ +For changing into .exe file first use: +pip install pyinstaller + +Use the command: +pyinstaller -w --onefile f_scrape.py + +Then wait for the command execute. A spec file will be formed. In this file add the following locations in the datas section. For me the locations were these: +("C:\\Users\\kulka\\anaconda3\\Lib\\site-packages\\branca\\*.json","branca"), +("C:\\Users\\kulka\\anaconda3\\Lib\\site-packages\\branca\\templates","templates"), +("C:\\Users\\kulka\\anaconda3\\Lib\\site-packages\\folium\\templates","templates"), + +Also locate the following three files and edit them as follows: +1.\folium\folium.py +2.\folium\raster_layers.py +3.\branca\element.py + +Replace the line: ENV = Environment(loader=PackageLoader('folium', 'templates')) +With the lines: +import os, sys +from jinja2 import FileSystemLoader +if getattr(sys, 'frozen', False): + # we are running in a bundle + templatedir = sys._MEIPASS +else: + # we are running in a normal Python environment + templatedir = os.path.dirname(os.path.abspath(__file__)) +ENV = Environment(loader=FileSystemLoader(templatedir + '\\templates')) + +Then finally run the command: pyinstaller f_scrape.spec + +Make sure all the external files required are the correct folders. +And there you have it a python application with a folium module converted to a windows application!! \ No newline at end of file diff --git a/f_scrape.py b/f_scrape.py new file mode 100644 index 0000000..7f36f16 --- /dev/null +++ b/f_scrape.py @@ -0,0 +1,353 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.keys import Keys +from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException +#from tqdm import tqdm_notebook as tqdmn +from selenium.webdriver.common.action_chains import ActionChains +import numpy as np +import pandas as pd +import folium +import time, re +import csv +import os + + +# In[2]: + + +#enter path of csv file that contains the list of keywords +user_input = input("Enter path: ") +assert os.path.exists(user_input), "file not at, "+str(user_input) + + +# In[3]: + +#adding such useless comments which aren't even helpful, just plain annoying +fields=[] +rows=[] +with open(user_input) as cf: + csv_read=csv.reader(cf, delimiter=',') + fields=next(csv_read) + for row in csv_read: + rows.append(row) + + +# In[4]: + + +def scrape_info(a,i,done): + try: + loc_name.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-hero-header-title > div.section-hero-header-title-top-container > div.section-hero-header-title-description > div:nth-child(1) > h1 > span:nth-child(1)').text) + except NoSuchElementException: + loc_name.append(np.nan) + + try: + category.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-hero-header-title > div.section-hero-header-title-top-container > div.section-hero-header-title-description > div.section-hero-header-title-description-container > div > div:nth-child(2) > span.section-rating-term > span:nth-child(1) > button').text) + except NoSuchElementException: + category.append(np.nan) + + time.sleep(2) + try: + address.append(driver.find_element_by_css_selector('button[data-tooltip="Copy address"]').text) + except NoSuchElementException: + address.append(np.nan) + + time.sleep(2) + try: + element=driver.find_element_by_css_selector('button[data-tooltip="Open website"]') + driver.execute_script("arguments[0].scrollIntoView();", element) + ActionChains(driver).move_to_element(element).perform() + web_link.append(driver.find_element_by_css_selector('button[data-tooltip="Open website"]').text) + except NoSuchElementException: + web_link.append(np.nan) + print("No web address found..") + + time.sleep(2) + try: + phone_no.append(driver.find_element_by_css_selector('button[data-tooltip="Copy phone number"]').text) + except NoSuchElementException: + phone_no.append(np.nan) + + try: + rev_no.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.jqnFjrOWMVU__root > div > div.jqnFjrOWMVU__right > div.gm2-display-2').text) + except NoSuchElementException: + rev_no.append(np.nan) + + try: + tot_rate.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.jqnFjrOWMVU__root > div > div.jqnFjrOWMVU__right > button').text) + except NoSuchElementException: + tot_rate.append(np.nan) + input_place.append(start_sear) + done=True + return loc_name, web_link, category, phone_no, address, rev_no, tot_rate, done + + +# In[5]: + + +def button_click(): + next_click=True + cli_text="" + try: + cli_text=driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[4]/div[1]/div[1]').text + + except (ElementClickInterceptedException, NoSuchElementException): + next_click= False + if cli_text=='No results found': + print("next place") + next_click=False + return next_click + + +# In[ ]: + + +#declaring empty lists to store scraped values +input_place=[] +loc_name=[] +web_link=[] +category=[] +phone_no=[] +address=[] +rev_no=[] +tot_rate=[] + +#install chromedriver before this step +driver=webdriver.Chrome("C:\\chromedriver.exe") +d=0 + + +fields=[] +rows=[] +with open(user_input) as cf: + csv_read=csv.reader(cf, delimiter=',') + fields=next(csv_read) + for row in csv_read: + rows.append(row) + +for esc_room in rows: + #takes the value from csv file iputted till the last element + start_sear = str(esc_room) + #if you want individual results you can declare the "start_search" string such as start_search="required keyword" + url="https://www.google.com/maps/search/" + start_sear + driver.get(url) + + time.sleep(5) + #check_i=False + #while(check_i!=True): + try: + #reads value of number of search results on the first page + i=driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-layout.section-scrollbox.scrollable-y.scrollable-show.section-layout-flex-vertical > div.n7lv7yjyC35__root > div > div:nth-child(1) > span > span:nth-child(2)').text + i=int(i) + #check_i=True + except: + # check_i=False + print("checking i..") + continue + print("after conv",i) + #to subtract later when we click the next triangle button + r=i + + for a in range (0,(i+40)): + cli_text='' + url_ch='' + ad_find='' + if (a>i): + time.sleep(3) + try: + print("clicking next button") + #clicking the next button to check if they have more results + next_click=driver.find_element_by_css_selector('#n7lv7yjyC35__section-pagination-button-next > span') + next_click.click() + time.sleep(10) + except (ElementClickInterceptedException, NoSuchElementException, TimeoutException): + #if the button is not clickable, it could mean there are no more results + break + time.sleep(5) + + if (button_click()): #function is written above + print("more searches to scrape") + time.sleep(5) + #if the function returns a true value, there are more results on the next page. So we store the last value, of the number of results on next page in d + time.sleep(3) + try: + d=driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[4]/div[2]/div/div[1]/span/span[2]').text + d=int(d) + except: + continue + #we get the url of this page as we have to scrape all the info from here + + else: + break + #to set the value of i for the loop + i=d-r + #resetting a. but i don't think it is working + a=1 + r=d + for a in range (1,i+1): + if(a>i): + time.sleep(3) + try: + print("clicking next button") + #clicking the next button to check if they have more results + driver.find_element_by_css_selector('#n7lv7yjyC35__section-pagination-button-next > span').click() + time.sleep(5) + except (ElementClickInterceptedException,NoSuchElementException,TimeoutException): + #if the button is not clickable, it could mean there are no more results + break + time.sleep(5) + + if (button_click()): #function is written + print("more searches to scrape") + time.sleep(5) + #if the function returns a true value, there are more results on the next page. So we store the last value, of the number of results on next page in d + try: + d=driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[4]/div[2]/div/div[1]/span/span[2]').text + d=int(d) + except: + continue + #we get the url of this page as we have to scrape all the info from here + + else: + break + #to set the value of i for the loop + i=d-r + #resetting a + a=1 + r=d + + a=(a*2)+1 + try: #waiting for the title element of places after entering our url + WebDriverWait(driver,25).until(EC.visibility_of_element_located((By.CLASS_NAME, "section-result-title"))) + except: + pass + try: #to scrape ads + ad_find=driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[4]/div[1]/div['+ str(a) +']/div[1]/div[1]/div[1]/div[1]/div[2]/span[1]').text + except (NoSuchElementException,TimeoutException): + pass + if ad_find=='Ad': + print("hi!") + i+=1 + + try: + print(a) + print("clicking on the first search results on new page") + time.sleep(5) + driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-layout.section-scrollbox.scrollable-y.scrollable-show.section-layout-flex-vertical > div.section-layout.section-scrollbox.scrollable-y.scrollable-show.section-layout-flex-vertical > div:nth-child(' + str(a) + ')').click() + except: + print("couldn't click on result") + continue + try: #waiting for the title of the clicked place to appear + print("waiting for description block title of clicked search to appear") + WebDriverWait(driver,30).until(EC.visibility_of_element_located((By.CLASS_NAME, "section-hero-header-title-description"))) + except (TimeoutException, NoSuchElementException): + print("description block chilled") + pass + done=False + loc_name, web_link, category, phone_no, address, rev_no, tot_rate, done=scrape_info(a,i,done) + #scrape_info(a,i,done) + while (done!=True): + print("waiting...") + try: + driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/button').click() + print("after clicking next a",a) + print("after clicking next i", i) + + except: + pass + + continue + #a calculated according to the division element that we have to click to scrape + a=(a*2)+1 + a=str(a) + try: #waiting for the title element of places after entering our url + WebDriverWait(driver,25).until(EC.visibility_of_element_located((By.CLASS_NAME, "section-result-title"))) + except: + pass + + try: #to ignore elements if they are ads + ad_find=driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[4]/div[1]/div['+ a +']/div[1]/div[1]/div[1]/div[1]/div[2]/span[1]').text + if ad_find=='Ad': + print("hi!") + #the different search results are in odd numbered divs + i+=1 + except (NoSuchElementException,TimeoutException): + pass + + + #clicking on the different places that appeared after searching + try: + driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-layout.section-scrollbox.scrollable-y.scrollable-show.section-layout-flex-vertical > div.section-layout.section-scrollbox.scrollable-y.scrollable-show.section-layout-flex-vertical > div:nth-child(' + a + ')').click() + except: + continue + try: #waiting for the title of the clicked place to appear + WebDriverWait(driver,30).until(EC.visibility_of_element_located((By.CLASS_NAME, "section-hero-header-title-description"))) + except TimeoutException: + pass + #appending the lists based on the data we found after scraping + try: + loc_name.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-hero-header-title > div.section-hero-header-title-top-container > div.section-hero-header-title-description > div:nth-child(1) > h1 > span:nth-child(1)').text) + except NoSuchElementException: + loc_name.append(np.nan) + + try: + category.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.section-hero-header-title > div.section-hero-header-title-top-container > div.section-hero-header-title-description > div.section-hero-header-title-description-container > div > div:nth-child(2) > span.section-rating-term > span:nth-child(1) > button').text) + except NoSuchElementException: + category.append(np.nan) + + try: + address.append(driver.find_element_by_css_selector('button[data-tooltip="Copy address"]').text) + except NoSuchElementException: + address.append(np.nan) + + time.sleep(2) + try: + element=driver.find_element_by_css_selector('button[data-tooltip="Open website"]') + driver.execute_script("arguments[0].scrollIntoView();", element) + ActionChains(driver).move_to_element(element).perform() + web_link.append(driver.find_element_by_css_selector('button[data-tooltip="Open website"]').text) + except NoSuchElementException: + web_link.append(np.nan) + print("No web address found..") + + time.sleep(2) + try: + phone_no.append(driver.find_element_by_css_selector('button[data-tooltip="Copy phone number"]').text) + except NoSuchElementException: + phone_no.append(np.nan) + + try: + rev_no.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.jqnFjrOWMVU__root > div > div.jqnFjrOWMVU__right > div.gm2-display-2').text) + except NoSuchElementException: + rev_no.append(np.nan) + + try: + tot_rate.append(driver.find_element_by_css_selector('#pane > div > div.widget-pane-content.scrollable-y > div > div > div.jqnFjrOWMVU__root > div > div.jqnFjrOWMVU__right > button').text) + except NoSuchElementException: + tot_rate.append(np.nan) + + #This appends the keyword that you want to search, so declare it in a start_search variable + input_place.append(start_sear) + a=int(a) + print("final a",a) + print("final i",i) + time.sleep(2) + try: + driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/button').click() + except (NoSuchElementException,TimeoutException): + pass + dict = {'keyword searched for':input_place, 'location_name': loc_name, 'website_link': web_link, 'category': category, 'contact': phone_no, 'address':address, 'no. of reviews': rev_no, 'total_ratings': tot_rate} + df_rev = pd.DataFrame(dict) + df_rev.to_csv('escape_rooms_final_result.csv') + +driver.close() + + diff --git a/f_scrape.spec b/f_scrape.spec new file mode 100644 index 0000000..332a2d8 --- /dev/null +++ b/f_scrape.spec @@ -0,0 +1,35 @@ +# -*- mode: python ; coding: utf-8 -*- + +block_cipher = None + + +a = Analysis(['f_scrape.py'], + pathex=['C:\\Users\\kulka'], + binaries=[], + datas=[("C:\\Users\\kulka\\anaconda3\\Lib\\site-packages\\branca\\*.json","branca"), + ("C:\\Users\\kulka\\anaconda3\\Lib\\site-packages\\branca\\templates","templates"), + ("C:\\Users\\kulka\\anaconda3\\Lib\\site-packages\\folium\\templates","templates"),], + hiddenimports=[], + hookspath=[], + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False) +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + [], + name='f_scrape', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True )