Skip to content

Commit 388c12a

Browse files
author
pgaref
committed
More work on (#25)
* Making sure request response is a valid page before parsing * Using the boolean response.ok attribute to make sure request was successful
1 parent 8d375ec commit 388c12a

4 files changed

Lines changed: 28 additions & 4 deletions

File tree

http_request_randomizer/requests/parsers/FreeProxyParser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@ def __init__(self, web_url, timeout=None):
1515

1616
def parse_proxyList(self):
1717
curr_proxy_list = []
18-
content = requests.get(self.get_URl(), timeout=self.timeout).content
18+
response = requests.get(self.get_URl(), timeout=self.timeout)
19+
20+
if not response.ok:
21+
logger.warn("Proxy Provider url failed: {}".format(self.get_URl()))
22+
return []
23+
24+
content = response.content
1925
soup = BeautifulSoup(content, "html.parser")
2026
table = soup.find("table", attrs={"class": "display fpltable"})
2127

http_request_randomizer/requests/parsers/ProxyForEuParser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@ def __init__(self, web_url, bandwithdh=None, timeout=None):
1515

1616
def parse_proxyList(self):
1717
curr_proxy_list = []
18-
content = requests.get(self.get_URl(), timeout=self.timeout).content
18+
response = requests.get(self.get_URl(), timeout=self.timeout)
19+
20+
if not response.ok:
21+
logger.warn("Proxy Provider url failed: {}".format(self.get_URl()))
22+
return []
23+
24+
content = response.content
1925
soup = BeautifulSoup(content, "html.parser")
2026
table = soup.find("table", attrs={"class": "proxy_list"})
2127

http_request_randomizer/requests/parsers/RebroWeeblyParser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@ def __init__(self, web_url, timeout=None):
1717

1818
def parse_proxyList(self, use_top15k=False):
1919
curr_proxy_list = []
20-
content = requests.get(self.get_URl()+"/"+self.top_proxy_path, timeout=self.timeout).content
20+
response = requests.get(self.get_URl()+"/"+self.top_proxy_path, timeout=self.timeout)
21+
22+
if not response.ok:
23+
logger.warn("Proxy Provider url failed: {}".format(self.get_URl()))
24+
return []
25+
26+
content = response.content
2127
soup = BeautifulSoup(content, "html.parser")
2228
table = soup.find("div", attrs={"class": "paragraph", 'style': "text-align:left;"}).find('font', attrs={
2329
'color': '#33a27f'})

http_request_randomizer/requests/parsers/SamairProxyParser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@ def __init__(self, web_url, timeout=None):
1515

1616
def parse_proxyList(self):
1717
curr_proxy_list = []
18-
content = requests.get(self.get_URl(), timeout=self.timeout).content
18+
response = requests.get(self.get_URl(), timeout=self.timeout)
19+
20+
if not response.ok:
21+
logger.warn("Proxy Provider url failed: {}".format(self.get_URl()))
22+
return []
23+
24+
content = response.content
1925
soup = BeautifulSoup(content, "html.parser")
2026
# css provides the port number so we reverse it
2127
# for href in soup.findAll('link'):

0 commit comments

Comments
 (0)