Skip to content

Commit 83cebd5

Browse files
committed
added samair parsing
1 parent 70f1602 commit 83cebd5

1 file changed

Lines changed: 28 additions & 0 deletions

File tree

project/http/requests/proxy/requestProxy.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def __init__(self, web_proxy_list=[]):
2222
self.proxy_list += self.proxyForEU_url_parser('http://proxyfor.eu/geo.php', 100.0)
2323
self.proxy_list += self.freeProxy_url_parser('http://free-proxy-list.net')
2424
self.proxy_list += self.weebly_url_parser('http://rebro.weebly.com/proxy-list.html')
25+
self.proxy_list += self.samair_url_parser('http://www.samair.ru/proxy/time-01.htm')
26+
2527

2628
def get_proxy_list(self):
2729
return self.proxy_list
@@ -127,6 +129,32 @@ def weebly_url_parser(self, web_url):
127129
curr_proxy_list.append(proxy.__str__())
128130
return curr_proxy_list
129131

132+
def samair_url_parser(self, web_url, speed_in_KBs=100.0):
133+
curr_proxy_list = []
134+
content = requests.get(web_url).content
135+
soup = BeautifulSoup(content, "html.parser")
136+
# css provides the prot number so we reverse it
137+
style = "http://www.samair.ru" + str(soup.find_all('link', attrs={'type':'text/css'})).split('\n')[1].split("\"")[1]
138+
css = requests.get(style).content.split('\n')
139+
css.pop()
140+
ports = {}
141+
for l in css:
142+
p = l.split(' ')
143+
key = p[0].split(':')[0][1:]
144+
value = p[1].split('\"')[1]
145+
ports[key] = value
146+
147+
table = soup.find("table", attrs={"id": "proxylist"})
148+
149+
# The first tr contains the field names.
150+
headings = [th.get_text() for th in table.find("tr").find_all("th")]
151+
152+
for row in table.find_all("span")[1:]:
153+
curr_proxy_list.append('http://' + row.text + ports[row['class'][0]])
154+
155+
print "ALL: ", curr_proxy_list
156+
return curr_proxy_list
157+
130158
def generate_proxied_request(self, url, params={}, req_timeout=30):
131159
#if len(self.proxy_list) < 2:
132160
# self.proxy_list += self.proxyForEU_url_parser('http://proxyfor.eu/geo.php')

0 commit comments

Comments
 (0)