@@ -22,6 +22,8 @@ def __init__(self, web_proxy_list=[]):
2222 self .proxy_list += self .proxyForEU_url_parser ('http://proxyfor.eu/geo.php' , 100.0 )
2323 self .proxy_list += self .freeProxy_url_parser ('http://free-proxy-list.net' )
2424 self .proxy_list += self .weebly_url_parser ('http://rebro.weebly.com/proxy-list.html' )
25+ self .proxy_list += self .samair_url_parser ('http://www.samair.ru/proxy/time-01.htm' )
26+
2527
2628 def get_proxy_list (self ):
2729 return self .proxy_list
@@ -127,6 +129,34 @@ def weebly_url_parser(self, web_url):
127129 curr_proxy_list .append (proxy .__str__ ())
128130 return curr_proxy_list
129131
132+ def samair_url_parser (self , web_url , speed_in_KBs = 100.0 ):
133+ curr_proxy_list = []
134+ content = requests .get (web_url ).content
135+ soup = BeautifulSoup (content , "html.parser" )
136+ # css provides the port number so we reverse it
137+ for href in soup .findAll ('link' ):
138+ if '/styles/' in href .get ('href' ):
139+ style = "http://www.samair.ru" + href .get ('href' )
140+ break
141+ css = requests .get (style ).content .split ('\n ' )
142+ css .pop ()
143+ ports = {}
144+ for l in css :
145+ p = l .split (' ' )
146+ key = p [0 ].split (':' )[0 ][1 :]
147+ value = p [1 ].split ('\" ' )[1 ]
148+ ports [key ] = value
149+
150+ table = soup .find ("table" , attrs = {"id" : "proxylist" })
151+
152+ # The first tr contains the field names.
153+ headings = [th .get_text () for th in table .find ("tr" ).find_all ("th" )]
154+
155+ for row in table .find_all ("span" )[1 :]:
156+ curr_proxy_list .append ('http://' + row .text + ports [row ['class' ][0 ]])
157+
158+ return curr_proxy_list
159+
130160 def generate_proxied_request (self , url , params = {}, req_timeout = 30 ):
131161 #if len(self.proxy_list) < 2:
132162 # self.proxy_list += self.proxyForEU_url_parser('http://proxyfor.eu/geo.php')
@@ -137,6 +167,7 @@ def generate_proxied_request(self, url, params={}, req_timeout=30):
137167 request = None
138168 try :
139169 rand_proxy = random .choice (self .proxy_list )
170+ print "Next proxy: " + str (rand_proxy )
140171 request = requests .get (test_url , proxies = {"http" : rand_proxy },
141172 headers = req_headers , timeout = req_timeout )
142173 except ConnectionError :
0 commit comments