@@ -22,6 +22,8 @@ def __init__(self, web_proxy_list=[]):
2222 self .proxy_list += self .proxyForEU_url_parser ('http://proxyfor.eu/geo.php' , 100.0 )
2323 self .proxy_list += self .freeProxy_url_parser ('http://free-proxy-list.net' )
2424 self .proxy_list += self .weebly_url_parser ('http://rebro.weebly.com/proxy-list.html' )
25+ self .proxy_list += self .samair_url_parser ('http://www.samair.ru/proxy/time-01.htm' )
26+
2527
2628 def get_proxy_list (self ):
2729 return self .proxy_list
@@ -127,6 +129,32 @@ def weebly_url_parser(self, web_url):
127129 curr_proxy_list .append (proxy .__str__ ())
128130 return curr_proxy_list
129131
132+ def samair_url_parser (self , web_url , speed_in_KBs = 100.0 ):
133+ curr_proxy_list = []
134+ content = requests .get (web_url ).content
135+ soup = BeautifulSoup (content , "html.parser" )
136+ # css provides the prot number so we reverse it
137+ style = "http://www.samair.ru" + str (soup .find_all ('link' , attrs = {'type' :'text/css' })).split ('\n ' )[1 ].split ("\" " )[1 ]
138+ css = requests .get (style ).content .split ('\n ' )
139+ css .pop ()
140+ ports = {}
141+ for l in css :
142+ p = l .split (' ' )
143+ key = p [0 ].split (':' )[0 ][1 :]
144+ value = p [1 ].split ('\" ' )[1 ]
145+ ports [key ] = value
146+
147+ table = soup .find ("table" , attrs = {"id" : "proxylist" })
148+
149+ # The first tr contains the field names.
150+ headings = [th .get_text () for th in table .find ("tr" ).find_all ("th" )]
151+
152+ for row in table .find_all ("span" )[1 :]:
153+ curr_proxy_list .append ('http://' + row .text + ports [row ['class' ][0 ]])
154+
155+ print "ALL: " , curr_proxy_list
156+ return curr_proxy_list
157+
130158 def generate_proxied_request (self , url , params = {}, req_timeout = 30 ):
131159 #if len(self.proxy_list) < 2:
132160 # self.proxy_list += self.proxyForEU_url_parser('http://proxyfor.eu/geo.php')
0 commit comments