2727
2828
2929class RequestProxy :
30- def __init__ (self , web_proxy_list = [], sustain = False ):
30+ def __init__ (self , web_proxy_list = [], sustain = False , timeout = 5 ):
3131 self .userAgent = UserAgentManager ()
3232 self .logger = logging .getLogger ()
3333 self .logger .addHandler (handler )
@@ -37,10 +37,10 @@ def __init__(self, web_proxy_list=[], sustain=False):
3737 # Each of the classes below implements a specific URL Parser
3838 #####
3939 parsers = list ([])
40- parsers .append (FreeProxyParser ('http://free-proxy-list.net' ))
41- parsers .append (ProxyForEuParser ('http://proxyfor.eu/geo.php' , 1.0 ))
42- parsers .append (RebroWeeblyParser ('http://rebro.weebly.com' ))
43- # parsers.append(SamairProxyParser('http://samair.ru/proxy/time-01.htm'))
40+ parsers .append (FreeProxyParser ('http://free-proxy-list.net' , timeout = timeout ))
41+ parsers .append (ProxyForEuParser ('http://proxyfor.eu/geo.php' , 1.0 , timeout = timeout ))
42+ parsers .append (RebroWeeblyParser ('http://rebro.weebly.com' , timeout = timeout ))
43+ parsers .append (SamairProxyParser ('http://samair.ru/proxy/time-01.htm' , timeout = timeout ))
4444
4545 self .logger .debug ("=== Initialized Proxy Parsers ===" )
4646 for i in range (len (parsers )):
@@ -51,7 +51,10 @@ def __init__(self, web_proxy_list=[], sustain=False):
5151 self .parsers = parsers
5252 self .proxy_list = web_proxy_list
5353 for i in range (len (parsers )):
54- self .proxy_list += parsers [i ].parse_proxyList ()
54+ try :
55+ self .proxy_list += parsers [i ].parse_proxyList ()
56+ except ReadTimeout :
57+ self .logger .warn ("Proxy Parser: '{}' TimedOut!" .format (parsers [i ].url ))
5558 self .current_proxy = self .randomize_proxy ()
5659
5760 def set_logger_level (self , level ):
@@ -83,7 +86,11 @@ def randomize_proxy(self):
8386 def generate_proxied_request (self , url , method = "GET" , params = {}, data = {}, headers = {}, req_timeout = 30 ):
8487 try :
8588 random .shuffle (self .proxy_list )
86- req_headers = dict (params .items () + self .generate_random_request_headers ().items ())
89+ # req_headers = dict(params.items() + self.generate_random_request_headers().items())
90+
91+ req_headers = dict (params .items ())
92+ req_headers_random = dict (self .generate_random_request_headers ().items ())
93+ req_headers .update (req_headers_random )
8794
8895 if not self .sustain :
8996 self .randomize_proxy ()
@@ -100,7 +107,7 @@ def generate_proxied_request(self, url, method="GET", params={}, data={}, header
100107 raise ConnectionError ("HTTP Response [403] - Permission denied error" )
101108 elif request .status_code == 503 :
102109 raise ConnectionError ("HTTP Response [503] - Service unavailable error" )
103- print 'RR Status {}' .format (request .status_code )
110+ print ( 'RR Status {}' .format (request .status_code ) )
104111 return request
105112 except ConnectionError :
106113 try :
@@ -132,19 +139,19 @@ def generate_proxied_request(self, url, method="GET", params={}, data={}, header
132139
133140 start = time .time ()
134141 req_proxy = RequestProxy ()
135- print "Initialization took: {0} sec" .format ((time .time () - start ))
136- print "Size : " , len (req_proxy .get_proxy_list ())
137- print " ALL = " , req_proxy .get_proxy_list ()
142+ print ( "Initialization took: {0} sec" .format ((time .time () - start ) ))
143+ print ( "Size: {0}" . format ( len (req_proxy .get_proxy_list ()) ))
144+ print ( " ALL = {0} " . format ( req_proxy .get_proxy_list ()) )
138145
139146 test_url = 'http://ipv4.icanhazip.com'
140147
141148 while True :
142149 start = time .time ()
143150 request = req_proxy .generate_proxied_request (test_url )
144- print "Proxied Request Took: {0} sec => Status: {1}" .format ((time .time () - start ), request .__str__ ())
151+ print ( "Proxied Request Took: {0} sec => Status: {1}" .format ((time .time () - start ), request .__str__ () ))
145152 if request is not None :
146- print "\t Response: ip={0}" .format (u'' .join (request .text ).encode ('utf-8' ))
147- print "Proxy List Size: " , len (req_proxy .get_proxy_list ())
153+ print ( "\t Response: ip={0}" .format (u'' .join (request .text ).encode ('utf-8' ) ))
154+ print ( "Proxy List Size: {0}" . format ( len (req_proxy .get_proxy_list ()) ))
148155
149- print "-> Going to sleep.."
156+ print ( "-> Going to sleep.." )
150157 time .sleep (10 )
0 commit comments