@@ -19,7 +19,7 @@ def __init__(self, id, web_url, timeout=None):
1919 def parse_proxyList (self ):
2020 curr_proxy_list = []
2121 # Parse all proxy pages -> format: /list/{num}.htm
22- # TODO: get the pageRange from the 'pagination' table
22+ # TODO @pgaref : get the pageRange from the 'pagination' table
2323 for page in range (1 , 21 ):
2424 response = requests .get ("{0}{num:02d}.htm" .format (self .get_URl (), num = page ), timeout = self .timeout )
2525 if not response .ok :
@@ -52,24 +52,24 @@ def parse_proxyList(self):
5252 # curr_proxy_list.append('http://' + row.text + ports[row['class'][0]])
5353 # Make sure it is a Valid Proxy Address
5454 if UrlParser .valid_ip_port (td_row .text ):
55- proxy_obj = self .createProxyObject (td_row )
55+ proxy_obj = self .createProxyObject (row )
5656 proxy_obj .print_everything ()
5757 curr_proxy_list .append (proxy_obj )
5858 else :
5959 logger .debug ("Address with Invalid format: {}" .format (td_row .text ))
6060 return curr_proxy_list
6161
62- def createProxyObject (self , td_row ):
63- ip = td_row .text .split (":" )[0 ]
64- port = td_row .text .split (":" )[1 ]
65- next_td_row = td_row .findNext ("td" )
66- anonymity = AnonymityLevel (next_td_row .text )
67- next_td_row = next_td_row .findNext ("td" )
68- next_td_row = next_td_row .findNext ("td" )
69- country = next_td_row .text
62+ def createProxyObject (self , row ):
63+ for td_row in row .findAll ("td" ):
64+ if td_row .attrs ['data-label' ] == 'IP:port ' :
65+ ip = td_row .text .split (":" )[0 ]
66+ port = td_row .text .split (":" )[1 ]
67+ elif td_row .attrs ['data-label' ] == 'Anonymity Type: ' :
68+ anonymity = AnonymityLevel (td_row .text )
69+ elif td_row .attrs ['data-label' ] == 'Country: ' :
70+ country = td_row .text
7071 return ProxyObject (source = self .id , ip = ip , port = port , anonymity_level = anonymity , country = country )
7172
72-
7373 def __str__ (self ):
7474 return "SemairProxy Parser of '{0}' with required bandwidth: '{1}' KBs" \
7575 .format (self .url , self .minimum_bandwidth_in_KBs )
0 commit comments