Skip to content

Commit 37a844a

Browse files
author
pgaref
committed
Extra string spaces in proxy attributes made parsers fail. Fixing using strip() - more work to be done in code deduplication
1 parent 4edf042 commit 37a844a

6 files changed

Lines changed: 25 additions & 21 deletions

File tree

http_request_randomizer/requests/parsers/FreeProxyParser.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,17 @@ def createProxyObject(self, dataset):
5757
for field in dataset:
5858
if field[0] == 'IP Address':
5959
# Make sure it is a Valid IP
60-
if not UrlParser.valid_ip(field[1]):
61-
logger.debug("IP with Invalid format: {}".format(field[1]))
62-
break
63-
else:
64-
ip = field[1]
60+
ip = field[1].strip() # String strip()
61+
# TODO @pgaref: Duplicate code!!!
62+
if not UrlParser.valid_ip(ip):
63+
logger.debug("IP with Invalid format: {}".format(ip))
64+
return None
6565
elif field[0] == 'Port':
66-
port = field[1]
66+
port = field[1].strip() # String strip()
6767
elif field[0] == 'Anonymity':
68-
anonymity = AnonymityLevel(field[1])
68+
anonymity = AnonymityLevel(field[1].strip()) # String strip()
6969
elif field[0] == 'Country':
70-
country = field[1]
70+
country = field[1].strip() # String strip()
7171
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country)
7272

7373
def __str__(self):

http_request_randomizer/requests/parsers/ProxyForEuParser.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,18 @@ def createProxyObject(self, dataset):
5757
if float(field[1]) < self.get_min_bandwidth():
5858
return None
5959
if field[0] == 'IP':
60+
ip = field[1].strip() # String strip()
61+
# TODO @pgaref : Dupicate code?
6062
# Make sure it is a Valid IP
61-
if not UrlParser.valid_ip(field[1]):
62-
logger.debug("IP with Invalid format: {}".format(field[1]))
63-
break
64-
else:
65-
ip = field[1]
63+
if not UrlParser.valid_ip(ip):
64+
logger.debug("IP with Invalid format: {}".format(ip))
65+
return None
6666
elif field[0] == 'Port':
67-
port = field[1]
67+
port = field[1].strip() # String strip()
6868
elif field[0] == 'Anon':
69-
anonymity = AnonymityLevel(field[1])
69+
anonymity = AnonymityLevel(field[1].strip()) # String strip()
7070
elif field[0] == 'Country':
71-
country = field[1]
71+
country = field[1].strip() # String strip()
7272
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country)
7373

7474
def __str__(self):

http_request_randomizer/requests/parsers/RebroWeeblyParser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def parse_proxyList(self, use_top15k=False):
5656

5757
def createProxyObject(self, dataset):
5858
# Provider specific code
59+
dataset = dataset.strip() # String strip()
5960
ip = dataset.split(":")[0]
6061
port = dataset.split(":")[1]
6162
# TODO: Parse extra tables and combine data - Provider seems to be out-of-date

http_request_randomizer/requests/parsers/SamairProxyParser.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,13 @@ def parse_proxyList(self):
6262
def createProxyObject(self, row):
6363
for td_row in row.findAll("td"):
6464
if td_row.attrs['data-label'] == 'IP:port ':
65-
ip = td_row.text.split(":")[0]
66-
port = td_row.text.split(":")[1]
65+
text = td_row.text.strip()
66+
ip = text.split(":")[0]
67+
port = text.split(":")[1]
6768
elif td_row.attrs['data-label'] == 'Anonymity Type: ':
68-
anonymity = AnonymityLevel(td_row.text)
69+
anonymity = AnonymityLevel(td_row.text.strip())
6970
elif td_row.attrs['data-label'] == 'Country: ':
70-
country = td_row.text
71+
country = td_row.text.strip()
7172
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country)
7273

7374
def __str__(self):

http_request_randomizer/requests/proxy/ProxyObject.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,5 @@ class AnonymityLevel(MultiValueEnum):
4848
ANONYMOUS = 'anonymous', 'anonymous proxy', 'high-anonymous'
4949
ELITE = 'elite', 'elite proxy', 'HIGH'
5050
UNKNOWN = 'unknown', 'none'
51+
# TODO @pgaref: use a default case instead!!!
52+
BAD = "bad", "bad1"

tests/mocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def samair_mock(url, request):
163163
\n
164164
<tr class="anon">
165165
<td data-label="IP:port ">152.251.141.93:8080</td>
166-
<td data-label="Anonymity Type: ">elite</td>
166+
<td data-label="Anonymity Type: ">elite </td>
167167
<td data-label="Checked: ">Jul-16, 04:39</td>
168168
<td data-label="Country: ">Brazil</td>
169169
<td data-label="City: ">&nbsp;</td>

0 commit comments

Comments
 (0)