Skip to content

Commit a946e7c

Browse files
committed
Hotfix for SemairProxyParser -- TODO: 1) parse all site pages 2) add some checks when Proxy page HTML structure changed
1 parent 5b62ef8 commit a946e7c

1 file changed

Lines changed: 16 additions & 14 deletions

File tree

project/http/requests/parsers/samairproxyParser.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,26 @@ def parse_proxyList(self):
1515
content = requests.get(self.get_URl()).content
1616
soup = BeautifulSoup(content, "html.parser")
1717
# css provides the port number so we reverse it
18-
for href in soup.findAll('link'):
19-
if '/styles/' in href.get('href'):
20-
style = "http://www.samair.ru" + href.get('href')
21-
break
22-
css = requests.get(style).content.split('\n')
23-
css.pop()
24-
ports = {}
25-
for l in css:
26-
p = l.split(' ')
27-
key = p[0].split(':')[0][1:]
28-
value = p[1].split('\"')[1]
29-
ports[key] = value
18+
# for href in soup.findAll('link'):
19+
# if '/styles/' in href.get('href'):
20+
# style = "http://www.samair.ru" + href.get('href')
21+
# break
22+
# css = requests.get(style).content.split('\n')
23+
# css.pop()
24+
# ports = {}
25+
# for l in css:
26+
# p = l.split(' ')
27+
# key = p[0].split(':')[0][1:]
28+
# value = p[1].split('\"')[1]
29+
# ports[key] = value
3030

3131
table = soup.find("table", attrs={"id": "proxylist"})
3232
# The first tr contains the field names.
3333
headings = [th.get_text() for th in table.find("tr").find_all("th")]
34-
for row in table.find_all("span")[1:]:
35-
curr_proxy_list.append('http://' + row.text + ports[row['class'][0]])
34+
for row in table.find_all("tr")[1:]:
35+
td_row = row.find("td")
36+
# curr_proxy_list.append('http://' + row.text + ports[row['class'][0]])
37+
curr_proxy_list.append('http://' +td_row.text)
3638

3739
return curr_proxy_list
3840

0 commit comments

Comments
 (0)