Skip to content

Commit a11a6a8

Browse files
committed
checks.network: only flag HTTP 301 redirects
Also, flag SRC_URI url redirects as well. Fixes #149
1 parent 601cacf commit a11a6a8

1 file changed

Lines changed: 51 additions & 34 deletions

File tree

src/pkgcheck/checks/network.py

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
import concurrent.futures
22
import urllib.request
3+
import socket
34
import ssl
45
import threading
56
from functools import partial
67
from itertools import chain
78

89
from pkgcore.fetch import fetchable
9-
from snakeoil.compatibility import IGNORED_EXCEPTIONS
1010

1111
from .. import addons, base
1212
from . import NetworkCheck
1313

1414

15-
class DeadHomepage(base.FilteredVersionResult, base.Warning):
16-
"""Package with a dead HOMEPAGE."""
15+
class _DeadUrlResult(base.FilteredVersionResult, base.Warning):
16+
"""Generic result for a dead URL."""
1717

1818
def __init__(self, url, message, **kwargs):
1919
super().__init__(**kwargs)
@@ -22,37 +22,40 @@ def __init__(self, url, message, **kwargs):
2222

2323
@property
2424
def desc(self):
25-
return f'dead homepage, {self.message}: {self.url!r}'
25+
return f'{self.message}: {self.url!r}'
2626

2727

28-
class SSLCertificateError(base.FilteredVersionResult, base.Warning):
29-
"""Package with https:// HOMEPAGE with an invalid SSL cert."""
28+
class DeadHomepage(_DeadUrlResult):
29+
"""Package with a dead HOMEPAGE."""
3030

31-
def __init__(self, url, message, **kwargs):
32-
super().__init__(**kwargs)
33-
self.url = url
34-
self.message = message
3531

36-
@property
37-
def desc(self):
38-
return f'SSL cert error, {self.message}: {self.url!r}'
32+
class DeadSrcUrl(_DeadUrlResult):
33+
"""Package with a dead SRC_URI target."""
3934

4035

41-
class RedirectedHomepage(base.FilteredVersionResult, base.Warning):
42-
"""Package with a HOMEPAGE that redirects to a different site."""
36+
class _RedirectedUrlResult(base.FilteredVersionResult, base.Warning):
37+
"""Generic result for a URL that permanently redirects to a different site."""
4338

44-
def __init__(self, url, redirected, **kwargs):
39+
def __init__(self, url, new_url, **kwargs):
4540
super().__init__(**kwargs)
4641
self.url = url
47-
self.redirected = redirected
42+
self.new_url = new_url
4843

4944
@property
5045
def desc(self):
51-
return f'redirected homepage, {self.url!r} -> {self.redirected!r}'
46+
return f'permanently redirected url, {self.url!r} -> {self.new_url!r}'
5247

5348

54-
class DeadSrcUrl(base.FilteredVersionResult, base.Warning):
55-
"""Package with a dead SRC_URI target."""
49+
class RedirectedHomepage(_RedirectedUrlResult):
50+
"""Package with a HOMEPAGE that permanently redirects to a different site."""
51+
52+
53+
class RedirectedSrcUrl(_RedirectedUrlResult):
54+
"""Package with a SRC_URI target that permanently redirects to a different site."""
55+
56+
57+
class SSLCertificateError(base.FilteredVersionResult, base.Warning):
58+
"""Package with https:// HOMEPAGE with an invalid SSL cert."""
5659

5760
def __init__(self, url, message, **kwargs):
5861
super().__init__(**kwargs)
@@ -61,7 +64,23 @@ def __init__(self, url, message, **kwargs):
6164

6265
@property
6366
def desc(self):
64-
return f'dead SRC_URI target, {self.message}: {self.url!r}'
67+
return f'SSL cert error, {self.message}: {self.url!r}'
68+
69+
70+
class _HttpRedirected301(Exception):
71+
"""Exception used for flagging HTTP 301 redirects."""
72+
73+
def __init__(self, url):
74+
self.url = url
75+
76+
77+
class _FlagHttp301RedirectHandler(urllib.request.HTTPRedirectHandler):
78+
"""Flag HTTP 301 redirects when using urllib."""
79+
80+
def http_error_301(self, req, fp, code, msg, headers):
81+
new_url = headers['Location']
82+
super().http_error_301(req, fp, code, msg, headers)
83+
raise _HttpRedirected301(new_url)
6584

6685

6786
class _UrlCheck(NetworkCheck):
@@ -75,30 +94,27 @@ def __init__(self, *args, **kwargs):
7594
self.executor = concurrent.futures.ThreadPoolExecutor()
7695
self.timeout = self.options.timeout
7796
self.reporter_lock = threading.Lock()
78-
self.redirected_result = None
7997
self.dead_result = None
98+
self.redirected_result = None
99+
100+
self.url_opener = urllib.request.build_opener(_FlagHttp301RedirectHandler())
80101
# spoof user agent similar to what would be used when fetching files
81-
self.headers = {'User-Agent': 'Wget/1.20.3 (linux-gnu)'}
102+
self.url_opener.addheaders = [('User-Agent', 'Wget/1.20.3 (linux-gnu)')]
82103

83104
def _url_to_result(self, url):
84105
result = False
85-
req = urllib.request.Request(url, headers=self.headers)
86106
try:
87-
response = urllib.request.urlopen(req, timeout=self.timeout)
88-
if self.redirected_result is not None:
89-
response_url = response.geturl()
90-
if response_url != url:
91-
result = partial(self.redirected_result, url, response_url)
107+
response = self.url_opener.open(url, timeout=self.timeout)
108+
except _HttpRedirected301 as e:
109+
result = partial(self.redirected_result, url, e.url)
92110
except urllib.error.HTTPError as e:
93111
if e.code >= 400:
94112
result = partial(self.dead_result, url, str(e))
95113
except urllib.error.URLError as e:
96114
result = partial(self.dead_result, url, str(e))
97115
except ssl.CertificateError as e:
98116
result = partial(SSLCertificateError, url, str(e))
99-
except IGNORED_EXCEPTIONS:
100-
raise
101-
except Exception as e:
117+
except socket.timeout as e:
102118
result = partial(self.dead_result, url, str(e))
103119
return result
104120

@@ -133,8 +149,8 @@ class HomepageUrlCheck(_UrlCheck):
133149

134150
def __init__(self, *args, **kwargs):
135151
super().__init__(*args, **kwargs)
136-
self.redirected_result = RedirectedHomepage
137152
self.dead_result = DeadHomepage
153+
self.redirected_result = RedirectedHomepage
138154

139155
def _get_urls(self, pkg):
140156
return pkg.homepage
@@ -143,13 +159,14 @@ def _get_urls(self, pkg):
143159
class FetchablesUrlCheck(_UrlCheck):
144160
"""Various SRC_URI related checks that require internet access."""
145161

146-
known_results = (DeadSrcUrl, SSLCertificateError)
162+
known_results = (DeadSrcUrl, RedirectedSrcUrl, SSLCertificateError)
147163
required_addons = (addons.UseAddon,)
148164

149165
def __init__(self, options, iuse_handler):
150166
super().__init__(options)
151167
self.fetch_filter = iuse_handler.get_filter('fetchables')
152168
self.dead_result = DeadSrcUrl
169+
self.redirected_result = RedirectedSrcUrl
153170

154171
def _get_urls(self, pkg):
155172
# ignore conditionals

0 commit comments

Comments
 (0)