11import concurrent .futures
22import urllib .request
3+ import socket
34import ssl
45import threading
56from functools import partial
67from itertools import chain
78
89from pkgcore .fetch import fetchable
9- from snakeoil .compatibility import IGNORED_EXCEPTIONS
1010
1111from .. import addons , base
1212from . import NetworkCheck
1313
1414
15- class DeadHomepage (base .FilteredVersionResult , base .Warning ):
16- """Package with a dead HOMEPAGE ."""
15+ class _DeadUrlResult (base .FilteredVersionResult , base .Warning ):
16+ """Generic result for a dead URL ."""
1717
1818 def __init__ (self , url , message , ** kwargs ):
1919 super ().__init__ (** kwargs )
@@ -22,37 +22,40 @@ def __init__(self, url, message, **kwargs):
2222
2323 @property
2424 def desc (self ):
25- return f'dead homepage, { self .message } : { self .url !r} '
25+ return f'{ self .message } : { self .url !r} '
2626
2727
28- class SSLCertificateError ( base . FilteredVersionResult , base . Warning ):
29- """Package with https:// HOMEPAGE with an invalid SSL cert ."""
28+ class DeadHomepage ( _DeadUrlResult ):
29+ """Package with a dead HOMEPAGE ."""
3030
31- def __init__ (self , url , message , ** kwargs ):
32- super ().__init__ (** kwargs )
33- self .url = url
34- self .message = message
3531
36- @property
37- def desc (self ):
38- return f'SSL cert error, { self .message } : { self .url !r} '
32+ class DeadSrcUrl (_DeadUrlResult ):
33+ """Package with a dead SRC_URI target."""
3934
4035
41- class RedirectedHomepage (base .FilteredVersionResult , base .Warning ):
42- """Package with a HOMEPAGE that redirects to a different site."""
36+ class _RedirectedUrlResult (base .FilteredVersionResult , base .Warning ):
37+ """Generic result for a URL that permanently redirects to a different site."""
4338
44- def __init__ (self , url , redirected , ** kwargs ):
39+ def __init__ (self , url , new_url , ** kwargs ):
4540 super ().__init__ (** kwargs )
4641 self .url = url
47- self .redirected = redirected
42+ self .new_url = new_url
4843
4944 @property
5045 def desc (self ):
51- return f'redirected homepage , { self .url !r} -> { self .redirected !r} '
46+ return f'permanently redirected url , { self .url !r} -> { self .new_url !r} '
5247
5348
54- class DeadSrcUrl (base .FilteredVersionResult , base .Warning ):
55- """Package with a dead SRC_URI target."""
49+ class RedirectedHomepage (_RedirectedUrlResult ):
50+ """Package with a HOMEPAGE that permanently redirects to a different site."""
51+
52+
53+ class RedirectedSrcUrl (_RedirectedUrlResult ):
54+ """Package with a SRC_URI target that permanently redirects to a different site."""
55+
56+
57+ class SSLCertificateError (base .FilteredVersionResult , base .Warning ):
58+ """Package with https:// HOMEPAGE with an invalid SSL cert."""
5659
5760 def __init__ (self , url , message , ** kwargs ):
5861 super ().__init__ (** kwargs )
@@ -61,7 +64,23 @@ def __init__(self, url, message, **kwargs):
6164
6265 @property
6366 def desc (self ):
64- return f'dead SRC_URI target, { self .message } : { self .url !r} '
67+ return f'SSL cert error, { self .message } : { self .url !r} '
68+
69+
70+ class _HttpRedirected301 (Exception ):
71+ """Exception used for flagging HTTP 301 redirects."""
72+
73+ def __init__ (self , url ):
74+ self .url = url
75+
76+
77+ class _FlagHttp301RedirectHandler (urllib .request .HTTPRedirectHandler ):
78+ """Flag HTTP 301 redirects when using urllib."""
79+
80+ def http_error_301 (self , req , fp , code , msg , headers ):
81+ new_url = headers ['Location' ]
82+ super ().http_error_301 (req , fp , code , msg , headers )
83+ raise _HttpRedirected301 (new_url )
6584
6685
6786class _UrlCheck (NetworkCheck ):
@@ -75,30 +94,27 @@ def __init__(self, *args, **kwargs):
7594 self .executor = concurrent .futures .ThreadPoolExecutor ()
7695 self .timeout = self .options .timeout
7796 self .reporter_lock = threading .Lock ()
78- self .redirected_result = None
7997 self .dead_result = None
98+ self .redirected_result = None
99+
100+ self .url_opener = urllib .request .build_opener (_FlagHttp301RedirectHandler ())
80101 # spoof user agent similar to what would be used when fetching files
81- self .headers = { 'User-Agent' : 'Wget/1.20.3 (linux-gnu)' }
102+ self .url_opener . addheaders = [( 'User-Agent' , 'Wget/1.20.3 (linux-gnu)' )]
82103
83104 def _url_to_result (self , url ):
84105 result = False
85- req = urllib .request .Request (url , headers = self .headers )
86106 try :
87- response = urllib .request .urlopen (req , timeout = self .timeout )
88- if self .redirected_result is not None :
89- response_url = response .geturl ()
90- if response_url != url :
91- result = partial (self .redirected_result , url , response_url )
107+ response = self .url_opener .open (url , timeout = self .timeout )
108+ except _HttpRedirected301 as e :
109+ result = partial (self .redirected_result , url , e .url )
92110 except urllib .error .HTTPError as e :
93111 if e .code >= 400 :
94112 result = partial (self .dead_result , url , str (e ))
95113 except urllib .error .URLError as e :
96114 result = partial (self .dead_result , url , str (e ))
97115 except ssl .CertificateError as e :
98116 result = partial (SSLCertificateError , url , str (e ))
99- except IGNORED_EXCEPTIONS :
100- raise
101- except Exception as e :
117+ except socket .timeout as e :
102118 result = partial (self .dead_result , url , str (e ))
103119 return result
104120
@@ -133,8 +149,8 @@ class HomepageUrlCheck(_UrlCheck):
133149
134150 def __init__ (self , * args , ** kwargs ):
135151 super ().__init__ (* args , ** kwargs )
136- self .redirected_result = RedirectedHomepage
137152 self .dead_result = DeadHomepage
153+ self .redirected_result = RedirectedHomepage
138154
139155 def _get_urls (self , pkg ):
140156 return pkg .homepage
@@ -143,13 +159,14 @@ def _get_urls(self, pkg):
143159class FetchablesUrlCheck (_UrlCheck ):
144160 """Various SRC_URI related checks that require internet access."""
145161
146- known_results = (DeadSrcUrl , SSLCertificateError )
162+ known_results = (DeadSrcUrl , RedirectedSrcUrl , SSLCertificateError )
147163 required_addons = (addons .UseAddon ,)
148164
149165 def __init__ (self , options , iuse_handler ):
150166 super ().__init__ (options )
151167 self .fetch_filter = iuse_handler .get_filter ('fetchables' )
152168 self .dead_result = DeadSrcUrl
169+ self .redirected_result = RedirectedSrcUrl
153170
154171 def _get_urls (self , pkg ):
155172 # ignore conditionals
0 commit comments