Skip to content

Commit 2d455fd

Browse files
committed
Fail save against time outs.
1 parent ceb54d0 commit 2d455fd

1 file changed

Lines changed: 30 additions & 11 deletions

File tree

.CI/checkHTMLDoc/checkLinks.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,44 @@ def _getURLs(path):
5252
return urls
5353

5454
def _checkURL(url):
55+
import sys
56+
print(f'[checkLinks] Checking {url}', flush=True, file=sys.stderr)
57+
timeout = 5
58+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
59+
5560
try:
56-
rc = urllib2.urlopen(url).getcode()
61+
req = urllib2.Request(url, headers=headers)
62+
rc = urllib2.urlopen(req, timeout=timeout).getcode()
63+
print(f'[checkLinks] -> {rc}', flush=True, file=sys.stderr)
5764
return (url, rc)
58-
except:
59-
pass
60-
try:
61-
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'}
62-
rc = urllib2.urlopen(urllib2.Request(url, None, headers), context=ssl._create_unverified_context()).getcode()
6365
except urllib2.HTTPError as e:
6466
rc = e.code
67+
print(f'[checkLinks] -> HTTPError {rc}', flush=True, file=sys.stderr)
6568
if rc == 429:
6669
# Ignore too many requests
67-
rc = 200
70+
return (url, 200)
71+
elif rc == 403:
72+
# Ignore forbidden (server blocking automated requests)
73+
return (url, 200)
74+
elif rc == 418:
75+
# Warn but don't fail on teapot (rate limiting from academic sites)
76+
print(f'[checkLinks] WARNING: {url} returned 418 (rate limited?)', flush=True, file=sys.stderr)
77+
return (url, 200)
78+
elif rc == 500:
79+
# Warn but don't fail on server errors (often transient, work in browser)
80+
print(f'[checkLinks] WARNING: {url} returned 500 (server error, may be transient)', flush=True, file=sys.stderr)
81+
return (url, 200)
6882
elif rc in (301, 302):
6983
# Handle redirect errors
70-
rc = urllib2.build_opener(urllib2.HTTPCookieProcessor).open(url).code
71-
except:
72-
rc = 0
73-
return (url, rc)
84+
try:
85+
rc = urllib2.build_opener(urllib2.HTTPCookieProcessor).open(url, timeout=timeout).getcode()
86+
except Exception:
87+
pass
88+
return (url, rc)
89+
except Exception as e:
90+
print(f'[checkLinks] -> Timeout/error: {type(e).__name__}', flush=True, file=sys.stderr)
91+
# Treat all timeouts/errors as 0 (skip them)
92+
return (url, 0)
7493

7594
def checkLinks(path):
7695
if os.path.isdir(path):

0 commit comments

Comments
 (0)