Skip to content

Commit 8cc4686

Browse files
authored
Be 5270 CVE 2023 27043 python 2 7 (#73)
* CVE-2023-27043 First attempt at porting the patch From here: https://github.com/python/cpython/pull/123770/files#diff-e3fbfb8d74a5297a5432876e8cc63b8a91836b416989c117cecab3722285ce21 * CVE-2023-27043 First pass at fixing errors * CVE-2023-27043 Fixup some more tests * CVE-2023-27043 Test both strict and non-strict getaddresses As per Python3.9
1 parent a22a1d8 commit 8cc4686

8 files changed

Lines changed: 3796 additions & 17 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,4 @@ coverage/
8787
externals/
8888
htmlcov/
8989
gmon.out
90+
.aider*

Doc/library/email.utils.rst

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,18 @@ There are several useful utilities provided in the :mod:`email.utils` module:
2121
begins with angle brackets, they are stripped off.
2222

2323

24-
.. function:: parseaddr(address)
24+
.. function:: parseaddr(address, strict=True)
2525

2626
Parse address -- which should be the value of some address-containing field such
2727
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
2828
*email address* parts. Returns a tuple of that information, unless the parse
2929
fails, in which case a 2-tuple of ``('', '')`` is returned.
3030

31+
If *strict* is true, use a strict parser which rejects malformed inputs.
32+
33+
.. versionchanged:: 2.7.18.12
34+
Add *strict* optional parameter and reject malformed inputs by default.
35+
3136

3237
.. function:: formataddr(pair)
3338

@@ -37,7 +42,7 @@ There are several useful utilities provided in the :mod:`email.utils` module:
3742
second element is returned unmodified.
3843

3944

40-
.. function:: getaddresses(fieldvalues)
45+
.. function:: getaddresses(fieldvalues, strict=True)
4146

4247
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
4348
*fieldvalues* is a sequence of header field values as might be returned by
@@ -52,6 +57,9 @@ There are several useful utilities provided in the :mod:`email.utils` module:
5257
resent_ccs = msg.get_all('resent-cc', [])
5358
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
5459

60+
.. versionchanged:: 2.7.18.12
61+
Add *strict* optional parameter and reject malformed inputs by default.
62+
5563

5664
.. function:: parsedate(date)
5765

Doc/whatsnew/2.7.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2793,3 +2793,20 @@ The author would like to thank the following people for offering
27932793
suggestions, corrections and assistance with various drafts of this
27942794
article: Nick Coghlan, Philip Jenvey, Ryan Lovett, R. David Murray,
27952795
Hugh Secker-Walker.
2796+
2797+
2798+
Notable changes in 3.8.20
2799+
=========================
2800+
2801+
email
2802+
-----
2803+
2804+
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
2805+
``('', '')`` 2-tuples in more situations where invalid email addresses are
2806+
encountered, instead of potentially inaccurate values.
2807+
An optional *strict* parameter was added to these two functions:
2808+
use ``strict=False`` to get the old behavior, accepting malformed inputs.
2809+
``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to
2810+
check if the *strict* paramater is available.
2811+
(Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
2812+
the CVE-2023-27043 fix.)

Lib/email/test/test_email.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2417,9 +2417,11 @@ def test_getaddresses(self):
24172417
def test_getaddresses_nasty(self):
24182418
eq = self.assertEqual
24192419
eq(Utils.getaddresses(['foo: ;']), [('', '')])
2420-
eq(Utils.getaddresses(
2421-
['[]*-- =~$']),
2422-
[('', ''), ('', ''), ('', '*--')])
2420+
addresses = ['[]*-- =~$']
2421+
eq(Utils.getaddresses(addresses),
2422+
[('', '')])
2423+
eq(Utils.getaddresses(addresses, strict=False),
2424+
[('', ''), ('', ''), ('', '*--')])
24232425
eq(Utils.getaddresses(
24242426
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
24252427
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])

Lib/email/test/test_email_renamed.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,9 +2278,11 @@ def test_getaddresses(self):
22782278
def test_getaddresses_nasty(self):
22792279
eq = self.assertEqual
22802280
eq(utils.getaddresses(['foo: ;']), [('', '')])
2281-
eq(utils.getaddresses(
2282-
['[]*-- =~$']),
2283-
[('', ''), ('', ''), ('', '*--')])
2281+
addresses = ['[]*-- =~$']
2282+
eq(utils.getaddresses(addresses),
2283+
[('', '')])
2284+
eq(utils.getaddresses(addresses, strict=False),
2285+
[('', ''), ('', ''), ('', '*--')])
22842286
eq(utils.getaddresses(
22852287
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
22862288
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])

Lib/email/utils.py

Lines changed: 138 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,125 @@ def formataddr(pair):
9999
return '%s%s%s <%s>' % (quotes, name, quotes, address)
100100
return address
101101

102+
def _iter_escaped_chars(addr):
103+
pos = 0
104+
escape = False
105+
for pos, ch in enumerate(addr):
106+
if escape:
107+
yield (pos, '\\' + ch)
108+
escape = False
109+
elif ch == '\\':
110+
escape = True
111+
else:
112+
yield (pos, ch)
113+
if escape:
114+
yield (pos, '\\')
115+
116+
117+
def _strip_quoted_realnames(addr):
118+
"""Strip real names between quotes."""
119+
if '"' not in addr:
120+
# Fast path
121+
return addr
122+
123+
start = 0
124+
open_pos = None
125+
result = []
126+
for pos, ch in _iter_escaped_chars(addr):
127+
if ch == '"':
128+
if open_pos is None:
129+
open_pos = pos
130+
else:
131+
if start != open_pos:
132+
result.append(addr[start:open_pos])
133+
start = pos + 1
134+
open_pos = None
102135

136+
if start < len(addr):
137+
result.append(addr[start:])
138+
139+
return ''.join(result)
103140

104-
def getaddresses(fieldvalues):
105-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
106-
all = COMMASPACE.join(fieldvalues)
107-
a = _AddressList(all)
108-
return a.addresslist
109141

142+
supports_strict_parsing = True
143+
144+
def getaddresses(fieldvalues, strict=True):
145+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
146+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
147+
its place.
148+
If strict is true, use a strict parser which rejects malformed inputs.
149+
"""
150+
151+
# If strict is true, if the resulting list of parsed addresses is greater
152+
# than the number of fieldvalues in the input list, a parsing error has
153+
# occurred and consequently a list containing a single empty 2-tuple [('',
154+
# '')] is returned in its place. This is done to avoid invalid output.
155+
#
156+
# Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
157+
# Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
158+
# Safe output: [('', '')]
159+
160+
if not strict:
161+
all = COMMASPACE.join(str(v) for v in fieldvalues)
162+
a = _AddressList(all)
163+
return a.addresslist
164+
165+
fieldvalues = [str(v) for v in fieldvalues]
166+
fieldvalues = _pre_parse_validation(fieldvalues)
167+
addr = COMMASPACE.join(fieldvalues)
168+
a = _AddressList(addr)
169+
result = _post_parse_validation(a.addresslist)
170+
171+
# Treat output as invalid if the number of addresses is not equal to the
172+
# expected number of addresses.
173+
n = 0
174+
for v in fieldvalues:
175+
# When a comma is used in the Real Name part it is not a deliminator.
176+
# So strip those out before counting the commas.
177+
v = _strip_quoted_realnames(v)
178+
# Expected number of addresses: 1 + number of commas
179+
n += 1 + v.count(',')
180+
if len(result) != n:
181+
return [('', '')]
182+
183+
return result
184+
185+
186+
def _check_parenthesis(addr):
187+
# Ignore parenthesis in quoted real names.
188+
addr = _strip_quoted_realnames(addr)
189+
190+
opens = 0
191+
for pos, ch in _iter_escaped_chars(addr):
192+
if ch == '(':
193+
opens += 1
194+
elif ch == ')':
195+
opens -= 1
196+
if opens < 0:
197+
return False
198+
return (opens == 0)
199+
200+
201+
def _pre_parse_validation(email_header_fields):
202+
accepted_values = []
203+
for v in email_header_fields:
204+
if not _check_parenthesis(v):
205+
v = "('', '')"
206+
accepted_values.append(v)
207+
208+
return accepted_values
209+
210+
211+
def _post_parse_validation(parsed_email_header_tuples):
212+
accepted_values = []
213+
# The parser would have parsed a correctly formatted domain-literal
214+
# The existence of an [ after parsing indicates a parsing failure
215+
for v in parsed_email_header_tuples:
216+
if '[' in v[1]:
217+
v = ('', '')
218+
accepted_values.append(v)
219+
220+
return accepted_values
110221

111222

112223
ecre = re.compile(r'''
@@ -210,19 +321,37 @@ def parsedate_tz(data):
210321
return _parsedate_tz(data)
211322

212323

213-
def parseaddr(addr):
324+
def parseaddr(addr, strict=True):
214325
"""
215326
Parse addr into its constituent realname and email address parts.
216327
217328
Return a tuple of realname and email address, unless the parse fails, in
218329
which case return a 2-tuple of ('', '').
330+
331+
If strict is True, use a strict parser which rejects malformed inputs.
219332
"""
220-
addrs = _AddressList(addr).addresslist
221-
if not addrs:
222-
return '', ''
333+
if not strict:
334+
addrs = _AddressList(addr).addresslist
335+
if not addrs:
336+
return ('', '')
337+
return addrs[0]
338+
339+
if isinstance(addr, list):
340+
addr = addr[0]
341+
342+
if not isinstance(addr, str):
343+
return ('', '')
344+
345+
addr = _pre_parse_validation([addr])[0]
346+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
347+
348+
if not addrs or len(addrs) > 1:
349+
return ('', '')
350+
223351
return addrs[0]
224352

225353

354+
226355
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
227356
def unquote(str):
228357
"""Remove quotes from a string."""

0 commit comments

Comments
 (0)