@@ -99,14 +99,125 @@ def formataddr(pair):
9999 return '%s%s%s <%s>' % (quotes , name , quotes , address )
100100 return address
101101
102+ def _iter_escaped_chars (addr ):
103+ pos = 0
104+ escape = False
105+ for pos , ch in enumerate (addr ):
106+ if escape :
107+ yield (pos , '\\ ' + ch )
108+ escape = False
109+ elif ch == '\\ ' :
110+ escape = True
111+ else :
112+ yield (pos , ch )
113+ if escape :
114+ yield (pos , '\\ ' )
115+
116+
117+ def _strip_quoted_realnames (addr ):
118+ """Strip real names between quotes."""
119+ if '"' not in addr :
120+ # Fast path
121+ return addr
122+
123+ start = 0
124+ open_pos = None
125+ result = []
126+ for pos , ch in _iter_escaped_chars (addr ):
127+ if ch == '"' :
128+ if open_pos is None :
129+ open_pos = pos
130+ else :
131+ if start != open_pos :
132+ result .append (addr [start :open_pos ])
133+ start = pos + 1
134+ open_pos = None
102135
136+ if start < len (addr ):
137+ result .append (addr [start :])
138+
139+ return '' .join (result )
103140
104- def getaddresses (fieldvalues ):
105- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
106- all = COMMASPACE .join (fieldvalues )
107- a = _AddressList (all )
108- return a .addresslist
109141
142+ supports_strict_parsing = True
143+
144+ def getaddresses (fieldvalues , strict = True ):
145+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
146+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
147+ its place.
148+ If strict is true, use a strict parser which rejects malformed inputs.
149+ """
150+
151+ # If strict is true, if the resulting list of parsed addresses is greater
152+ # than the number of fieldvalues in the input list, a parsing error has
153+ # occurred and consequently a list containing a single empty 2-tuple [('',
154+ # '')] is returned in its place. This is done to avoid invalid output.
155+ #
156+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
157+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
158+ # Safe output: [('', '')]
159+
160+ if not strict :
161+ all = COMMASPACE .join (str (v ) for v in fieldvalues )
162+ a = _AddressList (all )
163+ return a .addresslist
164+
165+ fieldvalues = [str (v ) for v in fieldvalues ]
166+ fieldvalues = _pre_parse_validation (fieldvalues )
167+ addr = COMMASPACE .join (fieldvalues )
168+ a = _AddressList (addr )
169+ result = _post_parse_validation (a .addresslist )
170+
171+ # Treat output as invalid if the number of addresses is not equal to the
172+ # expected number of addresses.
173+ n = 0
174+ for v in fieldvalues :
175+ # When a comma is used in the Real Name part it is not a deliminator.
176+ # So strip those out before counting the commas.
177+ v = _strip_quoted_realnames (v )
178+ # Expected number of addresses: 1 + number of commas
179+ n += 1 + v .count (',' )
180+ if len (result ) != n :
181+ return [('' , '' )]
182+
183+ return result
184+
185+
186+ def _check_parenthesis (addr ):
187+ # Ignore parenthesis in quoted real names.
188+ addr = _strip_quoted_realnames (addr )
189+
190+ opens = 0
191+ for pos , ch in _iter_escaped_chars (addr ):
192+ if ch == '(' :
193+ opens += 1
194+ elif ch == ')' :
195+ opens -= 1
196+ if opens < 0 :
197+ return False
198+ return (opens == 0 )
199+
200+
201+ def _pre_parse_validation (email_header_fields ):
202+ accepted_values = []
203+ for v in email_header_fields :
204+ if not _check_parenthesis (v ):
205+ v = "('', '')"
206+ accepted_values .append (v )
207+
208+ return accepted_values
209+
210+
211+ def _post_parse_validation (parsed_email_header_tuples ):
212+ accepted_values = []
213+ # The parser would have parsed a correctly formatted domain-literal
214+ # The existence of an [ after parsing indicates a parsing failure
215+ for v in parsed_email_header_tuples :
216+ if '[' in v [1 ]:
217+ v = ('' , '' )
218+ accepted_values .append (v )
219+
220+ return accepted_values
110221
111222
112223ecre = re .compile (r'''
@@ -210,19 +321,37 @@ def parsedate_tz(data):
210321 return _parsedate_tz (data )
211322
212323
213- def parseaddr (addr ):
324+ def parseaddr (addr , strict = True ):
214325 """
215326 Parse addr into its constituent realname and email address parts.
216327
217328 Return a tuple of realname and email address, unless the parse fails, in
218329 which case return a 2-tuple of ('', '').
330+
331+ If strict is True, use a strict parser which rejects malformed inputs.
219332 """
220- addrs = _AddressList (addr ).addresslist
221- if not addrs :
222- return '' , ''
333+ if not strict :
334+ addrs = _AddressList (addr ).addresslist
335+ if not addrs :
336+ return ('' , '' )
337+ return addrs [0 ]
338+
339+ if isinstance (addr , list ):
340+ addr = addr [0 ]
341+
342+ if not isinstance (addr , str ):
343+ return ('' , '' )
344+
345+ addr = _pre_parse_validation ([addr ])[0 ]
346+ addrs = _post_parse_validation (_AddressList (addr ).addresslist )
347+
348+ if not addrs or len (addrs ) > 1 :
349+ return ('' , '' )
350+
223351 return addrs [0 ]
224352
225353
354+
226355# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
227356def unquote (str ):
228357 """Remove quotes from a string."""
0 commit comments