43
43
specialsre = re .compile (r'[][\\()<>@,:;".]' )
44
44
escapesre = re .compile (r'[\\"]' )
45
45
46
+
46
47
def _has_surrogates (s ):
47
48
"""Return True if s may contain surrogate-escaped binary data."""
48
49
# This check is based on the fact that unless there are surrogates, utf8
@@ -103,12 +104,127 @@ def formataddr(pair, charset='utf-8'):
103
104
return address
104
105
105
106
107
+ def _iter_escaped_chars (addr ):
108
+ pos = 0
109
+ escape = False
110
+ for pos , ch in enumerate (addr ):
111
+ if escape :
112
+ yield (pos , '\\ ' + ch )
113
+ escape = False
114
+ elif ch == '\\ ' :
115
+ escape = True
116
+ else :
117
+ yield (pos , ch )
118
+ if escape :
119
+ yield (pos , '\\ ' )
120
+
121
+
122
+ def _strip_quoted_realnames (addr ):
123
+ """Strip real names between quotes."""
124
+ if '"' not in addr :
125
+ # Fast path
126
+ return addr
127
+
128
+ start = 0
129
+ open_pos = None
130
+ result = []
131
+ for pos , ch in _iter_escaped_chars (addr ):
132
+ if ch == '"' :
133
+ if open_pos is None :
134
+ open_pos = pos
135
+ else :
136
+ if start != open_pos :
137
+ result .append (addr [start :open_pos ])
138
+ start = pos + 1
139
+ open_pos = None
140
+
141
+ if start < len (addr ):
142
+ result .append (addr [start :])
143
+
144
+ return '' .join (result )
106
145
107
- def getaddresses (fieldvalues ):
108
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
109
- all = COMMASPACE .join (str (v ) for v in fieldvalues )
110
- a = _AddressList (all )
111
- return a .addresslist
146
+
147
+ supports_strict_parsing = True
148
+
149
+ def getaddresses (fieldvalues , * , strict = True ):
150
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
151
+
152
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
153
+ its place.
154
+
155
+ If strict is true, use a strict parser which rejects malformed inputs.
156
+ """
157
+
158
+ # If strict is true, if the resulting list of parsed addresses is greater
159
+ # than the number of fieldvalues in the input list, a parsing error has
160
+ # occurred and consequently a list containing a single empty 2-tuple [('',
161
+ # '')] is returned in its place. This is done to avoid invalid output.
162
+ #
163
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
164
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
165
+ # Safe output: [('', '')]
166
+
167
+ if not strict :
168
+ all = COMMASPACE .join (str (v ) for v in fieldvalues )
169
+ a = _AddressList (all )
170
+ return a .addresslist
171
+
172
+ fieldvalues = [str (v ) for v in fieldvalues ]
173
+ fieldvalues = _pre_parse_validation (fieldvalues )
174
+ addr = COMMASPACE .join (fieldvalues )
175
+ a = _AddressList (addr )
176
+ result = _post_parse_validation (a .addresslist )
177
+
178
+ # Treat output as invalid if the number of addresses is not equal to the
179
+ # expected number of addresses.
180
+ n = 0
181
+ for v in fieldvalues :
182
+ # When a comma is used in the Real Name part it is not a deliminator.
183
+ # So strip those out before counting the commas.
184
+ v = _strip_quoted_realnames (v )
185
+ # Expected number of addresses: 1 + number of commas
186
+ n += 1 + v .count (',' )
187
+ if len (result ) != n :
188
+ return [('' , '' )]
189
+
190
+ return result
191
+
192
+
193
+ def _check_parenthesis (addr ):
194
+ # Ignore parenthesis in quoted real names.
195
+ addr = _strip_quoted_realnames (addr )
196
+
197
+ opens = 0
198
+ for pos , ch in _iter_escaped_chars (addr ):
199
+ if ch == '(' :
200
+ opens += 1
201
+ elif ch == ')' :
202
+ opens -= 1
203
+ if opens < 0 :
204
+ return False
205
+ return (opens == 0 )
206
+
207
+
208
+ def _pre_parse_validation (email_header_fields ):
209
+ accepted_values = []
210
+ for v in email_header_fields :
211
+ if not _check_parenthesis (v ):
212
+ v = "('', '')"
213
+ accepted_values .append (v )
214
+
215
+ return accepted_values
216
+
217
+
218
+ def _post_parse_validation (parsed_email_header_tuples ):
219
+ accepted_values = []
220
+ # The parser would have parsed a correctly formatted domain-literal
221
+ # The existence of an [ after parsing indicates a parsing failure
222
+ for v in parsed_email_header_tuples :
223
+ if '[' in v [1 ]:
224
+ v = ('' , '' )
225
+ accepted_values .append (v )
226
+
227
+ return accepted_values
112
228
113
229
114
230
def _format_timetuple_and_zone (timetuple , zone ):
@@ -207,16 +323,33 @@ def parsedate_to_datetime(data):
207
323
tzinfo = datetime .timezone (datetime .timedelta (seconds = tz )))
208
324
209
325
210
- def parseaddr (addr ):
326
+ def parseaddr (addr , * , strict = True ):
211
327
"""
212
328
Parse addr into its constituent realname and email address parts.
213
329
214
330
Return a tuple of realname and email address, unless the parse fails, in
215
331
which case return a 2-tuple of ('', '').
332
+
333
+ If strict is True, use a strict parser which rejects malformed inputs.
216
334
"""
217
- addrs = _AddressList (addr ).addresslist
218
- if not addrs :
219
- return '' , ''
335
+ if not strict :
336
+ addrs = _AddressList (addr ).addresslist
337
+ if not addrs :
338
+ return ('' , '' )
339
+ return addrs [0 ]
340
+
341
+ if isinstance (addr , list ):
342
+ addr = addr [0 ]
343
+
344
+ if not isinstance (addr , str ):
345
+ return ('' , '' )
346
+
347
+ addr = _pre_parse_validation ([addr ])[0 ]
348
+ addrs = _post_parse_validation (_AddressList (addr ).addresslist )
349
+
350
+ if not addrs or len (addrs ) > 1 :
351
+ return ('' , '' )
352
+
220
353
return addrs [0 ]
221
354
222
355
0 commit comments