48
48
specialsre = re .compile (r'[][\\()<>@,:;".]' )
49
49
escapesre = re .compile (r'[\\"]' )
50
50
51
+
51
52
def _has_surrogates (s ):
52
53
"""Return True if s contains surrogate-escaped binary data."""
53
54
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
106
107
return address
107
108
108
109
110
+ def _iter_escaped_chars (addr ):
111
+ pos = 0
112
+ escape = False
113
+ for pos , ch in enumerate (addr ):
114
+ if escape :
115
+ yield (pos , '\\ ' + ch )
116
+ escape = False
117
+ elif ch == '\\ ' :
118
+ escape = True
119
+ else :
120
+ yield (pos , ch )
121
+ if escape :
122
+ yield (pos , '\\ ' )
123
+
124
+
125
+ def _strip_quoted_realnames (addr ):
126
+ """Strip real names between quotes."""
127
+ if '"' not in addr :
128
+ # Fast path
129
+ return addr
130
+
131
+ start = 0
132
+ open_pos = None
133
+ result = []
134
+ for pos , ch in _iter_escaped_chars (addr ):
135
+ if ch == '"' :
136
+ if open_pos is None :
137
+ open_pos = pos
138
+ else :
139
+ if start != open_pos :
140
+ result .append (addr [start :open_pos ])
141
+ start = pos + 1
142
+ open_pos = None
143
+
144
+ if start < len (addr ):
145
+ result .append (addr [start :])
146
+
147
+ return '' .join (result )
109
148
110
- def getaddresses (fieldvalues ):
111
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112
- all = COMMASPACE .join (str (v ) for v in fieldvalues )
113
- a = _AddressList (all )
114
- return a .addresslist
149
+
150
+ supports_strict_parsing = True
151
+
152
+ def getaddresses (fieldvalues , * , strict = True ):
153
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
154
+
155
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
156
+ its place.
157
+
158
+ If strict is true, use a strict parser which rejects malformed inputs.
159
+ """
160
+
161
+ # If strict is true, if the resulting list of parsed addresses is greater
162
+ # than the number of fieldvalues in the input list, a parsing error has
163
+ # occurred and consequently a list containing a single empty 2-tuple [('',
164
+ # '')] is returned in its place. This is done to avoid invalid output.
165
+ #
166
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
167
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
168
+ # Safe output: [('', '')]
169
+
170
+ if not strict :
171
+ all = COMMASPACE .join (str (v ) for v in fieldvalues )
172
+ a = _AddressList (all )
173
+ return a .addresslist
174
+
175
+ fieldvalues = [str (v ) for v in fieldvalues ]
176
+ fieldvalues = _pre_parse_validation (fieldvalues )
177
+ addr = COMMASPACE .join (fieldvalues )
178
+ a = _AddressList (addr )
179
+ result = _post_parse_validation (a .addresslist )
180
+
181
+ # Treat output as invalid if the number of addresses is not equal to the
182
+ # expected number of addresses.
183
+ n = 0
184
+ for v in fieldvalues :
185
+ # When a comma is used in the Real Name part it is not a deliminator.
186
+ # So strip those out before counting the commas.
187
+ v = _strip_quoted_realnames (v )
188
+ # Expected number of addresses: 1 + number of commas
189
+ n += 1 + v .count (',' )
190
+ if len (result ) != n :
191
+ return [('' , '' )]
192
+
193
+ return result
194
+
195
+
196
+ def _check_parenthesis (addr ):
197
+ # Ignore parenthesis in quoted real names.
198
+ addr = _strip_quoted_realnames (addr )
199
+
200
+ opens = 0
201
+ for pos , ch in _iter_escaped_chars (addr ):
202
+ if ch == '(' :
203
+ opens += 1
204
+ elif ch == ')' :
205
+ opens -= 1
206
+ if opens < 0 :
207
+ return False
208
+ return (opens == 0 )
209
+
210
+
211
+ def _pre_parse_validation (email_header_fields ):
212
+ accepted_values = []
213
+ for v in email_header_fields :
214
+ if not _check_parenthesis (v ):
215
+ v = "('', '')"
216
+ accepted_values .append (v )
217
+
218
+ return accepted_values
219
+
220
+
221
+ def _post_parse_validation (parsed_email_header_tuples ):
222
+ accepted_values = []
223
+ # The parser would have parsed a correctly formatted domain-literal
224
+ # The existence of an [ after parsing indicates a parsing failure
225
+ for v in parsed_email_header_tuples :
226
+ if '[' in v [1 ]:
227
+ v = ('' , '' )
228
+ accepted_values .append (v )
229
+
230
+ return accepted_values
115
231
116
232
117
233
def _format_timetuple_and_zone (timetuple , zone ):
@@ -202,16 +318,33 @@ def parsedate_to_datetime(data):
202
318
tzinfo = datetime .timezone (datetime .timedelta (seconds = tz )))
203
319
204
320
205
- def parseaddr (addr ):
321
+ def parseaddr (addr , * , strict = True ):
206
322
"""
207
323
Parse addr into its constituent realname and email address parts.
208
324
209
325
Return a tuple of realname and email address, unless the parse fails, in
210
326
which case return a 2-tuple of ('', '').
327
+
328
+ If strict is True, use a strict parser which rejects malformed inputs.
211
329
"""
212
- addrs = _AddressList (addr ).addresslist
213
- if not addrs :
214
- return '' , ''
330
+ if not strict :
331
+ addrs = _AddressList (addr ).addresslist
332
+ if not addrs :
333
+ return ('' , '' )
334
+ return addrs [0 ]
335
+
336
+ if isinstance (addr , list ):
337
+ addr = addr [0 ]
338
+
339
+ if not isinstance (addr , str ):
340
+ return ('' , '' )
341
+
342
+ addr = _pre_parse_validation ([addr ])[0 ]
343
+ addrs = _post_parse_validation (_AddressList (addr ).addresslist )
344
+
345
+ if not addrs or len (addrs ) > 1 :
346
+ return ('' , '' )
347
+
215
348
return addrs [0 ]
216
349
217
350
0 commit comments