Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5dfb5e6

Browse filesBrowse files
miss-islingtonmedmundsbitdancerencukou
authored
[3.12] gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) (#131404)
gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] --------- (cherry picked from commit 295b53d) Co-authored-by: Mike Edmunds <medmunds@gmail.com> Co-authored-by: R David Murray <rdmurray@bitdance.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
1 parent 865bd6d commit 5dfb5e6
Copy full SHA for 5dfb5e6

File tree

3 files changed

+37
-5
lines changed
Filter options

3 files changed

+37
-5
lines changed

‎Lib/email/_header_value_parser.py

Copy file name to clipboardExpand all lines: Lib/email/_header_value_parser.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ def get_fws(value):
10531053
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
10541054
return fws, newvalue
10551055

1056-
def get_encoded_word(value):
1056+
def get_encoded_word(value, terminal_type='vtext'):
10571057
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
10581058
10591059
"""
@@ -1092,7 +1092,7 @@ def get_encoded_word(value):
10921092
ew.append(token)
10931093
continue
10941094
chars, *remainder = _wsp_splitter(text, 1)
1095-
vtext = ValueTerminal(chars, 'vtext')
1095+
vtext = ValueTerminal(chars, terminal_type)
10961096
_validate_xtext(vtext)
10971097
ew.append(vtext)
10981098
text = ''.join(remainder)
@@ -1134,7 +1134,7 @@ def get_unstructured(value):
11341134
valid_ew = True
11351135
if value.startswith('=?'):
11361136
try:
1137-
token, value = get_encoded_word(value)
1137+
token, value = get_encoded_word(value, 'utext')
11381138
except _InvalidEwError:
11391139
valid_ew = False
11401140
except errors.HeaderParseError:
@@ -1163,7 +1163,7 @@ def get_unstructured(value):
11631163
# the parser to go in an infinite loop.
11641164
if valid_ew and rfc2047_matcher.search(tok):
11651165
tok, *remainder = value.partition('=?')
1166-
vtext = ValueTerminal(tok, 'vtext')
1166+
vtext = ValueTerminal(tok, 'utext')
11671167
_validate_xtext(vtext)
11681168
unstructured.append(vtext)
11691169
value = ''.join(remainder)
@@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28132813
continue
28142814
tstr = str(part)
28152815
if not want_encoding:
2816-
if part.token_type == 'ptext':
2816+
if part.token_type in ('ptext', 'vtext'):
28172817
# Encode if tstr contains special characters.
28182818
want_encoding = not SPECIALSNL.isdisjoint(tstr)
28192819
else:

‎Lib/test/test_email/test__header_value_parser.py

Copy file name to clipboardExpand all lines: Lib/test/test_email/test__header_value_parser.py
+25Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3076,6 +3076,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
30763076
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
30773077
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
30783078

3079+
def test_address_list_with_specials_in_encoded_word(self):
3080+
# An encoded-word parsed from a structured header must remain
3081+
# encoded when it contains specials. Regression for gh-121284.
3082+
policy = self.policy.clone(max_line_length=40)
3083+
cases = [
3084+
# (to, folded)
3085+
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
3086+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
3087+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
3088+
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
3089+
'This long name does not need\n'
3090+
' encoded-word <to@example.com>\n'),
3091+
('"A véry long name with, comma" <to@example.com>',
3092+
# (This isn't the best fold point, but it's not invalid.)
3093+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
3094+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
3095+
('"A véry long name containing a, comma" <to@example.com>',
3096+
'A =?utf-8?q?v=C3=A9ry?= long name\n'
3097+
' containing =?utf-8?q?a=2C?= comma\n'
3098+
' <to@example.com>\n'),
3099+
]
3100+
for (to, folded) in cases:
3101+
with self.subTest(to=to):
3102+
self._test(parser.get_address_list(to)[0], folded, policy=policy)
3103+
30793104
def test_address_list_with_list_separator_after_fold(self):
30803105
a = 'x' * 66 + '@example.com'
30813106
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Fix bug in the folding of rfc2047 encoded-words when flattening an email message
2+
using a modern email policy. Previously when an encoded-word was too long
3+
for a line, it would be decoded, split across lines, and re-encoded. But commas
4+
and other special characters in the original text could be left unencoded and
5+
unquoted. This could theoretically be used to spoof header lines using
6+
a carefully constructed encoded-word if the resulting rendered email was
7+
transmitted or re-parsed.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.