Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9a31386

Browse filesBrowse files
encukoumedmundsbitdancer
authored
[3.9] gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) (GH-131412)
Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] (cherry picked from commit 295b53d) Co-authored-by: Mike Edmunds <medmunds@gmail.com> Co-authored-by: R David Murray <rdmurray@bitdance.com>
1 parent ff4e5c2 commit 9a31386
Copy full SHA for 9a31386

File tree

3 files changed

+37
-5
lines changed
Filter options

3 files changed

+37
-5
lines changed

‎Lib/email/_header_value_parser.py

Copy file name to clipboardExpand all lines: Lib/email/_header_value_parser.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,7 +1037,7 @@ def get_fws(value):
10371037
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
10381038
return fws, newvalue
10391039

1040-
def get_encoded_word(value):
1040+
def get_encoded_word(value, terminal_type='vtext'):
10411041
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
10421042
10431043
"""
@@ -1076,7 +1076,7 @@ def get_encoded_word(value):
10761076
ew.append(token)
10771077
continue
10781078
chars, *remainder = _wsp_splitter(text, 1)
1079-
vtext = ValueTerminal(chars, 'vtext')
1079+
vtext = ValueTerminal(chars, terminal_type)
10801080
_validate_xtext(vtext)
10811081
ew.append(vtext)
10821082
text = ''.join(remainder)
@@ -1118,7 +1118,7 @@ def get_unstructured(value):
11181118
valid_ew = True
11191119
if value.startswith('=?'):
11201120
try:
1121-
token, value = get_encoded_word(value)
1121+
token, value = get_encoded_word(value, 'utext')
11221122
except _InvalidEwError:
11231123
valid_ew = False
11241124
except errors.HeaderParseError:
@@ -1147,7 +1147,7 @@ def get_unstructured(value):
11471147
# the parser to go in an infinite loop.
11481148
if valid_ew and rfc2047_matcher.search(tok):
11491149
tok, *remainder = value.partition('=?')
1150-
vtext = ValueTerminal(tok, 'vtext')
1150+
vtext = ValueTerminal(tok, 'utext')
11511151
_validate_xtext(vtext)
11521152
unstructured.append(vtext)
11531153
value = ''.join(remainder)
@@ -2781,7 +2781,7 @@ def _refold_parse_tree(parse_tree, *, policy):
27812781
continue
27822782
tstr = str(part)
27832783
if not want_encoding:
2784-
if part.token_type == 'ptext':
2784+
if part.token_type in ('ptext', 'vtext'):
27852785
# Encode if tstr contains special characters.
27862786
want_encoding = not SPECIALSNL.isdisjoint(tstr)
27872787
else:

‎Lib/test/test_email/test__header_value_parser.py

Copy file name to clipboardExpand all lines: Lib/test/test_email/test__header_value_parser.py
+25Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,6 +2946,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
29462946
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
29472947
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
29482948

2949+
def test_address_list_with_specials_in_encoded_word(self):
2950+
# An encoded-word parsed from a structured header must remain
2951+
# encoded when it contains specials. Regression for gh-121284.
2952+
policy = self.policy.clone(max_line_length=40)
2953+
cases = [
2954+
# (to, folded)
2955+
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
2956+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
2957+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
2958+
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
2959+
'This long name does not need\n'
2960+
' encoded-word <to@example.com>\n'),
2961+
('"A véry long name with, comma" <to@example.com>',
2962+
# (This isn't the best fold point, but it's not invalid.)
2963+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
2964+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
2965+
('"A véry long name containing a, comma" <to@example.com>',
2966+
'A =?utf-8?q?v=C3=A9ry?= long name\n'
2967+
' containing =?utf-8?q?a=2C?= comma\n'
2968+
' <to@example.com>\n'),
2969+
]
2970+
for (to, folded) in cases:
2971+
with self.subTest(to=to):
2972+
self._test(parser.get_address_list(to)[0], folded, policy=policy)
2973+
29492974
# XXX Need tests with comments on various sides of a unicode token,
29502975
# and with unicode tokens in the comments. Spaces inside the quotes
29512976
# currently don't do the right thing.
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Fix bug in the folding of rfc2047 encoded-words when flattening an email message
2+
using a modern email policy. Previously when an encoded-word was too long
3+
for a line, it would be decoded, split across lines, and re-encoded. But commas
4+
and other special characters in the original text could be left unencoded and
5+
unquoted. This could theoretically be used to spoof header lines using
6+
a carefully constructed encoded-word if the resulting rendered email was
7+
transmitted or re-parsed.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.