From c7f24031d477d0fe43ac4353715820a7446f8045 Mon Sep 17 00:00:00 2001 From: B Siemerink <52461103+bsiem@users.noreply.github.com> Date: Tue, 2 Jul 2019 20:42:13 +0200 Subject: [PATCH] bpo-37482: Fix email address name with encoded words and special chars Special characters in email address header display names are normally put within double quotes. However, encoded words (=?charset?x?...?=) are not allowed withing double quotes. When the header contains a word with special characters and another word that must be encoded, the first one must also be encoded. In the next example, the display name in the From header is quoted and therefore the comma is allowed; in the To header, the comma is not within quotes and not encoded, which is not allowed and therefore rejected by some mail servers. From: "Foo Bar, France" To: Foo Bar, =?utf-8?q?Espa=C3=B1a?= --- Lib/email/_header_value_parser.py | 3 +++ Lib/test/test_email/test_headerregistry.py | 24 +++++++++++++++++++ .../2019-07-09-11-20-21.bpo-37482.auzvev.rst | 1 + 3 files changed, 28 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index aefc457f678165..e1bc5bf68d71a7 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2736,6 +2736,9 @@ def _refold_parse_tree(parse_tree, *, policy): wrap_as_ew_blocked -= 1 continue tstr = str(part) + if part.token_type == 'ptext' and set(tstr) & SPECIALS: + # Encode if tstr contains special characters. + want_encoding = True try: tstr.encode(encoding) charset = encoding diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 5d9b3576d30657..8d89c5dd58322e 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1528,6 +1528,30 @@ def test_set_message_header_from_group(self): class TestFolding(TestHeaderBase): + def test_address_display_names(self): + """Test the folding and encoding of address headers.""" + for name, result in ( + ('Foo Bar, France', '"Foo Bar, France"'), + ('Foo Bar (France)', '"Foo Bar (France)"'), + ('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='), + ('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='), + ('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='), + ('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='), + ('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'), + ('Foo Bär ', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='), + ( + 'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. ' + 'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.', + '=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c' + '=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse' + '_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8' + '?q?p=C3=B4tenti=2E?=', + ), + ): + h = self.make_header('To', Address(name, addr_spec='a@b.com')) + self.assertEqual(h.fold(policy=policy.default), + 'To: %s \n' % result) + def test_short_unstructured(self): h = self.make_header('subject', 'this is a test') self.assertEqual(h.fold(policy=policy.default), diff --git a/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst new file mode 100644 index 00000000000000..e09ff63eedc43b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst @@ -0,0 +1 @@ +Fix serialization of display name in originator or destination address fields with both encoded words and special chars.