From c670b112683e89e9a1d80842f51cdfbb6e43ae4f Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 04:15:59 -0500 Subject: [PATCH 01/11] Removed re hex --- Lib/email/quoprimime.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 27c7ea55c7871f..9b4b44d34e97f1 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -39,8 +39,6 @@ 'unquote', ] -import re - from string import ascii_letters, digits, hexdigits CRLF = '\r\n' @@ -280,14 +278,6 @@ def decode(encoded, eol=NL): body_decode = decode decodestring = decode - - -def _unquote_match(match): - """Turn a match in the form =AB to the ASCII character with value 0xab""" - s = match.group(0) - return unquote(s) - - # Header decoding is done a bit differently def header_decode(s): """Decode a string encoded with RFC 2045 MIME header 'Q' encoding. @@ -297,4 +287,23 @@ def header_decode(s): the high level email.header class for that functionality. """ s = s.replace('_', ' ') - return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, flags=re.ASCII) + + valid_hex = '0123456789ABCDEFabcdef' + + i = 0 + # Check for regex =[a-fA-F0-9]{2} + + result = [] + + while i < len(s): + if s[i] == '=' and i + 2 < len(s): + hex_part = s[i: i + 3] + + if (hex_part[1] in valid_hex) and (hex_part[2] in valid_hex): + result.append(unquote(hex_part)) + i += 3 + continue + result.append(s[i]) + i += 1 + + return ''.join(result) From 08cdc0b97a4354a5b0b4afc2380c9fcc3d70d76b Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 04:24:51 -0500 Subject: [PATCH 02/11] implace replace, removed valid_hex parameter --- Lib/email/quoprimime.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 9b4b44d34e97f1..9aad48fa985568 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -286,24 +286,19 @@ def header_decode(s): quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use the high level email.header class for that functionality. """ - s = s.replace('_', ' ') - - valid_hex = '0123456789ABCDEFabcdef' - - i = 0 # Check for regex =[a-fA-F0-9]{2} - result = [] - while i < len(s): - if s[i] == '=' and i + 2 < len(s): + max_s_check_len = s - 2 + for i, c in enumerate(s): + if c == '=' and i < max_s_check_len: hex_part = s[i: i + 3] - if (hex_part[1] in valid_hex) and (hex_part[2] in valid_hex): + if (hex_part[1] in hexdigits) and (hex_part[2] in hexdigits): result.append(unquote(hex_part)) i += 3 continue - result.append(s[i]) + result.append(' ' if c == '_' else c) i += 1 return ''.join(result) From a3ef5506062fb54012901899f0875b5f24e67dfd Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 04:35:45 -0500 Subject: [PATCH 03/11] joined to big if statement --- Lib/email/quoprimime.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 9aad48fa985568..853e253c2765d5 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -288,17 +288,17 @@ def header_decode(s): """ # Check for regex =[a-fA-F0-9]{2} result = [] - - max_s_check_len = s - 2 - for i, c in enumerate(s): - if c == '=' and i < max_s_check_len: - hex_part = s[i: i + 3] - - if (hex_part[1] in hexdigits) and (hex_part[2] in hexdigits): - result.append(unquote(hex_part)) - i += 3 - continue + + s_len = len(s) + i =0 + while i < s_len: + c = s[i] + + if c == '=' and i + 2 < s_len and s[i + 1] in hexdigits and s[i + 2] in hexdigits: + result.append(unquote(s[i: i + 3])) + i += 3 + continue result.append(' ' if c == '_' else c) i += 1 - + return ''.join(result) From 22e6d9e16cc349976096672a5ac6b9e3c40519bf Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 05:09:26 -0500 Subject: [PATCH 04/11] added news --- .../next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst diff --git a/Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst b/Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst new file mode 100644 index 00000000000000..72f5f67ca3cf29 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst @@ -0,0 +1,2 @@ +Improved import time of :mod:`email.quoprimime` module by 60%. Patch by +Marius-Juston From 232bb559d737b7be48c75c78aad828b36b8614e6 Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 05:09:41 -0500 Subject: [PATCH 05/11] inline character assigment --- Lib/email/quoprimime.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 853e253c2765d5..039afa14955682 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -292,9 +292,7 @@ def header_decode(s): s_len = len(s) i =0 while i < s_len: - c = s[i] - - if c == '=' and i + 2 < s_len and s[i + 1] in hexdigits and s[i + 2] in hexdigits: + if (c := s[i]) == '=' and i + 2 < s_len and s[i + 1] in hexdigits and s[i + 2] in hexdigits: result.append(unquote(s[i: i + 3])) i += 3 continue From 3ada67a5a2c3c11af0c64f9434ffc9cfa0b0e905 Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 06:00:40 -0500 Subject: [PATCH 06/11] use cache for hex to char + instead of single character append use slices --- Lib/email/quoprimime.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 039afa14955682..ef7cc1bd0f90c5 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -278,6 +278,13 @@ def decode(encoded, eol=NL): body_decode = decode decodestring = decode + +_HEX_TO_CHAR = {} +for i in range(256): + key_lower = f"{i:02x}" + char_val = chr(i) + _HEX_TO_CHAR[key_lower] = char_val + # Header decoding is done a bit differently def header_decode(s): """Decode a string encoded with RFC 2045 MIME header 'Q' encoding. @@ -288,15 +295,27 @@ def header_decode(s): """ # Check for regex =[a-fA-F0-9]{2} result = [] - s_len = len(s) - i =0 + i = 0 + last_append = 0 + s = s.replace("_", " ") + while i < s_len: - if (c := s[i]) == '=' and i + 2 < s_len and s[i + 1] in hexdigits and s[i + 2] in hexdigits: - result.append(unquote(s[i: i + 3])) - i += 3 - continue - result.append(' ' if c == '_' else c) + if s[i] == '=' and i + 2 < s_len: + hex_str = s[i + 1:i + 3].lower() + if hex_str in _HEX_TO_CHAR: + if last_append < i: + result.append(s[last_append:i]) + result.append(_HEX_TO_CHAR[hex_str]) + i += 3 + last_append = i + continue i += 1 + if last_append == 0: + return s + + if last_append < s_len: + result.append(s[last_append:]) + return ''.join(result) From 9e3cc1f227a435f24bfc184e24f777d55061e65a Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 06:07:47 -0500 Subject: [PATCH 07/11] inplace assignment with walrus --- Lib/email/quoprimime.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index ef7cc1bd0f90c5..fa61788afad04b 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -302,8 +302,7 @@ def header_decode(s): while i < s_len: if s[i] == '=' and i + 2 < s_len: - hex_str = s[i + 1:i + 3].lower() - if hex_str in _HEX_TO_CHAR: + if (hex_str := s[i + 1:i + 3].lower()) in _HEX_TO_CHAR: if last_append < i: result.append(s[last_append:i]) result.append(_HEX_TO_CHAR[hex_str]) From 328756485b8979f1df24a4924b06544d5efcd6ca Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 12:27:29 -0500 Subject: [PATCH 08/11] removed news since should probably be "skip news" tagged --- .../next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst diff --git a/Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst b/Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst deleted file mode 100644 index 72f5f67ca3cf29..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-04-03-04-40-15.gh-issue-130167.Tc5zLB.rst +++ /dev/null @@ -1,2 +0,0 @@ -Improved import time of :mod:`email.quoprimime` module by 60%. Patch by -Marius-Juston From 8362a2e6e76a271efa96393b06799861c99f7158 Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 12:42:29 -0500 Subject: [PATCH 09/11] fast pass for no '=' --- Lib/email/quoprimime.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index fa61788afad04b..4f22e20e39d36b 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -294,11 +294,15 @@ def header_decode(s): the high level email.header class for that functionality. """ # Check for regex =[a-fA-F0-9]{2} + s = s.replace("_", " ") + + if '=' not in s: + return s + result = [] s_len = len(s) i = 0 last_append = 0 - s = s.replace("_", " ") while i < s_len: if s[i] == '=' and i + 2 < s_len: @@ -311,9 +315,6 @@ def header_decode(s): continue i += 1 - if last_append == 0: - return s - if last_append < s_len: result.append(s[last_append:]) From 81ae23a47ef8a5c1c527e75244dfcfbc7a42f531 Mon Sep 17 00:00:00 2001 From: Marius Juston Date: Thu, 3 Apr 2025 22:18:45 -0500 Subject: [PATCH 10/11] Update Lib/email/quoprimime.py Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/email/quoprimime.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 4f22e20e39d36b..994d7882df7862 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -279,11 +279,7 @@ def decode(encoded, eol=NL): decodestring = decode -_HEX_TO_CHAR = {} -for i in range(256): - key_lower = f"{i:02x}" - char_val = chr(i) - _HEX_TO_CHAR[key_lower] = char_val +_HEX_TO_CHAR = {f'{i:02x}': chr(i) for i in range(256)} # Header decoding is done a bit differently def header_decode(s): From 865948664eb15f178d714b599a1a291a594b9360 Mon Sep 17 00:00:00 2001 From: Marius-Juston Date: Thu, 3 Apr 2025 23:18:43 -0500 Subject: [PATCH 11/11] faster string concatenation --- Lib/email/quoprimime.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index 994d7882df7862..3cdc88cc955ae4 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -295,7 +295,7 @@ def header_decode(s): if '=' not in s: return s - result = [] + result = '' s_len = len(s) i = 0 last_append = 0 @@ -304,14 +304,14 @@ def header_decode(s): if s[i] == '=' and i + 2 < s_len: if (hex_str := s[i + 1:i + 3].lower()) in _HEX_TO_CHAR: if last_append < i: - result.append(s[last_append:i]) - result.append(_HEX_TO_CHAR[hex_str]) + result += s[last_append:i] + result += _HEX_TO_CHAR[hex_str] i += 3 last_append = i continue i += 1 if last_append < s_len: - result.append(s[last_append:]) + result += s[last_append:] - return ''.join(result) + return result