62
62
63
63
# regex based decoder.
64
64
_q_byte_subber = functools .partial (re .compile (br'=([a-fA-F0-9]{2})' ).sub ,
65
- lambda m : bytes ([ int ( m .group (1 ), 16 )] ))
65
+ lambda m : bytes . fromhex ( m .group (1 ). decode () ))
66
66
67
67
def decode_q (encoded ):
68
68
encoded = encoded .replace (b'_' , b' ' )
@@ -98,30 +98,42 @@ def len_q(bstring):
98
98
#
99
99
100
100
def decode_b (encoded ):
101
- defects = []
101
+ # First try encoding with validate=True, fixing the padding if needed.
102
+ # This will succeed only if encoded includes no invalid characters.
102
103
pad_err = len (encoded ) % 4
103
- if pad_err :
104
- defects .append (errors .InvalidBase64PaddingDefect ())
105
- padded_encoded = encoded + b'===' [:4 - pad_err ]
106
- else :
107
- padded_encoded = encoded
104
+ missing_padding = b'===' [:4 - pad_err ] if pad_err else b''
108
105
try :
109
- return base64 .b64decode (padded_encoded , validate = True ), defects
106
+ return (
107
+ base64 .b64decode (encoded + missing_padding , validate = True ),
108
+ [errors .InvalidBase64PaddingDefect ()] if pad_err else [],
109
+ )
110
110
except binascii .Error :
111
- # Since we had correct padding, this must an invalid char error.
112
- defects = [ errors . InvalidBase64CharactersDefect ()]
111
+ # Since we had correct padding, this is likely an invalid char error.
112
+ #
113
113
# The non-alphabet characters are ignored as far as padding
114
- # goes, but we don't know how many there are. So we'll just
115
- # try various padding lengths until something works.
116
- for i in 0 , 1 , 2 , 3 :
114
+ # goes, but we don't know how many there are. So try without adding
115
+ # padding to see if it works.
116
+ try :
117
+ return (
118
+ base64 .b64decode (encoded , validate = False ),
119
+ [errors .InvalidBase64CharactersDefect ()],
120
+ )
121
+ except binascii .Error :
122
+ # Add as much padding as could possibly be necessary (extra padding
123
+ # is ignored).
117
124
try :
118
- return base64 .b64decode (encoded + b'=' * i , validate = False ), defects
125
+ return (
126
+ base64 .b64decode (encoded + b'==' , validate = False ),
127
+ [errors .InvalidBase64CharactersDefect (),
128
+ errors .InvalidBase64PaddingDefect ()],
129
+ )
119
130
except binascii .Error :
120
- if i == 0 :
121
- defects .append (errors .InvalidBase64PaddingDefect ())
122
- else :
123
- # This should never happen.
124
- raise AssertionError ("unexpected binascii.Error" )
131
+ # This only happens when the encoded string's length is 1 more
132
+ # than a multiple of 4, which is invalid.
133
+ #
134
+ # bpo-27397: Just return the encoded string since there's no
135
+ # way to decode.
136
+ return encoded , [errors .InvalidBase64LengthDefect ()]
125
137
126
138
def encode_b (bstring ):
127
139
return base64 .b64encode (bstring ).decode ('ascii' )
@@ -167,15 +179,15 @@ def decode(ew):
167
179
# Turn the CTE decoded bytes into unicode.
168
180
try :
169
181
string = bstring .decode (charset )
170
- except UnicodeError :
182
+ except UnicodeDecodeError :
171
183
defects .append (errors .UndecodableBytesDefect ("Encoded word "
172
- "contains bytes not decodable using {} charset" . format ( charset ) ))
184
+ f "contains bytes not decodable using { charset !r } charset" ))
173
185
string = bstring .decode (charset , 'surrogateescape' )
174
- except LookupError :
186
+ except ( LookupError , UnicodeEncodeError ) :
175
187
string = bstring .decode ('ascii' , 'surrogateescape' )
176
188
if charset .lower () != 'unknown-8bit' :
177
- defects .append (errors .CharsetError ("Unknown charset {} "
178
- "in encoded word; decoded as unknown bytes" . format ( charset ) ))
189
+ defects .append (errors .CharsetError (f "Unknown charset { charset !r } "
190
+ f "in encoded word; decoded as unknown bytes" ))
179
191
return string , charset , lang , defects
180
192
181
193
0 commit comments