Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 55faaa5

Browse filesBrowse files
[3.12] gh-74668: Fix support of bytes in urllib.parse.parse_qsl() (GH-115771) (GH-116366)
urllib.parse functions parse_qs() and parse_qsl() now support bytes arguments containing raw and percent-encoded non-ASCII data. (cherry picked from commit bdba8ef) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 85c32ad commit 55faaa5
Copy full SHA for 55faaa5

File tree

Expand file treeCollapse file tree

3 files changed

+64
-26
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+64
-26
lines changed

‎Lib/test/test_urlparse.py

Copy file name to clipboardExpand all lines: Lib/test/test_urlparse.py
+35-2Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
("=a", [('', 'a')]),
2020
("a", [('a', '')]),
2121
("a=", [('a', '')]),
22+
("a=b=c", [('a', 'b=c')]),
23+
("a%3Db=c", [('a=b', 'c')]),
24+
("a=b&c=d", [('a', 'b'), ('c', 'd')]),
25+
("a=b%26c=d", [('a', 'b&c=d')]),
2226
("&a=b", [('a', 'b')]),
2327
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
2428
("a=1&a=2", [('a', '1'), ('a', '2')]),
@@ -29,13 +33,25 @@
2933
(b"=a", [(b'', b'a')]),
3034
(b"a", [(b'a', b'')]),
3135
(b"a=", [(b'a', b'')]),
36+
(b"a=b=c", [(b'a', b'b=c')]),
37+
(b"a%3Db=c", [(b'a=b', b'c')]),
38+
(b"a=b&c=d", [(b'a', b'b'), (b'c', b'd')]),
39+
(b"a=b%26c=d", [(b'a', b'b&c=d')]),
3240
(b"&a=b", [(b'a', b'b')]),
3341
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
3442
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
3543
(";a=b", [(';a', 'b')]),
3644
("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
3745
(b";a=b", [(b';a', b'b')]),
3846
(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
47+
48+
("\u0141=\xE9", [('\u0141', '\xE9')]),
49+
("%C5%81=%C3%A9", [('\u0141', '\xE9')]),
50+
("%81=%A9", [('\ufffd', '\ufffd')]),
51+
(b"\xc5\x81=\xc3\xa9", [(b'\xc5\x81', b'\xc3\xa9')]),
52+
(b"%C5%81=%C3%A9", [(b'\xc5\x81', b'\xc3\xa9')]),
53+
(b"\x81=\xA9", [(b'\x81', b'\xa9')]),
54+
(b"%81=%A9", [(b'\x81', b'\xa9')]),
3955
]
4056

4157
# Each parse_qs testcase is a two-tuple that contains
@@ -49,6 +65,10 @@
4965
("=a", {'': ['a']}),
5066
("a", {'a': ['']}),
5167
("a=", {'a': ['']}),
68+
("a=b=c", {'a': ['b=c']}),
69+
("a%3Db=c", {'a=b': ['c']}),
70+
("a=b&c=d", {'a': ['b'], 'c': ['d']}),
71+
("a=b%26c=d", {'a': ['b&c=d']}),
5272
("&a=b", {'a': ['b']}),
5373
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
5474
("a=1&a=2", {'a': ['1', '2']}),
@@ -59,13 +79,26 @@
5979
(b"=a", {b'': [b'a']}),
6080
(b"a", {b'a': [b'']}),
6181
(b"a=", {b'a': [b'']}),
82+
(b"a=b=c", {b'a': [b'b=c']}),
83+
(b"a%3Db=c", {b'a=b': [b'c']}),
84+
(b"a=b&c=d", {b'a': [b'b'], b'c': [b'd']}),
85+
(b"a=b%26c=d", {b'a': [b'b&c=d']}),
6286
(b"&a=b", {b'a': [b'b']}),
6387
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
6488
(b"a=1&a=2", {b'a': [b'1', b'2']}),
6589
(";a=b", {';a': ['b']}),
6690
("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
6791
(b";a=b", {b';a': [b'b']}),
6892
(b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
93+
(b"a=a%E2%80%99b", {b'a': [b'a\xe2\x80\x99b']}),
94+
95+
("\u0141=\xE9", {'\u0141': ['\xE9']}),
96+
("%C5%81=%C3%A9", {'\u0141': ['\xE9']}),
97+
("%81=%A9", {'\ufffd': ['\ufffd']}),
98+
(b"\xc5\x81=\xc3\xa9", {b'\xc5\x81': [b'\xc3\xa9']}),
99+
(b"%C5%81=%C3%A9", {b'\xc5\x81': [b'\xc3\xa9']}),
100+
(b"\x81=\xA9", {b'\x81': [b'\xa9']}),
101+
(b"%81=%A9", {b'\x81': [b'\xa9']}),
69102
]
70103

71104
class UrlParseTestCase(unittest.TestCase):
@@ -995,8 +1028,8 @@ def test_parse_qsl_encoding(self):
9951028

9961029
def test_parse_qsl_max_num_fields(self):
9971030
with self.assertRaises(ValueError):
998-
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
999-
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
1031+
urllib.parse.parse_qsl('&'.join(['a=a']*11), max_num_fields=10)
1032+
urllib.parse.parse_qsl('&'.join(['a=a']*10), max_num_fields=10)
10001033

10011034
def test_parse_qs_separator(self):
10021035
parse_qs_semicolon_cases = [

‎Lib/urllib/parse.py

Copy file name to clipboardExpand all lines: Lib/urllib/parse.py
+26-24Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -763,42 +763,44 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
763763
764764
Returns a list, as G-d intended.
765765
"""
766-
qs, _coerce_result = _coerce_args(qs)
767-
separator, _ = _coerce_args(separator)
768766

769-
if not separator or (not isinstance(separator, (str, bytes))):
767+
if not separator or not isinstance(separator, (str, bytes)):
770768
raise ValueError("Separator must be of type string or bytes.")
769+
if isinstance(qs, str):
770+
if not isinstance(separator, str):
771+
separator = str(separator, 'ascii')
772+
eq = '='
773+
def _unquote(s):
774+
return unquote_plus(s, encoding=encoding, errors=errors)
775+
else:
776+
qs = bytes(qs)
777+
if isinstance(separator, str):
778+
separator = bytes(separator, 'ascii')
779+
eq = b'='
780+
def _unquote(s):
781+
return unquote_to_bytes(s.replace(b'+', b' '))
782+
783+
if not qs:
784+
return []
771785

772786
# If max_num_fields is defined then check that the number of fields
773787
# is less than max_num_fields. This prevents a memory exhaustion DOS
774788
# attack via post bodies with many fields.
775789
if max_num_fields is not None:
776-
num_fields = 1 + qs.count(separator) if qs else 0
790+
num_fields = 1 + qs.count(separator)
777791
if max_num_fields < num_fields:
778792
raise ValueError('Max number of fields exceeded')
779793

780794
r = []
781-
query_args = qs.split(separator) if qs else []
782-
for name_value in query_args:
783-
if not name_value and not strict_parsing:
784-
continue
785-
nv = name_value.split('=', 1)
786-
if len(nv) != 2:
787-
if strict_parsing:
795+
for name_value in qs.split(separator):
796+
if name_value or strict_parsing:
797+
name, has_eq, value = name_value.partition(eq)
798+
if not has_eq and strict_parsing:
788799
raise ValueError("bad query field: %r" % (name_value,))
789-
# Handle case of a control-name with no equal sign
790-
if keep_blank_values:
791-
nv.append('')
792-
else:
793-
continue
794-
if len(nv[1]) or keep_blank_values:
795-
name = nv[0].replace('+', ' ')
796-
name = unquote(name, encoding=encoding, errors=errors)
797-
name = _coerce_result(name)
798-
value = nv[1].replace('+', ' ')
799-
value = unquote(value, encoding=encoding, errors=errors)
800-
value = _coerce_result(value)
801-
r.append((name, value))
800+
if value or keep_blank_values:
801+
name = _unquote(name)
802+
value = _unquote(value)
803+
r.append((name, value))
802804
return r
803805

804806
def unquote_plus(string, encoding='utf-8', errors='replace'):
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`urllib.parse` functions :func:`~urllib.parse.parse_qs` and
2+
:func:`~urllib.parse.parse_qsl` now support bytes arguments containing raw
3+
and percent-encoded non-ASCII data.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.