Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e237b25

Browse filesBrowse files
gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563)
1 parent e04cd96 commit e237b25
Copy full SHA for e237b25

File tree

3 files changed

+70
-4
lines changed
Filter options

3 files changed

+70
-4
lines changed

‎Lib/test/test_urlparse.py

Copy file name to clipboardExpand all lines: Lib/test/test_urlparse.py
+67-3Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,17 @@
103103

104104
class UrlParseTestCase(unittest.TestCase):
105105

106-
def checkRoundtrips(self, url, parsed, split):
106+
def checkRoundtrips(self, url, parsed, split, url2=None):
107+
if url2 is None:
108+
url2 = url
107109
result = urllib.parse.urlparse(url)
108110
self.assertSequenceEqual(result, parsed)
109111
t = (result.scheme, result.netloc, result.path,
110112
result.params, result.query, result.fragment)
111113
self.assertSequenceEqual(t, parsed)
112114
# put it back together and it should be the same
113115
result2 = urllib.parse.urlunparse(result)
114-
self.assertSequenceEqual(result2, url)
116+
self.assertSequenceEqual(result2, url2)
115117
self.assertSequenceEqual(result2, result.geturl())
116118

117119
# the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
137139
result.query, result.fragment)
138140
self.assertSequenceEqual(t, split)
139141
result2 = urllib.parse.urlunsplit(result)
140-
self.assertSequenceEqual(result2, url)
142+
self.assertSequenceEqual(result2, url2)
141143
self.assertSequenceEqual(result2, result.geturl())
142144

143145
# check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ def test_qs(self):
175177

176178
def test_roundtrips(self):
177179
str_cases = [
180+
('path/to/file',
181+
('', '', 'path/to/file', '', '', ''),
182+
('', '', 'path/to/file', '', '')),
183+
('/path/to/file',
184+
('', '', '/path/to/file', '', '', ''),
185+
('', '', '/path/to/file', '', '')),
186+
('//path/to/file',
187+
('', 'path', '/to/file', '', '', ''),
188+
('', 'path', '/to/file', '', '')),
189+
('////path/to/file',
190+
('', '', '//path/to/file', '', '', ''),
191+
('', '', '//path/to/file', '', '')),
192+
('scheme:path/to/file',
193+
('scheme', '', 'path/to/file', '', '', ''),
194+
('scheme', '', 'path/to/file', '', '')),
195+
('scheme:/path/to/file',
196+
('scheme', '', '/path/to/file', '', '', ''),
197+
('scheme', '', '/path/to/file', '', '')),
198+
('scheme://path/to/file',
199+
('scheme', 'path', '/to/file', '', '', ''),
200+
('scheme', 'path', '/to/file', '', '')),
201+
('scheme:////path/to/file',
202+
('scheme', '', '//path/to/file', '', '', ''),
203+
('scheme', '', '//path/to/file', '', '')),
178204
('file:///tmp/junk.txt',
179205
('file', '', '/tmp/junk.txt', '', '', ''),
180206
('file', '', '/tmp/junk.txt', '', '')),
207+
('file:////tmp/junk.txt',
208+
('file', '', '//tmp/junk.txt', '', '', ''),
209+
('file', '', '//tmp/junk.txt', '', '')),
210+
('file://///tmp/junk.txt',
211+
('file', '', '///tmp/junk.txt', '', '', ''),
212+
('file', '', '///tmp/junk.txt', '', '')),
181213
('imap://mail.python.org/mbox1',
182214
('imap', 'mail.python.org', '/mbox1', '', '', ''),
183215
('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -213,6 +245,38 @@ def _encode(t):
213245
for url, parsed, split in str_cases + bytes_cases:
214246
self.checkRoundtrips(url, parsed, split)
215247

248+
def test_roundtrips_normalization(self):
249+
str_cases = [
250+
('///path/to/file',
251+
'/path/to/file',
252+
('', '', '/path/to/file', '', '', ''),
253+
('', '', '/path/to/file', '', '')),
254+
('scheme:///path/to/file',
255+
'scheme:/path/to/file',
256+
('scheme', '', '/path/to/file', '', '', ''),
257+
('scheme', '', '/path/to/file', '', '')),
258+
('file:/tmp/junk.txt',
259+
'file:///tmp/junk.txt',
260+
('file', '', '/tmp/junk.txt', '', '', ''),
261+
('file', '', '/tmp/junk.txt', '', '')),
262+
('http:/tmp/junk.txt',
263+
'http:///tmp/junk.txt',
264+
('http', '', '/tmp/junk.txt', '', '', ''),
265+
('http', '', '/tmp/junk.txt', '', '')),
266+
('https:/tmp/junk.txt',
267+
'https:///tmp/junk.txt',
268+
('https', '', '/tmp/junk.txt', '', '', ''),
269+
('https', '', '/tmp/junk.txt', '', '')),
270+
]
271+
def _encode(t):
272+
return (t[0].encode('ascii'),
273+
t[1].encode('ascii'),
274+
tuple(x.encode('ascii') for x in t[2]),
275+
tuple(x.encode('ascii') for x in t[3]))
276+
bytes_cases = [_encode(x) for x in str_cases]
277+
for url, url2, parsed, split in str_cases + bytes_cases:
278+
self.checkRoundtrips(url, parsed, split, url2)
279+
216280
def test_http_roundtrips(self):
217281
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
218282
# so we test both 'http:' and 'https:' in all the following.

‎Lib/urllib/parse.py

Copy file name to clipboardExpand all lines: Lib/urllib/parse.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ def urlunsplit(components):
525525
empty query; the RFC states that these are equivalent)."""
526526
scheme, netloc, url, query, fragment, _coerce_result = (
527527
_coerce_args(*components))
528-
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
528+
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
529529
if url and url[:1] != '/': url = '/' + url
530530
url = '//' + (netloc or '') + url
531531
if scheme:
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
2+
Based on patch by Ashwin Ramaswami.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.