From ff16c4610197ca5ccca796398f3171378b840a4a Mon Sep 17 00:00:00 2001 From: Ashwin Ramaswami Date: Thu, 15 Aug 2019 03:15:30 +0000 Subject: [PATCH 1/7] fix: always add double slash if scheme is in uses_netloc --- Lib/test/test_urlparse.py | 6 ++++++ Lib/urllib/parse.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4ae6ed33858ce2..cd9173e4d842c5 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -157,6 +157,12 @@ def test_roundtrips(self): ('file:///tmp/junk.txt', ('file', '', '/tmp/junk.txt', '', '', ''), ('file', '', '/tmp/junk.txt', '', '')), + ('file:////tmp/junk.txt', + ('file', '', '//tmp/junk.txt', '', '', ''), + ('file', '', '//tmp/junk.txt', '', '')), + ('file://///tmp/junk.txt', + ('file', '', '///tmp/junk.txt', '', '', ''), + ('file', '', '///tmp/junk.txt', '', '')), ('imap://mail.python.org/mbox1', ('imap', 'mail.python.org', '/mbox1', '', '', ''), ('imap', 'mail.python.org', '/mbox1', '', '')), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b6608783a89471..9de24548612a98 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -490,7 +490,7 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, url, query, fragment, _coerce_result = ( _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): + if netloc or (scheme and scheme in uses_netloc): if url and url[:1] != '/': url = '/' + url url = '//' + (netloc or '') + url if scheme: From 151e39c59b9c7255d6db672d74ae20e32cbe5df6 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2019 01:16:54 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst new file mode 100644 index 00000000000000..0359babf27d61f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst @@ -0,0 +1 @@ +Makes sure that file URIs with multiple leading slashes (file:////, etc.) are properly round-tripped. Patch by Ashwin Ramaswami \ No newline at end of file From 0e177fda981adcc83ecda2e84410a5470aee7c81 Mon Sep 17 00:00:00 2001 From: Ashwin Ramaswami Date: Mon, 26 Aug 2019 18:17:25 -0700 Subject: [PATCH 3/7] Update 2019-08-27-01-16-50.bpo-34276.4NIAiy.rst --- .../next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst index 0359babf27d61f..d48bf3659e408d 100644 --- a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst +++ b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst @@ -1 +1 @@ -Makes sure that file URIs with multiple leading slashes (file:////, etc.) are properly round-tripped. Patch by Ashwin Ramaswami \ No newline at end of file +Makes sure that file URIs with multiple leading slashes (file:////, etc.) are properly round-tripped by urllib.parse. Patch by Ashwin Ramaswami From cc9067bf9e24acbc990077b680a7ad6ed8d457df Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 29 Dec 2023 12:29:49 +0200 Subject: [PATCH 4/7] Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority. --- Lib/test/test_urlparse.py | 24 +++++++++++++++++++ Lib/urllib/parse.py | 2 +- .../2019-08-27-01-16-50.bpo-34276.4NIAiy.rst | 3 ++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 9b783e10ca8814..d334cf80144904 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -142,6 +142,30 @@ def test_qs(self): def test_roundtrips(self): str_cases = [ + ('path/to/file', + ('', '', 'path/to/file', '', '', ''), + ('', '', 'path/to/file', '', '')), + ('/path/to/file', + ('', '', '/path/to/file', '', '', ''), + ('', '', '/path/to/file', '', '')), + ('//path/to/file', + ('', 'path', '/to/file', '', '', ''), + ('', 'path', '/to/file', '', '')), + ('////path/to/file', + ('', '', '//path/to/file', '', '', ''), + ('', '', '//path/to/file', '', '')), + ('scheme:path/to/file', + ('scheme', '', 'path/to/file', '', '', ''), + ('scheme', '', 'path/to/file', '', '')), + ('scheme:/path/to/file', + ('scheme', '', '/path/to/file', '', '', ''), + ('scheme', '', '/path/to/file', '', '')), + ('scheme://path/to/file', + ('scheme', 'path', '/to/file', '', '', ''), + ('scheme', 'path', '/to/file', '', '')), + ('scheme:////path/to/file', + ('scheme', '', '//path/to/file', '', '', ''), + ('scheme', '', '//path/to/file', '', '')), ('file:///tmp/junk.txt', ('file', '', '/tmp/junk.txt', '', '', ''), ('file', '', '/tmp/junk.txt', '', '')), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f3bedd0562372d..6157eb65dacaa6 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -525,7 +525,7 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, url, query, fragment, _coerce_result = ( _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc): + if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//': if url and url[:1] != '/': url = '/' + url url = '//' + (netloc or '') + url if scheme: diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst index d48bf3659e408d..22457df03e65c9 100644 --- a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst +++ b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst @@ -1 +1,2 @@ -Makes sure that file URIs with multiple leading slashes (file:////, etc.) are properly round-tripped by urllib.parse. Patch by Ashwin Ramaswami +Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority. +Based on patch by Ashwin Ramaswami. From fb03e5f3bd1342d4e2320d417bb086ad8dbf57ce Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 29 Dec 2023 16:25:43 +0200 Subject: [PATCH 5/7] Change issue number. --- ...NIAiy.rst => 2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst} | 0 .../next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst | 3 +++ 2 files changed, 3 insertions(+) rename Misc/NEWS.d/next/Library/{2019-08-27-01-16-50.bpo-34276.4NIAiy.rst => 2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst} (100%) create mode 100644 Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst similarity index 100% rename from Misc/NEWS.d/next/Library/2019-08-27-01-16-50.bpo-34276.4NIAiy.rst rename to Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst diff --git a/Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst b/Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst new file mode 100644 index 00000000000000..de2d06c00b679e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst @@ -0,0 +1,3 @@ +Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for +URIs with path starting with multiple slashes and no authority. Based on +patch by Ashwin Ramaswami. From 7f495b01315eaafe69814fb58b135a76276bb5df Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 13 May 2024 12:00:57 +0300 Subject: [PATCH 6/7] Delete Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst --- .../next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst diff --git a/Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst b/Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst deleted file mode 100644 index de2d06c00b679e..00000000000000 --- a/Misc/NEWS.d/next/Library/2023-12-29-16-31-31.gh-issue-67693.qiBhko.rst +++ /dev/null @@ -1,3 +0,0 @@ -Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for -URIs with path starting with multiple slashes and no authority. Based on -patch by Ashwin Ramaswami. From 62957fe05a1b50ad980848d35a601cb2028a0b58 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 13 May 2024 12:58:22 +0300 Subject: [PATCH 7/7] Add few more tests. --- Lib/test/test_urlparse.py | 40 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 521e8339958af2..2cf03d046a5b87 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -103,7 +103,9 @@ class UrlParseTestCase(unittest.TestCase): - def checkRoundtrips(self, url, parsed, split): + def checkRoundtrips(self, url, parsed, split, url2=None): + if url2 is None: + url2 = url result = urllib.parse.urlparse(url) self.assertSequenceEqual(result, parsed) t = (result.scheme, result.netloc, result.path, @@ -111,7 +113,7 @@ def checkRoundtrips(self, url, parsed, split): self.assertSequenceEqual(t, parsed) # put it back together and it should be the same result2 = urllib.parse.urlunparse(result) - self.assertSequenceEqual(result2, url) + self.assertSequenceEqual(result2, url2) self.assertSequenceEqual(result2, result.geturl()) # the result of geturl() is a fixpoint; we can always parse it @@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split): result.query, result.fragment) self.assertSequenceEqual(t, split) result2 = urllib.parse.urlunsplit(result) - self.assertSequenceEqual(result2, url) + self.assertSequenceEqual(result2, url2) self.assertSequenceEqual(result2, result.geturl()) # check the fixpoint property of re-parsing the result of geturl() @@ -243,6 +245,38 @@ def _encode(t): for url, parsed, split in str_cases + bytes_cases: self.checkRoundtrips(url, parsed, split) + def test_roundtrips_normalization(self): + str_cases = [ + ('///path/to/file', + '/path/to/file', + ('', '', '/path/to/file', '', '', ''), + ('', '', '/path/to/file', '', '')), + ('scheme:///path/to/file', + 'scheme:/path/to/file', + ('scheme', '', '/path/to/file', '', '', ''), + ('scheme', '', '/path/to/file', '', '')), + ('file:/tmp/junk.txt', + 'file:///tmp/junk.txt', + ('file', '', '/tmp/junk.txt', '', '', ''), + ('file', '', '/tmp/junk.txt', '', '')), + ('http:/tmp/junk.txt', + 'http:///tmp/junk.txt', + ('http', '', '/tmp/junk.txt', '', '', ''), + ('http', '', '/tmp/junk.txt', '', '')), + ('https:/tmp/junk.txt', + 'https:///tmp/junk.txt', + ('https', '', '/tmp/junk.txt', '', '', ''), + ('https', '', '/tmp/junk.txt', '', '')), + ] + def _encode(t): + return (t[0].encode('ascii'), + t[1].encode('ascii'), + tuple(x.encode('ascii') for x in t[2]), + tuple(x.encode('ascii') for x in t[3])) + bytes_cases = [_encode(x) for x in str_cases] + for url, url2, parsed, split in str_cases + bytes_cases: + self.checkRoundtrips(url, parsed, split, url2) + def test_http_roundtrips(self): # urllib.parse.urlsplit treats 'http:' as an optimized special case, # so we test both 'http:' and 'https:' in all the following.