python · vstinner · Jun 20, 2017
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -981,6 +981,15 @@ def test_splittype(self):
        self.assertEqual(splittype('type:'), ('type', ''))
        self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))

+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\ntype:string',
+            'ty\npe:string',
+            'type:str\ning',
+            'type:string\n',
+        ):
+            self.assertEqual(splittype(url), (None, url))
+
    def test_splithost(self):
        splithost = urllib.parse.splithost
        self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
@@ -1010,6 +1019,15 @@ def test_splithost(self):
        self.assertEqual(splithost("//example.net/file#"),
                         ('example.net', '/file#'))

+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\n//hostname/url',
+            '//host\nname/url',
+            '//hostname/u\nrl',
+            '//hostname/url\n',
+        ):
+            self.assertEqual(splithost(url), (None, url))
+
    def test_splituser(self):
        splituser = urllib.parse.splituser
        self.assertEqual(splituser('User:Pass@www.python.org:080'),
@@ -1052,6 +1070,15 @@ def test_splitport(self):
        self.assertEqual(splitport('[::1]'), ('[::1]', None))
        self.assertEqual(splitport(':88'), ('', '88'))

+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\nparrot:88',
+            'par\nrot:88',
+            'parrot:8\n8',
+            'parrot:88\n',
+        ):
+            self.assertEqual(splitport(url), (url, None))
+
    def test_splitnport(self):
        splitnport = urllib.parse.splitnport
        self.assertEqual(splitnport('parrot:88'), ('parrot', 88))

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
@@ -934,9 +934,9 @@ def splittype(url):
    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
    global _typeprog
    if _typeprog is None:
-        _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
+        _typeprog = re.compile('([^/:\n]+):(.*)')

-    match = _typeprog.match(url)
+    match = _typeprog.fullmatch(url)
    if match:
        scheme, data = match.groups()
        return scheme.lower(), data
@@ -947,9 +947,9 @@ def splithost(url):
    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
    global _hostprog
    if _hostprog is None:
-        _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
+        _hostprog = re.compile('//([^/#?\n]*)(.*)')

-    match = _hostprog.match(url)
+    match = _hostprog.fullmatch(url)
    if match:
        host_port, path = match.groups()
        if path and path[0] != '/':
@@ -973,9 +973,9 @@ def splitport(host):
    """splitport('host:port') --> 'host', 'port'."""
    global _portprog
    if _portprog is None:
-        _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+        _portprog = re.compile('(.*):([0-9]*)')

-    match = _portprog.match(host)
+    match = _portprog.fullmatch(host)
    if match:
        host, port = match.groups()
        if port:

diff --git a/Misc/NEWS b/Misc/NEWS
@@ -368,6 +368,9 @@ Extension Modules
 Library
 -------

+- [Security] bpo-30713: The splittype(), splitport() and splithost() functions
+  of the urllib.parse module now reject URLs which contain a newline character.
+
 - bpo-29755: Fixed the lgettext() family of functions in the gettext module.
  They now always return bytes.