From fe00393834005652a856d195db23cf4134068696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Wed, 18 Mar 2020 01:31:20 +0100 Subject: [PATCH 1/4] bpo-38576: Disallow control characters in hostnames in http.client (GH-18995) Add host validation for control characters for more CVE-2019-18348 protection. (cherry picked from commit 83fc70159b24) Co-authored-by: Ashwin Ramaswami --- Lib/httplib.py | 13 ++++++++++++ Lib/test/test_httplib.py | 13 +++++++++++- Lib/test/test_urllib2.py | 20 +++++++++++++++++-- .../2020-03-18-01-30-50.bpo-38576.cvI68q.rst | 3 +++ 4 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst diff --git a/Lib/httplib.py b/Lib/httplib.py index 79532b91149b17e..fcc4152aaf268dc 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -745,6 +745,8 @@ def __init__(self, host, port=None, strict=None, (self.host, self.port) = self._get_hostport(host, port) + self._validate_host(self.host) + # This is stored as an instance variable to allow unittests # to replace with a suitable mock self._create_connection = socket.create_connection @@ -1029,6 +1031,17 @@ def _validate_path(self, url): ).format(matched=match.group(), url=url) raise InvalidURL(msg) + def _validate_host(self, host): + """Validate a host so it doesn't contain control characters.""" + # Prevent CVE-2019-18348. + match = _contains_disallowed_url_pchar_re.search(host) + if match: + msg = ( + "URL can't contain control characters. {host!r} " + "(found at least {matched!r})" + ).format(matched=match.group(), host=host) + raise InvalidURL(msg) + def putheader(self, header, *values): """Send a request header line to the server. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 5462fdd503c8311..d8a57f73530da6a 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -702,7 +702,7 @@ def test_proxy_tunnel_without_status_line(self): with self.assertRaisesRegexp(socket.error, "Invalid response"): conn._tunnel() - def test_putrequest_override_validation(self): + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation behavior in putrequest (bpo-38216). @@ -715,6 +715,17 @@ def _validate_path(self, url): conn.sock = FakeSocket('') conn.putrequest('GET', '/\x00') + def test_putrequest_override_host_validation(self): + class UnsafeHTTPConnection(httplib.HTTPConnection): + def _validate_host(self, url): + pass + + conn = UnsafeHTTPConnection('example.com\r\n') + conn.sock = FakeSocket('') + # set skip_host so a ValueError is not raised upon adding the + # invalid URL as the value of the "Host:" header + conn.putrequest('GET', '/', skip_host=1) + class OfflineTest(TestCase): def test_responses(self): diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 9531818e16b25d9..0cb4a2c0d5cc4e4 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1321,7 +1321,7 @@ def test_unsupported_algorithm(self): ) @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_control_char_rejected(self): + def test_url_path_with_control_char_rejected(self): for char_no in range(0, 0x21) + range(0x7f, 0x100): char = chr(char_no) schemeless_url = "//localhost:7777/test%s/" % char @@ -1345,7 +1345,7 @@ def test_url_with_control_char_rejected(self): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_newline_header_injection_rejected(self): + def test_url_path_with_newline_header_injection_rejected(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" schemeless_url = "//" + host + ":8080/test/?test=a" @@ -1365,6 +1365,22 @@ def test_url_with_newline_header_injection_rejected(self): finally: self.unfakehttp() + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_control_char_rejected(self): + for char_no in list(range(0, 0x21)) + [0x7f]: + char = chr(char_no) + schemeless_url = "//localhost{char}/test/".format(char=char) + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + try: + escaped_char_repr = repr(char).replace('\\', r'\\') + InvalidURL = http.client.InvalidURL + with self.assertRaisesRegex( + InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): + urlopen("http:{schemeless_url}".format(schemeless_url=schemeless_url)) + with self.assertRaisesRegex(InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): + urlopen("https:{schemeless_url}".format(schemeless_url=schemeless_url)) + finally: + self.unfakehttp() class RequestTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst new file mode 100644 index 000000000000000..96af32d34d096b7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst @@ -0,0 +1,3 @@ +Disallow control characters in hostnames in http.client, addressing +CVE-2019-18348. Such potentially malicious header injection URLs now cause a +InvalidURL to be raised. From 3200154de90b414260308d29d178ae09746dd26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Wed, 18 Mar 2020 02:02:31 +0100 Subject: [PATCH 2/4] Replace http.client with httplib. --- Lib/test/test_urllib2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 0cb4a2c0d5cc4e4..6728593febd382e 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1373,7 +1373,7 @@ def test_url_host_with_control_char_rejected(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") try: escaped_char_repr = repr(char).replace('\\', r'\\') - InvalidURL = http.client.InvalidURL + InvalidURL = httplib.InvalidURL with self.assertRaisesRegex( InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): urlopen("http:{schemeless_url}".format(schemeless_url=schemeless_url)) From d0ea0b4f2393f6d2311c6137b966f617aad836c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Wed, 18 Mar 2020 02:15:01 +0100 Subject: [PATCH 3/4] assertRaisesRegex -> assertRaisesRegexp --- Lib/test/test_urllib2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 6728593febd382e..5654f5be6589943 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1374,10 +1374,10 @@ def test_url_host_with_control_char_rejected(self): try: escaped_char_repr = repr(char).replace('\\', r'\\') InvalidURL = httplib.InvalidURL - with self.assertRaisesRegex( + with self.assertRaisesRegexp( InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): urlopen("http:{schemeless_url}".format(schemeless_url=schemeless_url)) - with self.assertRaisesRegex(InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): + with self.assertRaisesRegexp(InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): urlopen("https:{schemeless_url}".format(schemeless_url=schemeless_url)) finally: self.unfakehttp() From 50f014bc381867bb3e611f0081f687a89e86c2ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Wed, 18 Mar 2020 09:13:13 +0100 Subject: [PATCH 4/4] Finally, make really sure this code is py2k-compliant. --- Lib/test/test_urllib2.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 5654f5be6589943..20a0f581436d640 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1357,11 +1357,12 @@ def test_url_path_with_newline_header_injection_rejected(self): # calls urllib.parse.quote() on the URL which makes all of the # above attempts at injection within the url _path_ safe. InvalidURL = httplib.InvalidURL - with self.assertRaisesRegexp( - InvalidURL, r"contain control.*\\r.*(found at least . .)"): - urllib2.urlopen("http:" + schemeless_url) - with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"): - urllib2.urlopen("https:" + schemeless_url) + with self.assertRaisesRegexp(InvalidURL, + r"contain control.*\\r.*(found at least . .)"): + urllib2.urlopen("http:{}".format(schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + r"contain control.*\\n"): + urllib2.urlopen("https:{}".format(schemeless_url)) finally: self.unfakehttp() @@ -1369,16 +1370,17 @@ def test_url_path_with_newline_header_injection_rejected(self): def test_url_host_with_control_char_rejected(self): for char_no in list(range(0, 0x21)) + [0x7f]: char = chr(char_no) - schemeless_url = "//localhost{char}/test/".format(char=char) + schemeless_url = "//localhost{}/test/".format(char) self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") try: escaped_char_repr = repr(char).replace('\\', r'\\') InvalidURL = httplib.InvalidURL - with self.assertRaisesRegexp( - InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): - urlopen("http:{schemeless_url}".format(schemeless_url=schemeless_url)) - with self.assertRaisesRegexp(InvalidURL, "contain control.*{escaped_char_repr}".format(escaped_char_repr=escaped_char_repr)): - urlopen("https:{schemeless_url}".format(schemeless_url=schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + "contain control.*{}".format(escaped_char_repr)): + urllib2.urlopen("http:{}".format(schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + "contain control.*{}".format(escaped_char_repr)): + urllib2.urlopen("https:{}".format(schemeless_url)) finally: self.unfakehttp()