diff --git a/Lib/http/client.py b/Lib/http/client.py index f61267e108a524..eb7a55c9e8d7d5 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1095,8 +1095,9 @@ def putrequest(self, method, url, skip_host=False, f"(found at least {match.group()!r})") request = '%s %s %s' % (method, url, self._http_vsn_str) - # Non-ASCII characters should have been eliminated earlier - self._output(request.encode('ascii')) + # Encode with surrogate escapes, to allow non-ascii bytes without + # making it too easy to write an out-of-spec client + self._output(request.encode('ascii', errors='surrogateescape')) if self._http_vsn == 11: # Issue some standard headers for better HTTP/1.1 compliance diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 9148169cc7c2e5..9b7d2942fd7c51 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -275,6 +275,37 @@ def test_ipv6host_header(self): conn.request('GET', '/foo') self.assertTrue(sock.data.startswith(expected)) + def test_request_path_handling(self): + happy_cases = ( + ('/', b'/'), + ('', b'/'), + ('/\udce4\udcbd\udca0\udce5\udca5\udcbd', + b'/\xe4\xbd\xa0\xe5\xa5\xbd'), + ) + for caller_path, expected_path in happy_cases: + with self.subTest((caller_path, expected_path)): + conn = client.HTTPConnection('server.fqdn') + sock = FakeSocket('') + conn.sock = sock + conn.request('GET', caller_path) + expected = (b'GET ' + expected_path + b' HTTP/1.1\r\n' + b'Host: server.fqdn\r\n' + b'Accept-Encoding: identity\r\n\r\n') + self.assertEqual(sock.data, expected) + + error_cases = ( + '/\xe4\xbd\xa0\xe5\xa5\xbd', + '/\u4f60\u597d', + ) + for caller_path in error_cases: + with self.subTest(caller_path): + conn = client.HTTPConnection('server.fqdn') + sock = FakeSocket('') + conn.sock = sock + with self.assertRaises(UnicodeEncodeError): + conn.request('GET', caller_path) + self.assertEqual(sock.data, b'') + def test_malformed_headers_coped_with(self): # Issue 19996 body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n" @@ -720,8 +751,7 @@ def test_send_file(self): sock = FakeSocket(body) conn.sock = sock conn.request('GET', '/foo', body) - self.assertTrue(sock.data.startswith(expected), '%r != %r' % - (sock.data[:len(expected)], expected)) + self.assertEqual(sock.data[:len(expected)], expected) def test_send(self): expected = b'this is a test this is only a test' diff --git a/Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst b/Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst new file mode 100644 index 00000000000000..979a60f6c66220 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst @@ -0,0 +1,3 @@ +``http.client`` can now make requests with non-ASCII request-targets using +surrogate escape sequences. Callers are still encouraged to URL-quote the +request-target instead so as to comply with the RFC.