From 3d8161a1248c67db2d896a4b1355eac6f60825d2 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Wed, 13 Mar 2019 13:13:19 -0700 Subject: [PATCH] bpo-36274: Encode request lines as Latin-1 While this is out of spec according to RFC 7230 (which limits expected octets to some subset of ASCII), it is often useful to be able to mimic an out-of-spec client when testing a server or application. Use Latin-1 in keeping with how we handle headers and bodies. https://bugs.python.org/issue36274 --- Lib/http/client.py | 4 +-- Lib/test/test_httplib.py | 34 +++++++++++++++++-- .../2019-07-08-09-20-10.bpo-36274.8XicsH.rst | 2 ++ 3 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index f61267e108a5249..f28381b4cda9424 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1095,8 +1095,8 @@ def putrequest(self, method, url, skip_host=False, f"(found at least {match.group()!r})") request = '%s %s %s' % (method, url, self._http_vsn_str) - # Non-ASCII characters should have been eliminated earlier - self._output(request.encode('ascii')) + # Encode as latin-1, like we do headers and data + self._output(request.encode('latin-1')) if self._http_vsn == 11: # Issue some standard headers for better HTTP/1.1 compliance diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 9148169cc7c2e57..f399b46027da3d9 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -275,6 +275,37 @@ def test_ipv6host_header(self): conn.request('GET', '/foo') self.assertTrue(sock.data.startswith(expected)) + def test_request_path_handling(self): + happy_cases = ( + ('/', b'/'), + ('', b'/'), + ('/\xe4\xbd\xa0\xe5\xa5\xbd', + b'/\xe4\xbd\xa0\xe5\xa5\xbd'), + ) + for caller_path, expected_path in happy_cases: + with self.subTest((caller_path, expected_path)): + conn = client.HTTPConnection('server.fqdn') + sock = FakeSocket('') + conn.sock = sock + conn.request('GET', caller_path) + expected = (b'GET ' + expected_path + b' HTTP/1.1\r\n' + b'Host: server.fqdn\r\n' + b'Accept-Encoding: identity\r\n\r\n') + self.assertEqual(sock.data, expected) + + error_cases = ( + '/\u4f60\u597d', + '/\udce4\udcbd\udca0\udce5\udca5\udcbd', + ) + for caller_path in error_cases: + with self.subTest(caller_path): + conn = client.HTTPConnection('server.fqdn') + sock = FakeSocket('') + conn.sock = sock + with self.assertRaises(UnicodeEncodeError): + conn.request('GET', caller_path) + self.assertEqual(sock.data, b'') + def test_malformed_headers_coped_with(self): # Issue 19996 body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n" @@ -720,8 +751,7 @@ def test_send_file(self): sock = FakeSocket(body) conn.sock = sock conn.request('GET', '/foo', body) - self.assertTrue(sock.data.startswith(expected), '%r != %r' % - (sock.data[:len(expected)], expected)) + self.assertEqual(sock.data[:len(expected)], expected) def test_send(self): expected = b'this is a test this is only a test' diff --git a/Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst b/Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst new file mode 100644 index 000000000000000..ce7f218bb0988f2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-08-09-20-10.bpo-36274.8XicsH.rst @@ -0,0 +1,2 @@ +``http.client`` now accepts ISO-8859-1 request-targets. Callers are still +encouraged to URL-quote the request-target so as to comply with the RFC.