Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 #8305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Apr 5, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions 12 Doc/library/http.client.rst
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,13 @@ HTTPConnection Objects
The headers argument should be a mapping of extra HTTP headers to send with
the CONNECT request.

As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC
<https://tools.ietf.org/html/rfc7231#section-4.3.6>`_, a HTTP ``Host:``
header must be provided, matching the authority-form of the request target
provided as the destination for the CONNECT request. If a HTTP ``Host:``
header is not provided via the headers argument, one is generated and
transmitted automatically.

For example, to tunnel through a HTTPS proxy server running locally on port
8080, we would pass the address of the proxy to the :class:`HTTPSConnection`
constructor, and the address of the host that we eventually want to reach to
Expand All @@ -332,6 +339,11 @@ HTTPConnection Objects

.. versionadded:: 3.2

.. versionchanged:: 3.7
merwok marked this conversation as resolved.
Show resolved Hide resolved
HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from
protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so
one will be automatically generated and transmitted if not provided in
the headers argument.

.. method:: HTTPConnection.connect()

Expand Down
31 changes: 24 additions & 7 deletions 31 Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
Req-sent-unread-response _CS_REQ_SENT <response_class>
"""

import copy
import email.parser
import email.message
import http
Expand Down Expand Up @@ -847,27 +848,43 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
def set_tunnel(self, host, port=None, headers=None):
"""Set up host and port for HTTP CONNECT tunnelling.

In a connection that uses HTTP CONNECT tunneling, the host passed to the
constructor is used as a proxy server that relays all communication to
the endpoint passed to `set_tunnel`. This done by sending an HTTP
In a connection that uses HTTP CONNECT tunnelling, the host passed to
the constructor is used as a proxy server that relays all communication
to the endpoint passed to `set_tunnel`. This done by sending an HTTP
CONNECT request to the proxy server when the connection is established.

This method must be called before the HTML connection has been
established.

The headers argument should be a mapping of extra HTTP headers to send
with the CONNECT request.

As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
(https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
header must be provided, matching the authority-form of the request
target provided as the destination for the CONNECT request. If a
HTTP Host: header is not provided via the headers argument, one
is generated and transmitted automatically.
"""

if self.sock:
raise RuntimeError("Can't set up tunnel for established connection")

self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
if headers:
self._tunnel_headers = headers
self._tunnel_headers = copy.copy(headers)
merwok marked this conversation as resolved.
Show resolved Hide resolved
else:
self._tunnel_headers.clear()

saw_host_header = False
for header in self._tunnel_headers.keys():
if header.lower() == "host":
saw_host_header = True
handlerbot marked this conversation as resolved.
Show resolved Hide resolved
if not saw_host_header:
handlerbot marked this conversation as resolved.
Show resolved Hide resolved
encoded_host = self._tunnel_host.encode("idna").decode("ascii")
self._tunnel_headers["Host"] = "%s:%d" % (
encoded_host, self._tunnel_port)

def _get_hostport(self, host, port):
if port is None:
i = host.rfind(':')
Expand All @@ -892,9 +909,9 @@ def set_debuglevel(self, level):
self.debuglevel = level

def _tunnel(self):
connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
self._tunnel_port)
connect_bytes = connect_str.encode("ascii")
connect_bytes = b"CONNECT %s:%d %s\r\n" % (
self._tunnel_host.encode("idna"), self._tunnel_port,
self._http_vsn_str.encode("ascii"))
self.send(connect_bytes)
for header, value in self._tunnel_headers.items():
header_str = "%s: %s\r\n" % (header, value)
Expand Down
148 changes: 133 additions & 15 deletions 148 Lib/test/test_httplib.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import errno
from http import client
import io
Expand Down Expand Up @@ -1864,11 +1865,12 @@ def test_getting_header_defaultint(self):
class TunnelTests(TestCase):
def setUp(self):
response_text = (
'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n' # Reply to HEAD
'Content-Length: 42\r\n\r\n'
)
self.host = 'proxy.com'
self.port = client.HTTP_PORT
self.conn = client.HTTPConnection(self.host)
self.conn._create_connection = self._create_connection(response_text)

Expand All @@ -1880,15 +1882,45 @@ def create_connection(address, timeout=None, source_address=None):
return FakeSocket(response_text, host=address[0], port=address[1])
return create_connection

def test_set_tunnel_host_port_headers(self):
def test_set_tunnel_host_port_headers_add_host_missing(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'}
tunnel_headers_after = copy.copy(tunnel_headers)
tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port)
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after)

def test_set_tunnel_host_port_headers_set_host_identical(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % (tunnel_host, tunnel_port)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)

def test_set_tunnel_host_port_headers_set_host_different(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % ('example.com', 4200)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
Expand All @@ -1900,25 +1932,111 @@ def test_disallow_set_tunnel_after_connect(self):
'destination.com')

def test_connect_with_tunnel(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_with_default_port(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_with_nonstandard_port(self):
d = {
b'host': b'destination.com',
b'port': 8888,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d,
self.conn.sock.data)

# This request is not RFC-valid, but it's been possible with the library
# for years, so don't break it unexpectedly... This also tests
# case-insensitivity when injecting Host: headers if they're missing.
def test_connect_with_tunnel_with_different_host_header(self):
d = {
b'host': b'destination.com',
b'tunnel_host_header': b'example.com:9876',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(
d[b'host'].decode('ascii'),
headers={'HOST': d[b'tunnel_host_header'].decode('ascii')})
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'HOST: %(tunnel_host_header)s\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_different_host(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_idna(self):
dest = '\u03b4\u03c0\u03b8.gr'
dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT)
expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (
dest_port, dest_port)
self.conn.set_tunnel(dest)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
# issue22095
self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)

# This test should be removed when CONNECT gets the HTTP/1.1 blessing
self.assertNotIn(b'Host: proxy.com', self.conn.sock.data)
self.assertIn(expected, self.conn.sock.data)

def test_connect_put_request(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('PUT', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_tunnel_debuglog(self):
expected_header = 'X-Dummy: 1'
Expand Down
1 change: 1 addition & 0 deletions 1 Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ Anders Hammarquist
Mark Hammond
Harald Hanche-Olsen
Manus Hand
Michael Handler
Milton L. Hankins
Stephen Hansen
Barry Hantman
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send
a matching Host: header with CONNECT, if one is not provided; convert IDN
domain names to Punycode. Patch by Michael Handler.
Morty Proxy This is a proxified and sanitized view of the page, visit original site.