From 25166b1f36daf53b35eaf726913c82ab32d51e7a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 23 May 2024 11:29:46 +0300 Subject: [PATCH 1/2] gh-119451: Fix OOM vulnerability in http.client Reading the whole body of the HTTP response could cause OOM if the Content-Length value is too large even if the server does not send a large amount of data. Now the HTTP client reads large data by chunks, therefore the amount of consumed memory is proportional to the amount of sent data. --- Lib/http/client.py | 16 +++++++-- Lib/test/test_httplib.py | 33 +++++++++++++++++++ ...-05-23-11-47-48.gh-issue-119451.qkJe9-.rst | 3 ++ 3 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index a353716a8506e6..c6f9e5db86039a 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -111,6 +111,11 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_SAFE_BUF_SIZE = 1 << 20 + + # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E @@ -637,9 +642,14 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) + cursize = min(amt, _SAFE_BUF_SIZE) + data = self.fp.read(cursize) + while len(data) < amt: + if len(data) < cursize: + raise IncompleteRead(data, amt-len(data)) + delta = min(cursize, amt - cursize) + data += self.fp.read(cursize) + cursize += delta return data def _safe_readinto(self, b): diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 9d853d254db7c6..d405f38ab01166 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1436,6 +1436,39 @@ def run_server(): thread.join() self.assertEqual(result, b"proxied data\n") + def test_large_content_length(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + while True: + [conn, address] = serv.accept() + with conn: + conn.recv(1024) + if not size: + break + body = b"HTTP/1.1 200 Ok\r\nContent-Length: %d\r\n\r\nText" % size + conn.sendall(body) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(18, 65): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertRaises(client.IncompleteRead, response.read) + conn.close() + finally: + conn.close() + size = 0 + conn.request("GET", "/") + conn.close() + thread.join() + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst new file mode 100644 index 00000000000000..3fc6f6f59e86d7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst @@ -0,0 +1,3 @@ +Fix OOM vulnerability in :mod:`http.client`, when reading the whole body of +a specially prepared small HTTP response could cause consuming an arbitrary +amount of memory. From f097fada1be22d44ca59cf4bb0f74bcb5f331c66 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 23 May 2024 14:25:18 +0300 Subject: [PATCH 2/2] Add also test for non-truncated large body. --- Lib/test/test_httplib.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index d405f38ab01166..f26831ffaf5f5b 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1440,6 +1440,36 @@ def test_large_content_length(self): serv = socket.create_server((HOST, 0)) self.addCleanup(serv.close) + def run_server(): + [conn, address] = serv.accept() + with conn: + while conn.recv(1024): + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" % size) + conn.sendall(b'A' * (size//3)) + conn.sendall(b'B' * (size - size//3)) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(15, 27): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertEqual(len(response.read()), size) + finally: + conn.close() + thread.join(1.0) + + def test_large_content_length_truncated(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + def run_server(): while True: [conn, address] = serv.accept() @@ -1447,8 +1477,11 @@ def run_server(): conn.recv(1024) if not size: break - body = b"HTTP/1.1 200 Ok\r\nContent-Length: %d\r\n\r\nText" % size - conn.sendall(body) + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" + b"Text" % size) thread = threading.Thread(target=run_server) thread.start() @@ -1467,7 +1500,7 @@ def run_server(): size = 0 conn.request("GET", "/") conn.close() - thread.join() + thread.join(1.0) def test_putrequest_override_domain_validation(self): """