From 25166b1f36daf53b35eaf726913c82ab32d51e7a Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 23 May 2024 11:29:46 +0300
Subject: [PATCH 1/2] gh-119451: Fix OOM vulnerability in http.client

Reading the whole body of the HTTP response could cause OOM if
the Content-Length value is too large even if the server does not send
a large amount of data. Now the HTTP client reads large data by chunks,
therefore the amount of consumed memory is proportional to the amount
of sent data.
---
 Lib/http/client.py                            | 16 +++++++--
 Lib/test/test_httplib.py                      | 33 +++++++++++++++++++
 ...-05-23-11-47-48.gh-issue-119451.qkJe9-.rst |  3 ++
 3 files changed, 49 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst

diff --git a/Lib/http/client.py b/Lib/http/client.py
index a353716a8506e6..c6f9e5db86039a 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -111,6 +111,11 @@
 _MAXLINE = 65536
 _MAXHEADERS = 100
 
+# Data larger than this will be read in chunks, to prevent extreme
+# overallocation.
+_SAFE_BUF_SIZE = 1 << 20
+
+
 # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
 #
 # VCHAR          = %x21-7E
@@ -637,9 +642,14 @@ def _safe_read(self, amt):
         reading. If the bytes are truly not available (due to EOF), then the
         IncompleteRead exception can be used to detect the problem.
         """
-        data = self.fp.read(amt)
-        if len(data) < amt:
-            raise IncompleteRead(data, amt-len(data))
+        cursize = min(amt, _SAFE_BUF_SIZE)
+        data = self.fp.read(cursize)
+        while len(data) < amt:
+            if len(data) < cursize:
+                raise IncompleteRead(data, amt-len(data))
+            delta = min(cursize, amt - cursize)
+            data += self.fp.read(cursize)
+            cursize += delta
         return data
 
     def _safe_readinto(self, b):
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 9d853d254db7c6..d405f38ab01166 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -1436,6 +1436,39 @@ def run_server():
         thread.join()
         self.assertEqual(result, b"proxied data\n")
 
+    def test_large_content_length(self):
+        serv = socket.create_server((HOST, 0))
+        self.addCleanup(serv.close)
+
+        def run_server():
+            while True:
+                [conn, address] = serv.accept()
+                with conn:
+                    conn.recv(1024)
+                    if not size:
+                        break
+                    body = b"HTTP/1.1 200 Ok\r\nContent-Length: %d\r\n\r\nText" % size
+                    conn.sendall(body)
+
+        thread = threading.Thread(target=run_server)
+        thread.start()
+        self.addCleanup(thread.join, 1.0)
+
+        conn = client.HTTPConnection(*serv.getsockname())
+        try:
+            for w in range(18, 65):
+                size = 1 << w
+                conn.request("GET", "/")
+                with conn.getresponse() as response:
+                    self.assertRaises(client.IncompleteRead, response.read)
+                conn.close()
+        finally:
+            conn.close()
+            size = 0
+            conn.request("GET", "/")
+            conn.close()
+            thread.join()
+
     def test_putrequest_override_domain_validation(self):
         """
         It should be possible to override the default validation
diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst
new file mode 100644
index 00000000000000..3fc6f6f59e86d7
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst
@@ -0,0 +1,3 @@
+Fix OOM vulnerability in :mod:`http.client`, when reading the whole body of
+a specially prepared small HTTP response could cause consuming an arbitrary
+amount of memory.

From f097fada1be22d44ca59cf4bb0f74bcb5f331c66 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 23 May 2024 14:25:18 +0300
Subject: [PATCH 2/2] Add also test for non-truncated large body.

---
 Lib/test/test_httplib.py | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index d405f38ab01166..f26831ffaf5f5b 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -1440,6 +1440,36 @@ def test_large_content_length(self):
         serv = socket.create_server((HOST, 0))
         self.addCleanup(serv.close)
 
+        def run_server():
+            [conn, address] = serv.accept()
+            with conn:
+                while conn.recv(1024):
+                    conn.sendall(
+                        b"HTTP/1.1 200 Ok\r\n"
+                        b"Content-Length: %d\r\n"
+                        b"\r\n" % size)
+                    conn.sendall(b'A' * (size//3))
+                    conn.sendall(b'B' * (size - size//3))
+
+        thread = threading.Thread(target=run_server)
+        thread.start()
+        self.addCleanup(thread.join, 1.0)
+
+        conn = client.HTTPConnection(*serv.getsockname())
+        try:
+            for w in range(15, 27):
+                size = 1 << w
+                conn.request("GET", "/")
+                with conn.getresponse() as response:
+                    self.assertEqual(len(response.read()), size)
+        finally:
+            conn.close()
+            thread.join(1.0)
+
+    def test_large_content_length_truncated(self):
+        serv = socket.create_server((HOST, 0))
+        self.addCleanup(serv.close)
+
         def run_server():
             while True:
                 [conn, address] = serv.accept()
@@ -1447,8 +1477,11 @@ def run_server():
                     conn.recv(1024)
                     if not size:
                         break
-                    body = b"HTTP/1.1 200 Ok\r\nContent-Length: %d\r\n\r\nText" % size
-                    conn.sendall(body)
+                    conn.sendall(
+                        b"HTTP/1.1 200 Ok\r\n"
+                        b"Content-Length: %d\r\n"
+                        b"\r\n"
+                        b"Text" % size)
 
         thread = threading.Thread(target=run_server)
         thread.start()
@@ -1467,7 +1500,7 @@ def run_server():
             size = 0
             conn.request("GET", "/")
             conn.close()
-            thread.join()
+            thread.join(1.0)
 
     def test_putrequest_override_domain_validation(self):
         """