diff --git a/lib/matplotlib/tests/test_text.py b/lib/matplotlib/tests/test_text.py
index 19262202e5c1..e13522b9fa45 100644
--- a/lib/matplotlib/tests/test_text.py
+++ b/lib/matplotlib/tests/test_text.py
@@ -1,4 +1,6 @@
 from datetime import datetime
+import gc
+import inspect
 import io
 import warnings
 
@@ -881,7 +883,12 @@ def test_pdf_chars_beyond_bmp():
 
 @needs_usetex
 def test_metrics_cache():
-    mpl.text._get_text_metrics_with_cache_impl.cache_clear()
+    # dig into the signature to get the mutable default used as a cache
+    renderer_cache = inspect.signature(
+        mpl.text._get_text_metrics_function
+    ).parameters['_cache'].default
+
+    renderer_cache.clear()
 
     fig = plt.figure()
     fig.text(.3, .5, "foo\nbar")
@@ -890,6 +897,7 @@ def test_metrics_cache():
     fig.canvas.draw()
     renderer = fig._get_renderer()
     ys = {}  # mapping of strings to where they were drawn in y with draw_tex.
+    assert renderer in renderer_cache
 
     def call(*args, **kwargs):
         renderer, x, y, s, *_ = args
@@ -904,12 +912,40 @@ def call(*args, **kwargs):
     # get incorrectly reused by the first TeX string.
     assert len(ys["foo"]) == len(ys["bar"]) == 1
 
-    info = mpl.text._get_text_metrics_with_cache_impl.cache_info()
+    info = renderer_cache[renderer].cache_info()
     # Every string gets a miss for the first layouting (extents), then a hit
     # when drawing, but "foo\nbar" gets two hits as it's drawn twice.
     assert info.hits > info.misses
 
 
+def test_metrics_cache2():
+    plt.close('all')
+    # dig into the signature to get the mutable default used as a cache
+    renderer_cache = inspect.signature(
+        mpl.text._get_text_metrics_function
+    ).parameters['_cache'].default
+    gc.collect()
+    assert len(renderer_cache) == 0
+
+    def helper():
+        fig, ax = plt.subplots()
+        fig.draw_without_rendering()
+        # show we hit the outer cache
+        assert len(renderer_cache) == 1
+        func = renderer_cache[fig.canvas.get_renderer()]
+        cache_info = func.cache_info()
+        # show we hit the inner cache
+        assert cache_info.currsize > 0
+        assert cache_info.currsize == cache_info.misses
+        assert cache_info.hits > cache_info.misses
+        plt.close(fig)
+
+    helper()
+    gc.collect()
+    # show the outer cache has a lifetime tied to the renderer (via the figure)
+    assert len(renderer_cache) == 0
+
+
 def test_annotate_offset_fontsize():
     # Test that offset_fontsize parameter works and uses accurate values
     fig, ax = plt.subplots()
diff --git a/lib/matplotlib/text.py b/lib/matplotlib/text.py
index 6691f32f8771..1ea51e57b787 100644
--- a/lib/matplotlib/text.py
+++ b/lib/matplotlib/text.py
@@ -64,17 +64,89 @@ def _get_textbox(text, renderer):
 
 def _get_text_metrics_with_cache(renderer, text, fontprop, ismath, dpi):
     """Call ``renderer.get_text_width_height_descent``, caching the results."""
-    # Cached based on a copy of fontprop so that later in-place mutations of
-    # the passed-in argument do not mess up the cache.
-    return _get_text_metrics_with_cache_impl(
-        weakref.ref(renderer), text, fontprop.copy(), ismath, dpi)
 
+    # hit the outer cache layer and get the function to compute the metrics
+    # for this renderer instance
+    get_text_metrics = _get_text_metrics_function(renderer)
+    # call the function to compute the metrics and return
+    #
+    # We pass a copy of the fontprop because FontProperties is both mutable and
+    # has a `__hash__` that depends on that mutable state.  This is not ideal
+    # as it means the hash of an object is not stable over time which leads to
+    # very confusing behavior when used as keys in dictionaries or hashes.
+    return get_text_metrics(text, fontprop.copy(), ismath, dpi)
 
-@functools.lru_cache(4096)
-def _get_text_metrics_with_cache_impl(
-        renderer_ref, text, fontprop, ismath, dpi):
-    # dpi is unused, but participates in cache invalidation (via the renderer).
-    return renderer_ref().get_text_width_height_descent(text, fontprop, ismath)
+
+def _get_text_metrics_function(input_renderer, _cache=weakref.WeakKeyDictionary()):
+    """
+    Helper function to provide a two-layered cache for font metrics
+
+
+    To get the rendered size of a size of string we need to know:
+      - what renderer we are using
+      - the current dpi of the renderer
+      - the string
+      - the font properties
+      - is it math text or not
+
+    We do this as a two-layer cache with the outer layer being tied to a
+    renderer instance and the inner layer handling everything else.
+
+    The outer layer is implemented as `.WeakKeyDictionary` keyed on the
+    renderer.  As long as someone else is holding a hard ref to the renderer
+    we will keep the cache alive, but it will be automatically dropped when
+    the renderer is garbage collected.
+
+    The inner layer is provided by an lru_cache with a large maximum size (such
+    that we do not expect it to be hit in very few actual use cases).  As the
+    dpi is mutable on the renderer, we need to explicitly include it as part of
+    the cache key on the inner layer even though we do not directly use it (it is
+    used in the method call on the renderer).
+
+    This function takes a renderer and returns a function that can be used to
+    get the font metrics.
+
+    Parameters
+    ----------
+    input_renderer : maplotlib.backend_bases.RendererBase
+        The renderer to set the cache up for.
+
+    _cache : dict, optional
+        We are using the mutable default value to attach the cache to the function.
+
+        In principle you could pass a different dict-like to this function to inject
+        a different cache, but please don't.  This is an internal function not meant to
+        be reused outside of the narrow context we need it for.
+
+       There is a possible race condition here between threads, we may need to drop the
+       mutable default and switch to a threadlocal variable in the future.
+
+    """
+    if (_text_metrics := _cache.get(input_renderer, None)) is None:
+        # We are going to include this in the closure we put as values in the
+        # cache.  Closing over a hard-ref would create an unbreakable reference
+        # cycle.
+        renderer_ref = weakref.ref(input_renderer)
+
+        # define the function locally to get a new lru_cache per renderer
+        @functools.lru_cache(4096)
+        # dpi is unused, but participates in cache invalidation (via the renderer).
+        def _text_metrics(text, fontprop, ismath, dpi):
+            # this should never happen under normal use, but this is a better error to
+            # raise than an AttributeError on `None`
+            if (local_renderer := renderer_ref()) is None:
+                raise RuntimeError(
+                    "Trying to get text metrics for a renderer that no longer exists.  "
+                    "This should never happen and is evidence of a bug elsewhere."
+                    )
+            # do the actual method call we need and return the result
+            return local_renderer.get_text_width_height_descent(text, fontprop, ismath)
+
+        # stash the function for later use.
+        _cache[input_renderer] = _text_metrics
+
+    # return the inner function
+    return _text_metrics
 
 
 @_docstring.interpd