From 10a60941ede0a0113165d8358afe2f30889812f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 20:58:19 +0200
Subject: [PATCH 01/97] add implementation

---
 Modules/_fnmatchmodule.c          | 246 ++++++++++++++++++++++++++++++
 Modules/clinic/_fnmatchmodule.c.h | 192 +++++++++++++++++++++++
 2 files changed, 438 insertions(+)
 create mode 100644 Modules/_fnmatchmodule.c
 create mode 100644 Modules/clinic/_fnmatchmodule.c.h

diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatchmodule.c
new file mode 100644
index 00000000000000..a5b77ec4c8d4f2
--- /dev/null
+++ b/Modules/_fnmatchmodule.c
@@ -0,0 +1,246 @@
+/*
+ * C accelerator for the 'fnmatch' module.
+ *
+ * Most functions expect string or bytes instances, and thus the Python
+ * implementation should first pre-process path-like objects, and possibly
+ * applying normalizations depending on the platform if needed.
+ */
+
+#include "Python.h"
+
+#include "clinic/_fnmatchmodule.c.h"
+
+/*[clinic input]
+module _fnmatch
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
+
+#include <fnmatch.h>
+
+static inline int
+validate_encoded_object(PyObject *name)
+{
+    if (!PyBytes_Check(name)) {
+        PyErr_Format(PyExc_TypeError,
+                     "name must be a bytes object, got %.200s",
+                     Py_TYPE(name)->tp_name);
+        return 0;
+    }
+    return 1;
+}
+
+static inline int
+validate_unicode_object(PyObject *name)
+{
+    if (!PyUnicode_Check(name)) {
+        PyErr_Format(PyExc_TypeError,
+                     "name must be a string object, got %.200s",
+                     Py_TYPE(name)->tp_name);
+        return 0;
+    }
+    return 1;
+}
+
+static inline int
+posix_fnmatch_encoded(const char *pattern, PyObject *name)
+{
+    if (!validate_encoded_object(name)) {
+        return -1;
+    }
+    // case-insensitive match
+#ifdef FNM_CASEFOLD
+    return fnmatch(pattern, PyBytes_AS_STRING(name), FNM_CASEFOLD) == 0;
+#else
+    // todo: fallback to Python implementation
+    return -1;
+#endif
+}
+
+static inline int
+posix_fnmatchcase_encoded(const char *pattern, PyObject *name)
+{
+    if (!validate_encoded_object(name)) {
+        return -1;
+    }
+    // case-sensitive match
+    return fnmatch(pattern, PyBytes_AS_STRING(name), 0) == 0;
+}
+
+static inline int
+posix_fnmatch_unicode(const char *pattern, PyObject *name)
+{
+    if (!validate_unicode_object(name)) {
+        return -1;
+    }
+    // case-insensitive match
+#ifdef FNM_CASEFOLD
+    return fnmatch(pattern, PyUnicode_AsUTF8(name), FNM_CASEFOLD) == 0;
+#else
+    // todo: fallback to Python implementation
+    return -1;
+#endif
+}
+
+static inline int
+posix_fnmatchcase_unicode(const char *pattern, PyObject *name)
+{
+    if (!validate_unicode_object(name)) {
+        return -1;
+    }
+    // case-sensitive match
+    return fnmatch(pattern, PyUnicode_AsUTF8(name), 0) == 0;
+}
+
+static PyObject *
+_fnmatch_filter_generic_impl(PyObject *module,
+                             PyObject *names,
+                             const char *pattern,
+                             int (*match)(const char *, PyObject *))
+{
+    PyObject *iter = PyObject_GetIter(names);
+    if (iter == NULL) {
+        return NULL;
+    }
+
+    PyObject *res = PyList_New(0);
+    if (res == NULL) {
+        Py_DECREF(iter);
+        return NULL;
+    }
+
+    PyObject *name = NULL;
+    while ((name = PyIter_Next(iter))) {
+        int rc = match(pattern, name);
+        if (rc < 0) {
+            goto abort;
+        }
+        if (rc == 1) {
+            if (PyList_Append(res, name) < 0) {
+                goto abort;
+            }
+        }
+        Py_DECREF(name);
+        if (PyErr_Occurred()) {
+            Py_DECREF(res);
+            Py_DECREF(iter);
+            return NULL;
+        }
+    }
+    Py_DECREF(iter);
+    return res;
+abort:
+    Py_DECREF(name);
+    Py_DECREF(iter);
+    Py_DECREF(res);
+    return NULL;
+}
+
+/*[clinic input]
+_fnmatch.filter -> object
+
+    names: object
+    pat: object
+
+[clinic start generated code]*/
+
+static PyObject *
+_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
+/*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
+{
+    // todo: handle os.path.normcase(...)
+    if (PyBytes_Check(pat)) {
+        const char *pattern = PyBytes_AS_STRING(pat);
+        return _fnmatch_filter_generic_impl(module, names, pattern,
+                                            &posix_fnmatch_encoded);
+    }
+    if (PyUnicode_Check(pat)) {
+        const char *pattern = PyUnicode_AsUTF8(pat);
+        return _fnmatch_filter_generic_impl(module, names, pattern,
+                                            &posix_fnmatch_unicode);
+    }
+    PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
+    return NULL;
+}
+
+/*[clinic input]
+_fnmatch.fnmatch -> bool
+
+    name: object
+    pat: object
+
+[clinic start generated code]*/
+
+static int
+_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
+/*[clinic end generated code: output=b4cd0bd911e8bc93 input=c45e0366489540b8]*/
+{
+    // todo: handle os.path.normcase(...)
+    if (PyBytes_Check(pat)) {
+        const char *pattern = PyBytes_AS_STRING(pat);
+        return posix_fnmatch_encoded(pattern, name);
+    }
+    if (PyUnicode_Check(pat)) {
+        const char *pattern = PyUnicode_AsUTF8(pat);
+        return posix_fnmatch_unicode(pattern, name);
+    }
+    PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
+    return NULL;
+}
+
+/*[clinic input]
+_fnmatch.fnmatchcase -> bool
+
+    name: object
+    pat: object
+
+Test whether `name` matches `pattern`, including case.
+
+This is a version of fnmatch() which doesn't case-normalize
+its arguments.
+
+[clinic start generated code]*/
+
+static int
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
+/*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
+{
+    if (PyBytes_Check(pat)) {
+        const char *pattern = PyBytes_AS_STRING(pat);
+        return posix_fnmatchcase_encoded(pattern, name);
+    }
+    if (PyUnicode_Check(pat)) {
+        const char *pattern = PyUnicode_AsUTF8(pat);
+        return posix_fnmatchcase_unicode(pattern, name);
+    }
+    PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
+    return NULL;
+}
+
+static PyMethodDef _fnmatch_methods[] = {
+    _FNMATCH_FILTER_METHODDEF
+    _FNMATCH_FNMATCH_METHODDEF
+    _FNMATCH_FNMATCHCASE_METHODDEF
+    {NULL, NULL}
+};
+
+static struct PyModuleDef_Slot _fnmatch_slots[] = {
+    {0, NULL}
+};
+
+static struct PyModuleDef _fnmatchmodule = {
+    PyModuleDef_HEAD_INIT,
+    "_fnmatch",
+    NULL,
+    0,
+    _fnmatch_methods,
+    _fnmatch_slots,
+    NULL,
+    NULL,
+    NULL,
+};
+
+PyMODINIT_FUNC
+PyInit__fnmatch(void)
+{
+    return PyModuleDef_Init(&_fnmatchmodule);
+}
diff --git a/Modules/clinic/_fnmatchmodule.c.h b/Modules/clinic/_fnmatchmodule.c.h
new file mode 100644
index 00000000000000..a693bccee18ff5
--- /dev/null
+++ b/Modules/clinic/_fnmatchmodule.c.h
@@ -0,0 +1,192 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+#  include "pycore_gc.h"          // PyGC_Head
+#  include "pycore_runtime.h"     // _Py_ID()
+#endif
+#include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_fnmatch_filter__doc__,
+"filter($module, /, names, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_FILTER_METHODDEF    \
+    {"filter", _PyCFunction_CAST(_fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, _fnmatch_filter__doc__},
+
+static PyObject *
+_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat);
+
+static PyObject *
+_fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(names), &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"names", "pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "filter",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *names;
+    PyObject *pat;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    names = args[0];
+    pat = args[1];
+    return_value = _fnmatch_filter_impl(module, names, pat);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(_fnmatch_fnmatch__doc__,
+"fnmatch($module, /, name, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_FNMATCH_METHODDEF    \
+    {"fnmatch", _PyCFunction_CAST(_fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatch__doc__},
+
+static int
+_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat);
+
+static PyObject *
+_fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"name", "pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "fnmatch",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *name;
+    PyObject *pat;
+    int _return_value;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    name = args[0];
+    pat = args[1];
+    _return_value = _fnmatch_fnmatch_impl(module, name, pat);
+    if ((_return_value == -1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
+"fnmatchcase($module, /, name, pat)\n"
+"--\n"
+"\n"
+"Test whether `name` matches `pattern`, including case.\n"
+"\n"
+"This is a version of fnmatch() which doesn\'t case-normalize\n"
+"its arguments.");
+
+#define _FNMATCH_FNMATCHCASE_METHODDEF    \
+    {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__},
+
+static int
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat);
+
+static PyObject *
+_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"name", "pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "fnmatchcase",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *name;
+    PyObject *pat;
+    int _return_value;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    name = args[0];
+    pat = args[1];
+    _return_value = _fnmatch_fnmatchcase_impl(module, name, pat);
+    if ((_return_value == -1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=fd6cc9541aa95a9a input=a9049054013a1b77]*/

From ca0338811424ed6369ab511d4ea75a362deebc10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 20:57:37 +0200
Subject: [PATCH 02/97] add initial tests

---
 Lib/test/test_fnmatch.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 10ed496d4e2f37..b086495b78c785 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -5,10 +5,11 @@
 import string
 import warnings
 
+import _fnmatch as c_fnmatch
+import fnmatch as py_fnmatch
 from fnmatch import fnmatch, fnmatchcase, translate, filter
 
 class FnmatchTestCase(unittest.TestCase):
-
     def check_match(self, filename, pattern, should_match=True, fn=fnmatch):
         if should_match:
             self.assertTrue(fn(filename, pattern),
@@ -250,18 +251,25 @@ def test_translate(self):
         self.assertTrue(re.match(fatre, 'cbabcaxc'))
         self.assertFalse(re.match(fatre, 'dabccbad'))
 
-class FilterTestCase(unittest.TestCase):
+
+class FilterTestCaseMixin:
+    fnmatch = None
 
     def test_filter(self):
+        filter = self.fnmatch.filter
         self.assertEqual(filter(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*'),
                          ['Python', 'Perl'])
         self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'),
                          [b'Python', b'Perl'])
 
     def test_mix_bytes_str(self):
+        filter = self.fnmatch.filter
         self.assertRaises(TypeError, filter, ['test'], b'*')
         self.assertRaises(TypeError, filter, [b'test'], '*')
 
+class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
+    fnmatch = py_fnmatch
+
     def test_case(self):
         ignorecase = os.path.normcase('P') == os.path.normcase('p')
         self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
@@ -276,6 +284,9 @@ def test_sep(self):
         self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
                          ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
 
+class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
+    fnmatch = c_fnmatch
+
 
 if __name__ == "__main__":
     unittest.main()

From adb6ed040279bc592b3c86133eb6569470758cc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 20:57:17 +0200
Subject: [PATCH 03/97] add generated objects

---
 Include/internal/pycore_global_objects_fini_generated.h | 2 ++
 Include/internal/pycore_global_strings.h                | 2 ++
 Include/internal/pycore_runtime_init_generated.h        | 2 ++
 Include/internal/pycore_unicodeobject_generated.h       | 8 ++++++++
 4 files changed, 14 insertions(+)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index c0840f9eb7eca2..77b2a8e2e7a7dc 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -1087,6 +1087,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_unnamed_fields));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(names));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg));
@@ -1129,6 +1130,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pat));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(path));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pattern));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(peek));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 51735a8a726e11..4896f6343087d3 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -576,6 +576,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(n_unnamed_fields)
         STRUCT_FOR_ID(name)
         STRUCT_FOR_ID(name_from)
+        STRUCT_FOR_ID(names)
         STRUCT_FOR_ID(namespace_separator)
         STRUCT_FOR_ID(namespaces)
         STRUCT_FOR_ID(narg)
@@ -618,6 +619,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(pages)
         STRUCT_FOR_ID(parent)
         STRUCT_FOR_ID(password)
+        STRUCT_FOR_ID(pat)
         STRUCT_FOR_ID(path)
         STRUCT_FOR_ID(pattern)
         STRUCT_FOR_ID(peek)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index c5be67c6d80b9d..1249957fb29d1e 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1085,6 +1085,7 @@ extern "C" {
     INIT_ID(n_unnamed_fields), \
     INIT_ID(name), \
     INIT_ID(name_from), \
+    INIT_ID(names), \
     INIT_ID(namespace_separator), \
     INIT_ID(namespaces), \
     INIT_ID(narg), \
@@ -1127,6 +1128,7 @@ extern "C" {
     INIT_ID(pages), \
     INIT_ID(parent), \
     INIT_ID(password), \
+    INIT_ID(pat), \
     INIT_ID(path), \
     INIT_ID(pattern), \
     INIT_ID(peek), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index 0e0ad6518771e9..0bd57f5db64ea9 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -2104,6 +2104,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(names);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(namespace_separator);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2272,6 +2276,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(pat);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(path);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));

From e95c255bc92ef15cde2e6ae252700aa32b98a7b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 20:57:56 +0200
Subject: [PATCH 04/97] update PC/config.c

---
 PC/config.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/PC/config.c b/PC/config.c
index b744f711b0d636..f08a847a3f1206 100644
--- a/PC/config.c
+++ b/PC/config.c
@@ -12,6 +12,7 @@ extern PyObject* PyInit_cmath(void);
 extern PyObject* PyInit_errno(void);
 extern PyObject* PyInit_faulthandler(void);
 extern PyObject* PyInit__tracemalloc(void);
+extern PyObject* PyInit_fnmatch(void);
 extern PyObject* PyInit_gc(void);
 extern PyObject* PyInit_math(void);
 extern PyObject* PyInit__md5(void);
@@ -92,6 +93,7 @@ struct _inittab _PyImport_Inittab[] = {
     {"cmath", PyInit_cmath},
     {"errno", PyInit_errno},
     {"faulthandler", PyInit_faulthandler},
+    {"fnmatch", PyInit_fnmatch},
     {"gc", PyInit_gc},
     {"math", PyInit_math},
     {"nt", PyInit_nt}, /* Use the NT os functions, not posix */

From 9b1c20dcdc8fe562c2a7287b67ee20483e164b4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 20:58:07 +0200
Subject: [PATCH 05/97] update Modules/Setup

---
 Modules/Setup | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Modules/Setup b/Modules/Setup
index e4acf6bc7de8ea..acb542b70946ea 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -137,6 +137,7 @@ PYTHONPATH=$(COREPYTHONPATH)
 #_datetime _datetimemodule.c
 #_decimal _decimal/_decimal.c
 #_heapq _heapqmodule.c
+_fnmatch _fnmatchmodule.c
 #_interpchannels _interpchannelsmodule.c
 #_interpqueues _interpqueuesmodule.c
 #_interpreters _interpretersmodule.c

From 85fa9533e9780c244eac5f22eeee432daeebc647 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 20:58:25 +0200
Subject: [PATCH 06/97] update VCX project files

---
 PCbuild/pythoncore.vcxproj         | 1 +
 PCbuild/pythoncore.vcxproj.filters | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index f36fcb8caece33..639c497767cfa4 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -450,6 +450,7 @@
     <ClCompile Include="..\Modules\_datetimemodule.c" />
     <ClCompile Include="..\Modules\errnomodule.c" />
     <ClCompile Include="..\Modules\faulthandler.c" />
+    <ClCompile Include="..\Modules\fnmatchmodule.c" />
     <ClCompile Include="..\Modules\gcmodule.c" />
     <ClCompile Include="..\Modules\getbuildinfo.c" />
     <ClCompile Include="..\Modules\itertoolsmodule.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index a1b43addf9e36a..27f4905e6d5263 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -920,6 +920,9 @@
     <ClCompile Include="..\Modules\_csv.c">
       <Filter>Modules</Filter>
     </ClCompile>
+    <ClCompile Include="..\Modules\_fnmatchmodule.c">
+      <Filter>Modules</Filter>
+    </ClCompile>
     <ClCompile Include="..\Modules\_functoolsmodule.c">
       <Filter>Modules</Filter>
     </ClCompile>

From 363ec36f747e453595bc5663fa44d2cfe99b9a66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 21:44:06 +0200
Subject: [PATCH 07/97] fix return value

---
 Modules/_fnmatchmodule.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatchmodule.c
index a5b77ec4c8d4f2..19118fd2a4a740 100644
--- a/Modules/_fnmatchmodule.c
+++ b/Modules/_fnmatchmodule.c
@@ -184,7 +184,7 @@ _fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
         return posix_fnmatch_unicode(pattern, name);
     }
     PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
-    return NULL;
+    return -1;
 }
 
 /*[clinic input]
@@ -213,7 +213,7 @@ _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
         return posix_fnmatchcase_unicode(pattern, name);
     }
     PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
-    return NULL;
+    return -1;
 }
 
 static PyMethodDef _fnmatch_methods[] = {

From 42b019f9bb208e716cb87ba2c25f9c67e8178c0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 21:45:09 +0200
Subject: [PATCH 08/97] fix typo in pythoncore.vcxproj

---
 PCbuild/pythoncore.vcxproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 639c497767cfa4..db9f960c61ce6c 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -449,8 +449,8 @@
     <ClCompile Include="..\Modules\cmathmodule.c" />
     <ClCompile Include="..\Modules\_datetimemodule.c" />
     <ClCompile Include="..\Modules\errnomodule.c" />
+    <ClCompile Include="..\Modules\_fnmatchmodule.c" />
     <ClCompile Include="..\Modules\faulthandler.c" />
-    <ClCompile Include="..\Modules\fnmatchmodule.c" />
     <ClCompile Include="..\Modules\gcmodule.c" />
     <ClCompile Include="..\Modules\getbuildinfo.c" />
     <ClCompile Include="..\Modules\itertoolsmodule.c" />

From 4120a95dcf5b163f48f9d8973c36853635103777 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 21:45:50 +0200
Subject: [PATCH 09/97] Update pythoncore.vcxproj.filters

---
 PCbuild/pythoncore.vcxproj.filters | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 27f4905e6d5263..24384e355f46ec 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -920,9 +920,6 @@
     <ClCompile Include="..\Modules\_csv.c">
       <Filter>Modules</Filter>
     </ClCompile>
-    <ClCompile Include="..\Modules\_fnmatchmodule.c">
-      <Filter>Modules</Filter>
-    </ClCompile>
     <ClCompile Include="..\Modules\_functoolsmodule.c">
       <Filter>Modules</Filter>
     </ClCompile>
@@ -998,6 +995,9 @@
     <ClCompile Include="..\Modules\errnomodule.c">
       <Filter>Modules</Filter>
     </ClCompile>
+    <ClCompile Include="..\Modules\_fnmatchmodule.c">
+      <Filter>Modules</Filter>
+    </ClCompile>
     <ClCompile Include="..\Modules\faulthandler.c">
       <Filter>Modules</Filter>
     </ClCompile>

From 36394bbc19c6a8fe5d4c5559fd35227adea895b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 7 Jul 2024 00:37:56 +0200
Subject: [PATCH 10/97] Amend un-necessary modifications in `test_fnmatch.py`

---
 Lib/test/test_fnmatch.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index b086495b78c785..94ec41958b07c0 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -10,6 +10,7 @@
 from fnmatch import fnmatch, fnmatchcase, translate, filter
 
 class FnmatchTestCase(unittest.TestCase):
+
     def check_match(self, filename, pattern, should_match=True, fn=fnmatch):
         if should_match:
             self.assertTrue(fn(filename, pattern),

From 2c2f9f102d085bb0aef8fa98c5117c48bda0b346 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:57:08 +0200
Subject: [PATCH 11/97] update configurations

---
 configure.ac  | 24 ++++++++++++++++++++++++
 pyconfig.h.in |  3 +++
 2 files changed, 27 insertions(+)

diff --git a/configure.ac b/configure.ac
index 705f8752597b96..d4fdb81d34890a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3854,6 +3854,30 @@ if test "$ac_cv_c_complex_supported" = "yes"; then
               [Defined if _Complex C type is available.])
 fi
 
+# check for fnmatch(3) support
+#
+# We test for the plain POSIX implementation (case-sensitive match).
+#
+# To ensure that the implementation of fnmatch(3) is compliant
+# we run some tests to make sure that everything works well.
+#
+# Note that MSVC does not support fnmatch(3).
+AC_CACHE_CHECK([for case-sensititve fnmatch(3)], [ac_cv_fnmatch_supported],
+[AC_RUN_IFELSE(
+  [AC_LANG_PROGRAM([@%:@include <fnmatch.h>], [[
+    exit(!(
+        fnmatch("a*", "abc", 0) != FNM_NOMATCH &&
+        fnmatch("a*", "Abc", 0) == FNM_NOMATCH
+    ));
+  ]])], [ac_cv_fnmatch_supported=yes],
+  [ac_cv_fnmatch_supported=no],
+  [ac_cv_fnmatch_supported=no]
+)])
+if test "$ac_cv_fnmatch_supported" = "yes"; then
+  AC_DEFINE([Py_HAVE_FNMATCH], [1],
+            [Defined if case-sensitive fnmatch(3) is supported.])
+fi
+
 # check for systems that require aligned memory access
 AC_CACHE_CHECK([aligned memory access is required], [ac_cv_aligned_required],
 [AC_RUN_IFELSE([AC_LANG_SOURCE([[
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 8fbba7ed3b949e..0997722334867c 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1689,6 +1689,9 @@
 /* Defined if _Complex C type is available. */
 #undef Py_HAVE_C_COMPLEX
 
+/* Defined if case-sensitive fnmatch(3) is supported. */
+#undef Py_HAVE_FNMATCH
+
 /* Define if year with century should be normalized for strftime. */
 #undef Py_NORMALIZE_CENTURY
 

From ecf8146a566c692cae9930f56285f0d722de741c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:57:28 +0200
Subject: [PATCH 12/97] add C implementation

---
 Modules/_fnmatchmodule.c          | 865 ++++++++++++++++++++++++++----
 Modules/clinic/_fnmatchmodule.c.h |  69 ++-
 2 files changed, 799 insertions(+), 135 deletions(-)

diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatchmodule.c
index 19118fd2a4a740..07d10f9112bc0e 100644
--- a/Modules/_fnmatchmodule.c
+++ b/Modules/_fnmatchmodule.c
@@ -1,8 +1,8 @@
 /*
- * C accelerator for the 'fnmatch' module.
+ * C accelerator for the 'fnmatch' module (POSIX only).
  *
  * Most functions expect string or bytes instances, and thus the Python
- * implementation should first pre-process path-like objects, and possibly
+ * implementation should first pre-process path-like objects, possibly
  * applying normalizations depending on the platform if needed.
  */
 
@@ -10,92 +10,225 @@
 
 #include "clinic/_fnmatchmodule.c.h"
 
+#define INVALID_PATTERN_TYPE "pattern must be a string or a bytes object"
+
+// module state functions
+
+typedef struct {
+    PyObject *re_module; // 're' module
+    PyObject *os_module; // 'os' module
+
+    PyObject *lru_cache; // optional cache for regex patterns, if needed
+} fnmatchmodule_state;
+
+static inline fnmatchmodule_state *
+get_fnmatchmodulestate_state(PyObject *module)
+{
+    void *state = PyModule_GetState(module);
+    assert(state != NULL);
+    return (fnmatchmodule_state *) state;
+}
+
+static int
+fnmatchmodule_clear(PyObject *m)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    Py_CLEAR(st->os_module);
+    Py_CLEAR(st->re_module);
+    Py_CLEAR(st->lru_cache);
+    return 0;
+}
+
+static int
+fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    Py_VISIT(st->os_module);
+    Py_VISIT(st->re_module);
+    Py_VISIT(st->lru_cache);
+    return 0;
+}
+
+static void
+fnmatchmodule_free(void *m)
+{
+    fnmatchmodule_clear((PyObject *) m);
+}
+
+static int
+fnmatchmodule_exec(PyObject *m)
+{
+    fnmatchmodule_state *state = get_fnmatchmodulestate_state(m);
+
+    // imports
+    state->os_module = PyImport_ImportModule("os");
+    if (state->os_module == NULL) {
+        return -1;
+    }
+    state->re_module = PyImport_ImportModule("re");
+    if (state->re_module == NULL) {
+        return -1;
+    }
+
+    // helpers
+    state->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    if (state->lru_cache == NULL) {
+        return -1;
+    }
+    // todo: handle LRU cache
+    return 0;
+}
+
 /*[clinic input]
 module _fnmatch
 [clinic start generated code]*/
 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
 
+#ifdef Py_HAVE_FNMATCH
 #include <fnmatch.h>
 
+#define VERIFY_NAME_ARG_TYPE(name, check, expecting) \
+    do { \
+        if (!check) { \
+            PyErr_Format(PyExc_TypeError, \
+                         "name must be a %s object, got %.200s", \
+                         expecting, Py_TYPE(name)->tp_name); \
+            return -1; \
+        } \
+    } while (0)
+
+#define PROCESS_MATCH_RESULT(r) \
+    do { \
+        int res = (r); /* avoid variable capture */ \
+        if (res < 0) { \
+            return res; \
+        } \
+        return res != FNM_NOMATCH; \
+    } while (0)
+
+/*
+ * Perform a case-sensitive match using fnmatch(3).
+ *
+ * Parameters
+ *
+ *      pattern     A UNIX shell pattern.
+ *      name        The filename to match (bytes object).
+ *
+ * Returns 1 if the 'name' matches the 'pattern' and 0 otherwise.
+ *
+ * Returns -1 if (1) 'name' is not a `bytes` object, and
+ * sets a TypeError exception, or (2) something went wrong.
+ */
 static inline int
-validate_encoded_object(PyObject *name)
+posix_fnmatch_encoded(const char *pattern, PyObject *name)
 {
-    if (!PyBytes_Check(name)) {
-        PyErr_Format(PyExc_TypeError,
-                     "name must be a bytes object, got %.200s",
-                     Py_TYPE(name)->tp_name);
-        return 0;
-    }
-    return 1;
+    VERIFY_NAME_ARG_TYPE(name, PyBytes_Check(name), "bytes");
+    PROCESS_MATCH_RESULT(fnmatch(pattern, PyBytes_AS_STRING(name), 0));
 }
 
+/* Same as `posix_fnmatch_encoded` but for string-like objects. */
 static inline int
-validate_unicode_object(PyObject *name)
+posix_fnmatch_unicode(const char *pattern, PyObject *name)
 {
-    if (!PyUnicode_Check(name)) {
-        PyErr_Format(PyExc_TypeError,
-                     "name must be a string object, got %.200s",
-                     Py_TYPE(name)->tp_name);
-        return 0;
-    }
-    return 1;
+    VERIFY_NAME_ARG_TYPE(name, PyUnicode_Check(name), "string");
+    PROCESS_MATCH_RESULT(fnmatch(pattern, PyUnicode_AsUTF8(name), 0));
 }
 
-static inline int
-posix_fnmatch_encoded(const char *pattern, PyObject *name)
+static PyObject *
+posix_fnmatch_filter(const char *pattern, PyObject *names,
+                     int (*match)(const char *, PyObject *))
 {
-    if (!validate_encoded_object(name)) {
-        return -1;
+    PyObject *iter = PyObject_GetIter(names);
+    if (iter == NULL) {
+        return NULL;
     }
-    // case-insensitive match
-#ifdef FNM_CASEFOLD
-    return fnmatch(pattern, PyBytes_AS_STRING(name), FNM_CASEFOLD) == 0;
-#else
-    // todo: fallback to Python implementation
-    return -1;
-#endif
-}
 
-static inline int
-posix_fnmatchcase_encoded(const char *pattern, PyObject *name)
-{
-    if (!validate_encoded_object(name)) {
-        return -1;
+    PyObject *res = PyList_New(0);
+    if (res == NULL) {
+        Py_DECREF(iter);
+        return NULL;
     }
-    // case-sensitive match
-    return fnmatch(pattern, PyBytes_AS_STRING(name), 0) == 0;
+
+    PyObject *name = NULL;
+    while ((name = PyIter_Next(iter))) {
+        int rc = match(pattern, name);
+        if (rc < 0) {
+            goto abort;
+        }
+        if (rc == 1) {
+            if (PyList_Append(res, name) < 0) {
+                goto abort;
+            }
+        }
+        Py_DECREF(name);
+        if (PyErr_Occurred()) {
+            Py_DECREF(res);
+            Py_DECREF(iter);
+            return NULL;
+        }
+    }
+    Py_DECREF(iter);
+    return res;
+abort:
+    Py_XDECREF(name);
+    Py_DECREF(iter);
+    Py_DECREF(res);
+    return NULL;
 }
+#else
 
-static inline int
-posix_fnmatch_unicode(const char *pattern, PyObject *name)
+static PyObject *
+get_match_function(PyObject *module, PyObject *pattern)
 {
-    if (!validate_unicode_object(name)) {
-        return -1;
+    PyObject *expr = _fnmatch_translate_impl(module, pattern);
+    if (expr == NULL) {
+        return NULL;
     }
-    // case-insensitive match
-#ifdef FNM_CASEFOLD
-    return fnmatch(pattern, PyUnicode_AsUTF8(name), FNM_CASEFOLD) == 0;
-#else
-    // todo: fallback to Python implementation
-    return -1;
-#endif
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    PyObject *compiled = PyObject_CallMethod(st->re_module, "compile", "O", expr);
+    Py_DECREF(expr);
+    if (compiled == NULL) {
+        return NULL;
+    }
+    PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
+    Py_DECREF(compiled);
+    return matcher;
 }
 
+static PyMethodDef get_match_function_method_def = {
+    "get_match_function",
+    _PyCFunction_CAST(get_match_function),
+    METH_O,
+    NULL
+};
+
+/*
+ * Perform a case-sensitive match using regular expressions.
+ *
+ * Parameters
+ *
+ *      pattern     A translated regular expression.
+ *      name        The filename to match.
+ *
+ * Returns 1 if the 'name' matches the 'pattern' and 0 otherwise.
+ * Returns -1 if something went wrong.
+ */
 static inline int
-posix_fnmatchcase_unicode(const char *pattern, PyObject *name)
+regex_fnmatch_generic(PyObject *matcher, PyObject *name)
 {
-    if (!validate_unicode_object(name)) {
+    // If 'name' is of incorrect type, it will be detected when calling
+    // the matcher function (we emulate 're.compile(...).match(name)').
+    PyObject *match = PyObject_CallFunction(matcher, "O", name);
+    if (match == NULL) {
         return -1;
     }
-    // case-sensitive match
-    return fnmatch(pattern, PyUnicode_AsUTF8(name), 0) == 0;
+    int matching = match != Py_None;
+    Py_DECREF(match);
+    return matching;
 }
 
 static PyObject *
-_fnmatch_filter_generic_impl(PyObject *module,
-                             PyObject *names,
-                             const char *pattern,
-                             int (*match)(const char *, PyObject *))
+regex_fnmatch_filter(PyObject *matcher, PyObject *names)
 {
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
@@ -110,7 +243,7 @@ _fnmatch_filter_generic_impl(PyObject *module,
 
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        int rc = match(pattern, name);
+        int rc = regex_fnmatch_generic(matcher, name);
         if (rc < 0) {
             goto abort;
         }
@@ -129,11 +262,12 @@ _fnmatch_filter_generic_impl(PyObject *module,
     Py_DECREF(iter);
     return res;
 abort:
-    Py_DECREF(name);
+    Py_XDECREF(name);
     Py_DECREF(iter);
     Py_DECREF(res);
     return NULL;
 }
+#endif
 
 /*[clinic input]
 _fnmatch.filter -> object
@@ -147,34 +281,59 @@ static PyObject *
 _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
 /*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
 {
-    // todo: handle os.path.normcase(...)
+#ifndef Py_HAVE_FNMATCH
+    PyObject *matcher = get_match_function(module, pat);
+    if (matcher == NULL) {
+        return NULL;
+    }
+    PyObject *result = regex_fnmatch_filter(matcher, names);
+    Py_DECREF(matcher);
+    return result;
+#else
+    // Note that the Python implementation of fnmatch.filter() does not
+    // call os.fspath() on the names being matched, whereas it does on NT.
     if (PyBytes_Check(pat)) {
         const char *pattern = PyBytes_AS_STRING(pat);
-        return _fnmatch_filter_generic_impl(module, names, pattern,
-                                            &posix_fnmatch_encoded);
+        return posix_fnmatch_filter(pattern, names, &posix_fnmatch_encoded);
     }
     if (PyUnicode_Check(pat)) {
         const char *pattern = PyUnicode_AsUTF8(pat);
-        return _fnmatch_filter_generic_impl(module, names, pattern,
-                                            &posix_fnmatch_unicode);
+        return posix_fnmatch_filter(pattern, names, &posix_fnmatch_unicode);
     }
-    PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
+    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
     return NULL;
+#endif
 }
 
 /*[clinic input]
-_fnmatch.fnmatch -> bool
+_fnmatch.fnmatchcase -> bool
 
     name: object
     pat: object
 
+Test whether `name` matches `pattern`, including case.
+
+This is a version of fnmatch() which doesn't case-normalize
+its arguments.
+
 [clinic start generated code]*/
 
 static int
-_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
-/*[clinic end generated code: output=b4cd0bd911e8bc93 input=c45e0366489540b8]*/
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
+/*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
 {
-    // todo: handle os.path.normcase(...)
+#ifndef Py_HAVE_FNMATCH
+    PyObject *matcher = get_match_function(module, pat);
+    if (matcher == NULL) {
+        return -1;
+    }
+    int res = regex_fnmatch_generic(matcher, name);
+    Py_DECREF(matcher);
+    return res;
+#else
+    // This function does not transform path-like objects, nor does it
+    // case-normalize 'name' or 'pattern' (whether it is the Python or
+    // the C implementation).
     if (PyBytes_Check(pat)) {
         const char *pattern = PyBytes_AS_STRING(pat);
         return posix_fnmatch_encoded(pattern, name);
@@ -183,60 +342,572 @@ _fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
         const char *pattern = PyUnicode_AsUTF8(pat);
         return posix_fnmatch_unicode(pattern, name);
     }
-    PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
+    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
     return -1;
+#endif
 }
 
-/*[clinic input]
-_fnmatch.fnmatchcase -> bool
 
-    name: object
-    pat: object
+static inline int /* number of written characters or -1 on error */
+write_normal_character(PyObject *re, _PyUnicodeWriter *writer, PyObject *cp)
+{
+    PyObject *ch = PyObject_CallMethodOneArg(re, &_Py_ID(escape), cp);
+    if (ch == NULL) {
+        return -1;
+    }
+    int written = PyUnicode_GetLength(ch);
+    int rc = _PyUnicodeWriter_WriteStr(writer, ch);
+    Py_DECREF(ch);
+    if (rc < 0) {
+        return -1;
+    }
+    assert(written > 0);
+    return written;
+}
 
-Test whether `name` matches `pattern`, including case.
+static inline int /* number of written characters or -1 on error */
+write_translated_group(_PyUnicodeWriter *writer, PyObject *group)
+{
+#define WRITE_ASCII(str, len) \
+    do { \
+        if (_PyUnicodeWriter_WriteASCIIString(writer, (str), (len)) < 0) { \
+            return -1; \
+        } \
+    } while (0)
 
-This is a version of fnmatch() which doesn't case-normalize
-its arguments.
+#define WRITE_CHAR(c) \
+    do { \
+        if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) { \
+            return -1; \
+        } \
+    } while (0)
+
+    Py_ssize_t grouplen;
+    const char *buffer = PyUnicode_AsUTF8AndSize(group, &grouplen);
+    if (grouplen == 0) {
+        /* empty range: never match */
+        WRITE_ASCII("(?!)", 4);
+        return 4;
+    }
+    else if (grouplen == 1 && buffer[0] == '!') {
+        /* negated empty range: match any character */
+        WRITE_CHAR('.');
+        return 1;
+    }
+    else {
+        int extra = 0;
+        WRITE_CHAR('[');
+        switch (buffer[0]) {
+            case '!': {
+                WRITE_CHAR('^');
+                if (_PyUnicodeWriter_WriteSubstring(writer, group, 1, grouplen) < 0) {
+                    return -1;
+                }
+                break;
+            }
+            case '^':
+            case '[': {
+                WRITE_CHAR('\\');
+                extra = 1;
+                break;
+            }
+            default:
+                if (_PyUnicodeWriter_WriteStr(writer, group) < 0) {
+                    return -1;
+                }
+                break;
+        }
+        WRITE_CHAR(']');
+        return 2 + grouplen + extra;
+    }
+#undef WRITE_CHAR
+#undef WRITE_ASCII
+}
+
+static PyObject *
+get_translated_group(PyObject *unicode,
+                     Py_ssize_t i /* unicode[i-1] == '[' (incl.) */,
+                     Py_ssize_t j /* unicode[j]   == ']' (excl.) */)
+{
+    PyObject *chunks = PyList_New(0);
+    if (chunks == NULL) {
+        return NULL;
+    }
+    PyObject *chr = PySequence_GetItem(unicode, i);
+    if (chr == NULL) {
+        goto error;
+    }
+    Py_ssize_t k = PyUnicode_CompareWithASCIIString(chr, "!") == 0 ? i + 2 : i + 1;
+    Py_DECREF(chr);
+    Py_ssize_t chunkscount = 0;
+    while (k < j) {
+        PyObject *eobj = PyObject_CallMethod(unicode, "find", "ii", k, j);
+        if (eobj == NULL) {
+            goto error;
+        }
+        Py_ssize_t t = PyLong_AsSsize_t(eobj);
+        Py_DECREF(eobj);
+        if (t < 0) {
+            goto error;
+        }
+        PyObject *sub = PyUnicode_Substring(unicode, i, t);
+        if (sub == NULL) {
+            goto error;
+        }
+        int rc = PyList_Append(chunks, sub);
+        Py_DECREF(sub);
+        if (rc < 0) {
+            goto error;
+        }
+        chunkscount += 1;
+        i = t + 1;
+        k = t + 3;
+    }
+    if (i >= j) {
+        assert(chunkscount > 0);
+        PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
+        PyObject *hyphen = PyUnicode_FromOrdinal('-');
+        if (hyphen == NULL) {
+            goto error;
+        }
+        PyObject *repl = PyUnicode_Concat(chunk, hyphen);
+        Py_DECREF(hyphen);
+        int rc = PyList_SetItem(chunks, chunkscount - 1, repl);
+        Py_DECREF(repl);
+        if (rc < 0) {
+            goto error;
+        }
+    }
+    else {
+        PyObject *sub = PyUnicode_Substring(unicode, i, j);
+        if (sub == NULL) {
+            goto error;
+        }
+        int rc = PyList_Append(chunks, sub);
+        Py_DECREF(sub);
+        if (rc < 0) {
+            goto error;
+        }
+        chunkscount += 1;
+    }
+    // remove empty ranges (they are not valid in RE)
+    Py_ssize_t c = chunkscount;
+    while (--c) {
+        PyObject *c1 = PyList_GET_ITEM(chunks, c - 1);
+        assert(c1 != NULL);
+        Py_ssize_t c1len = 0;
+        const char *c1buf = PyUnicode_AsUTF8AndSize(c1, &c1len);
+        if (c1buf == NULL) {
+            goto error;
+        }
+        assert(c1len > 0);
+
+        PyObject *c2 = PyList_GET_ITEM(chunks, c);
+        assert(c2 != NULL);
+        Py_ssize_t c2len = 0;
+        const char *c2buf = PyUnicode_AsUTF8AndSize(c2, &c2len);
+        if (c2buf == NULL) {
+            goto error;
+        }
+        assert(c2len > 0);
+
+        if (c1buf[c1len - 1] > c2buf[0]) {
+            // all but the last character in the chunk
+            PyObject *c1sub = PyUnicode_Substring(c1, 0, c1len - 1);
+            // all but the first character in the chunk
+            PyObject *c2sub = PyUnicode_Substring(c2, 1, c2len);
+            if (c1sub == NULL || c2sub == NULL) {
+                Py_XDECREF(c1sub);
+                Py_XDECREF(c2sub);
+                goto error;
+            }
+            PyObject *merged = PyUnicode_Concat(c1sub, c2sub);
+            Py_DECREF(c1sub);
+            Py_DECREF(c2sub);
+            if (merged == NULL) {
+                goto error;
+            }
+            int rc = PyList_SetItem(chunks, c - 1, merged);
+            Py_DECREF(merged);
+            if (rc < 0) {
+                goto error;
+            }
+            if (PySequence_DelItem(chunks, c) < 0) {
+                goto error;
+            }
+            chunkscount--;
+        }
+    }
+    // Escape backslashes and hyphens for set difference (--),
+    // but hyphens that create ranges should not be escaped.
+    for (c = 0; c < chunkscount; ++c) {
+        PyObject *s0 = PyList_GetItem(chunks, c);
+        if (s0 == NULL) {
+            goto error;
+        }
+        PyObject *s1 = PyObject_CallMethod(s0, "replace", "ss", "\\", "\\\\");
+        if (s1 == NULL) {
+            goto error;
+        }
+        PyObject *s2 = PyObject_CallMethod(s1, "replace", "ss", "-", "\\-");
+        Py_DECREF(s1);
+        if (s2 == NULL) {
+            goto error;
+        }
+        if (PyList_SetItem(chunks, c, s2) < 0) {
+            goto error;
+        }
+    }
+    PyObject *hyphen = PyUnicode_FromString("-");
+    if (hyphen == NULL) {
+        goto error;
+    }
+    PyObject *res = PyUnicode_Join(hyphen, chunks);
+    Py_DECREF(hyphen);
+    if (res == NULL) {
+        goto error;
+    }
+    Py_DECREF(chunks);
+    return res;
+error:
+    Py_XDECREF(chunks);
+    return NULL;
+}
+
+static PyObject *
+join_translated_parts(PyObject *parts, PyObject *indices)
+{
+#define LOAD_STAR_INDEX(var, k) \
+    do { \
+        ind = PyList_GET_ITEM(indices, (k)); \
+        var = PyLong_AsSsize_t(ind); \
+        if (var < 0) { \
+            goto abort; \
+        } \
+    } while (0)
+
+#define WRITE_SUBSTRING(i, j) \
+    do { \
+        if ((i) < (j)) { \
+            if (_PyUnicodeWriter_WriteSubstring(_writer, parts, (i), (j)) < 0) { \
+                goto abort; \
+            } \
+        } \
+    } while (0)
+
+#define WRITE_WILDCARD() \
+    do { \
+        if (_PyUnicodeWriter_WriteASCIIString(_writer, ".*", 2) < 0) { \
+            goto abort; \
+        } \
+    } while (0)
+
+#define WRITE_ATOMIC_SUBSTRING(i, j) \
+    do { \
+        if ((_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) || \
+            (_PyUnicodeWriter_WriteSubstring(_writer, parts, (i), (j)) < 0) || \
+            (_PyUnicodeWriter_WriteChar(_writer, ')') < 0)) \
+        { \
+            goto abort; \
+        } \
+    } while (0)
+
+    const Py_ssize_t m = PyList_GET_SIZE(indices);
+    if (m == 0) {
+        // just write fr'(?s:{parts} + ")\Z"
+        return PyUnicode_FromFormat("(?s:%S)\\Z", parts);
+    }
+
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
+
+    /*
+     * Special cases: indices[0] == 0 or indices[-1] + 1 == n
+     *
+     * If indices[0] == 0       write (?>.*?group_1) instead of abcdef
+     * If indices[-1] == n - 1  write '.*' instead of empty string
+     */
+    PyObject *ind;
+    Py_ssize_t i, j, n = PyUnicode_GetLength(parts);
+    // handle the first group
+    LOAD_STAR_INDEX(i, 0);
+    if (i == 0) {
+        if (m == 1) { // pattern = '*TAIL'
+            WRITE_WILDCARD();
+            WRITE_SUBSTRING(1, n); // write TAIL part
+            goto finalize;
+        }
+        else { // pattern = '*BODY*...'
+            LOAD_STAR_INDEX(j, 1);
+            WRITE_ATOMIC_SUBSTRING(i + 1, j);
+            i = j + 1;
+        }
+    }
+    else {
+        if (m == 1) { // pattern = 'HEAD*' or 'HEAD*TAIL'
+            WRITE_SUBSTRING(0, i); // write HEAD part
+            WRITE_WILDCARD();
+            WRITE_SUBSTRING(i + 1, n); // write TAIL part (if any)
+            goto finalize;
+        }
+        else { // pattern = 'HEAD*STRING*...'
+            WRITE_SUBSTRING(0, i);  // write HEAD part
+            i++;
+        }
+    }
+    // handle the inner groups
+    for (Py_ssize_t k = 1; k < m - 1; ++k) {
+        LOAD_STAR_INDEX(j, k + 1);
+        assert(i < j);
+        WRITE_ATOMIC_SUBSTRING(i, j);
+        i = j + 1;
+    }
+    // handle the last group
+    WRITE_WILDCARD();
+    WRITE_SUBSTRING(i, n); // write TAIL part (
+finalize:
+    ; // empty statement for allowing a label before a declaration
+    PyObject *res = PyUnicodeWriter_Finish(writer);
+    if (res == NULL) {
+        return NULL;
+    }
+    return PyUnicode_FromFormat("(?s:%S)\\Z", res);
+abort:
+    PyUnicodeWriter_Discard(writer);
+    return NULL;
+}
+
+static PyObject *
+translate(PyObject *module, PyObject *unicode)
+/* new reference */
+{
+    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
+    PyObject *re = state->re_module;
+
+    Py_ssize_t estimate = 0;
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(estimate);
+    if (writer == NULL) {
+        return NULL;
+    }
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *) (writer);
+
+    // list containing the indices where '*' has a special meaning
+    PyObject *indices = PyList_New(0);
+    if (indices == NULL) {
+        goto abort;
+    }
+
+    Py_ssize_t n = PyUnicode_GetLength(unicode);
+    if (n < 0) {
+        goto abort;
+    }
+    Py_ssize_t h = 0, i = 0;
+    PyObject *peek = NULL;
+    while (i < n) {
+        PyObject *chr = PySequence_GetItem(unicode, i);
+        if (chr == NULL) {
+            goto abort;
+        }
+        if (PyUnicode_CompareWithASCIIString(chr, "*") == 0) {
+            Py_DECREF(chr);
+            if (_PyUnicodeWriter_WriteChar(_writer, '*') < 0) {
+                goto abort;
+            }
+            // drop all other '*' that can be found afterwards
+            while (++i < n) {
+                peek = PySequence_GetItem(unicode, i);
+                if (peek == NULL) {
+                    goto abort;
+                }
+                if (PyUnicode_CompareWithASCIIString(peek, "*") != 0) {
+                    Py_DECREF(peek);
+                    break;
+                }
+                Py_DECREF(peek);
+            }
+            PyObject *index = PyLong_FromLong(h++);
+            if (index == NULL) {
+                goto abort;
+            }
+            int rc = PyList_Append(indices, index);
+            Py_DECREF(index);
+            if (rc < 0) {
+                goto abort;
+            }
+        }
+        else if (PyUnicode_CompareWithASCIIString(chr, "?") == 0)  {
+            Py_DECREF(chr);
+            // translate optional '?' (fnmatch) into optional '.' (regex)
+            if (_PyUnicodeWriter_WriteChar(_writer, '.') < 0) {
+                goto abort;
+            }
+            ++i; // advance for the next iteration
+            ++h; // increase the expected result's length
+        }
+        else if (PyUnicode_CompareWithASCIIString(chr, "[") == 0)  {
+            Py_DECREF(chr);
+            // check the next characters (peek)
+            Py_ssize_t j = ++i;
+            if (j < n) {
+                peek = PySequence_GetItem(unicode, j);
+                if (peek == NULL) {
+                    goto abort;
+                }
+                if (PyUnicode_CompareWithASCIIString(peek, "!") == 0) {// [!
+                    ++j;
+                }
+                Py_DECREF(peek);
+            }
+            if (j < n) {
+                peek = PySequence_GetItem(unicode, j);
+                if (peek == NULL) {
+                    goto abort;
+                }
+                if (PyUnicode_CompareWithASCIIString(peek, "]") == 0) { // [!] or []
+                    ++j;
+                }
+                Py_DECREF(peek);
+            }
+            while (j < n) {
+                peek = PySequence_GetItem(unicode, j);
+                if (peek == NULL) {
+                    goto abort;
+                }
+                // locate the closing ']'
+                if (PyUnicode_CompareWithASCIIString(peek, "]") != 0) {
+                    ++j;
+                }
+                Py_DECREF(peek);
+            }
+            if (j >= n) {
+                if (_PyUnicodeWriter_WriteASCIIString(_writer, "\\[", 2) < 0) {
+                    goto abort;
+                }
+                h += 2; // we just wrote 2 characters
+            }
+            else {
+                //              v--- pattern[j] (exclusive)
+                // '[' * ... * ']'
+                //     ^----- pattern[i] (inclusive)
+                PyObject *s1 = NULL, *s2 = NULL;
+                if (PyUnicode_FindChar(unicode, '-', i, j, 1) >= 0) {
+                    PyObject *group = PyUnicode_Substring(unicode, i, j);
+                    if (group == NULL) {
+                        goto abort;
+                    }
+                    s1 = PyObject_CallMethod(group, "replace", "ss", "\\", "\\\\");
+                    Py_DECREF(group);
+                }
+                else {
+                    s1 = get_translated_group(unicode, i, j);
+                }
+                if (s1 == NULL) {
+                    goto abort;
+                }
+                s2 = PyObject_CallMethod(re, "sub", "ssO", "([&~|])", "\\\\\\1", s1);
+                Py_DECREF(s1);
+                if (s2 == NULL) {
+                    goto abort;
+                }
+                int difflen = write_translated_group(_writer, s2);
+                Py_DECREF(s2);
+                if (difflen < 0) {
+                    goto abort;
+                }
+                h += difflen;
+                i = j + 1; // jump to the character after ']'
+            }
+        }
+        else {
+            int difflen = write_normal_character(re, _writer, chr);
+            Py_DECREF(chr);
+            if (difflen < 0) {
+                goto abort;
+            }
+            h += difflen;
+            ++i;
+        }
+    }
+    PyObject *parts = PyUnicodeWriter_Finish(writer);
+    if (parts == NULL) {
+        Py_DECREF(indices);
+        return NULL;
+    }
+    assert(h == PyUnicode_GET_LENGTH(parts));
+    PyObject *res = join_translated_parts(parts, indices);
+    Py_DECREF(parts);
+    Py_DECREF(indices);
+    return res;
+abort:
+    Py_XDECREF(indices);
+    PyUnicodeWriter_Discard(writer);
+    return NULL;
+}
+
+/*[clinic input]
+_fnmatch.translate -> object
+
+    pat as pattern: object
 
 [clinic start generated code]*/
 
-static int
-_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
-/*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
+static PyObject *
+_fnmatch_translate_impl(PyObject *module, PyObject *pattern)
+/*[clinic end generated code: output=2d9e3bbcbcc6e90e input=56e39f7beea97810]*/
 {
-    if (PyBytes_Check(pat)) {
-        const char *pattern = PyBytes_AS_STRING(pat);
-        return posix_fnmatchcase_encoded(pattern, name);
+    if (PyBytes_Check(pattern)) {
+        PyObject *unicode = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
+                                                   PyBytes_GET_SIZE(pattern),
+                                                   "strict");
+        if (unicode == NULL) {
+            return NULL;
+        }
+        // translated regular expression as a str object
+        PyObject *str_expr = translate(module, unicode);
+        Py_DECREF(unicode);
+        if (str_expr == NULL) {
+            return NULL;
+        }
+        PyObject *expr = PyUnicode_AsLatin1String(str_expr);
+        Py_DECREF(str_expr);
+        return expr;
     }
-    if (PyUnicode_Check(pat)) {
-        const char *pattern = PyUnicode_AsUTF8(pat);
-        return posix_fnmatchcase_unicode(pattern, name);
+    else if (PyUnicode_Check(pattern)) {
+        return translate(module, pattern);
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
+        return NULL;
     }
-    PyErr_Format(PyExc_TypeError, "pattern must be a string or a bytes object");
-    return -1;
 }
 
-static PyMethodDef _fnmatch_methods[] = {
+static PyMethodDef fnmatchmodule_methods[] = {
     _FNMATCH_FILTER_METHODDEF
-    _FNMATCH_FNMATCH_METHODDEF
     _FNMATCH_FNMATCHCASE_METHODDEF
+    _FNMATCH_TRANSLATE_METHODDEF
     {NULL, NULL}
 };
 
-static struct PyModuleDef_Slot _fnmatch_slots[] = {
-    {0, NULL}
+static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
+    {Py_mod_exec, fnmatchmodule_exec},
+    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+    {0, NULL},
 };
 
 static struct PyModuleDef _fnmatchmodule = {
     PyModuleDef_HEAD_INIT,
     "_fnmatch",
     NULL,
-    0,
-    _fnmatch_methods,
-    _fnmatch_slots,
-    NULL,
-    NULL,
-    NULL,
+    .m_size = sizeof(fnmatchmodule_state),
+    .m_methods = fnmatchmodule_methods,
+    .m_slots = fnmatchmodule_slots,
+    .m_traverse = fnmatchmodule_traverse,
+    .m_clear = fnmatchmodule_clear,
+    .m_free = fnmatchmodule_free,
 };
 
 PyMODINIT_FUNC
diff --git a/Modules/clinic/_fnmatchmodule.c.h b/Modules/clinic/_fnmatchmodule.c.h
index a693bccee18ff5..4b12f33113d3fb 100644
--- a/Modules/clinic/_fnmatchmodule.c.h
+++ b/Modules/clinic/_fnmatchmodule.c.h
@@ -64,19 +64,23 @@ _fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj
     return return_value;
 }
 
-PyDoc_STRVAR(_fnmatch_fnmatch__doc__,
-"fnmatch($module, /, name, pat)\n"
+PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
+"fnmatchcase($module, /, name, pat)\n"
 "--\n"
-"\n");
+"\n"
+"Test whether `name` matches `pattern`, including case.\n"
+"\n"
+"This is a version of fnmatch() which doesn\'t case-normalize\n"
+"its arguments.");
 
-#define _FNMATCH_FNMATCH_METHODDEF    \
-    {"fnmatch", _PyCFunction_CAST(_fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatch__doc__},
+#define _FNMATCH_FNMATCHCASE_METHODDEF    \
+    {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__},
 
 static int
-_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat);
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat);
 
 static PyObject *
-_fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
@@ -100,7 +104,7 @@ _fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
     static const char * const _keywords[] = {"name", "pat", NULL};
     static _PyArg_Parser _parser = {
         .keywords = _keywords,
-        .fname = "fnmatch",
+        .fname = "fnmatchcase",
         .kwtuple = KWTUPLE,
     };
     #undef KWTUPLE
@@ -115,7 +119,7 @@ _fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
     }
     name = args[0];
     pat = args[1];
-    _return_value = _fnmatch_fnmatch_impl(module, name, pat);
+    _return_value = _fnmatch_fnmatchcase_impl(module, name, pat);
     if ((_return_value == -1) && PyErr_Occurred()) {
         goto exit;
     }
@@ -125,35 +129,31 @@ _fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
     return return_value;
 }
 
-PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
-"fnmatchcase($module, /, name, pat)\n"
+PyDoc_STRVAR(_fnmatch_translate__doc__,
+"translate($module, /, pat)\n"
 "--\n"
-"\n"
-"Test whether `name` matches `pattern`, including case.\n"
-"\n"
-"This is a version of fnmatch() which doesn\'t case-normalize\n"
-"its arguments.");
+"\n");
 
-#define _FNMATCH_FNMATCHCASE_METHODDEF    \
-    {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__},
+#define _FNMATCH_TRANSLATE_METHODDEF    \
+    {"translate", _PyCFunction_CAST(_fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, _fnmatch_translate__doc__},
 
-static int
-_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat);
+static PyObject *
+_fnmatch_translate_impl(PyObject *module, PyObject *pattern);
 
 static PyObject *
-_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+_fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
 
-    #define NUM_KEYWORDS 2
+    #define NUM_KEYWORDS 1
     static struct {
         PyGC_Head _this_is_not_used;
         PyObject_VAR_HEAD
         PyObject *ob_item[NUM_KEYWORDS];
     } _kwtuple = {
         .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
-        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+        .ob_item = { &_Py_ID(pat), },
     };
     #undef NUM_KEYWORDS
     #define KWTUPLE (&_kwtuple.ob_base.ob_base)
@@ -162,31 +162,24 @@ _fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs,
     #  define KWTUPLE NULL
     #endif  // !Py_BUILD_CORE
 
-    static const char * const _keywords[] = {"name", "pat", NULL};
+    static const char * const _keywords[] = {"pat", NULL};
     static _PyArg_Parser _parser = {
         .keywords = _keywords,
-        .fname = "fnmatchcase",
+        .fname = "translate",
         .kwtuple = KWTUPLE,
     };
     #undef KWTUPLE
-    PyObject *argsbuf[2];
-    PyObject *name;
-    PyObject *pat;
-    int _return_value;
+    PyObject *argsbuf[1];
+    PyObject *pattern;
 
-    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
     if (!args) {
         goto exit;
     }
-    name = args[0];
-    pat = args[1];
-    _return_value = _fnmatch_fnmatchcase_impl(module, name, pat);
-    if ((_return_value == -1) && PyErr_Occurred()) {
-        goto exit;
-    }
-    return_value = PyBool_FromLong((long)_return_value);
+    pattern = args[0];
+    return_value = _fnmatch_translate_impl(module, pattern);
 
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=fd6cc9541aa95a9a input=a9049054013a1b77]*/
+/*[clinic end generated code: output=b0366b259b101bdf input=a9049054013a1b77]*/

From cb16b6ac5bad479ea80933ebe8b43bb682408d30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:57:37 +0200
Subject: [PATCH 13/97] update Python implementation

---
 Lib/fnmatch.py | 289 +++++++++++++++++++++++++------------------------
 1 file changed, 148 insertions(+), 141 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 73acb1fe8d4106..96487bc53fb2de 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -45,148 +45,155 @@ def _compile_pattern(pat):
         res = translate(pat)
     return re.compile(res).match
 
-def filter(names, pat):
-    """Construct a list from those elements of the iterable NAMES that match PAT."""
-    result = []
-    pat = os.path.normcase(pat)
-    match = _compile_pattern(pat)
-    if os.path is posixpath:
-        # normcase on posix is NOP. Optimize it away from the loop.
-        for name in names:
-            if match(name):
-                result.append(name)
-    else:
-        for name in names:
-            if match(os.path.normcase(name)):
-                result.append(name)
-    return result
-
-def fnmatchcase(name, pat):
-    """Test whether FILENAME matches PATTERN, including case.
-
-    This is a version of fnmatch() which doesn't case-normalize
-    its arguments.
-    """
-    match = _compile_pattern(pat)
-    return match(name) is not None
-
-
-def translate(pat):
-    """Translate a shell PATTERN to a regular expression.
-
-    There is no way to quote meta-characters.
-    """
-
-    STAR = object()
-    parts = _translate(pat, STAR, '.')
-    return _join_translated_parts(parts, STAR)
-
-
-def _translate(pat, STAR, QUESTION_MARK):
-    res = []
-    add = res.append
-    i, n = 0, len(pat)
-    while i < n:
-        c = pat[i]
-        i = i+1
-        if c == '*':
-            # compress consecutive `*` into one
-            if (not res) or res[-1] is not STAR:
-                add(STAR)
-        elif c == '?':
-            add(QUESTION_MARK)
-        elif c == '[':
-            j = i
-            if j < n and pat[j] == '!':
-                j = j+1
-            if j < n and pat[j] == ']':
-                j = j+1
-            while j < n and pat[j] != ']':
-                j = j+1
-            if j >= n:
-                add('\\[')
-            else:
-                stuff = pat[i:j]
-                if '-' not in stuff:
-                    stuff = stuff.replace('\\', r'\\')
+try:
+    from _fnmatch import filter
+except ImportError:
+    def filter(names, pat):
+        """Construct a list from those elements of the iterable NAMES that match PAT."""
+        result = []
+        pat = os.path.normcase(pat)
+        match = _compile_pattern(pat)
+        if os.path is posixpath:
+            # normcase on posix is NOP. Optimize it away from the loop.
+            for name in names:
+                if match(name):
+                    result.append(name)
+        else:
+            for name in names:
+                if match(os.path.normcase(name)):
+                    result.append(name)
+        return result
+
+try:
+    from _fnmatch import fnmatchcase
+except ImportError:
+    def fnmatchcase(name, pat):
+        """Test whether FILENAME matches PATTERN, including case.
+
+        This is a version of fnmatch() which doesn't case-normalize
+        its arguments.
+        """
+        match = _compile_pattern(pat)
+        return match(name) is not None
+
+try:
+    from _fnmatch import translate
+except ImportError:
+    def translate(pat):
+        """Translate a shell PATTERN to a regular expression.
+
+        There is no way to quote meta-characters.
+        """
+
+        STAR = object()
+        parts = _translate(pat, STAR, '.')
+        return _join_translated_parts(parts, STAR)
+
+    def _translate(pat, STAR, QUESTION_MARK):
+        res = []
+        add = res.append
+        i, n = 0, len(pat)
+        while i < n:
+            c = pat[i]
+            i = i+1
+            if c == '*':
+                # compress consecutive `*` into one
+                if (not res) or res[-1] is not STAR:
+                    add(STAR)
+            elif c == '?':
+                add(QUESTION_MARK)
+            elif c == '[':
+                j = i
+                if j < n and pat[j] == '!':
+                    j = j+1
+                if j < n and pat[j] == ']':
+                    j = j+1
+                while j < n and pat[j] != ']':
+                    j = j+1
+                if j >= n:
+                    add('\\[')
                 else:
-                    chunks = []
-                    k = i+2 if pat[i] == '!' else i+1
-                    while True:
-                        k = pat.find('-', k, j)
-                        if k < 0:
-                            break
-                        chunks.append(pat[i:k])
-                        i = k+1
-                        k = k+3
-                    chunk = pat[i:j]
-                    if chunk:
-                        chunks.append(chunk)
+                    stuff = pat[i:j]
+                    if '-' not in stuff:
+                        stuff = stuff.replace('\\', r'\\')
                     else:
-                        chunks[-1] += '-'
-                    # Remove empty ranges -- invalid in RE.
-                    for k in range(len(chunks)-1, 0, -1):
-                        if chunks[k-1][-1] > chunks[k][0]:
-                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
-                            del chunks[k]
-                    # Escape backslashes and hyphens for set difference (--).
-                    # Hyphens that create ranges shouldn't be escaped.
-                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
-                                     for s in chunks)
-                # Escape set operations (&&, ~~ and ||).
-                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
-                i = j+1
-                if not stuff:
-                    # Empty range: never match.
-                    add('(?!)')
-                elif stuff == '!':
-                    # Negated empty range: match any character.
-                    add('.')
-                else:
-                    if stuff[0] == '!':
-                        stuff = '^' + stuff[1:]
-                    elif stuff[0] in ('^', '['):
-                        stuff = '\\' + stuff
-                    add(f'[{stuff}]')
-        else:
-            add(re.escape(c))
-    assert i == n
-    return res
-
-
-def _join_translated_parts(inp, STAR):
-    # Deal with STARs.
-    res = []
-    add = res.append
-    i, n = 0, len(inp)
-    # Fixed pieces at the start?
-    while i < n and inp[i] is not STAR:
-        add(inp[i])
-        i += 1
-    # Now deal with STAR fixed STAR fixed ...
-    # For an interior `STAR fixed` pairing, we want to do a minimal
-    # .*? match followed by `fixed`, with no possibility of backtracking.
-    # Atomic groups ("(?>...)") allow us to spell that directly.
-    # Note: people rely on the undocumented ability to join multiple
-    # translate() results together via "|" to build large regexps matching
-    # "one of many" shell patterns.
-    while i < n:
-        assert inp[i] is STAR
-        i += 1
-        if i == n:
-            add(".*")
-            break
-        assert inp[i] is not STAR
-        fixed = []
+                        chunks = []
+                        k = i+2 if pat[i] == '!' else i+1
+                        while True:
+                            k = pat.find('-', k, j)
+                            if k < 0:
+                                break
+                            chunks.append(pat[i:k])
+                            i = k+1
+                            k = k+3
+                        chunk = pat[i:j]
+                        if chunk:
+                            chunks.append(chunk)
+                        else:
+                            chunks[-1] += '-'
+                        # Remove empty ranges -- invalid in RE.
+                        for k in range(len(chunks)-1, 0, -1):
+                            if chunks[k-1][-1] > chunks[k][0]:
+                                chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
+                                del chunks[k]
+                        # Escape backslashes and hyphens for set difference (--).
+                        # Hyphens that create ranges shouldn't be escaped.
+                        stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
+                                         for s in chunks)
+                    # Escape set operations (&&, ~~ and ||).
+                    stuff = re.sub(r'([&~|])', r'\\\1', stuff)
+                    i = j+1
+                    if not stuff:
+                        # Empty range: never match.
+                        add('(?!)')
+                    elif stuff == '!':
+                        # Negated empty range: match any character.
+                        add('.')
+                    else:
+                        if stuff[0] == '!':
+                            stuff = '^' + stuff[1:]
+                        elif stuff[0] in ('^', '['):
+                            stuff = '\\' + stuff
+                        add(f'[{stuff}]')
+            else:
+                add(re.escape(c))
+        assert i == n
+        return res
+
+
+    def _join_translated_parts(inp, STAR):
+        # Deal with STARs.
+        res = []
+        add = res.append
+        i, n = 0, len(inp)
+        # Fixed pieces at the start?
         while i < n and inp[i] is not STAR:
-            fixed.append(inp[i])
+            add(inp[i])
             i += 1
-        fixed = "".join(fixed)
-        if i == n:
-            add(".*")
-            add(fixed)
-        else:
-            add(f"(?>.*?{fixed})")
-    assert i == n
-    res = "".join(res)
-    return fr'(?s:{res})\Z'
+        # Now deal with STAR fixed STAR fixed ...
+        # For an interior `STAR fixed` pairing, we want to do a minimal
+        # .*? match followed by `fixed`, with no possibility of backtracking.
+        # Atomic groups ("(?>...)") allow us to spell that directly.
+        # Note: people rely on the undocumented ability to join multiple
+        # translate() results together via "|" to build large regexps matching
+        # "one of many" shell patterns.
+        while i < n:
+            assert inp[i] is STAR
+            i += 1
+            if i == n:
+                add(".*")
+                break
+            assert inp[i] is not STAR
+            fixed = []
+            while i < n and inp[i] is not STAR:
+                fixed.append(inp[i])
+                i += 1
+            fixed = "".join(fixed)
+            if i == n:
+                add(".*")
+                add(fixed)
+            else:
+                add(f"(?>.*?{fixed})")
+        assert i == n
+        res = "".join(res)
+        return fr'(?s:{res})\Z'

From 751c06906bead6192bc79bfe9f3db67136929502 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:57:42 +0200
Subject: [PATCH 14/97] update tests

---
 Lib/test/test_fnmatch.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 94ec41958b07c0..f7e9391722ac38 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -1,13 +1,18 @@
 """Test cases for the fnmatch module."""
-
-import unittest
 import os
 import string
+import unittest
 import warnings
 
-import _fnmatch as c_fnmatch
-import fnmatch as py_fnmatch
-from fnmatch import fnmatch, fnmatchcase, translate, filter
+import test.support.import_helper
+
+c_fnmatch = test.support.import_helper.import_fresh_module("_fnmatch", blocked=["fnmatch"])
+py_fnmatch = test.support.import_helper.import_fresh_module("fnmatch", blocked=["_fnmatch"])
+
+fnmatch = py_fnmatch.fnmatch
+fnmatchcase = py_fnmatch.fnmatchcase
+translate  = py_fnmatch.translate
+filter = py_fnmatch.filter
 
 class FnmatchTestCase(unittest.TestCase):
 

From 92580688a0999401dd0b11cf5683d43bceb45d36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:57:50 +0200
Subject: [PATCH 15/97] add generated objects

---
 Include/internal/pycore_global_objects_fini_generated.h | 1 +
 Include/internal/pycore_global_strings.h                | 1 +
 Include/internal/pycore_runtime_init_generated.h        | 1 +
 Include/internal/pycore_unicodeobject_generated.h       | 4 ++++
 4 files changed, 7 insertions(+)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 77b2a8e2e7a7dc..8e3d405fc7c04b 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -916,6 +916,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(escape));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 4896f6343087d3..7cbc1941ffa0ee 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -405,6 +405,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(entrypoint)
         STRUCT_FOR_ID(env)
         STRUCT_FOR_ID(errors)
+        STRUCT_FOR_ID(escape)
         STRUCT_FOR_ID(event)
         STRUCT_FOR_ID(eventmask)
         STRUCT_FOR_ID(exc_type)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 1249957fb29d1e..4164c6cae7a8bf 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -914,6 +914,7 @@ extern "C" {
     INIT_ID(entrypoint), \
     INIT_ID(env), \
     INIT_ID(errors), \
+    INIT_ID(escape), \
     INIT_ID(event), \
     INIT_ID(eventmask), \
     INIT_ID(exc_type), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index 0bd57f5db64ea9..b15845cd16e814 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1420,6 +1420,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(escape);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(event);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));

From 5a7183c80ed3ebcec0a04a6af8e2f5eaf3270fe9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 8 Jul 2024 19:01:33 +0200
Subject: [PATCH 16/97] re-expose private API

---
 Lib/fnmatch.py | 208 ++++++++++++++++++++++++-------------------------
 1 file changed, 104 insertions(+), 104 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 96487bc53fb2de..ffa15825954f5a 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -89,111 +89,111 @@ def translate(pat):
         parts = _translate(pat, STAR, '.')
         return _join_translated_parts(parts, STAR)
 
-    def _translate(pat, STAR, QUESTION_MARK):
-        res = []
-        add = res.append
-        i, n = 0, len(pat)
-        while i < n:
-            c = pat[i]
-            i = i+1
-            if c == '*':
-                # compress consecutive `*` into one
-                if (not res) or res[-1] is not STAR:
-                    add(STAR)
-            elif c == '?':
-                add(QUESTION_MARK)
-            elif c == '[':
-                j = i
-                if j < n and pat[j] == '!':
-                    j = j+1
-                if j < n and pat[j] == ']':
-                    j = j+1
-                while j < n and pat[j] != ']':
-                    j = j+1
-                if j >= n:
-                    add('\\[')
+def _translate(pat, STAR, QUESTION_MARK):
+    res = []
+    add = res.append
+    i, n = 0, len(pat)
+    while i < n:
+        c = pat[i]
+        i = i+1
+        if c == '*':
+            # compress consecutive `*` into one
+            if (not res) or res[-1] is not STAR:
+                add(STAR)
+        elif c == '?':
+            add(QUESTION_MARK)
+        elif c == '[':
+            j = i
+            if j < n and pat[j] == '!':
+                j = j+1
+            if j < n and pat[j] == ']':
+                j = j+1
+            while j < n and pat[j] != ']':
+                j = j+1
+            if j >= n:
+                add('\\[')
+            else:
+                stuff = pat[i:j]
+                if '-' not in stuff:
+                    stuff = stuff.replace('\\', r'\\')
                 else:
-                    stuff = pat[i:j]
-                    if '-' not in stuff:
-                        stuff = stuff.replace('\\', r'\\')
-                    else:
-                        chunks = []
-                        k = i+2 if pat[i] == '!' else i+1
-                        while True:
-                            k = pat.find('-', k, j)
-                            if k < 0:
-                                break
-                            chunks.append(pat[i:k])
-                            i = k+1
-                            k = k+3
-                        chunk = pat[i:j]
-                        if chunk:
-                            chunks.append(chunk)
-                        else:
-                            chunks[-1] += '-'
-                        # Remove empty ranges -- invalid in RE.
-                        for k in range(len(chunks)-1, 0, -1):
-                            if chunks[k-1][-1] > chunks[k][0]:
-                                chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
-                                del chunks[k]
-                        # Escape backslashes and hyphens for set difference (--).
-                        # Hyphens that create ranges shouldn't be escaped.
-                        stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
-                                         for s in chunks)
-                    # Escape set operations (&&, ~~ and ||).
-                    stuff = re.sub(r'([&~|])', r'\\\1', stuff)
-                    i = j+1
-                    if not stuff:
-                        # Empty range: never match.
-                        add('(?!)')
-                    elif stuff == '!':
-                        # Negated empty range: match any character.
-                        add('.')
+                    chunks = []
+                    k = i+2 if pat[i] == '!' else i+1
+                    while True:
+                        k = pat.find('-', k, j)
+                        if k < 0:
+                            break
+                        chunks.append(pat[i:k])
+                        i = k+1
+                        k = k+3
+                    chunk = pat[i:j]
+                    if chunk:
+                        chunks.append(chunk)
                     else:
-                        if stuff[0] == '!':
-                            stuff = '^' + stuff[1:]
-                        elif stuff[0] in ('^', '['):
-                            stuff = '\\' + stuff
-                        add(f'[{stuff}]')
-            else:
-                add(re.escape(c))
-        assert i == n
-        return res
-
-
-    def _join_translated_parts(inp, STAR):
-        # Deal with STARs.
-        res = []
-        add = res.append
-        i, n = 0, len(inp)
-        # Fixed pieces at the start?
+                        chunks[-1] += '-'
+                    # Remove empty ranges -- invalid in RE.
+                    for k in range(len(chunks)-1, 0, -1):
+                        if chunks[k-1][-1] > chunks[k][0]:
+                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
+                            del chunks[k]
+                    # Escape backslashes and hyphens for set difference (--).
+                    # Hyphens that create ranges shouldn't be escaped.
+                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
+                                     for s in chunks)
+                # Escape set operations (&&, ~~ and ||).
+                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
+                i = j+1
+                if not stuff:
+                    # Empty range: never match.
+                    add('(?!)')
+                elif stuff == '!':
+                    # Negated empty range: match any character.
+                    add('.')
+                else:
+                    if stuff[0] == '!':
+                        stuff = '^' + stuff[1:]
+                    elif stuff[0] in ('^', '['):
+                        stuff = '\\' + stuff
+                    add(f'[{stuff}]')
+        else:
+            add(re.escape(c))
+    assert i == n
+    return res
+
+
+def _join_translated_parts(inp, STAR):
+    # Deal with STARs.
+    res = []
+    add = res.append
+    i, n = 0, len(inp)
+    # Fixed pieces at the start?
+    while i < n and inp[i] is not STAR:
+        add(inp[i])
+        i += 1
+    # Now deal with STAR fixed STAR fixed ...
+    # For an interior `STAR fixed` pairing, we want to do a minimal
+    # .*? match followed by `fixed`, with no possibility of backtracking.
+    # Atomic groups ("(?>...)") allow us to spell that directly.
+    # Note: people rely on the undocumented ability to join multiple
+    # translate() results together via "|" to build large regexps matching
+    # "one of many" shell patterns.
+    while i < n:
+        assert inp[i] is STAR
+        i += 1
+        if i == n:
+            add(".*")
+            break
+        assert inp[i] is not STAR
+        fixed = []
         while i < n and inp[i] is not STAR:
-            add(inp[i])
-            i += 1
-        # Now deal with STAR fixed STAR fixed ...
-        # For an interior `STAR fixed` pairing, we want to do a minimal
-        # .*? match followed by `fixed`, with no possibility of backtracking.
-        # Atomic groups ("(?>...)") allow us to spell that directly.
-        # Note: people rely on the undocumented ability to join multiple
-        # translate() results together via "|" to build large regexps matching
-        # "one of many" shell patterns.
-        while i < n:
-            assert inp[i] is STAR
+            fixed.append(inp[i])
             i += 1
-            if i == n:
-                add(".*")
-                break
-            assert inp[i] is not STAR
-            fixed = []
-            while i < n and inp[i] is not STAR:
-                fixed.append(inp[i])
-                i += 1
-            fixed = "".join(fixed)
-            if i == n:
-                add(".*")
-                add(fixed)
-            else:
-                add(f"(?>.*?{fixed})")
-        assert i == n
-        res = "".join(res)
-        return fr'(?s:{res})\Z'
+        fixed = "".join(fixed)
+        if i == n:
+            add(".*")
+            add(fixed)
+        else:
+            add(f"(?>.*?{fixed})")
+    assert i == n
+    res = "".join(res)
+    return fr'(?s:{res})\Z'

From 2a8020046cf62fb2ff6b7d92205f609d7d6856cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 11:54:59 +0200
Subject: [PATCH 17/97] fix implementation?

---
 Modules/_fnmatchmodule.c | 475 ++++++++++++++++++++-------------------
 1 file changed, 248 insertions(+), 227 deletions(-)

diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatchmodule.c
index 07d10f9112bc0e..251b71ea5f5cfd 100644
--- a/Modules/_fnmatchmodule.c
+++ b/Modules/_fnmatchmodule.c
@@ -7,6 +7,7 @@
  */
 
 #include "Python.h"
+#include "pycore_call.h" // for _PyObject_CallMethod
 
 #include "clinic/_fnmatchmodule.c.h"
 
@@ -19,6 +20,10 @@ typedef struct {
     PyObject *os_module; // 'os' module
 
     PyObject *lru_cache; // optional cache for regex patterns, if needed
+
+    PyObject *str_atomic_bgroup;    // (?>.*?
+    PyObject *str_atomic_egroup;    // )
+    PyObject *str_wildcard;         // *
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
@@ -36,6 +41,10 @@ fnmatchmodule_clear(PyObject *m)
     Py_CLEAR(st->os_module);
     Py_CLEAR(st->re_module);
     Py_CLEAR(st->lru_cache);
+
+    Py_CLEAR(st->str_atomic_bgroup);
+    Py_CLEAR(st->str_atomic_egroup);
+    Py_CLEAR(st->str_wildcard);
     return 0;
 }
 
@@ -46,6 +55,10 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
     Py_VISIT(st->os_module);
     Py_VISIT(st->re_module);
     Py_VISIT(st->lru_cache);
+
+    Py_VISIT(st->str_atomic_bgroup);
+    Py_VISIT(st->str_atomic_egroup);
+    Py_VISIT(st->str_wildcard);
     return 0;
 }
 
@@ -58,17 +71,27 @@ fnmatchmodule_free(void *m)
 static int
 fnmatchmodule_exec(PyObject *m)
 {
+#define IMPORT_MODULE(attr, name) \
+    do { \
+        state->attr = PyImport_ImportModule((name)); \
+        if (state->attr == NULL) { \
+            return -1; \
+        } \
+    } while (0)
+
+#define INTERN_STRING(attr, str) \
+    do { \
+        state->attr = PyUnicode_InternFromString((str)); \
+        if (state->attr == NULL) { \
+            return -1; \
+        } \
+    } while (0)
+
     fnmatchmodule_state *state = get_fnmatchmodulestate_state(m);
 
     // imports
-    state->os_module = PyImport_ImportModule("os");
-    if (state->os_module == NULL) {
-        return -1;
-    }
-    state->re_module = PyImport_ImportModule("re");
-    if (state->re_module == NULL) {
-        return -1;
-    }
+    IMPORT_MODULE(os_module, "os");
+    IMPORT_MODULE(re_module, "re");
 
     // helpers
     state->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
@@ -76,6 +99,15 @@ fnmatchmodule_exec(PyObject *m)
         return -1;
     }
     // todo: handle LRU cache
+
+    // interned strings
+    INTERN_STRING(str_atomic_bgroup, "(?>.*?");
+    INTERN_STRING(str_atomic_egroup, ")");
+    INTERN_STRING(str_wildcard, "*");
+
+#undef INTERN_STRING
+#undef IMPORT_MODULE
+
     return 0;
 }
 
@@ -347,17 +379,48 @@ _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
 #endif
 }
 
+/*
+ * Convert Py_UCS4 to (PyObject *).
+ *
+ * This creates a new reference.
+ *
+ * Note: this is 'unicode_char' taken from Objects/unicodeobject.c.
+ */
+static PyObject *
+get_unicode_character(Py_UCS4 ch)
+{
+    assert(ch <= MAX_UNICODE);
+    if (ch < 256) {
+        PyObject *o = _Py_LATIN1_CHR(ch);
+        assert(_Py_IsImmortal(o));
+        return o;
+    }
+    PyObject *unicode = PyUnicode_New(1, ch);
+    if (unicode == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND);
+    if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
+        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2) ch;
+    }
+    else {
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    }
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+    return unicode;
+}
 
-static inline int /* number of written characters or -1 on error */
-write_normal_character(PyObject *re, _PyUnicodeWriter *writer, PyObject *cp)
+static Py_ssize_t /* number of written characters or -1 on error */
+write_escaped_string(PyObject *re, _PyUnicodeWriter *writer, PyObject *str)
 {
-    PyObject *ch = PyObject_CallMethodOneArg(re, &_Py_ID(escape), cp);
-    if (ch == NULL) {
+    PyObject *escaped = PyObject_CallMethodOneArg(re, &_Py_ID(escape), str);
+    if (escaped == NULL) {
         return -1;
     }
-    int written = PyUnicode_GetLength(ch);
-    int rc = _PyUnicodeWriter_WriteStr(writer, ch);
-    Py_DECREF(ch);
+    Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
+    int rc = _PyUnicodeWriter_WriteStr(writer, escaped);
+    Py_DECREF(escaped);
     if (rc < 0) {
         return -1;
     }
@@ -365,7 +428,7 @@ write_normal_character(PyObject *re, _PyUnicodeWriter *writer, PyObject *cp)
     return written;
 }
 
-static inline int /* number of written characters or -1 on error */
+static Py_ssize_t /* number of written characters or -1 on error */
 write_translated_group(_PyUnicodeWriter *writer, PyObject *group)
 {
 #define WRITE_ASCII(str, len) \
@@ -395,7 +458,7 @@ write_translated_group(_PyUnicodeWriter *writer, PyObject *group)
         return 1;
     }
     else {
-        int extra = 0;
+        Py_ssize_t extra = 2; // '[' and ']'
         WRITE_CHAR('[');
         switch (buffer[0]) {
             case '!': {
@@ -408,7 +471,7 @@ write_translated_group(_PyUnicodeWriter *writer, PyObject *group)
             case '^':
             case '[': {
                 WRITE_CHAR('\\');
-                extra = 1;
+                extra++;
                 break;
             }
             default:
@@ -418,30 +481,25 @@ write_translated_group(_PyUnicodeWriter *writer, PyObject *group)
                 break;
         }
         WRITE_CHAR(']');
-        return 2 + grouplen + extra;
+        return grouplen + extra;
     }
 #undef WRITE_CHAR
 #undef WRITE_ASCII
 }
 
 static PyObject *
-get_translated_group(PyObject *unicode,
-                     Py_ssize_t i /* unicode[i-1] == '[' (incl.) */,
-                     Py_ssize_t j /* unicode[j]   == ']' (excl.) */)
+get_translated_group(PyObject *pattern,
+                     Py_ssize_t i /* pattern[i-1] == '[' (incl.) */,
+                     Py_ssize_t j /* pattern[j]   == ']' (excl.) */)
 {
     PyObject *chunks = PyList_New(0);
     if (chunks == NULL) {
         return NULL;
     }
-    PyObject *chr = PySequence_GetItem(unicode, i);
-    if (chr == NULL) {
-        goto error;
-    }
-    Py_ssize_t k = PyUnicode_CompareWithASCIIString(chr, "!") == 0 ? i + 2 : i + 1;
-    Py_DECREF(chr);
+    Py_ssize_t k = (PyUnicode_READ_CHAR(pattern, i) == '!') ? i + 2 : i + 1;
     Py_ssize_t chunkscount = 0;
     while (k < j) {
-        PyObject *eobj = PyObject_CallMethod(unicode, "find", "ii", k, j);
+        PyObject *eobj = _PyObject_CallMethod(pattern, &_Py_ID(find), "ii", k, j);
         if (eobj == NULL) {
             goto error;
         }
@@ -450,7 +508,7 @@ get_translated_group(PyObject *unicode,
         if (t < 0) {
             goto error;
         }
-        PyObject *sub = PyUnicode_Substring(unicode, i, t);
+        PyObject *sub = PyUnicode_Substring(pattern, i, t);
         if (sub == NULL) {
             goto error;
         }
@@ -479,7 +537,7 @@ get_translated_group(PyObject *unicode,
         }
     }
     else {
-        PyObject *sub = PyUnicode_Substring(unicode, i, j);
+        PyObject *sub = PyUnicode_Substring(pattern, i, j);
         if (sub == NULL) {
             goto error;
         }
@@ -494,24 +552,16 @@ get_translated_group(PyObject *unicode,
     Py_ssize_t c = chunkscount;
     while (--c) {
         PyObject *c1 = PyList_GET_ITEM(chunks, c - 1);
-        assert(c1 != NULL);
-        Py_ssize_t c1len = 0;
-        const char *c1buf = PyUnicode_AsUTF8AndSize(c1, &c1len);
-        if (c1buf == NULL) {
-            goto error;
-        }
         assert(c1len > 0);
+        Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1);
+        assert(c1 != NULL);
 
         PyObject *c2 = PyList_GET_ITEM(chunks, c);
         assert(c2 != NULL);
-        Py_ssize_t c2len = 0;
-        const char *c2buf = PyUnicode_AsUTF8AndSize(c2, &c2len);
-        if (c2buf == NULL) {
-            goto error;
-        }
+        Py_ssize_t c2len = PyUnicode_GET_LENGTH(c2);
         assert(c2len > 0);
 
-        if (c1buf[c1len - 1] > c2buf[0]) {
+        if (PyUnicode_READ_CHAR(c1, c1len - 1) > PyUnicode_READ_CHAR(c2, 0)) {
             // all but the last character in the chunk
             PyObject *c1sub = PyUnicode_Substring(c1, 0, c1len - 1);
             // all but the first character in the chunk
@@ -558,7 +608,7 @@ get_translated_group(PyObject *unicode,
             goto error;
         }
     }
-    PyObject *hyphen = PyUnicode_FromString("-");
+    PyObject *hyphen = PyUnicode_FromOrdinal('-');
     if (hyphen == NULL) {
         goto error;
     }
@@ -575,101 +625,79 @@ get_translated_group(PyObject *unicode,
 }
 
 static PyObject *
-join_translated_parts(PyObject *parts, PyObject *indices)
+join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
 {
-#define LOAD_STAR_INDEX(var, k) \
-    do { \
-        ind = PyList_GET_ITEM(indices, (k)); \
-        var = PyLong_AsSsize_t(ind); \
-        if (var < 0) { \
-            goto abort; \
-        } \
-    } while (0)
-
 #define WRITE_SUBSTRING(i, j) \
     do { \
         if ((i) < (j)) { \
-            if (_PyUnicodeWriter_WriteSubstring(_writer, parts, (i), (j)) < 0) { \
+            if (_PyUnicodeWriter_WriteSubstring(_writer, strings, (i), (j)) < 0) { \
                 goto abort; \
             } \
         } \
     } while (0)
 
-#define WRITE_WILDCARD() \
-    do { \
-        if (_PyUnicodeWriter_WriteASCIIString(_writer, ".*", 2) < 0) { \
-            goto abort; \
-        } \
-    } while (0)
-
-#define WRITE_ATOMIC_SUBSTRING(i, j) \
-    do { \
-        if ((_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) || \
-            (_PyUnicodeWriter_WriteSubstring(_writer, parts, (i), (j)) < 0) || \
-            (_PyUnicodeWriter_WriteChar(_writer, ')') < 0)) \
-        { \
-            goto abort; \
-        } \
-    } while (0)
-
     const Py_ssize_t m = PyList_GET_SIZE(indices);
     if (m == 0) {
         // just write fr'(?s:{parts} + ")\Z"
-        return PyUnicode_FromFormat("(?s:%S)\\Z", parts);
+        return PyUnicode_FromFormat("(?s:%S)\\Z", strings);
     }
-
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
-        return NULL;
-    }
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
-
     /*
      * Special cases: indices[0] == 0 or indices[-1] + 1 == n
      *
-     * If indices[0] == 0       write (?>.*?group_1) instead of abcdef
+     * If indices[0] == 0       write (?>.*?abcdef) instead of abcdef
      * If indices[-1] == n - 1  write '.*' instead of empty string
      */
     PyObject *ind;
-    Py_ssize_t i, j, n = PyUnicode_GetLength(parts);
-    // handle the first group
-    LOAD_STAR_INDEX(i, 0);
-    if (i == 0) {
-        if (m == 1) { // pattern = '*TAIL'
-            WRITE_WILDCARD();
-            WRITE_SUBSTRING(1, n); // write TAIL part
-            goto finalize;
-        }
-        else { // pattern = '*BODY*...'
-            LOAD_STAR_INDEX(j, 1);
-            WRITE_ATOMIC_SUBSTRING(i + 1, j);
-            i = j + 1;
-        }
+    Py_ssize_t i = 0, j, n = PyUnicode_GET_LENGTH(strings);
+    /*
+     * If the pattern starts with '*', we will write everything
+     * before it. So we will write at least indices[0] characters.
+     *
+     * For the inner groups 'STAR STRING ...' we always surround
+     * the STRING by "(?>.*?" and ")", and thus we will write at
+     * least 7 + len(STRING) characters.
+     *
+     * We write one additional '.*' if indices[-1] + 1 = n.
+     *
+     * Since the result is surrounded by "(?s:" and ")\Z", we
+     * write at least "indices[0] + 7m + n + 6" characters,
+     * where 'm' is the number of stars and 'n' the length
+     * of the translated pattern.
+     */
+    PyObject *jobj = PyList_GET_ITEM(indices, 0);
+    j = PyLong_AsSsize_t(jobj);  // get the first position of '*'
+    if (j < 0) {
+        return NULL;
     }
-    else {
-        if (m == 1) { // pattern = 'HEAD*' or 'HEAD*TAIL'
-            WRITE_SUBSTRING(0, i); // write HEAD part
-            WRITE_WILDCARD();
-            WRITE_SUBSTRING(i + 1, n); // write TAIL part (if any)
-            goto finalize;
-        }
-        else { // pattern = 'HEAD*STRING*...'
-            WRITE_SUBSTRING(0, i);  // write HEAD part
-            i++;
-        }
+    Py_ssize_t estimate = j + 7 * m + n + 6;
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(estimate);
+    if (writer == NULL) {
+        return NULL;
     }
-    // handle the inner groups
-    for (Py_ssize_t k = 1; k < m - 1; ++k) {
-        LOAD_STAR_INDEX(j, k + 1);
-        assert(i < j);
-        WRITE_ATOMIC_SUBSTRING(i, j);
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *) (writer);
+
+    WRITE_SUBSTRING(i, j);  // write stuff before '*' if needed
+    i = j + 1;              // jump after the star
+    for (Py_ssize_t k = 1; k < m; ++k) {
+        ind = PyList_GET_ITEM(indices, k);
+        j = PyLong_AsSsize_t(ind);
+        assert(j < 0 || i > j);
+        if (j < 0 ||
+            (_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) ||
+            (_PyUnicodeWriter_WriteSubstring(_writer, strings, i, j) < 0) ||
+            (_PyUnicodeWriter_WriteChar(_writer, ')') < 0)) {
+            goto abort;
+        }
         i = j + 1;
     }
     // handle the last group
-    WRITE_WILDCARD();
-    WRITE_SUBSTRING(i, n); // write TAIL part (
-finalize:
-    ; // empty statement for allowing a label before a declaration
+    if (_PyUnicodeWriter_WriteASCIIString(_writer, ".*", 2) < 0) {
+        goto abort;
+    }
+    WRITE_SUBSTRING(i, n); // write TAIL part
+
+#undef WRITE_SUBSTRING
+
     PyObject *res = PyUnicodeWriter_Finish(writer);
     if (res == NULL) {
         return NULL;
@@ -681,163 +709,156 @@ join_translated_parts(PyObject *parts, PyObject *indices)
 }
 
 static PyObject *
-translate(PyObject *module, PyObject *unicode)
+translate(PyObject *module, PyObject *pattern)
 /* new reference */
 {
+#define READ(ind) PyUnicode_READ(kind, data, (ind))
+
+#define ADVANCE_IF_CHAR(ch, ind, maxind) \
+    do { \
+        if ((ind) < (maxind) && READ(ind) == (ch)) { \
+            ++(ind); \
+        } \
+    } while (0)
+
+#define _WHILE_READ_CMP(ch, ind, maxind, cmp) \
+    do { \
+        while ((ind) < (maxind) && READ(ind) cmp (ch)) { \
+            ++(ind); \
+        } \
+    } while (0)
+
+#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, !=)
+#define DROP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, ==)
+
     fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
     PyObject *re = state->re_module;
-
-    Py_ssize_t estimate = 0;
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(estimate);
+    const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
+    // We would write less data if there are successive '*', which should
+    // not be the case in general. Otherwise, we write >= n characters
+    // since escaping them would always add more characters so we will
+    // overestimate a bit the number of characters to write.
+    //
+    // TODO(picnixz): should we limit the estimation or not?
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create((Py_ssize_t) (1.05 * n));
     if (writer == NULL) {
         return NULL;
     }
     _PyUnicodeWriter *_writer = (_PyUnicodeWriter *) (writer);
-
     // list containing the indices where '*' has a special meaning
     PyObject *indices = PyList_New(0);
     if (indices == NULL) {
         goto abort;
     }
-
-    Py_ssize_t n = PyUnicode_GetLength(unicode);
-    if (n < 0) {
-        goto abort;
-    }
+    const int kind = PyUnicode_KIND(pattern);
+    const void *data = PyUnicode_DATA(pattern);
     Py_ssize_t h = 0, i = 0;
-    PyObject *peek = NULL;
     while (i < n) {
-        PyObject *chr = PySequence_GetItem(unicode, i);
-        if (chr == NULL) {
-            goto abort;
-        }
-        if (PyUnicode_CompareWithASCIIString(chr, "*") == 0) {
-            Py_DECREF(chr);
-            if (_PyUnicodeWriter_WriteChar(_writer, '*') < 0) {
-                goto abort;
-            }
-            // drop all other '*' that can be found afterwards
-            while (++i < n) {
-                peek = PySequence_GetItem(unicode, i);
-                if (peek == NULL) {
-                    goto abort;
-                }
-                if (PyUnicode_CompareWithASCIIString(peek, "*") != 0) {
-                    Py_DECREF(peek);
-                    break;
-                }
-                Py_DECREF(peek);
-            }
-            PyObject *index = PyLong_FromLong(h++);
-            if (index == NULL) {
-                goto abort;
-            }
-            int rc = PyList_Append(indices, index);
-            Py_DECREF(index);
-            if (rc < 0) {
-                goto abort;
-            }
-        }
-        else if (PyUnicode_CompareWithASCIIString(chr, "?") == 0)  {
-            Py_DECREF(chr);
-            // translate optional '?' (fnmatch) into optional '.' (regex)
-            if (_PyUnicodeWriter_WriteChar(_writer, '.') < 0) {
-                goto abort;
-            }
-            ++i; // advance for the next iteration
-            ++h; // increase the expected result's length
-        }
-        else if (PyUnicode_CompareWithASCIIString(chr, "[") == 0)  {
-            Py_DECREF(chr);
-            // check the next characters (peek)
-            Py_ssize_t j = ++i;
-            if (j < n) {
-                peek = PySequence_GetItem(unicode, j);
-                if (peek == NULL) {
+        // read and advance to the next character
+        Py_UCS4 chr = READ(i++);
+        switch (chr) {
+            case '*': {
+                if (_PyUnicodeWriter_WriteChar(_writer, chr) < 0) {
                     goto abort;
                 }
-                if (PyUnicode_CompareWithASCIIString(peek, "!") == 0) {// [!
-                    ++j;
-                }
-                Py_DECREF(peek);
-            }
-            if (j < n) {
-                peek = PySequence_GetItem(unicode, j);
-                if (peek == NULL) {
+                DROP_DUPLICATES('*', i, n);
+                PyObject *index = PyLong_FromSsize_t(h++);
+                if (index == NULL) {
                     goto abort;
                 }
-                if (PyUnicode_CompareWithASCIIString(peek, "]") == 0) { // [!] or []
-                    ++j;
-                }
-                Py_DECREF(peek);
-            }
-            while (j < n) {
-                peek = PySequence_GetItem(unicode, j);
-                if (peek == NULL) {
+                int rc = PyList_Append(indices, index);
+                Py_DECREF(index);
+                if (rc < 0) {
                     goto abort;
                 }
-                // locate the closing ']'
-                if (PyUnicode_CompareWithASCIIString(peek, "]") != 0) {
-                    ++j;
-                }
-                Py_DECREF(peek);
+                break;
             }
-            if (j >= n) {
-                if (_PyUnicodeWriter_WriteASCIIString(_writer, "\\[", 2) < 0) {
+            case '?': {
+                // translate optional '?' (fnmatch) into optional '.' (regex)
+                if (_PyUnicodeWriter_WriteChar(_writer, '.') < 0) {
                     goto abort;
                 }
-                h += 2; // we just wrote 2 characters
+                ++h; // increase the expected result's length
+                break;
             }
-            else {
-                //              v--- pattern[j] (exclusive)
-                // '[' * ... * ']'
-                //     ^----- pattern[i] (inclusive)
-                PyObject *s1 = NULL, *s2 = NULL;
-                if (PyUnicode_FindChar(unicode, '-', i, j, 1) >= 0) {
-                    PyObject *group = PyUnicode_Substring(unicode, i, j);
-                    if (group == NULL) {
+            case '[': {
+                Py_ssize_t j = i;           // 'i' is already at next char
+                ADVANCE_IF_CHAR('!', j, n); // [!
+                ADVANCE_IF_CHAR(']', j, n); // [!] or []
+                ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
+                if (j >= n) {
+                    if (_PyUnicodeWriter_WriteASCIIString(_writer, "\\[", 2) < 0) {
                         goto abort;
                     }
-                    s1 = PyObject_CallMethod(group, "replace", "ss", "\\", "\\\\");
-                    Py_DECREF(group);
+                    h += 2; // we just wrote 2 characters
+                    break;  // early break for clarity
                 }
                 else {
-                    s1 = get_translated_group(unicode, i, j);
-                }
-                if (s1 == NULL) {
-                    goto abort;
+                    //              v--- pattern[j] (exclusive)
+                    // '[' * ... * ']'
+                    //     ^----- pattern[i] (inclusive)
+                    PyObject *s1 = NULL, *s2 = NULL;
+                    int rc = PyUnicode_FindChar(pattern, '-', i, j, 1);
+                    if (rc == -2) {
+                        goto abort;
+                    }
+                    if (rc == -1) {
+                        PyObject *group = PyUnicode_Substring(pattern, i, j);
+                        if (group == NULL) {
+                            goto abort;
+                        }
+                        s1 = _PyObject_CallMethod(group, &_Py_ID(replace), "ss", "\\", "\\\\");
+                        Py_DECREF(group);
+                    }
+                    else {
+                        assert(rc >= 0);
+                        s1 = get_translated_group(pattern, i, j);
+                    }
+                    if (s1 == NULL) {
+                        goto abort;
+                    }
+                    s2 = _PyObject_CallMethod(re, &_Py_ID(sub), "ssO", "([&~|])", "\\\\\\1", s1);
+                    Py_DECREF(s1);
+                    if (s2 == NULL) {
+                        goto abort;
+                    }
+                    int difflen = write_translated_group(_writer, s2);
+                    Py_DECREF(s2);
+                    if (difflen < 0) {
+                        goto abort;
+                    }
+                    h += difflen;
+                    i = j + 1;  // jump to the character after ']'
+                    break;      // early break for clarity
                 }
-                s2 = PyObject_CallMethod(re, "sub", "ssO", "([&~|])", "\\\\\\1", s1);
-                Py_DECREF(s1);
-                if (s2 == NULL) {
+            }
+            default: {
+                PyObject *str = get_unicode_character(chr);
+                if (str == NULL) {
                     goto abort;
                 }
-                int difflen = write_translated_group(_writer, s2);
-                Py_DECREF(s2);
+                int difflen = write_escaped_string(re, _writer, str);
+                Py_DECREF(str);
                 if (difflen < 0) {
                     goto abort;
                 }
                 h += difflen;
-                i = j + 1; // jump to the character after ']'
-            }
-        }
-        else {
-            int difflen = write_normal_character(re, _writer, chr);
-            Py_DECREF(chr);
-            if (difflen < 0) {
-                goto abort;
+                break;
             }
-            h += difflen;
-            ++i;
         }
     }
+#undef DROP_DUPLICATES
+#undef ADVANCE_TO_NEXT
+#undef _WHILE_READ_CMP
+#undef ADVANCE_IF_CHAR
+#undef READ
     PyObject *parts = PyUnicodeWriter_Finish(writer);
     if (parts == NULL) {
         Py_DECREF(indices);
         return NULL;
     }
     assert(h == PyUnicode_GET_LENGTH(parts));
-    PyObject *res = join_translated_parts(parts, indices);
+    PyObject *res = join_translated_parts(module, parts, indices);
     Py_DECREF(parts);
     Py_DECREF(indices);
     return res;

From 36432e82044ecd3f39469a6270df7313a4c082de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 11:55:05 +0200
Subject: [PATCH 18/97] update generated objects

---
 Include/internal/pycore_global_objects_fini_generated.h | 2 ++
 Include/internal/pycore_global_strings.h                | 2 ++
 Include/internal/pycore_runtime_init_generated.h        | 2 ++
 Include/internal/pycore_unicodeobject_generated.h       | 8 ++++++++
 4 files changed, 14 insertions(+)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 8e3d405fc7c04b..fc82cfc1536feb 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -945,6 +945,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filter));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filters));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(final));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find_class));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags));
@@ -1229,6 +1230,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict_mode));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(string));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub_key));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 7cbc1941ffa0ee..78f56bdd2f6238 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -434,6 +434,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(filter)
         STRUCT_FOR_ID(filters)
         STRUCT_FOR_ID(final)
+        STRUCT_FOR_ID(find)
         STRUCT_FOR_ID(find_class)
         STRUCT_FOR_ID(fix_imports)
         STRUCT_FOR_ID(flags)
@@ -718,6 +719,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(strict)
         STRUCT_FOR_ID(strict_mode)
         STRUCT_FOR_ID(string)
+        STRUCT_FOR_ID(sub)
         STRUCT_FOR_ID(sub_key)
         STRUCT_FOR_ID(symmetric_difference_update)
         STRUCT_FOR_ID(tabsize)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 4164c6cae7a8bf..2c965ecd99fcf5 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -943,6 +943,7 @@ extern "C" {
     INIT_ID(filter), \
     INIT_ID(filters), \
     INIT_ID(final), \
+    INIT_ID(find), \
     INIT_ID(find_class), \
     INIT_ID(fix_imports), \
     INIT_ID(flags), \
@@ -1227,6 +1228,7 @@ extern "C" {
     INIT_ID(strict), \
     INIT_ID(strict_mode), \
     INIT_ID(string), \
+    INIT_ID(sub), \
     INIT_ID(sub_key), \
     INIT_ID(symmetric_difference_update), \
     INIT_ID(tabsize), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index b15845cd16e814..0307d1f4806ba7 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1536,6 +1536,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(find);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(find_class);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2672,6 +2676,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(sub);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(sub_key);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));

From 4a369809c65673e2d571574962049dcb8fa28409 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 12:01:54 +0200
Subject: [PATCH 19/97] FIX BUILD

---
 Modules/_fnmatchmodule.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatchmodule.c
index 251b71ea5f5cfd..82458b9499ee35 100644
--- a/Modules/_fnmatchmodule.c
+++ b/Modules/_fnmatchmodule.c
@@ -389,7 +389,7 @@ _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
 static PyObject *
 get_unicode_character(Py_UCS4 ch)
 {
-    assert(ch <= MAX_UNICODE);
+    assert(ch <= 0x10ffff);
     if (ch < 256) {
         PyObject *o = _Py_LATIN1_CHR(ch);
         assert(_Py_IsImmortal(o));
@@ -552,9 +552,9 @@ get_translated_group(PyObject *pattern,
     Py_ssize_t c = chunkscount;
     while (--c) {
         PyObject *c1 = PyList_GET_ITEM(chunks, c - 1);
-        assert(c1len > 0);
-        Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1);
         assert(c1 != NULL);
+        Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1);
+        assert(c1len > 0);
 
         PyObject *c2 = PyList_GET_ITEM(chunks, c);
         assert(c2 != NULL);

From 4881f1cbb548497ef4acf526f44e2a2e9458de3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 12:14:03 +0200
Subject: [PATCH 20/97] remove interned strings

---
 Modules/_fnmatchmodule.c | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatchmodule.c
index 82458b9499ee35..60528f18b1b658 100644
--- a/Modules/_fnmatchmodule.c
+++ b/Modules/_fnmatchmodule.c
@@ -20,10 +20,6 @@ typedef struct {
     PyObject *os_module; // 'os' module
 
     PyObject *lru_cache; // optional cache for regex patterns, if needed
-
-    PyObject *str_atomic_bgroup;    // (?>.*?
-    PyObject *str_atomic_egroup;    // )
-    PyObject *str_wildcard;         // *
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
@@ -41,10 +37,6 @@ fnmatchmodule_clear(PyObject *m)
     Py_CLEAR(st->os_module);
     Py_CLEAR(st->re_module);
     Py_CLEAR(st->lru_cache);
-
-    Py_CLEAR(st->str_atomic_bgroup);
-    Py_CLEAR(st->str_atomic_egroup);
-    Py_CLEAR(st->str_wildcard);
     return 0;
 }
 
@@ -55,10 +47,6 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
     Py_VISIT(st->os_module);
     Py_VISIT(st->re_module);
     Py_VISIT(st->lru_cache);
-
-    Py_VISIT(st->str_atomic_bgroup);
-    Py_VISIT(st->str_atomic_egroup);
-    Py_VISIT(st->str_wildcard);
     return 0;
 }
 
@@ -100,12 +88,6 @@ fnmatchmodule_exec(PyObject *m)
     }
     // todo: handle LRU cache
 
-    // interned strings
-    INTERN_STRING(str_atomic_bgroup, "(?>.*?");
-    INTERN_STRING(str_atomic_egroup, ")");
-    INTERN_STRING(str_wildcard, "*");
-
-#undef INTERN_STRING
 #undef IMPORT_MODULE
 
     return 0;
@@ -212,12 +194,13 @@ posix_fnmatch_filter(const char *pattern, PyObject *names,
 static PyObject *
 get_match_function(PyObject *module, PyObject *pattern)
 {
+    // TODO(picnixz): use LRU-cache
     PyObject *expr = _fnmatch_translate_impl(module, pattern);
     if (expr == NULL) {
         return NULL;
     }
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
-    PyObject *compiled = PyObject_CallMethod(st->re_module, "compile", "O", expr);
+    PyObject *compiled = _PyObject_CallMethod(st->re_module, &_Py_ID(compile), "O", expr);
     Py_DECREF(expr);
     if (compiled == NULL) {
         return NULL;
@@ -678,12 +661,15 @@ join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
 
     WRITE_SUBSTRING(i, j);  // write stuff before '*' if needed
     i = j + 1;              // jump after the star
+
+    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
     for (Py_ssize_t k = 1; k < m; ++k) {
         ind = PyList_GET_ITEM(indices, k);
         j = PyLong_AsSsize_t(ind);
-        assert(j < 0 || i > j);
-        if (j < 0 ||
-            (_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) ||
+        if (j < 0 || i > j) {
+            goto abort;
+        }
+        if ((_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) ||
             (_PyUnicodeWriter_WriteSubstring(_writer, strings, i, j) < 0) ||
             (_PyUnicodeWriter_WriteChar(_writer, ')') < 0)) {
             goto abort;

From ec5a922aa7a7333f8841cfccbd3e058eba01cfc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 16:14:03 +0200
Subject: [PATCH 21/97] revert addition

---
 Modules/Setup | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Modules/Setup b/Modules/Setup
index acb542b70946ea..e4acf6bc7de8ea 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -137,7 +137,6 @@ PYTHONPATH=$(COREPYTHONPATH)
 #_datetime _datetimemodule.c
 #_decimal _decimal/_decimal.c
 #_heapq _heapqmodule.c
-_fnmatch _fnmatchmodule.c
 #_interpchannels _interpchannelsmodule.c
 #_interpqueues _interpqueuesmodule.c
 #_interpreters _interpretersmodule.c

From b29ccb49042626ef3d1d205146d533b7de968de8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:49:16 +0200
Subject: [PATCH 22/97] make it fail on Windows for now

---
 PCbuild/pythoncore.vcxproj         | 1 -
 PCbuild/pythoncore.vcxproj.filters | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index db9f960c61ce6c..f36fcb8caece33 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -449,7 +449,6 @@
     <ClCompile Include="..\Modules\cmathmodule.c" />
     <ClCompile Include="..\Modules\_datetimemodule.c" />
     <ClCompile Include="..\Modules\errnomodule.c" />
-    <ClCompile Include="..\Modules\_fnmatchmodule.c" />
     <ClCompile Include="..\Modules\faulthandler.c" />
     <ClCompile Include="..\Modules\gcmodule.c" />
     <ClCompile Include="..\Modules\getbuildinfo.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 24384e355f46ec..a1b43addf9e36a 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -995,9 +995,6 @@
     <ClCompile Include="..\Modules\errnomodule.c">
       <Filter>Modules</Filter>
     </ClCompile>
-    <ClCompile Include="..\Modules\_fnmatchmodule.c">
-      <Filter>Modules</Filter>
-    </ClCompile>
     <ClCompile Include="..\Modules\faulthandler.c">
       <Filter>Modules</Filter>
     </ClCompile>

From a91f689db9c418e259c5be0da5b4194bfb7f1b87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:33:07 +0200
Subject: [PATCH 23/97] Update configurations?

---
 Makefile.pre.in            | 9 +++++++++
 Modules/Setup.bootstrap.in | 3 +++
 configure.ac               | 2 ++
 3 files changed, 14 insertions(+)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 94cfb74138a3d9..97aa13bcc34409 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -350,6 +350,12 @@ IO_OBJS=	\
 		Modules/_io/bytesio.o \
 		Modules/_io/stringio.o
 
+FNMATCH_H=	Modules/_fnmatch/_fnmatchmodule.h
+
+FNMATCH_OBJS=	\
+		Modules/_fnmatch/_fnmatchmodule.o \
+		Modules/_fnmatch/posix.o \
+		Modules/_fnmatch/regex.o \
 
 ##########################################################################
 # mimalloc
@@ -1740,6 +1746,8 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt
 		$(MULTIARCH_CPPFLAGS) \
 		-o $@ $(srcdir)/Python/sysmodule.c
 
+$(FNMATCH_OBJS): $(FNMATCH_H)
+
 $(IO_OBJS): $(IO_H)
 
 .PHONY: regen-pegen-metaparser
@@ -3131,6 +3139,7 @@ MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h
 MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@
 MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@
 MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
+MODULE__FNMATCH_DEPS=$(srcdir)/Modules/_fnmatch/_fnmatchmodule.h
 MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
 MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
 MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c
diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in
index aa4e60e272653b..c54cd207aec57d 100644
--- a/Modules/Setup.bootstrap.in
+++ b/Modules/Setup.bootstrap.in
@@ -34,5 +34,8 @@ _operator _operator.c
 _stat _stat.c
 _symtable symtablemodule.c
 
+# miscellaneous accelerators
+_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/posix.c _fnmatch/regex.c _fnmatch/translate.c
+
 # for systems without $HOME env, used by site._getuserbase()
 @MODULE_PWD_TRUE@pwd pwdmodule.c
diff --git a/configure.ac b/configure.ac
index d4fdb81d34890a..6093c994bd13af 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7025,6 +7025,7 @@ SRCDIRS="\
   Modules/_ctypes \
   Modules/_decimal \
   Modules/_decimal/libmpdec \
+  Modules/_fnmatch \
   Modules/_hacl \
   Modules/_io \
   Modules/_multiprocessing \
@@ -7701,6 +7702,7 @@ AC_DEFUN([PY_STDLIB_MOD_SIMPLE], [
 ])
 
 dnl static modules in Modules/Setup.bootstrap
+PY_STDLIB_MOD_SIMPLE([_fnmatch], [-I\$(srcdir)/Modules/_fnmatch], [])
 PY_STDLIB_MOD_SIMPLE([_io], [-I\$(srcdir)/Modules/_io], [])
 PY_STDLIB_MOD_SIMPLE([time], [], [$TIMEMODULE_LIB])
 

From 9e93b589ba21e3a3f5da7517c8a233530b1ffa57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:39:44 +0200
Subject: [PATCH 24/97] split implementation

---
 Modules/_fnmatch/_fnmatchmodule.c             | 259 +++++
 Modules/_fnmatch/_fnmatchmodule.h             |  73 ++
 Modules/_fnmatch/clinic/_fnmatchmodule.c.h    | 185 ++++
 Modules/_fnmatch/posix.c                      |  82 ++
 Modules/_fnmatch/regex.c                      |  67 ++
 .../translate.c}                              | 979 ++++++------------
 6 files changed, 967 insertions(+), 678 deletions(-)
 create mode 100644 Modules/_fnmatch/_fnmatchmodule.c
 create mode 100644 Modules/_fnmatch/_fnmatchmodule.h
 create mode 100644 Modules/_fnmatch/clinic/_fnmatchmodule.c.h
 create mode 100644 Modules/_fnmatch/posix.c
 create mode 100644 Modules/_fnmatch/regex.c
 rename Modules/{_fnmatchmodule.c => _fnmatch/translate.c} (50%)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
new file mode 100644
index 00000000000000..6e566991188861
--- /dev/null
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -0,0 +1,259 @@
+/*
+ * C accelerator for the 'fnmatch' module (POSIX only).
+ *
+ * Most functions expect string or bytes instances, and thus the Python
+ * implementation should first pre-process path-like objects, possibly
+ * applying normalizations depending on the platform if needed.
+ */
+
+#include "Python.h"
+#include "pycore_call.h" // for _PyObject_CallMethod
+
+#include "_fnmatchmodule.h"
+#include "clinic/_fnmatchmodule.c.h"
+
+#define INVALID_PATTERN_TYPE "pattern must be a string or a bytes object"
+
+// module state functions
+
+static int
+fnmatchmodule_clear(PyObject *m)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    Py_CLEAR(st->os_module);
+    Py_CLEAR(st->re_module);
+    Py_CLEAR(st->lru_cache);
+    return 0;
+}
+
+static int
+fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    Py_VISIT(st->os_module);
+    Py_VISIT(st->re_module);
+    Py_VISIT(st->lru_cache);
+    return 0;
+}
+
+static void
+fnmatchmodule_free(void *m)
+{
+    fnmatchmodule_clear((PyObject *) m);
+}
+
+static int
+fnmatchmodule_exec(PyObject *m)
+{
+#define IMPORT_MODULE(attr, name) \
+    do { \
+        state->attr = PyImport_ImportModule((name)); \
+        if (state->attr == NULL) { \
+            return -1; \
+        } \
+    } while (0)
+
+#define INTERN_STRING(attr, str) \
+    do { \
+        state->attr = PyUnicode_InternFromString((str)); \
+        if (state->attr == NULL) { \
+            return -1; \
+        } \
+    } while (0)
+
+    fnmatchmodule_state *state = get_fnmatchmodulestate_state(m);
+
+    // imports
+    IMPORT_MODULE(os_module, "os");
+    IMPORT_MODULE(re_module, "re");
+
+    // helpers
+    state->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    if (state->lru_cache == NULL) {
+        return -1;
+    }
+    // todo: handle LRU cache
+
+#undef IMPORT_MODULE
+#undef INTERN_STRING
+
+    return 0;
+}
+
+/*[clinic input]
+module _fnmatch
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
+
+static PyObject *
+get_match_function(PyObject *module, PyObject *pattern)
+{
+    // TODO(picnixz): use LRU-cache
+    PyObject *expr = _fnmatch_translate_impl(module, pattern);
+    if (expr == NULL) {
+        return NULL;
+    }
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    PyObject *compiled = _PyObject_CallMethod(st->re_module, &_Py_ID(compile), "O", expr);
+    Py_DECREF(expr);
+    if (compiled == NULL) {
+        return NULL;
+    }
+    PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
+    Py_DECREF(compiled);
+    return matcher;
+}
+
+static PyMethodDef get_match_function_method_def = {
+    "get_match_function",
+    _PyCFunction_CAST(get_match_function),
+    METH_O,
+    NULL
+};
+
+/*[clinic input]
+_fnmatch.filter -> object
+
+    names: object
+    pat: object
+
+[clinic start generated code]*/
+
+static PyObject *
+_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
+/*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
+{
+#ifndef Py_HAVE_FNMATCH
+    PyObject *matcher = get_match_function(module, pat);
+    if (matcher == NULL) {
+        return NULL;
+    }
+    PyObject *result = _regex_fnmatch_filter(matcher, names);
+    Py_DECREF(matcher);
+    return result;
+#else
+    // Note that the Python implementation of fnmatch.filter() does not
+    // call os.fspath() on the names being matched, whereas it does on NT.
+    if (PyBytes_Check(pat)) {
+        const char *pattern = PyBytes_AS_STRING(pat);
+        return _posix_fnmatch_filter(pattern, names, &_posix_fnmatch_encoded);
+    }
+    if (PyUnicode_Check(pat)) {
+        const char *pattern = PyUnicode_AsUTF8(pat);
+        return _posix_fnmatch_filter(pattern, names, &_posix_fnmatch_unicode);
+    }
+    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
+    return NULL;
+#endif
+}
+
+/*[clinic input]
+_fnmatch.fnmatchcase -> bool
+
+    name: object
+    pat: object
+
+Test whether `name` matches `pattern`, including case.
+
+This is a version of fnmatch() which doesn't case-normalize
+its arguments.
+
+[clinic start generated code]*/
+
+static int
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
+/*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
+{
+#ifndef Py_HAVE_FNMATCH
+    PyObject *matcher = get_match_function(module, pat);
+    if (matcher == NULL) {
+        return -1;
+    }
+    int res = _regex_fnmatch_generic(matcher, name);
+    Py_DECREF(matcher);
+    return res;
+#else
+    // This function does not transform path-like objects, nor does it
+    // case-normalize 'name' or 'pattern' (whether it is the Python or
+    // the C implementation).
+    if (PyBytes_Check(pat)) {
+        const char *pattern = PyBytes_AS_STRING(pat);
+        return _posix_fnmatch_encoded(pattern, name);
+    }
+    if (PyUnicode_Check(pat)) {
+        const char *pattern = PyUnicode_AsUTF8(pat);
+        return _posix_fnmatch_unicode(pattern, name);
+    }
+    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
+    return -1;
+#endif
+}
+
+/*[clinic input]
+_fnmatch.translate -> object
+
+    pat as pattern: object
+
+[clinic start generated code]*/
+
+static PyObject *
+_fnmatch_translate_impl(PyObject *module, PyObject *pattern)
+/*[clinic end generated code: output=2d9e3bbcbcc6e90e input=56e39f7beea97810]*/
+{
+    if (PyBytes_Check(pattern)) {
+        PyObject *unicode = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
+                                                   PyBytes_GET_SIZE(pattern),
+                                                   "strict");
+        if (unicode == NULL) {
+            return NULL;
+        }
+        // translated regular expression as a str object
+        PyObject *str_expr = translate(module, unicode);
+        Py_DECREF(unicode);
+        if (str_expr == NULL) {
+            return NULL;
+        }
+        PyObject *expr = PyUnicode_AsLatin1String(str_expr);
+        Py_DECREF(str_expr);
+        return expr;
+    }
+    else if (PyUnicode_Check(pattern)) {
+        return translate(module, pattern);
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
+        return NULL;
+    }
+}
+
+static PyMethodDef fnmatchmodule_methods[] = {
+    _FNMATCH_FILTER_METHODDEF
+    _FNMATCH_FNMATCHCASE_METHODDEF
+    _FNMATCH_TRANSLATE_METHODDEF
+    {NULL, NULL}
+};
+
+static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
+    {Py_mod_exec, fnmatchmodule_exec},
+    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+    {0, NULL},
+};
+
+static struct PyModuleDef _fnmatchmodule = {
+    PyModuleDef_HEAD_INIT,
+    "_fnmatch",
+    NULL,
+    .m_size = sizeof(fnmatchmodule_state),
+    .m_methods = fnmatchmodule_methods,
+    .m_slots = fnmatchmodule_slots,
+    .m_traverse = fnmatchmodule_traverse,
+    .m_clear = fnmatchmodule_clear,
+    .m_free = fnmatchmodule_free,
+};
+
+PyMODINIT_FUNC
+PyInit__fnmatch(void)
+{
+    return PyModuleDef_Init(&_fnmatchmodule);
+}
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
new file mode 100644
index 00000000000000..af271703791be3
--- /dev/null
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -0,0 +1,73 @@
+#ifndef _FNMATCHMODULE_H
+#define _FNMATCHMODULE_H
+
+#include "Python.h"
+
+typedef struct {
+    PyObject *re_module; // 're' module
+    PyObject *os_module; // 'os' module
+
+    PyObject *lru_cache; // optional cache for regex patterns, if needed
+} fnmatchmodule_state;
+
+static inline fnmatchmodule_state *
+get_fnmatchmodulestate_state(PyObject *module)
+{
+    void *state = PyModule_GetState(module);
+    assert(state != NULL);
+    return (fnmatchmodule_state *)state;
+}
+
+/*
+ * The filter() function works differently depending on whether fnmatch(3)
+ * is present or not.
+ *
+ * If fnmatch(3) is present, the match is performed without using regular
+ * expressions. The functions being used are
+ *
+ * If fnmatch(3) is not present, the match is performed using regular
+ * expressions.
+ */
+
+#ifdef Py_HAVE_FNMATCH
+/*
+ * Type for a matching function.
+ *
+ * The function must take as input a pattern and a name,
+ * and is used to determine whether the name matches the
+ * pattern or not.
+ *
+ * If the pattern is obtained from str() types, then 'name'
+ * must be a string (it is left to the matcher the task for
+ * validating this part).
+ */
+typedef int (*Matcher)(const char *, PyObject *);
+
+extern PyObject *
+_posix_fnmatch_filter(const char *pattern, PyObject *names, Matcher match);
+
+/*
+ * Perform a case-sensitive match using fnmatch(3).
+ *
+ * Parameters
+ *
+ *      pattern  A UNIX shell pattern.
+ *      string   The string to match (bytes object).
+ *
+ * Returns 1 if the 'string' matches the 'pattern' and 0 otherwise.
+ *
+ * Returns -1 if (1) 'string' is not a `bytes` object, and
+ * sets a TypeError exception, or (2) something went wrong.
+ */
+extern int _posix_fnmatch_encoded(const char *pattern, PyObject *string);
+/* Same as _posix_fnmatch_encoded() but for unicode inputs. */
+extern int _posix_fnmatch_unicode(const char *pattern, PyObject *string);
+#else
+extern int _regex_fnmatch_generic(PyObject *matcher, PyObject *name);
+extern PyObject *
+_regex_fnmatch_filter(PyObject *matcher, PyObject *names);
+#endif
+
+extern PyObject *translate(PyObject *module, PyObject *pattern);
+
+#endif // _FNMATCHMODULE_H
diff --git a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
new file mode 100644
index 00000000000000..4b12f33113d3fb
--- /dev/null
+++ b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
@@ -0,0 +1,185 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+#  include "pycore_gc.h"          // PyGC_Head
+#  include "pycore_runtime.h"     // _Py_ID()
+#endif
+#include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_fnmatch_filter__doc__,
+"filter($module, /, names, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_FILTER_METHODDEF    \
+    {"filter", _PyCFunction_CAST(_fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, _fnmatch_filter__doc__},
+
+static PyObject *
+_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat);
+
+static PyObject *
+_fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(names), &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"names", "pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "filter",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *names;
+    PyObject *pat;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    names = args[0];
+    pat = args[1];
+    return_value = _fnmatch_filter_impl(module, names, pat);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
+"fnmatchcase($module, /, name, pat)\n"
+"--\n"
+"\n"
+"Test whether `name` matches `pattern`, including case.\n"
+"\n"
+"This is a version of fnmatch() which doesn\'t case-normalize\n"
+"its arguments.");
+
+#define _FNMATCH_FNMATCHCASE_METHODDEF    \
+    {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__},
+
+static int
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat);
+
+static PyObject *
+_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"name", "pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "fnmatchcase",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *name;
+    PyObject *pat;
+    int _return_value;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    name = args[0];
+    pat = args[1];
+    _return_value = _fnmatch_fnmatchcase_impl(module, name, pat);
+    if ((_return_value == -1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(_fnmatch_translate__doc__,
+"translate($module, /, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_TRANSLATE_METHODDEF    \
+    {"translate", _PyCFunction_CAST(_fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, _fnmatch_translate__doc__},
+
+static PyObject *
+_fnmatch_translate_impl(PyObject *module, PyObject *pattern);
+
+static PyObject *
+_fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "translate",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[1];
+    PyObject *pattern;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    pattern = args[0];
+    return_value = _fnmatch_translate_impl(module, pattern);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=b0366b259b101bdf input=a9049054013a1b77]*/
diff --git a/Modules/_fnmatch/posix.c b/Modules/_fnmatch/posix.c
new file mode 100644
index 00000000000000..30d0845d7bae88
--- /dev/null
+++ b/Modules/_fnmatch/posix.c
@@ -0,0 +1,82 @@
+#ifdef Py_HAVE_FNMATCH
+
+#include <fnmatch.h>        // for fnmatch(3)
+
+#include "Python.h"
+#include "_fnmatchmodule.h" // for PosixMatcher
+
+#define INVALID_TYPE_FOR_NAME "name must be a %s object, got %.200s"
+
+#define VERIFY_NAME_ARG_TYPE(name, check, expecting) \
+    do { \
+        if (!check) { \
+            PyErr_Format(PyExc_TypeError, INVALID_TYPE_FOR_NAME, \
+                         expecting, Py_TYPE(name)->tp_name); \
+            return -1; \
+        } \
+    } while (0)
+
+#define PROCESS_MATCH_RESULT(r) \
+    do { \
+        int res = (r); /* avoid variable capture */ \
+        if (res < 0) { \
+            return res; \
+        } \
+        return res != FNM_NOMATCH; \
+    } while (0)
+
+inline int
+_posix_fnmatch_encoded(const char *pattern, PyObject *string)
+{
+    VERIFY_NAME_ARG_TYPE(string, PyBytes_Check(string), "bytes");
+    PROCESS_MATCH_RESULT(fnmatch(pattern, PyBytes_AS_STRING(string), 0));
+}
+
+inline int
+_posix_fnmatch_unicode(const char *pattern, PyObject *string)
+{
+    VERIFY_NAME_ARG_TYPE(string, PyUnicode_Check(string), "string");
+    PROCESS_MATCH_RESULT(fnmatch(pattern, PyUnicode_AsUTF8(string), 0));
+}
+
+PyObject *
+_posix_fnmatch_filter(const char *pattern, PyObject *names, Matcher match)
+{
+    PyObject *iter = PyObject_GetIter(names);
+    if (iter == NULL) {
+        return NULL;
+    }
+
+    PyObject *res = PyList_New(0);
+    if (res == NULL) {
+        Py_DECREF(iter);
+        return NULL;
+    }
+
+    PyObject *name = NULL;
+    while ((name = PyIter_Next(iter))) {
+        int rc = match(pattern, name);
+        if (rc < 0) {
+            goto abort;
+        }
+        if (rc == 1) {
+            if (PyList_Append(res, name) < 0) {
+                goto abort;
+            }
+        }
+        Py_DECREF(name);
+        if (PyErr_Occurred()) {
+            Py_DECREF(res);
+            Py_DECREF(iter);
+            return NULL;
+        }
+    }
+    Py_DECREF(iter);
+    return res;
+abort:
+    Py_XDECREF(name);
+    Py_DECREF(iter);
+    Py_DECREF(res);
+    return NULL;
+}
+#endif
diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
new file mode 100644
index 00000000000000..5ba96a214bc267
--- /dev/null
+++ b/Modules/_fnmatch/regex.c
@@ -0,0 +1,67 @@
+#include "Python.h"
+
+/*
+ * Perform a case-sensitive match using regular expressions.
+ *
+ * Parameters
+ *
+ *      pattern     A translated regular expression.
+ *      name        The filename to match.
+ *
+ * Returns 1 if the 'name' matches the 'pattern' and 0 otherwise.
+ * Returns -1 if something went wrong.
+ */
+int
+_regex_fnmatch_generic(PyObject *matcher, PyObject *name)
+{
+    // If 'name' is of incorrect type, it will be detected when calling
+    // the matcher function (we emulate 're.compile(...).match(name)').
+    PyObject *match = PyObject_CallFunction(matcher, "O", name);
+    if (match == NULL) {
+        return -1;
+    }
+    int matching = match != Py_None;
+    Py_DECREF(match);
+    return matching;
+}
+
+PyObject *
+_regex_fnmatch_filter(PyObject *matcher, PyObject *names)
+{
+    PyObject *iter = PyObject_GetIter(names);
+    if (iter == NULL) {
+        return NULL;
+    }
+
+    PyObject *res = PyList_New(0);
+    if (res == NULL) {
+        Py_DECREF(iter);
+        return NULL;
+    }
+
+    PyObject *name = NULL;
+    while ((name = PyIter_Next(iter))) {
+        int rc = _regex_fnmatch_generic(matcher, name);
+        if (rc < 0) {
+            goto abort;
+        }
+        if (rc == 1) {
+            if (PyList_Append(res, name) < 0) {
+                goto abort;
+            }
+        }
+        Py_DECREF(name);
+        if (PyErr_Occurred()) {
+            Py_DECREF(res);
+            Py_DECREF(iter);
+            return NULL;
+        }
+    }
+    Py_DECREF(iter);
+    return res;
+abort:
+    Py_XDECREF(name);
+    Py_DECREF(iter);
+    Py_DECREF(res);
+    return NULL;
+}
diff --git a/Modules/_fnmatchmodule.c b/Modules/_fnmatch/translate.c
similarity index 50%
rename from Modules/_fnmatchmodule.c
rename to Modules/_fnmatch/translate.c
index 60528f18b1b658..8ac45d546826fc 100644
--- a/Modules/_fnmatchmodule.c
+++ b/Modules/_fnmatch/translate.c
@@ -1,375 +1,228 @@
 /*
- * C accelerator for the 'fnmatch' module (POSIX only).
- *
- * Most functions expect string or bytes instances, and thus the Python
- * implementation should first pre-process path-like objects, possibly
- * applying normalizations depending on the platform if needed.
+ * C accelerator for the translation function from UNIX shell patterns
+ * to RE patterns. This accelerator is platform-independent but can be
+ * disabled on demand.
  */
 
 #include "Python.h"
-#include "pycore_call.h" // for _PyObject_CallMethod
-
-#include "clinic/_fnmatchmodule.c.h"
+#include "pycore_call.h"    // for _PyObject_CallMethod()
 
-#define INVALID_PATTERN_TYPE "pattern must be a string or a bytes object"
+#include "_fnmatchmodule.h" // for get_fnmatchmodulestate_state()
 
-// module state functions
+// ==== Helper declarations ==================================================
 
-typedef struct {
-    PyObject *re_module; // 're' module
-    PyObject *os_module; // 'os' module
+/*
+ * Creates a new Unicode object from a Py_UCS4 character.
+ *
+ * Note: this is 'unicode_char' taken from Objects/unicodeobject.c.
+ */
+static PyObject *
+get_unicode_character(Py_UCS4 ch);
 
-    PyObject *lru_cache; // optional cache for regex patterns, if needed
-} fnmatchmodule_state;
+/*
+ * Construct a regular expression out of a UNIX-style expression.
+ *
+ * The expression to translate is the content of an '[(BLOCK)]' expression
+ * or '[!(BLOCK)]' expression. The BLOCK contains single unicode characters
+ * or character ranges (e.g., 'a-z').
+ *
+ * By convention 'start' and 'stop' represent the INCLUSIVE start index
+ * and EXCLUSIVE stop index of BLOCK in the full 'pattern'. Note that
+ * we always have pattern[stop] == ']' and pattern[start] == BLOCK[0].
+ *
+ * For instance, for "ab[c-f]g[!1-5]", the values of 'start' and 'stop'
+ * for the sub-pattern '[c-f]' are 3 and 6 respectively, whereas their
+ * values for '[!1-5]' are 10 (not 9) and 13 respectively.
+ */
+static PyObject *
+translate_expression(PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
 
-static inline fnmatchmodule_state *
-get_fnmatchmodulestate_state(PyObject *module)
-{
-    void *state = PyModule_GetState(module);
-    assert(state != NULL);
-    return (fnmatchmodule_state *) state;
-}
+/*
+ * Write an escaped string using re.escape().
+ *
+ * This returns the number of written characters, or -1 if an error occurred.
+ */
+static Py_ssize_t
+write_literal(fnmatchmodule_state *state,
+              _PyUnicodeWriter *writer,
+              PyObject *unicode);
 
-static int
-fnmatchmodule_clear(PyObject *m)
-{
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
-    Py_CLEAR(st->os_module);
-    Py_CLEAR(st->re_module);
-    Py_CLEAR(st->lru_cache);
-    return 0;
-}
+/*
+ * Write the translated pattern obtained by translate_expression().
+ *
+ * This returns the number of written characters, or -1 if an error occurred.
+ */
+static Py_ssize_t
+write_expression(_PyUnicodeWriter *writer, PyObject *expression);
 
-static int
-fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
-{
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
-    Py_VISIT(st->os_module);
-    Py_VISIT(st->re_module);
-    Py_VISIT(st->lru_cache);
-    return 0;
-}
+/*
+ * Build the final regular expression by processing the wildcards.
+ *
+ * The position of each wildcard in 'strings' is given by 'indices'.
+ */
+static PyObject *
+process_wildcards(PyObject *pattern, PyObject *indices);
 
-static void
-fnmatchmodule_free(void *m)
-{
-    fnmatchmodule_clear((PyObject *) m);
-}
+// ==== API implementation ====================================================
 
-static int
-fnmatchmodule_exec(PyObject *m)
+PyObject *
+translate(PyObject *module, PyObject *pattern)
 {
-#define IMPORT_MODULE(attr, name) \
-    do { \
-        state->attr = PyImport_ImportModule((name)); \
-        if (state->attr == NULL) { \
-            return -1; \
-        } \
-    } while (0)
-
-#define INTERN_STRING(attr, str) \
-    do { \
-        state->attr = PyUnicode_InternFromString((str)); \
-        if (state->attr == NULL) { \
-            return -1; \
-        } \
-    } while (0)
-
-    fnmatchmodule_state *state = get_fnmatchmodulestate_state(m);
-
-    // imports
-    IMPORT_MODULE(os_module, "os");
-    IMPORT_MODULE(re_module, "re");
-
-    // helpers
-    state->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
-    if (state->lru_cache == NULL) {
-        return -1;
-    }
-    // todo: handle LRU cache
-
-#undef IMPORT_MODULE
-
-    return 0;
-}
-
-/*[clinic input]
-module _fnmatch
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
-
-#ifdef Py_HAVE_FNMATCH
-#include <fnmatch.h>
-
-#define VERIFY_NAME_ARG_TYPE(name, check, expecting) \
+#define READ(ind) PyUnicode_READ(kind, data, (ind))
+#define ADVANCE_IF_CHAR(ch, ind, maxind) \
     do { \
-        if (!check) { \
-            PyErr_Format(PyExc_TypeError, \
-                         "name must be a %s object, got %.200s", \
-                         expecting, Py_TYPE(name)->tp_name); \
-            return -1; \
+        if ((ind) < (maxind) && READ(ind) == (ch)) { \
+            ++(ind); \
         } \
     } while (0)
-
-#define PROCESS_MATCH_RESULT(r) \
+#define _WHILE_READ_CMP(ch, ind, maxind, cmp) \
     do { \
-        int res = (r); /* avoid variable capture */ \
-        if (res < 0) { \
-            return res; \
+        while ((ind) < (maxind) && READ(ind) cmp (ch)) { \
+            ++(ind); \
         } \
-        return res != FNM_NOMATCH; \
     } while (0)
+#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, !=)
+#define DROP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, ==)
 
-/*
- * Perform a case-sensitive match using fnmatch(3).
- *
- * Parameters
- *
- *      pattern     A UNIX shell pattern.
- *      name        The filename to match (bytes object).
- *
- * Returns 1 if the 'name' matches the 'pattern' and 0 otherwise.
- *
- * Returns -1 if (1) 'name' is not a `bytes` object, and
- * sets a TypeError exception, or (2) something went wrong.
- */
-static inline int
-posix_fnmatch_encoded(const char *pattern, PyObject *name)
-{
-    VERIFY_NAME_ARG_TYPE(name, PyBytes_Check(name), "bytes");
-    PROCESS_MATCH_RESULT(fnmatch(pattern, PyBytes_AS_STRING(name), 0));
-}
-
-/* Same as `posix_fnmatch_encoded` but for string-like objects. */
-static inline int
-posix_fnmatch_unicode(const char *pattern, PyObject *name)
-{
-    VERIFY_NAME_ARG_TYPE(name, PyUnicode_Check(name), "string");
-    PROCESS_MATCH_RESULT(fnmatch(pattern, PyUnicode_AsUTF8(name), 0));
-}
-
-static PyObject *
-posix_fnmatch_filter(const char *pattern, PyObject *names,
-                     int (*match)(const char *, PyObject *))
-{
-    PyObject *iter = PyObject_GetIter(names);
-    if (iter == NULL) {
+    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
+    PyObject *re = state->re_module;
+    const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
+    // We would write less data if there are successive '*', which should
+    // not be the case in general. Otherwise, we write >= n characters
+    // since escaping them would always add more characters so we will
+    // overestimate a bit the number of characters to write.
+    //
+    // TODO(picnixz): should we limit the estimation or not?
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create((Py_ssize_t)(1.05 * n));
+    if (writer == NULL) {
         return NULL;
     }
-
-    PyObject *res = PyList_New(0);
-    if (res == NULL) {
-        Py_DECREF(iter);
-        return NULL;
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
+    // list containing the indices where '*' has a special meaning
+    PyObject *indices = PyList_New(0);
+    if (indices == NULL) {
+        goto abort;
     }
-
-    PyObject *name = NULL;
-    while ((name = PyIter_Next(iter))) {
-        int rc = match(pattern, name);
-        if (rc < 0) {
-            goto abort;
-        }
-        if (rc == 1) {
-            if (PyList_Append(res, name) < 0) {
-                goto abort;
+    const int kind = PyUnicode_KIND(pattern);
+    const void *data = PyUnicode_DATA(pattern);
+    Py_ssize_t h = 0, i = 0;
+    while (i < n) {
+        // read and advance to the next character
+        Py_UCS4 chr = READ(i++);
+        switch (chr) {
+            case '*': {
+                if (_PyUnicodeWriter_WriteChar(_writer, chr) < 0) {
+                    goto abort;
+                }
+                DROP_DUPLICATES('*', i, n);
+                PyObject *index = PyLong_FromSsize_t(h++);
+                if (index == NULL) {
+                    goto abort;
+                }
+                int rc = PyList_Append(indices, index);
+                Py_DECREF(index);
+                if (rc < 0) {
+                    goto abort;
+                }
+                break;
+            }
+            case '?': {
+                // translate optional '?' (fnmatch) into optional '.' (regex)
+                if (_PyUnicodeWriter_WriteChar(_writer, '.') < 0) {
+                    goto abort;
+                }
+                ++h; // increase the expected result's length
+                break;
+            }
+            case '[': {
+                Py_ssize_t j = i;           // 'i' is already at next char
+                ADVANCE_IF_CHAR('!', j, n); // [!
+                ADVANCE_IF_CHAR(']', j, n); // [!] or []
+                ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
+                if (j >= n) {
+                    if (_PyUnicodeWriter_WriteASCIIString(_writer, "\\[", 2) < 0) {
+                        goto abort;
+                    }
+                    h += 2; // we just wrote 2 characters
+                    break;  // early break for clarity
+                }
+                else {
+                    //              v--- pattern[j] (exclusive)
+                    // '[' * ... * ']'
+                    //     ^----- pattern[i] (inclusive)
+                    int rc = PyUnicode_FindChar(pattern, '-', i, j, 1);
+                    if (rc == -2) {
+                        goto abort;
+                    }
+                    PyObject *s1 = NULL, *s2 = NULL;
+                    if (rc == -1) {
+                        PyObject *group = PyUnicode_Substring(pattern, i, j);
+                        if (group == NULL) {
+                            goto abort;
+                        }
+                        s1 = _PyObject_CallMethod(group, &_Py_ID(replace), "ss", "\\", "\\\\");
+                        Py_DECREF(group);
+                    }
+                    else {
+                        assert(rc >= 0);
+                        s1 = translate_expression(pattern, i, j);
+                    }
+                    if (s1 == NULL) {
+                        goto abort;
+                    }
+                    s2 = _PyObject_CallMethod(re, &_Py_ID(sub), "ssO", "([&~|])", "\\\\\\1", s1);
+                    Py_DECREF(s1);
+                    if (s2 == NULL) {
+                        goto abort;
+                    }
+                    int difflen = write_expression(_writer, s2);
+                    Py_DECREF(s2);
+                    if (difflen < 0) {
+                        goto abort;
+                    }
+                    h += difflen;
+                    i = j + 1;  // jump to the character after ']'
+                    break;      // early break for clarity
+                }
+            }
+            default: {
+                PyObject *str = get_unicode_character(chr);
+                if (str == NULL) {
+                    goto abort;
+                }
+                int difflen = write_literal(state, _writer, str);
+                Py_DECREF(str);
+                if (difflen < 0) {
+                    goto abort;
+                }
+                h += difflen;
+                break;
             }
         }
-        Py_DECREF(name);
-        if (PyErr_Occurred()) {
-            Py_DECREF(res);
-            Py_DECREF(iter);
-            return NULL;
-        }
-    }
-    Py_DECREF(iter);
-    return res;
-abort:
-    Py_XDECREF(name);
-    Py_DECREF(iter);
-    Py_DECREF(res);
-    return NULL;
-}
-#else
-
-static PyObject *
-get_match_function(PyObject *module, PyObject *pattern)
-{
-    // TODO(picnixz): use LRU-cache
-    PyObject *expr = _fnmatch_translate_impl(module, pattern);
-    if (expr == NULL) {
-        return NULL;
-    }
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
-    PyObject *compiled = _PyObject_CallMethod(st->re_module, &_Py_ID(compile), "O", expr);
-    Py_DECREF(expr);
-    if (compiled == NULL) {
-        return NULL;
-    }
-    PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
-    Py_DECREF(compiled);
-    return matcher;
-}
-
-static PyMethodDef get_match_function_method_def = {
-    "get_match_function",
-    _PyCFunction_CAST(get_match_function),
-    METH_O,
-    NULL
-};
-
-/*
- * Perform a case-sensitive match using regular expressions.
- *
- * Parameters
- *
- *      pattern     A translated regular expression.
- *      name        The filename to match.
- *
- * Returns 1 if the 'name' matches the 'pattern' and 0 otherwise.
- * Returns -1 if something went wrong.
- */
-static inline int
-regex_fnmatch_generic(PyObject *matcher, PyObject *name)
-{
-    // If 'name' is of incorrect type, it will be detected when calling
-    // the matcher function (we emulate 're.compile(...).match(name)').
-    PyObject *match = PyObject_CallFunction(matcher, "O", name);
-    if (match == NULL) {
-        return -1;
-    }
-    int matching = match != Py_None;
-    Py_DECREF(match);
-    return matching;
-}
-
-static PyObject *
-regex_fnmatch_filter(PyObject *matcher, PyObject *names)
-{
-    PyObject *iter = PyObject_GetIter(names);
-    if (iter == NULL) {
-        return NULL;
     }
-
-    PyObject *res = PyList_New(0);
-    if (res == NULL) {
-        Py_DECREF(iter);
+#undef DROP_DUPLICATES
+#undef ADVANCE_TO_NEXT
+#undef _WHILE_READ_CMP
+#undef ADVANCE_IF_CHAR
+#undef READ
+    PyObject *translated = PyUnicodeWriter_Finish(writer);
+    if (translated == NULL) {
+        Py_DECREF(indices);
         return NULL;
     }
-
-    PyObject *name = NULL;
-    while ((name = PyIter_Next(iter))) {
-        int rc = regex_fnmatch_generic(matcher, name);
-        if (rc < 0) {
-            goto abort;
-        }
-        if (rc == 1) {
-            if (PyList_Append(res, name) < 0) {
-                goto abort;
-            }
-        }
-        Py_DECREF(name);
-        if (PyErr_Occurred()) {
-            Py_DECREF(res);
-            Py_DECREF(iter);
-            return NULL;
-        }
-    }
-    Py_DECREF(iter);
+    PyObject *res = process_wildcards(translated, indices);
+    Py_DECREF(translated);
+    Py_DECREF(indices);
     return res;
 abort:
-    Py_XDECREF(name);
-    Py_DECREF(iter);
-    Py_DECREF(res);
-    return NULL;
-}
-#endif
-
-/*[clinic input]
-_fnmatch.filter -> object
-
-    names: object
-    pat: object
-
-[clinic start generated code]*/
-
-static PyObject *
-_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
-/*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
-{
-#ifndef Py_HAVE_FNMATCH
-    PyObject *matcher = get_match_function(module, pat);
-    if (matcher == NULL) {
-        return NULL;
-    }
-    PyObject *result = regex_fnmatch_filter(matcher, names);
-    Py_DECREF(matcher);
-    return result;
-#else
-    // Note that the Python implementation of fnmatch.filter() does not
-    // call os.fspath() on the names being matched, whereas it does on NT.
-    if (PyBytes_Check(pat)) {
-        const char *pattern = PyBytes_AS_STRING(pat);
-        return posix_fnmatch_filter(pattern, names, &posix_fnmatch_encoded);
-    }
-    if (PyUnicode_Check(pat)) {
-        const char *pattern = PyUnicode_AsUTF8(pat);
-        return posix_fnmatch_filter(pattern, names, &posix_fnmatch_unicode);
-    }
-    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
+    PyUnicodeWriter_Discard(writer);
+    Py_XDECREF(indices);
     return NULL;
-#endif
 }
 
-/*[clinic input]
-_fnmatch.fnmatchcase -> bool
-
-    name: object
-    pat: object
-
-Test whether `name` matches `pattern`, including case.
+// ==== Helper implementations ================================================
 
-This is a version of fnmatch() which doesn't case-normalize
-its arguments.
-
-[clinic start generated code]*/
-
-static int
-_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
-/*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
-{
-#ifndef Py_HAVE_FNMATCH
-    PyObject *matcher = get_match_function(module, pat);
-    if (matcher == NULL) {
-        return -1;
-    }
-    int res = regex_fnmatch_generic(matcher, name);
-    Py_DECREF(matcher);
-    return res;
-#else
-    // This function does not transform path-like objects, nor does it
-    // case-normalize 'name' or 'pattern' (whether it is the Python or
-    // the C implementation).
-    if (PyBytes_Check(pat)) {
-        const char *pattern = PyBytes_AS_STRING(pat);
-        return posix_fnmatch_encoded(pattern, name);
-    }
-    if (PyUnicode_Check(pat)) {
-        const char *pattern = PyUnicode_AsUTF8(pat);
-        return posix_fnmatch_unicode(pattern, name);
-    }
-    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
-    return -1;
-#endif
-}
-
-/*
- * Convert Py_UCS4 to (PyObject *).
- *
- * This creates a new reference.
- *
- * Note: this is 'unicode_char' taken from Objects/unicodeobject.c.
- */
-static PyObject *
+PyObject *
 get_unicode_character(Py_UCS4 ch)
 {
     assert(ch <= 0x10ffff);
@@ -382,98 +235,20 @@ get_unicode_character(Py_UCS4 ch)
     if (unicode == NULL) {
         return NULL;
     }
-    assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND);
-    if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
-        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2) ch;
-    }
-    else {
-        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
-        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
-    }
-    assert(_PyUnicode_CheckConsistency(unicode, 1));
-    return unicode;
-}
-
-static Py_ssize_t /* number of written characters or -1 on error */
-write_escaped_string(PyObject *re, _PyUnicodeWriter *writer, PyObject *str)
-{
-    PyObject *escaped = PyObject_CallMethodOneArg(re, &_Py_ID(escape), str);
-    if (escaped == NULL) {
-        return -1;
-    }
-    Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
-    int rc = _PyUnicodeWriter_WriteStr(writer, escaped);
-    Py_DECREF(escaped);
-    if (rc < 0) {
-        return -1;
-    }
-    assert(written > 0);
-    return written;
-}
-
-static Py_ssize_t /* number of written characters or -1 on error */
-write_translated_group(_PyUnicodeWriter *writer, PyObject *group)
-{
-#define WRITE_ASCII(str, len) \
-    do { \
-        if (_PyUnicodeWriter_WriteASCIIString(writer, (str), (len)) < 0) { \
-            return -1; \
-        } \
-    } while (0)
-
-#define WRITE_CHAR(c) \
-    do { \
-        if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) { \
-            return -1; \
-        } \
-    } while (0)
-
-    Py_ssize_t grouplen;
-    const char *buffer = PyUnicode_AsUTF8AndSize(group, &grouplen);
-    if (grouplen == 0) {
-        /* empty range: never match */
-        WRITE_ASCII("(?!)", 4);
-        return 4;
-    }
-    else if (grouplen == 1 && buffer[0] == '!') {
-        /* negated empty range: match any character */
-        WRITE_CHAR('.');
-        return 1;
-    }
-    else {
-        Py_ssize_t extra = 2; // '[' and ']'
-        WRITE_CHAR('[');
-        switch (buffer[0]) {
-            case '!': {
-                WRITE_CHAR('^');
-                if (_PyUnicodeWriter_WriteSubstring(writer, group, 1, grouplen) < 0) {
-                    return -1;
-                }
-                break;
-            }
-            case '^':
-            case '[': {
-                WRITE_CHAR('\\');
-                extra++;
-                break;
-            }
-            default:
-                if (_PyUnicodeWriter_WriteStr(writer, group) < 0) {
-                    return -1;
-                }
-                break;
-        }
-        WRITE_CHAR(']');
-        return grouplen + extra;
+    assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND);
+    if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
+        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
     }
-#undef WRITE_CHAR
-#undef WRITE_ASCII
+    else {
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    }
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+    return unicode;
 }
 
-static PyObject *
-get_translated_group(PyObject *pattern,
-                     Py_ssize_t i /* pattern[i-1] == '[' (incl.) */,
-                     Py_ssize_t j /* pattern[j]   == ']' (excl.) */)
+PyObject *
+translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 {
     PyObject *chunks = PyList_New(0);
     if (chunks == NULL) {
@@ -482,7 +257,7 @@ get_translated_group(PyObject *pattern,
     Py_ssize_t k = (PyUnicode_READ_CHAR(pattern, i) == '!') ? i + 2 : i + 1;
     Py_ssize_t chunkscount = 0;
     while (k < j) {
-        PyObject *eobj = _PyObject_CallMethod(pattern, &_Py_ID(find), "ii", k, j);
+        PyObject *eobj = _PyObject_CallMethod(pattern, &_Py_ID(find), "sii", "-", k, j);
         if (eobj == NULL) {
             goto error;
         }
@@ -607,13 +382,93 @@ get_translated_group(PyObject *pattern,
     return NULL;
 }
 
-static PyObject *
-join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
+Py_ssize_t
+write_literal(fnmatchmodule_state *state,
+              _PyUnicodeWriter *writer,
+              PyObject *unicode)
+{
+    PyObject *escaped = PyObject_CallMethodOneArg(state->re_module,
+                                                  &_Py_ID(escape),
+                                                  unicode);
+    if (escaped == NULL) {
+        return -1;
+    }
+    Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
+    int rc = _PyUnicodeWriter_WriteStr(writer, escaped);
+    Py_DECREF(escaped);
+    if (rc < 0) {
+        return -1;
+    }
+    assert(written > 0);
+    return written;
+}
+
+Py_ssize_t
+write_expression(_PyUnicodeWriter *writer, PyObject *expression)
+{
+#define WRITE_ASCII(str, len) \
+    do { \
+        if (_PyUnicodeWriter_WriteASCIIString(writer, (str), (len)) < 0) { \
+            return -1; \
+        } \
+    } while (0)
+
+#define WRITE_CHAR(c) \
+    do { \
+        if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) { \
+            return -1; \
+        } \
+    } while (0)
+
+    Py_ssize_t grouplen;
+    const char *buffer = PyUnicode_AsUTF8AndSize(expression, &grouplen);
+    if (grouplen == 0) {
+        /* empty range: never match */
+        WRITE_ASCII("(?!)", 4);
+        return 4;
+    }
+    else if (grouplen == 1 && buffer[0] == '!') {
+        /* negated empty range: match any character */
+        WRITE_CHAR('.');
+        return 1;
+    }
+    else {
+        Py_ssize_t extra = 2; // '[' and ']'
+        WRITE_CHAR('[');
+        switch (buffer[0]) {
+            case '!': {
+                WRITE_CHAR('^');
+                if (_PyUnicodeWriter_WriteSubstring(writer, expression, 1, grouplen) < 0) {
+                    return -1;
+                }
+                break;
+            }
+            case '^':
+            case '[': {
+                WRITE_CHAR('\\');
+                extra++;
+                break;
+            }
+            default:
+                if (_PyUnicodeWriter_WriteStr(writer, expression) < 0) {
+                    return -1;
+                }
+                break;
+        }
+        WRITE_CHAR(']');
+        return grouplen + extra;
+    }
+#undef WRITE_CHAR
+#undef WRITE_ASCII
+}
+
+PyObject *
+process_wildcards(PyObject *pattern, PyObject *indices)
 {
 #define WRITE_SUBSTRING(i, j) \
     do { \
-        if ((i) < (j)) { \
-            if (_PyUnicodeWriter_WriteSubstring(_writer, strings, (i), (j)) < 0) { \
+        if ((i) < (j)) { /* write the substring if non-empty */ \
+            if (_PyUnicodeWriter_WriteSubstring(_writer, pattern, (i), (j)) < 0) { \
                 goto abort; \
             } \
         } \
@@ -622,7 +477,7 @@ join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
     const Py_ssize_t m = PyList_GET_SIZE(indices);
     if (m == 0) {
         // just write fr'(?s:{parts} + ")\Z"
-        return PyUnicode_FromFormat("(?s:%S)\\Z", strings);
+        return PyUnicode_FromFormat("(?s:%S)\\Z", pattern);
     }
     /*
      * Special cases: indices[0] == 0 or indices[-1] + 1 == n
@@ -630,8 +485,7 @@ join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
      * If indices[0] == 0       write (?>.*?abcdef) instead of abcdef
      * If indices[-1] == n - 1  write '.*' instead of empty string
      */
-    PyObject *ind;
-    Py_ssize_t i = 0, j, n = PyUnicode_GET_LENGTH(strings);
+    Py_ssize_t i = 0, j, n = PyUnicode_GET_LENGTH(pattern);
     /*
      * If the pattern starts with '*', we will write everything
      * before it. So we will write at least indices[0] characters.
@@ -657,21 +511,19 @@ join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
     if (writer == NULL) {
         return NULL;
     }
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *) (writer);
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
 
     WRITE_SUBSTRING(i, j);  // write stuff before '*' if needed
-    i = j + 1;              // jump after the star
-
-    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
+    i = j + 1;              // jump after the '*'
     for (Py_ssize_t k = 1; k < m; ++k) {
-        ind = PyList_GET_ITEM(indices, k);
+        PyObject *ind = PyList_GET_ITEM(indices, k);
         j = PyLong_AsSsize_t(ind);
-        if (j < 0 || i > j) {
-            goto abort;
-        }
-        if ((_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) ||
-            (_PyUnicodeWriter_WriteSubstring(_writer, strings, i, j) < 0) ||
-            (_PyUnicodeWriter_WriteChar(_writer, ')') < 0)) {
+        assert(j < 0 || i < j);
+        if (j < 0 ||
+            (_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) ||
+            (_PyUnicodeWriter_WriteSubstring(_writer, pattern, i, j) < 0) ||
+            (_PyUnicodeWriter_WriteChar(_writer, ')') < 0))
+        {
             goto abort;
         }
         i = j + 1;
@@ -680,245 +532,16 @@ join_translated_parts(PyObject *module, PyObject *strings, PyObject *indices)
     if (_PyUnicodeWriter_WriteASCIIString(_writer, ".*", 2) < 0) {
         goto abort;
     }
-    WRITE_SUBSTRING(i, n); // write TAIL part
-
+    WRITE_SUBSTRING(i, n); // write the remaining substring
 #undef WRITE_SUBSTRING
-
     PyObject *res = PyUnicodeWriter_Finish(writer);
     if (res == NULL) {
         return NULL;
     }
-    return PyUnicode_FromFormat("(?s:%S)\\Z", res);
-abort:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
-}
-
-static PyObject *
-translate(PyObject *module, PyObject *pattern)
-/* new reference */
-{
-#define READ(ind) PyUnicode_READ(kind, data, (ind))
-
-#define ADVANCE_IF_CHAR(ch, ind, maxind) \
-    do { \
-        if ((ind) < (maxind) && READ(ind) == (ch)) { \
-            ++(ind); \
-        } \
-    } while (0)
-
-#define _WHILE_READ_CMP(ch, ind, maxind, cmp) \
-    do { \
-        while ((ind) < (maxind) && READ(ind) cmp (ch)) { \
-            ++(ind); \
-        } \
-    } while (0)
-
-#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, !=)
-#define DROP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, ==)
-
-    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
-    PyObject *re = state->re_module;
-    const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
-    // We would write less data if there are successive '*', which should
-    // not be the case in general. Otherwise, we write >= n characters
-    // since escaping them would always add more characters so we will
-    // overestimate a bit the number of characters to write.
-    //
-    // TODO(picnixz): should we limit the estimation or not?
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create((Py_ssize_t) (1.05 * n));
-    if (writer == NULL) {
-        return NULL;
-    }
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *) (writer);
-    // list containing the indices where '*' has a special meaning
-    PyObject *indices = PyList_New(0);
-    if (indices == NULL) {
-        goto abort;
-    }
-    const int kind = PyUnicode_KIND(pattern);
-    const void *data = PyUnicode_DATA(pattern);
-    Py_ssize_t h = 0, i = 0;
-    while (i < n) {
-        // read and advance to the next character
-        Py_UCS4 chr = READ(i++);
-        switch (chr) {
-            case '*': {
-                if (_PyUnicodeWriter_WriteChar(_writer, chr) < 0) {
-                    goto abort;
-                }
-                DROP_DUPLICATES('*', i, n);
-                PyObject *index = PyLong_FromSsize_t(h++);
-                if (index == NULL) {
-                    goto abort;
-                }
-                int rc = PyList_Append(indices, index);
-                Py_DECREF(index);
-                if (rc < 0) {
-                    goto abort;
-                }
-                break;
-            }
-            case '?': {
-                // translate optional '?' (fnmatch) into optional '.' (regex)
-                if (_PyUnicodeWriter_WriteChar(_writer, '.') < 0) {
-                    goto abort;
-                }
-                ++h; // increase the expected result's length
-                break;
-            }
-            case '[': {
-                Py_ssize_t j = i;           // 'i' is already at next char
-                ADVANCE_IF_CHAR('!', j, n); // [!
-                ADVANCE_IF_CHAR(']', j, n); // [!] or []
-                ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
-                if (j >= n) {
-                    if (_PyUnicodeWriter_WriteASCIIString(_writer, "\\[", 2) < 0) {
-                        goto abort;
-                    }
-                    h += 2; // we just wrote 2 characters
-                    break;  // early break for clarity
-                }
-                else {
-                    //              v--- pattern[j] (exclusive)
-                    // '[' * ... * ']'
-                    //     ^----- pattern[i] (inclusive)
-                    PyObject *s1 = NULL, *s2 = NULL;
-                    int rc = PyUnicode_FindChar(pattern, '-', i, j, 1);
-                    if (rc == -2) {
-                        goto abort;
-                    }
-                    if (rc == -1) {
-                        PyObject *group = PyUnicode_Substring(pattern, i, j);
-                        if (group == NULL) {
-                            goto abort;
-                        }
-                        s1 = _PyObject_CallMethod(group, &_Py_ID(replace), "ss", "\\", "\\\\");
-                        Py_DECREF(group);
-                    }
-                    else {
-                        assert(rc >= 0);
-                        s1 = get_translated_group(pattern, i, j);
-                    }
-                    if (s1 == NULL) {
-                        goto abort;
-                    }
-                    s2 = _PyObject_CallMethod(re, &_Py_ID(sub), "ssO", "([&~|])", "\\\\\\1", s1);
-                    Py_DECREF(s1);
-                    if (s2 == NULL) {
-                        goto abort;
-                    }
-                    int difflen = write_translated_group(_writer, s2);
-                    Py_DECREF(s2);
-                    if (difflen < 0) {
-                        goto abort;
-                    }
-                    h += difflen;
-                    i = j + 1;  // jump to the character after ']'
-                    break;      // early break for clarity
-                }
-            }
-            default: {
-                PyObject *str = get_unicode_character(chr);
-                if (str == NULL) {
-                    goto abort;
-                }
-                int difflen = write_escaped_string(re, _writer, str);
-                Py_DECREF(str);
-                if (difflen < 0) {
-                    goto abort;
-                }
-                h += difflen;
-                break;
-            }
-        }
-    }
-#undef DROP_DUPLICATES
-#undef ADVANCE_TO_NEXT
-#undef _WHILE_READ_CMP
-#undef ADVANCE_IF_CHAR
-#undef READ
-    PyObject *parts = PyUnicodeWriter_Finish(writer);
-    if (parts == NULL) {
-        Py_DECREF(indices);
-        return NULL;
-    }
-    assert(h == PyUnicode_GET_LENGTH(parts));
-    PyObject *res = join_translated_parts(module, parts, indices);
-    Py_DECREF(parts);
-    Py_DECREF(indices);
-    return res;
+    PyObject *formatted = PyUnicode_FromFormat("(?s:%S)\\Z", res);
+    Py_DECREF(res);
+    return formatted;
 abort:
-    Py_XDECREF(indices);
     PyUnicodeWriter_Discard(writer);
     return NULL;
 }
-
-/*[clinic input]
-_fnmatch.translate -> object
-
-    pat as pattern: object
-
-[clinic start generated code]*/
-
-static PyObject *
-_fnmatch_translate_impl(PyObject *module, PyObject *pattern)
-/*[clinic end generated code: output=2d9e3bbcbcc6e90e input=56e39f7beea97810]*/
-{
-    if (PyBytes_Check(pattern)) {
-        PyObject *unicode = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
-                                                   PyBytes_GET_SIZE(pattern),
-                                                   "strict");
-        if (unicode == NULL) {
-            return NULL;
-        }
-        // translated regular expression as a str object
-        PyObject *str_expr = translate(module, unicode);
-        Py_DECREF(unicode);
-        if (str_expr == NULL) {
-            return NULL;
-        }
-        PyObject *expr = PyUnicode_AsLatin1String(str_expr);
-        Py_DECREF(str_expr);
-        return expr;
-    }
-    else if (PyUnicode_Check(pattern)) {
-        return translate(module, pattern);
-    }
-    else {
-        PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
-        return NULL;
-    }
-}
-
-static PyMethodDef fnmatchmodule_methods[] = {
-    _FNMATCH_FILTER_METHODDEF
-    _FNMATCH_FNMATCHCASE_METHODDEF
-    _FNMATCH_TRANSLATE_METHODDEF
-    {NULL, NULL}
-};
-
-static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
-    {Py_mod_exec, fnmatchmodule_exec},
-    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
-    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
-    {0, NULL},
-};
-
-static struct PyModuleDef _fnmatchmodule = {
-    PyModuleDef_HEAD_INIT,
-    "_fnmatch",
-    NULL,
-    .m_size = sizeof(fnmatchmodule_state),
-    .m_methods = fnmatchmodule_methods,
-    .m_slots = fnmatchmodule_slots,
-    .m_traverse = fnmatchmodule_traverse,
-    .m_clear = fnmatchmodule_clear,
-    .m_free = fnmatchmodule_free,
-};
-
-PyMODINIT_FUNC
-PyInit__fnmatch(void)
-{
-    return PyModuleDef_Init(&_fnmatchmodule);
-}

From 5cfd5801b78291796eedf22ab053ebc139c2be8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:39:54 +0200
Subject: [PATCH 25/97] regenerated objects

---
 Include/internal/pycore_global_objects_fini_generated.h | 1 +
 Include/internal/pycore_global_strings.h                | 1 +
 Include/internal/pycore_runtime_init_generated.h        | 1 +
 Include/internal/pycore_unicodeobject_generated.h       | 4 ++++
 4 files changed, 7 insertions(+)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index fc82cfc1536feb..ec11eec5eec27d 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -863,6 +863,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(col_offset));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(command));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(comment_factory));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile_mode));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(consts));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(context));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 78f56bdd2f6238..f27bdeb0183aec 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -352,6 +352,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(col_offset)
         STRUCT_FOR_ID(command)
         STRUCT_FOR_ID(comment_factory)
+        STRUCT_FOR_ID(compile)
         STRUCT_FOR_ID(compile_mode)
         STRUCT_FOR_ID(consts)
         STRUCT_FOR_ID(context)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 2c965ecd99fcf5..ab94af0cfb90c9 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -861,6 +861,7 @@ extern "C" {
     INIT_ID(col_offset), \
     INIT_ID(command), \
     INIT_ID(comment_factory), \
+    INIT_ID(compile), \
     INIT_ID(compile_mode), \
     INIT_ID(consts), \
     INIT_ID(context), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index 0307d1f4806ba7..a0e532edc1bfc6 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1208,6 +1208,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(compile);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(compile_mode);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));

From 891a36820164714dfdc8d5c93adc0bfd2688eb81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:47:49 +0200
Subject: [PATCH 26/97] maybe it's like that?

---
 PC/config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PC/config.c b/PC/config.c
index f08a847a3f1206..8f49d9255b4fbe 100644
--- a/PC/config.c
+++ b/PC/config.c
@@ -92,8 +92,8 @@ struct _inittab _PyImport_Inittab[] = {
     {"binascii", PyInit_binascii},
     {"cmath", PyInit_cmath},
     {"errno", PyInit_errno},
+    {"_fnmatch", PyInit_fnmatch},
     {"faulthandler", PyInit_faulthandler},
-    {"fnmatch", PyInit_fnmatch},
     {"gc", PyInit_gc},
     {"math", PyInit_math},
     {"nt", PyInit_nt}, /* Use the NT os functions, not posix */

From adc18bd7f58fc9bc0f8f2b79193096a7023d883a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 9 Jul 2024 19:00:54 +0200
Subject: [PATCH 27/97] maybe like this...?

---
 Makefile.pre.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 97aa13bcc34409..ec99023f36b2b3 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -356,6 +356,7 @@ FNMATCH_OBJS=	\
 		Modules/_fnmatch/_fnmatchmodule.o \
 		Modules/_fnmatch/posix.o \
 		Modules/_fnmatch/regex.o \
+		Modules/_fnmatch/translate.o
 
 ##########################################################################
 # mimalloc

From 658fb81ad684c40254d7356111c393d767da7f9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 10 Jul 2024 10:14:01 +0200
Subject: [PATCH 28/97] regenerate configure script

---
 configure | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/configure b/configure
index 131ca5f7f897a7..0fefae0032587f 100755
--- a/configure
+++ b/configure
@@ -817,6 +817,8 @@ MODULE_TIME_FALSE
 MODULE_TIME_TRUE
 MODULE__IO_FALSE
 MODULE__IO_TRUE
+MODULE__FNMATCH_FALSE
+MODULE__FNMATCH_TRUE
 MODULE_BUILDTYPE
 TEST_MODULES
 LIBB2_LIBS
@@ -14042,6 +14044,60 @@ printf "%s\n" "#define Py_HAVE_C_COMPLEX 1" >>confdefs.h
 
 fi
 
+# check for fnmatch(3) support
+#
+# We test for the plain POSIX implementation (case-sensitive match).
+#
+# To ensure that the implementation of fnmatch(3) is compliant
+# we run some tests to make sure that everything works well.
+#
+# Note that MSVC does not support fnmatch(3).
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for case-sensititve fnmatch(3)" >&5
+printf %s "checking for case-sensititve fnmatch(3)... " >&6; }
+if test ${ac_cv_fnmatch_supported+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$cross_compiling" = yes
+then :
+  ac_cv_fnmatch_supported=no
+
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <fnmatch.h>
+int
+main (void)
+{
+
+    exit(!(
+        fnmatch("a*", "abc", 0) != FNM_NOMATCH &&
+        fnmatch("a*", "Abc", 0) == FNM_NOMATCH
+    ));
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+  ac_cv_fnmatch_supported=yes
+else $as_nop
+  ac_cv_fnmatch_supported=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fnmatch_supported" >&5
+printf "%s\n" "$ac_cv_fnmatch_supported" >&6; }
+if test "$ac_cv_fnmatch_supported" = "yes"; then
+
+printf "%s\n" "#define Py_HAVE_FNMATCH 1" >>confdefs.h
+
+fi
+
 # check for systems that require aligned memory access
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking aligned memory access is required" >&5
 printf %s "checking aligned memory access is required... " >&6; }
@@ -27681,6 +27737,7 @@ SRCDIRS="\
   Modules/_ctypes \
   Modules/_decimal \
   Modules/_decimal/libmpdec \
+  Modules/_fnmatch \
   Modules/_hacl \
   Modules/_io \
   Modules/_multiprocessing \
@@ -29013,6 +29070,28 @@ MODULE_BLOCK=
 
 
 
+        if test "$py_cv_module__fnmatch" != "n/a"
+then :
+  py_cv_module__fnmatch=yes
+fi
+   if test "$py_cv_module__fnmatch" = yes; then
+  MODULE__FNMATCH_TRUE=
+  MODULE__FNMATCH_FALSE='#'
+else
+  MODULE__FNMATCH_TRUE='#'
+  MODULE__FNMATCH_FALSE=
+fi
+
+  as_fn_append MODULE_BLOCK "MODULE__FNMATCH_STATE=$py_cv_module__fnmatch$as_nl"
+  if test "x$py_cv_module__fnmatch" = xyes
+then :
+
+    as_fn_append MODULE_BLOCK "MODULE__FNMATCH_CFLAGS=-I\$(srcdir)/Modules/_fnmatch$as_nl"
+
+
+fi
+
+
         if test "$py_cv_module__io" != "n/a"
 then :
   py_cv_module__io=yes
@@ -31744,6 +31823,10 @@ LTLIBOBJS=$ac_ltlibobjs
 
 
 
+if test -z "${MODULE__FNMATCH_TRUE}" && test -z "${MODULE__FNMATCH_FALSE}"; then
+  as_fn_error $? "conditional \"MODULE__FNMATCH\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${MODULE__IO_TRUE}" && test -z "${MODULE__IO_FALSE}"; then
   as_fn_error $? "conditional \"MODULE__IO\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5

From cb29bd30546191c06d82799462e59f3607757eae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:52:00 +0200
Subject: [PATCH 29/97] update tests

---
 Lib/test/test_fnmatch.py | 104 ++++++++++++++++++++++++++++++---------
 1 file changed, 80 insertions(+), 24 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index f7e9391722ac38..6d72df182af862 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -1,4 +1,6 @@
 """Test cases for the fnmatch module."""
+import itertools
+
 import os
 import string
 import unittest
@@ -6,25 +8,22 @@
 
 import test.support.import_helper
 
-c_fnmatch = test.support.import_helper.import_fresh_module("_fnmatch", blocked=["fnmatch"])
+c_fnmatch = test.support.import_helper.import_fresh_module("_fnmatch")
 py_fnmatch = test.support.import_helper.import_fresh_module("fnmatch", blocked=["_fnmatch"])
 
-fnmatch = py_fnmatch.fnmatch
-fnmatchcase = py_fnmatch.fnmatchcase
-translate  = py_fnmatch.translate
-filter = py_fnmatch.filter
+class FnmatchTestCaseMixin:
+    fnmatch = None
 
-class FnmatchTestCase(unittest.TestCase):
+    def check_match(self, filename, pattern, should_match=True, func=None):
+        if func is None:
+            func = self.fnmatch.fnmatch
 
-    def check_match(self, filename, pattern, should_match=True, fn=fnmatch):
-        if should_match:
-            self.assertTrue(fn(filename, pattern),
-                         "expected %r to match pattern %r"
-                         % (filename, pattern))
-        else:
-            self.assertFalse(fn(filename, pattern),
-                         "expected %r not to match pattern %r"
-                         % (filename, pattern))
+        with self.subTest(fn=func, name=filename, pattern=pattern):
+            res = func(filename, pattern)
+            if should_match:
+                self.assertTrue(res, f"expected {filename!r} to match pattern {pattern!r}")
+            else:
+                self.assertFalse(res, f"expected {filename!r} not to match pattern {pattern!r}")
 
     def test_fnmatch(self):
         check = self.check_match
@@ -61,13 +60,17 @@ def test_slow_fnmatch(self):
         check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
 
     def test_mix_bytes_str(self):
+        fnmatch = self.fnmatch.fnmatch
         self.assertRaises(TypeError, fnmatch, 'test', b'*')
         self.assertRaises(TypeError, fnmatch, b'test', '*')
+
+        fnmatchcase = self.fnmatch.fnmatchcase
         self.assertRaises(TypeError, fnmatchcase, 'test', b'*')
         self.assertRaises(TypeError, fnmatchcase, b'test', '*')
 
     def test_fnmatchcase(self):
         check = self.check_match
+        fnmatchcase = self.fnmatch.fnmatchcase
         check('abc', 'abc', True, fnmatchcase)
         check('AbC', 'abc', False, fnmatchcase)
         check('abc', 'AbC', False, fnmatchcase)
@@ -223,11 +226,18 @@ def test_warnings(self):
             check(',', '[a-z+--A-Z]')
             check('.', '[a-z--/A-Z]')
 
+class PurePythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase):
+    fnmatch = py_fnmatch
+
+class CPythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase):
+    fnmatch = c_fnmatch
 
-class TranslateTestCase(unittest.TestCase):
+class TranslateTestCaseMixin:
+    fnmatch = None
 
     def test_translate(self):
         import re
+        translate = self.fnmatch.translate
         self.assertEqual(translate('*'), r'(?s:.*)\Z')
         self.assertEqual(translate('?'), r'(?s:.)\Z')
         self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -257,6 +267,11 @@ def test_translate(self):
         self.assertTrue(re.match(fatre, 'cbabcaxc'))
         self.assertFalse(re.match(fatre, 'dabccbad'))
 
+class PurePythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase):
+    fnmatch = py_fnmatch
+
+class CPythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase):
+    fnmatch = c_fnmatch
 
 class FilterTestCaseMixin:
     fnmatch = None
@@ -268,31 +283,72 @@ def test_filter(self):
         self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'),
                          [b'Python', b'Perl'])
 
-    def test_mix_bytes_str(self):
-        filter = self.fnmatch.filter
-        self.assertRaises(TypeError, filter, ['test'], b'*')
-        self.assertRaises(TypeError, filter, [b'test'], '*')
-
-class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
-    fnmatch = py_fnmatch
-
     def test_case(self):
         ignorecase = os.path.normcase('P') == os.path.normcase('p')
+        filter = self.fnmatch.filter
         self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
                          ['Test.py', 'Test.PL'] if ignorecase else ['Test.py'])
         self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
                          ['Test.py', 'Test.PL'] if ignorecase else ['Test.PL'])
 
     def test_sep(self):
+        filter = self.fnmatch.filter
         normsep = os.path.normcase('\\') == os.path.normcase('/')
         self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
                          ['usr/bin', 'usr\\lib'] if normsep else ['usr/bin'])
         self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
                          ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
 
+    def test_mix_bytes_str(self):
+        filter = self.fnmatch.filter
+        self.assertRaises(TypeError, filter, ['test'], b'*')
+        self.assertRaises(TypeError, filter, [b'test'], '*')
+
+class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
+    fnmatch = py_fnmatch
+
 class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
     fnmatch = c_fnmatch
 
+    @staticmethod
+    def translate_func(pattern):
+        STAR = object()
+        parts = py_fnmatch._translate(pattern, STAR, '.')
+        return py_fnmatch._join_translated_parts(parts, STAR)
+
+    def test_translate(self):
+        # We want to check that the C implementation is EXACTLY the same
+        # as the Python implementation. For that, we will need to cover
+        # a lot of cases.
+        translate = self.fnmatch.translate
+
+        for choice in itertools.combinations_with_replacement('*?.', 5):
+            for suffix in ['', '!']:
+                pat = suffix + ''.join(choice)
+                with self.subTest(pattern=pat):
+                    self.assertEqual(translate(pat), self.translate_func(pat))
+
+        for pat in [
+            '',
+            '!!a*', '!\\!a*', '!a*', '*', '**', '*******?', '*******c', '*****??', '**/',
+            '*.js', '*/man*/bash.*', '*???', '?', '?*****??', '?*****?c', '?***?****',
+            '?***?****?', '?***?****c', '?*?', '??', '???', '???*', '[!\\]',
+            '[*', '[-abc]', '[[]b', '[[a]b', '[\\\\]', '[\\]', '[]-]', '[][!]',
+            '[]]b', '[]a[]b', '[^a-c]*', '[a-\\z]',
+            '[a-c]b*', '[a-y]*[^c]', '[abc-]', '\\*',
+            '[0-4-3-2]', '[b-ac-z9-1]', '[!b-ac-z9-1]', '[!]b-ac-z9-1]',
+            '[]b-ac-z9-1]', '[]b-ac-z9-1]*', '*[]b-ac-z9-1]',
+            '\\**', '\\*\\*', 'a*', 'a*****?c', 'a****c**?**??*****', 'a***c',
+            'a**?**cd**?**??***k', 'a**?**cd**?**??***k**', 'a**?**cd**?**??k',
+            'a**?**cd**?**??k***', 'a*[^c]',
+            'a*cd**?**??k', 'a/*', 'a/**', 'a/**/b',
+            'a/**/b/**/c', 'a/.*/c', 'a/?', 'a/??', 'a[X-]b', 'a[\\.]c',
+            'a[\\b]c', 'a[bc', 'a\\*?/*', 'a\\*b/*',
+            'ab[!de]', 'ab[cd]', 'ab[cd]ef', 'abc', 'b*/', 'foo*',
+            'man/man1/bash.1'
+        ]:
+            with self.subTest(pattern=pat):
+                self.assertEqual(translate(pat), self.translate_func(pat))
 
 if __name__ == "__main__":
     unittest.main()

From c1fae2425b13c34aef2b4b267f2ca5bb1bb45578 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:52:07 +0200
Subject: [PATCH 30/97] update files

---
 Modules/_fnmatch/_fnmatchmodule.c          | 277 +++++++++++++--------
 Modules/_fnmatch/_fnmatchmodule.h          | 113 ++++++---
 Modules/_fnmatch/clinic/_fnmatchmodule.c.h |  63 ++++-
 Modules/_fnmatch/posix.c                   | 161 ++++++++++--
 Modules/_fnmatch/regex.c                   |  22 +-
 Modules/_fnmatch/translate.c               | 243 +++++++++---------
 6 files changed, 596 insertions(+), 283 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 6e566991188861..6e4b23d83cfb53 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,115 +1,166 @@
-/*
- * C accelerator for the 'fnmatch' module (POSIX only).
- *
- * Most functions expect string or bytes instances, and thus the Python
- * implementation should first pre-process path-like objects, possibly
- * applying normalizations depending on the platform if needed.
- */
-
 #include "Python.h"
 #include "pycore_call.h" // for _PyObject_CallMethod
 
 #include "_fnmatchmodule.h"
 #include "clinic/_fnmatchmodule.c.h"
 
-#define INVALID_PATTERN_TYPE "pattern must be a string or a bytes object"
+#define COMPILED_CACHE_SIZE     32768
+#define INVALID_PATTERN_TYPE    "pattern must be a string or a bytes object"
+
+// ==== Helper implementations ================================================
+
+/*
+ * Compile a UNIX shell pattern into a RE pattern
+ * and returns the corresponding 'match()' method.
+ *
+ * This function is LRU-cached by the module itself.
+ */
+static PyObject *
+fnmatchmodule_get_matcher_function(PyObject *module, PyObject *pattern)
+{
+    // translate the pattern into a RE pattern
+    assert(module != NULL);
+    PyObject *expr = _fnmatch_translate_impl(module, pattern);
+    if (expr == NULL) {
+        return NULL;
+    }
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    // compile the pattern
+    PyObject *compiled = _PyObject_CallMethod(st->re_module, &_Py_ID(compile), "O", expr);
+    Py_DECREF(expr);
+    if (compiled == NULL) {
+        return NULL;
+    }
+    // get the compiled pattern matcher function
+    PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
+    Py_DECREF(compiled);
+    return matcher;
+}
 
-// module state functions
+static PyMethodDef get_matcher_function_def = {
+    "get_matcher_function",
+    (PyCFunction)(fnmatchmodule_get_matcher_function),
+    METH_O,
+    NULL
+};
 
 static int
-fnmatchmodule_clear(PyObject *m)
+fnmatchmodule_load_lru_cache(PyObject *module, fnmatchmodule_state *st)
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
-    Py_CLEAR(st->os_module);
-    Py_CLEAR(st->re_module);
-    Py_CLEAR(st->lru_cache);
+    st->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    if (st->lru_cache == NULL) {
+        return -1;
+    }
     return 0;
 }
 
 static int
-fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
+fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
-    Py_VISIT(st->os_module);
-    Py_VISIT(st->re_module);
-    Py_VISIT(st->lru_cache);
+    assert(st->lru_cache != NULL);
+    PyObject *maxsize = PyLong_FromLong(COMPILED_CACHE_SIZE);
+    if (maxsize == NULL) {
+        return -1;
+    }
+    PyObject *args[] = {NULL, maxsize, Py_True};
+    size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+    PyObject *decorator = PyObject_Vectorcall(st->lru_cache, args + 1, nargsf, NULL);
+    Py_DECREF(maxsize);
+    if (decorator == NULL) {
+        return -1;
+    }
+    // TODO(picnixz): should INCREF the refcount of 'module'?
+    assert(module != NULL);
+    PyObject *decorated = PyCFunction_New(&get_matcher_function_def, module);
+    PyObject *translator = PyObject_CallOneArg(decorator, decorated);
+    Py_DECREF(decorated);
+    Py_DECREF(decorator);
+    if (translator == NULL) {
+        return -1;
+    }
+    // reference on 'translator' will be removed upon module cleanup
+    st->translator = translator;
     return 0;
 }
 
-static void
-fnmatchmodule_free(void *m)
+static inline PyObject *
+get_matcher_function(PyObject *module, PyObject *pattern)
 {
-    fnmatchmodule_clear((PyObject *) m);
+    assert(module != NULL);
+    assert(pattern != NULL);
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    assert(st->translator != NULL);
+    size_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+    return PyObject_Vectorcall(st->translator, &pattern, nargsf, NULL);
 }
 
-static int
-fnmatchmodule_exec(PyObject *m)
-{
-#define IMPORT_MODULE(attr, name) \
-    do { \
-        state->attr = PyImport_ImportModule((name)); \
-        if (state->attr == NULL) { \
-            return -1; \
-        } \
-    } while (0)
+// ==== Module state functions ================================================
 
-#define INTERN_STRING(attr, str) \
+#define IMPORT_MODULE(state, attribute, name) \
     do { \
-        state->attr = PyUnicode_InternFromString((str)); \
-        if (state->attr == NULL) { \
+        state->attribute = NULL; \
+        state->attribute = PyImport_ImportModule((name)); \
+        if (state->attribute == NULL) { \
             return -1; \
         } \
     } while (0)
 
-    fnmatchmodule_state *state = get_fnmatchmodulestate_state(m);
-
-    // imports
-    IMPORT_MODULE(os_module, "os");
-    IMPORT_MODULE(re_module, "re");
-
-    // helpers
-    state->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
-    if (state->lru_cache == NULL) {
+static int
+fnmatchmodule_exec(PyObject *module)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    st->py_module = NULL;
+    IMPORT_MODULE(st, py_module, "fnmatch");
+    st->os_module = NULL;
+    IMPORT_MODULE(st, os_module, "os");
+    st->re_module = NULL;
+    IMPORT_MODULE(st, re_module, "re");
+    st->lru_cache = NULL;
+    if (fnmatchmodule_load_lru_cache(module, st) < 0) {
         return -1;
     }
-    // todo: handle LRU cache
-
+    st->translator = NULL;
+    if (fnmatchmodule_load_translator(module, st) < 0) {
+        return -1;
+    }
+    return 0;
+}
 #undef IMPORT_MODULE
-#undef INTERN_STRING
 
+static int
+fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    Py_VISIT(st->py_module);
+    Py_VISIT(st->os_module);
+    Py_VISIT(st->re_module);
+    Py_VISIT(st->lru_cache);
+    Py_VISIT(st->translator);
     return 0;
 }
 
-/*[clinic input]
-module _fnmatch
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
+static int
+fnmatchmodule_clear(PyObject *m)
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    Py_CLEAR(st->py_module);
+    Py_CLEAR(st->os_module);
+    Py_CLEAR(st->re_module);
+    Py_CLEAR(st->lru_cache);
+    Py_CLEAR(st->translator);
+    return 0;
+}
 
-static PyObject *
-get_match_function(PyObject *module, PyObject *pattern)
+static void
+fnmatchmodule_free(void *m)
 {
-    // TODO(picnixz): use LRU-cache
-    PyObject *expr = _fnmatch_translate_impl(module, pattern);
-    if (expr == NULL) {
-        return NULL;
-    }
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
-    PyObject *compiled = _PyObject_CallMethod(st->re_module, &_Py_ID(compile), "O", expr);
-    Py_DECREF(expr);
-    if (compiled == NULL) {
-        return NULL;
-    }
-    PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
-    Py_DECREF(compiled);
-    return matcher;
+    (void)fnmatchmodule_clear((PyObject *)m);
 }
 
-static PyMethodDef get_match_function_method_def = {
-    "get_match_function",
-    _PyCFunction_CAST(get_match_function),
-    METH_O,
-    NULL
-};
+/*[clinic input]
+module _fnmatch
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
 
 /*[clinic input]
 _fnmatch.filter -> object
@@ -123,30 +174,55 @@ static PyObject *
 _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
 /*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
 {
-#ifndef Py_HAVE_FNMATCH
-    PyObject *matcher = get_match_function(module, pat);
-    if (matcher == NULL) {
-        return NULL;
-    }
-    PyObject *result = _regex_fnmatch_filter(matcher, names);
-    Py_DECREF(matcher);
-    return result;
-#else
+#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
     // Note that the Python implementation of fnmatch.filter() does not
     // call os.fspath() on the names being matched, whereas it does on NT.
     if (PyBytes_Check(pat)) {
         const char *pattern = PyBytes_AS_STRING(pat);
-        return _posix_fnmatch_filter(pattern, names, &_posix_fnmatch_encoded);
+        return _posix_fnmatch_encoded_filter_cached(pattern, names);
     }
     if (PyUnicode_Check(pat)) {
         const char *pattern = PyUnicode_AsUTF8(pat);
-        return _posix_fnmatch_filter(pattern, names, &_posix_fnmatch_unicode);
+        return _posix_fnmatch_unicode_filter_cached(pattern, names);
     }
     PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
     return NULL;
+#else
+    PyObject *matcher = get_matcher_function(module, pat);
+    if (matcher == NULL) {
+        return NULL;
+    }
+    PyObject *result = _regex_fnmatch_filter(matcher, names);
+    Py_DECREF(matcher);
+    return result;
 #endif
 }
 
+/*[clinic input]
+_fnmatch.fnmatch -> bool
+
+    name: object
+    pat: object
+
+[clinic start generated code]*/
+
+static int
+_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
+/*[clinic end generated code: output=b4cd0bd911e8bc93 input=c45e0366489540b8]*/
+{
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    PyObject *res = _PyObject_CallMethod(st->py_module, &_Py_ID(fnmatch), "OO", name, pat);
+    if (res == NULL) {
+        return -1;
+    }
+    int matching = PyLong_AsLong(res);
+    if (matching < 0) {
+        return -1;
+    }
+    Py_DECREF(res);
+    return matching;
+}
+
 /*[clinic input]
 _fnmatch.fnmatchcase -> bool
 
@@ -164,28 +240,28 @@ static int
 _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
 /*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
 {
-#ifndef Py_HAVE_FNMATCH
-    PyObject *matcher = get_match_function(module, pat);
-    if (matcher == NULL) {
-        return -1;
-    }
-    int res = _regex_fnmatch_generic(matcher, name);
-    Py_DECREF(matcher);
-    return res;
-#else
+#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
     // This function does not transform path-like objects, nor does it
     // case-normalize 'name' or 'pattern' (whether it is the Python or
     // the C implementation).
     if (PyBytes_Check(pat)) {
         const char *pattern = PyBytes_AS_STRING(pat);
-        return _posix_fnmatch_encoded(pattern, name);
+        return _posix_fnmatch_encoded_cached(pattern, name);
     }
     if (PyUnicode_Check(pat)) {
         const char *pattern = PyUnicode_AsUTF8(pat);
-        return _posix_fnmatch_unicode(pattern, name);
+        return _posix_fnmatch_unicode_cached(pattern, name);
     }
     PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
     return -1;
+#else
+    PyObject *matcher = get_matcher_function(module, pat);
+    if (matcher == NULL) {
+        return -1;
+    }
+    int res = _regex_fnmatch_generic(matcher, name);
+    Py_DECREF(matcher);
+    return res;
 #endif
 }
 
@@ -208,7 +284,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
             return NULL;
         }
         // translated regular expression as a str object
-        PyObject *str_expr = translate(module, unicode);
+        PyObject *str_expr = _regex_translate(module, unicode);
         Py_DECREF(unicode);
         if (str_expr == NULL) {
             return NULL;
@@ -218,7 +294,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
         return expr;
     }
     else if (PyUnicode_Check(pattern)) {
-        return translate(module, pattern);
+        return _regex_translate(module, pattern);
     }
     else {
         PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
@@ -228,6 +304,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
 
 static PyMethodDef fnmatchmodule_methods[] = {
     _FNMATCH_FILTER_METHODDEF
+    _FNMATCH_FNMATCH_METHODDEF
     _FNMATCH_FNMATCHCASE_METHODDEF
     _FNMATCH_TRANSLATE_METHODDEF
     {NULL, NULL}
@@ -242,8 +319,8 @@ static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
 
 static struct PyModuleDef _fnmatchmodule = {
     PyModuleDef_HEAD_INIT,
-    "_fnmatch",
-    NULL,
+    .m_name = "_fnmatch",
+    .m_doc = NULL,
     .m_size = sizeof(fnmatchmodule_state),
     .m_methods = fnmatchmodule_methods,
     .m_slots = fnmatchmodule_slots,
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index af271703791be3..2311e35efe691e 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -1,13 +1,35 @@
+/*
+* C accelerator for the 'fnmatch' module (POSIX only).
+ *
+ * Most functions expect string or bytes instances, and thus the Python
+ * implementation should first pre-process path-like objects, possibly
+ * applying normalizations depending on the platform if needed.
+ */
+
 #ifndef _FNMATCHMODULE_H
 #define _FNMATCHMODULE_H
 
 #include "Python.h"
 
+#undef Py_USE_FNMATCH_FALLBACK
+/*
+ * For now, only test the C acceleration of the Python implementation.
+ *
+ * TODO(picnixz): Currently, I don't know how to handle backslashes
+ * TODO(picnixz): in fnmatch(3) so that they are treated correctly
+ * TODO(picnixz): depending on whether the string was a raw string
+ * TODO(picnixz): or not. To see the bug, uncomment the following
+ * TODO(picnixz): macro and run the tests.
+ */
+#define Py_USE_FNMATCH_FALLBACK 1
+
 typedef struct {
-    PyObject *re_module; // 're' module
-    PyObject *os_module; // 'os' module
+    PyObject *py_module;    // 'fnmatch' module
+    PyObject *re_module;    // 're' module
+    PyObject *os_module;    // 'os' module
 
-    PyObject *lru_cache; // optional cache for regex patterns, if needed
+    PyObject *lru_cache;    // the LRU cache decorator
+    PyObject *translator;   // the translation unit whose calls are cached
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
@@ -18,33 +40,22 @@ get_fnmatchmodulestate_state(PyObject *module)
     return (fnmatchmodule_state *)state;
 }
 
+#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
 /*
- * The filter() function works differently depending on whether fnmatch(3)
- * is present or not.
- *
- * If fnmatch(3) is present, the match is performed without using regular
- * expressions. The functions being used are
- *
- * If fnmatch(3) is not present, the match is performed using regular
- * expressions.
- */
-
-#ifdef Py_HAVE_FNMATCH
-/*
- * Type for a matching function.
- *
- * The function must take as input a pattern and a name,
- * and is used to determine whether the name matches the
- * pattern or not.
- *
- * If the pattern is obtained from str() types, then 'name'
- * must be a string (it is left to the matcher the task for
- * validating this part).
+ * Construct a list of filtered names using fnmatch(3).
  */
-typedef int (*Matcher)(const char *, PyObject *);
+extern PyObject *
+_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names);
+/* Same as _posix_fnmatch_encoded_filter() but for unicode inputs. */
+extern PyObject *
+_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names);
 
+/* cached 'pattern' version of _posix_fnmatch_encoded_filter() */
+extern PyObject *
+_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names);
+/* cached 'pattern' version of _posix_fnmatch_unicode_filter() */
 extern PyObject *
-_posix_fnmatch_filter(const char *pattern, PyObject *names, Matcher match);
+_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names);
 
 /*
  * Perform a case-sensitive match using fnmatch(3).
@@ -59,15 +70,53 @@ _posix_fnmatch_filter(const char *pattern, PyObject *names, Matcher match);
  * Returns -1 if (1) 'string' is not a `bytes` object, and
  * sets a TypeError exception, or (2) something went wrong.
  */
-extern int _posix_fnmatch_encoded(const char *pattern, PyObject *string);
+extern int
+_posix_fnmatch_encoded(PyObject *pattern, PyObject *string);
 /* Same as _posix_fnmatch_encoded() but for unicode inputs. */
-extern int _posix_fnmatch_unicode(const char *pattern, PyObject *string);
-#else
-extern int _regex_fnmatch_generic(PyObject *matcher, PyObject *name);
+extern int
+_posix_fnmatch_unicode(PyObject *pattern, PyObject *string);
+
+/* cached 'pattern' version of _posix_fnmatch_encoded() */
+extern int
+_posix_fnmatch_encoded_cached(const char *pattern, PyObject *names);
+/* cached 'pattern' version of _posix_fnmatch_encoded() */
+extern int
+_posix_fnmatch_unicode_cached(const char *pattern, PyObject *names);
+#endif
+
+/*
+ * Test whether a name matches a compiled RE pattern.
+ *
+ * Parameters
+ *
+ *      matcher  A reference to the 'match()' method of a compiled pattern.
+ *      string   The string to match (str or bytes object).
+ *
+ * Returns 1 if the 'string' matches the pattern and 0 otherwise.
+ *
+ * Returns -1 if (1) 'string' is not a `str` or a `bytes` object,
+ * and sets a TypeError exception, or (2) something went wrong.
+ */
+extern int
+_regex_fnmatch_generic(PyObject *matcher, PyObject *string);
+
+/*
+ * Perform a case-sensitive match using compiled RE patterns.
+ *
+ * Parameters
+ *
+ *      matcher  A reference to the 'match()' method of a compiled pattern.
+ *      names    An iterable of strings (str or bytes objects) to match.
+ *
+ * Returns a list of matched names, or NULL if an error occurred.
+ */
 extern PyObject *
 _regex_fnmatch_filter(PyObject *matcher, PyObject *names);
-#endif
 
-extern PyObject *translate(PyObject *module, PyObject *pattern);
+/*
+ * C accelerator for translating UNIX shell patterns into RE patterns.
+ */
+extern PyObject *
+_regex_translate(PyObject *module, PyObject *pattern);
 
 #endif // _FNMATCHMODULE_H
diff --git a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
index 4b12f33113d3fb..5250bddbecc273 100644
--- a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
+++ b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
@@ -64,6 +64,67 @@ _fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj
     return return_value;
 }
 
+PyDoc_STRVAR(_fnmatch_fnmatch__doc__,
+"fnmatch($module, /, name, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_FNMATCH_METHODDEF    \
+    {"fnmatch", _PyCFunction_CAST(_fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatch__doc__},
+
+static int
+_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat);
+
+static PyObject *
+_fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"name", "pat", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "fnmatch",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *name;
+    PyObject *pat;
+    int _return_value;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    name = args[0];
+    pat = args[1];
+    _return_value = _fnmatch_fnmatch_impl(module, name, pat);
+    if ((_return_value == -1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
 "fnmatchcase($module, /, name, pat)\n"
 "--\n"
@@ -182,4 +243,4 @@ _fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=b0366b259b101bdf input=a9049054013a1b77]*/
+/*[clinic end generated code: output=d9bb3df00c5c2b5e input=a9049054013a1b77]*/
diff --git a/Modules/_fnmatch/posix.c b/Modules/_fnmatch/posix.c
index 30d0845d7bae88..d4fdbb42ba0210 100644
--- a/Modules/_fnmatch/posix.c
+++ b/Modules/_fnmatch/posix.c
@@ -1,24 +1,95 @@
-#ifdef Py_HAVE_FNMATCH
+#include "Python.h"
+
+#include "_fnmatchmodule.h" // for pre-declarations
+
+#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
 
 #include <fnmatch.h>        // for fnmatch(3)
 
-#include "Python.h"
-#include "_fnmatchmodule.h" // for PosixMatcher
+#define INVALID_PATTERN_TYPE "pattern must be a %s object, got %.200s"
+#define INVALID_NAME_TYPE    "name must be a %s object, got %.200s"
 
-#define INVALID_TYPE_FOR_NAME "name must be a %s object, got %.200s"
+// ==== Helper declarations ===================================================
 
-#define VERIFY_NAME_ARG_TYPE(name, check, expecting) \
-    do { \
-        if (!check) { \
-            PyErr_Format(PyExc_TypeError, INVALID_TYPE_FOR_NAME, \
-                         expecting, Py_TYPE(name)->tp_name); \
-            return -1; \
-        } \
-    } while (0)
+/*
+ * Return a bytes object as a "const char *", or NULL on error.
+ *
+ * The 'error' message is either INVALID_PATTERN_TYPE or INVALID_NAME_TYPE,
+ * and is used to set a TypeError if 'arg' is of incorrect type.
+ */
+static inline const char *
+from_encoded(PyObject *arg, const char *error);
+
+/*
+ * Return a str object as a "const char *", or NULL on error.
+ *
+ * The 'error' message is either INVALID_PATTERN_TYPE or INVALID_NAME_TYPE
+ * and is used to set a TypeError if 'arg' is of incorrect type.
+ */
+static inline const char *
+from_unicode(PyObject *arg, const char *error);
+
+/* The type of from_encoded() or from_unicode() conversion functions. */
+typedef const char *(*Converter)(PyObject *string, const char *error);
+
+static inline PyObject *
+_posix_fnmatch_filter(PyObject *pattern, PyObject *names, Converter converter);
+
+/* cached 'pattern' version of _posix_fnmatch_filter()  */
+static /* not inline */ PyObject *
+_posix_fnmatch_filter_cached(const char *pattern, PyObject *names, Converter converter);
+
+// ==== API implementation ====================================================
+
+inline PyObject *
+_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names)
+{
+    return _posix_fnmatch_filter(pattern, names, &from_encoded);
+}
+
+inline PyObject *
+_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names)
+{
+    return _posix_fnmatch_filter(pattern, names, &from_unicode);
+}
+
+inline PyObject *
+_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names)
+{
+    assert(pattern != NULL);
+    return _posix_fnmatch_filter_cached(pattern, names, &from_encoded);
+}
+
+inline PyObject *
+_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names)
+{
+    assert(pattern != NULL);
+    return _posix_fnmatch_filter_cached(pattern, names, &from_unicode);
+}
+
+inline int
+_posix_fnmatch_encoded(PyObject *pattern, PyObject *string)
+{
+    const char *p = from_encoded(pattern, INVALID_PATTERN_TYPE);
+    if (p == NULL) {
+        return -1;
+    }
+    return _posix_fnmatch_encoded_cached(p, string);
+}
+
+inline int
+_posix_fnmatch_unicode(PyObject *pattern, PyObject *string)
+{
+    const char *p = from_unicode(pattern, INVALID_PATTERN_TYPE);
+    if (p == NULL) {
+        return -1;
+    }
+    return _posix_fnmatch_unicode_cached(p, string);
+}
 
 #define PROCESS_MATCH_RESULT(r) \
     do { \
-        int res = (r); /* avoid variable capture */ \
+        int res = (r); \
         if (res < 0) { \
             return res; \
         } \
@@ -26,40 +97,75 @@
     } while (0)
 
 inline int
-_posix_fnmatch_encoded(const char *pattern, PyObject *string)
+_posix_fnmatch_encoded_cached(const char *pattern, PyObject *string)
 {
-    VERIFY_NAME_ARG_TYPE(string, PyBytes_Check(string), "bytes");
-    PROCESS_MATCH_RESULT(fnmatch(pattern, PyBytes_AS_STRING(string), 0));
+    assert(pattern != NULL);
+    const char *s = from_encoded(string, INVALID_NAME_TYPE);
+    if (s == NULL) {
+        return -1;
+    }
+    PROCESS_MATCH_RESULT(fnmatch(pattern, s, 0));
 }
 
 inline int
-_posix_fnmatch_unicode(const char *pattern, PyObject *string)
+_posix_fnmatch_unicode_cached(const char *pattern, PyObject *string)
+{
+    assert(pattern != NULL);
+    const char *s = from_unicode(string, INVALID_NAME_TYPE);
+    if (s == NULL) {
+        return -1;
+    }
+    PROCESS_MATCH_RESULT(fnmatch(pattern, s, 0));
+}
+
+#undef PROCESS_MATCH_RESULT
+
+// ==== Helper implementations ================================================
+
+#define GENERATE_CONVERTER(function, predicate, converter, expecting) \
+    static inline const char * \
+    function(PyObject *arg, const char *error) \
+    { \
+        if (!predicate(arg)) { \
+            PyErr_Format(PyExc_TypeError, error, expecting, Py_TYPE(arg)->tp_name); \
+            return NULL; \
+        } \
+        return converter(arg); \
+    }
+GENERATE_CONVERTER(from_encoded, PyBytes_Check, PyBytes_AS_STRING, "bytes")
+GENERATE_CONVERTER(from_unicode, PyUnicode_Check, PyUnicode_AsUTF8, "str")
+#undef GENERATE_CONVERTER
+
+static inline PyObject *
+_posix_fnmatch_filter(PyObject *pattern, PyObject *names, Converter converter)
 {
-    VERIFY_NAME_ARG_TYPE(string, PyUnicode_Check(string), "string");
-    PROCESS_MATCH_RESULT(fnmatch(pattern, PyUnicode_AsUTF8(string), 0));
+    const char *p = converter(pattern, INVALID_PATTERN_TYPE);
+    if (p == NULL) {
+        return NULL;
+    }
+    return _posix_fnmatch_filter_cached(p, names, converter);
 }
 
-PyObject *
-_posix_fnmatch_filter(const char *pattern, PyObject *names, Matcher match)
+static PyObject *
+_posix_fnmatch_filter_cached(const char *pattern, PyObject *names, Converter converter)
 {
+    assert(pattern != NULL);
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
         return NULL;
     }
-
     PyObject *res = PyList_New(0);
     if (res == NULL) {
         Py_DECREF(iter);
         return NULL;
     }
-
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        int rc = match(pattern, name);
-        if (rc < 0) {
+        const char *n = converter(name, INVALID_NAME_TYPE);
+        if (n == NULL) {
             goto abort;
         }
-        if (rc == 1) {
+        if (fnmatch(pattern, n, 0) != FNM_NOMATCH) {
             if (PyList_Append(res, name) < 0) {
                 goto abort;
             }
@@ -79,4 +185,7 @@ _posix_fnmatch_filter(const char *pattern, PyObject *names, Matcher match)
     Py_DECREF(res);
     return NULL;
 }
+
+#undef INVALID_NAME_TYPE
+#undef INVALID_PATTERN_TYPE
 #endif
diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
index 5ba96a214bc267..807e773635a9da 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/regex.c
@@ -1,26 +1,20 @@
 #include "Python.h"
 
-/*
- * Perform a case-sensitive match using regular expressions.
- *
- * Parameters
- *
- *      pattern     A translated regular expression.
- *      name        The filename to match.
- *
- * Returns 1 if the 'name' matches the 'pattern' and 0 otherwise.
- * Returns -1 if something went wrong.
- */
-int
+#include "_fnmatchmodule.h" // for pre-declarations
+
+// ==== API implementation ====================================================
+
+inline int
 _regex_fnmatch_generic(PyObject *matcher, PyObject *name)
 {
     // If 'name' is of incorrect type, it will be detected when calling
     // the matcher function (we emulate 're.compile(...).match(name)').
+    assert(PyCallable_Check(matcher));
     PyObject *match = PyObject_CallFunction(matcher, "O", name);
     if (match == NULL) {
         return -1;
     }
-    int matching = match != Py_None;
+    int matching = match == Py_None ? 0 : 1;
     Py_DECREF(match);
     return matching;
 }
@@ -28,6 +22,7 @@ _regex_fnmatch_generic(PyObject *matcher, PyObject *name)
 PyObject *
 _regex_fnmatch_filter(PyObject *matcher, PyObject *names)
 {
+    assert(PyCallable_Check(matcher));
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
         return NULL;
@@ -43,6 +38,7 @@ _regex_fnmatch_filter(PyObject *matcher, PyObject *names)
     while ((name = PyIter_Next(iter))) {
         int rc = _regex_fnmatch_generic(matcher, name);
         if (rc < 0) {
+            assert(PyErr_Occurred());
             goto abort;
         }
         if (rc == 1) {
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 8ac45d546826fc..d4d63a2693b7de 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -11,6 +11,33 @@
 
 // ==== Helper declarations ==================================================
 
+#define _WRITE_OR_FAIL(writeop, onerror) \
+    do { \
+        if ((writeop) < 0) { \
+            onerror; \
+        } \
+    } while (0)
+
+#define _WRITE_CHAR      _PyUnicodeWriter_WriteChar
+#define _WRITE_CHAR_OR(_writer, ch, onerror) \
+    _WRITE_OR_FAIL(_WRITE_CHAR((_writer), (ch)), onerror)
+
+#define _WRITE_ASCII     _PyUnicodeWriter_WriteASCIIString
+#define _WRITE_ASCII_OR(_writer, ascii, length, onerror) \
+    _WRITE_OR_FAIL(_WRITE_ASCII((_writer), (ascii), (length)), onerror)
+
+#define _WRITE_STRING    _PyUnicodeWriter_WriteStr
+#define _WRITE_STRING_OR(_writer, string, onerror) \
+    _WRITE_OR_FAIL(_WRITE_STRING((_writer), (string)), onerror)
+
+#define _WRITE_BLOCK    _PyUnicodeWriter_WriteSubstring
+#define _WRITE_BLOCK_OR(_writer, string, i, j, onerror) \
+    do { \
+        if ((i) < (j) && _WRITE_BLOCK((_writer), (string), (i), (j)) < 0) { \
+            onerror; \
+        } \
+    } while (0)
+
 /*
  * Creates a new Unicode object from a Py_UCS4 character.
  *
@@ -66,23 +93,23 @@ process_wildcards(PyObject *pattern, PyObject *indices);
 // ==== API implementation ====================================================
 
 PyObject *
-translate(PyObject *module, PyObject *pattern)
+_regex_translate(PyObject *module, PyObject *pattern)
 {
 #define READ(ind) PyUnicode_READ(kind, data, (ind))
 #define ADVANCE_IF_CHAR(ch, ind, maxind) \
     do { \
         if ((ind) < (maxind) && READ(ind) == (ch)) { \
-            ++(ind); \
+            ++ind; \
         } \
     } while (0)
 #define _WHILE_READ_CMP(ch, ind, maxind, cmp) \
     do { \
         while ((ind) < (maxind) && READ(ind) cmp (ch)) { \
-            ++(ind); \
+            ++ind; \
         } \
     } while (0)
 #define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, !=)
-#define DROP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, ==)
+#define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, ==)
 
     fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
     PyObject *re = state->re_module;
@@ -111,10 +138,8 @@ translate(PyObject *module, PyObject *pattern)
         Py_UCS4 chr = READ(i++);
         switch (chr) {
             case '*': {
-                if (_PyUnicodeWriter_WriteChar(_writer, chr) < 0) {
-                    goto abort;
-                }
-                DROP_DUPLICATES('*', i, n);
+                _WRITE_CHAR_OR(_writer, chr, goto abort);
+                SKIP_DUPLICATES('*', i, n);
                 PyObject *index = PyLong_FromSsize_t(h++);
                 if (index == NULL) {
                     goto abort;
@@ -128,9 +153,7 @@ translate(PyObject *module, PyObject *pattern)
             }
             case '?': {
                 // translate optional '?' (fnmatch) into optional '.' (regex)
-                if (_PyUnicodeWriter_WriteChar(_writer, '.') < 0) {
-                    goto abort;
-                }
+                _WRITE_CHAR_OR(_writer, '.', goto abort);
                 ++h; // increase the expected result's length
                 break;
             }
@@ -140,9 +163,7 @@ translate(PyObject *module, PyObject *pattern)
                 ADVANCE_IF_CHAR(']', j, n); // [!] or []
                 ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
                 if (j >= n) {
-                    if (_PyUnicodeWriter_WriteASCIIString(_writer, "\\[", 2) < 0) {
-                        goto abort;
-                    }
+                    _WRITE_ASCII_OR(_writer, "\\[", 2, goto abort);
                     h += 2; // we just wrote 2 characters
                     break;  // early break for clarity
                 }
@@ -165,6 +186,7 @@ translate(PyObject *module, PyObject *pattern)
                     }
                     else {
                         assert(rc >= 0);
+                        assert(READ(j) == ']');
                         s1 = translate_expression(pattern, i, j);
                     }
                     if (s1 == NULL) {
@@ -200,7 +222,7 @@ translate(PyObject *module, PyObject *pattern)
             }
         }
     }
-#undef DROP_DUPLICATES
+#undef SKIP_DUPLICATES
 #undef ADVANCE_TO_NEXT
 #undef _WHILE_READ_CMP
 #undef ADVANCE_IF_CHAR
@@ -222,7 +244,7 @@ translate(PyObject *module, PyObject *pattern)
 
 // ==== Helper implementations ================================================
 
-PyObject *
+static PyObject *
 get_unicode_character(Py_UCS4 ch)
 {
     assert(ch <= 0x10ffff);
@@ -247,7 +269,7 @@ get_unicode_character(Py_UCS4 ch)
     return unicode;
 }
 
-PyObject *
+static PyObject *
 translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 {
     PyObject *chunks = PyList_New(0);
@@ -259,21 +281,26 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
     while (k < j) {
         PyObject *eobj = _PyObject_CallMethod(pattern, &_Py_ID(find), "sii", "-", k, j);
         if (eobj == NULL) {
-            goto error;
+            goto abort;
         }
         Py_ssize_t t = PyLong_AsSsize_t(eobj);
         Py_DECREF(eobj);
         if (t < 0) {
-            goto error;
+            if (PyErr_Occurred()) {
+                goto abort;
+            }
+            // -1 here means that '-' was not found
+            assert(t == -1);
+            break;
         }
         PyObject *sub = PyUnicode_Substring(pattern, i, t);
         if (sub == NULL) {
-            goto error;
+            goto abort;
         }
         int rc = PyList_Append(chunks, sub);
         Py_DECREF(sub);
         if (rc < 0) {
-            goto error;
+            goto abort;
         }
         chunkscount += 1;
         i = t + 1;
@@ -282,27 +309,28 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
     if (i >= j) {
         assert(chunkscount > 0);
         PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
+        assert(chunk != NULL);
         PyObject *hyphen = PyUnicode_FromOrdinal('-');
         if (hyphen == NULL) {
-            goto error;
+            goto abort;
         }
         PyObject *repl = PyUnicode_Concat(chunk, hyphen);
         Py_DECREF(hyphen);
-        int rc = PyList_SetItem(chunks, chunkscount - 1, repl);
-        Py_DECREF(repl);
-        if (rc < 0) {
-            goto error;
+        // PyList_SetItem() does not create a new reference on 'repl'
+        // so we should not decref 'repl' after the call (I think?)
+        if (repl == NULL || PyList_SetItem(chunks, chunkscount - 1, repl) < 0) {
+            goto abort;
         }
     }
     else {
         PyObject *sub = PyUnicode_Substring(pattern, i, j);
         if (sub == NULL) {
-            goto error;
+            goto abort;
         }
         int rc = PyList_Append(chunks, sub);
         Py_DECREF(sub);
         if (rc < 0) {
-            goto error;
+            goto abort;
         }
         chunkscount += 1;
     }
@@ -327,62 +355,60 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
             if (c1sub == NULL || c2sub == NULL) {
                 Py_XDECREF(c1sub);
                 Py_XDECREF(c2sub);
-                goto error;
+                goto abort;
             }
             PyObject *merged = PyUnicode_Concat(c1sub, c2sub);
             Py_DECREF(c1sub);
             Py_DECREF(c2sub);
             if (merged == NULL) {
-                goto error;
+                goto abort;
             }
             int rc = PyList_SetItem(chunks, c - 1, merged);
-            Py_DECREF(merged);
             if (rc < 0) {
-                goto error;
+                goto abort;
             }
             if (PySequence_DelItem(chunks, c) < 0) {
-                goto error;
+                goto abort;
             }
             chunkscount--;
         }
     }
+    assert(chunkscount == PyList_GET_SIZE(chunks));
     // Escape backslashes and hyphens for set difference (--),
     // but hyphens that create ranges should not be escaped.
     for (c = 0; c < chunkscount; ++c) {
-        PyObject *s0 = PyList_GetItem(chunks, c);
-        if (s0 == NULL) {
-            goto error;
-        }
-        PyObject *s1 = PyObject_CallMethod(s0, "replace", "ss", "\\", "\\\\");
+        PyObject *s0 = PyList_GET_ITEM(chunks, c);
+        assert(s0 != NULL);
+        PyObject *s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
         if (s1 == NULL) {
-            goto error;
+            goto abort;
         }
-        PyObject *s2 = PyObject_CallMethod(s1, "replace", "ss", "-", "\\-");
+        PyObject *s2 = _PyObject_CallMethod(s1, &_Py_ID(replace), "ss", "-", "\\-");
         Py_DECREF(s1);
         if (s2 == NULL) {
-            goto error;
+            goto abort;
         }
         if (PyList_SetItem(chunks, c, s2) < 0) {
-            goto error;
+            goto abort;
         }
     }
     PyObject *hyphen = PyUnicode_FromOrdinal('-');
     if (hyphen == NULL) {
-        goto error;
+        goto abort;
     }
     PyObject *res = PyUnicode_Join(hyphen, chunks);
     Py_DECREF(hyphen);
     if (res == NULL) {
-        goto error;
+        goto abort;
     }
     Py_DECREF(chunks);
     return res;
-error:
+abort:
     Py_XDECREF(chunks);
     return NULL;
 }
 
-Py_ssize_t
+static Py_ssize_t
 write_literal(fnmatchmodule_state *state,
               _PyUnicodeWriter *writer,
               PyObject *unicode)
@@ -403,77 +429,56 @@ write_literal(fnmatchmodule_state *state,
     return written;
 }
 
-Py_ssize_t
+static Py_ssize_t
 write_expression(_PyUnicodeWriter *writer, PyObject *expression)
 {
-#define WRITE_ASCII(str, len) \
-    do { \
-        if (_PyUnicodeWriter_WriteASCIIString(writer, (str), (len)) < 0) { \
-            return -1; \
-        } \
-    } while (0)
-
-#define WRITE_CHAR(c) \
-    do { \
-        if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) { \
-            return -1; \
-        } \
-    } while (0)
-
-    Py_ssize_t grouplen;
-    const char *buffer = PyUnicode_AsUTF8AndSize(expression, &grouplen);
+#define WRITE_CHAR(c)           _WRITE_CHAR_OR(writer, c, return -1)
+#define WRITE_ASCII(s, n)       _WRITE_ASCII_OR(writer, s, n, return -1)
+#define WRITE_BLOCK(s, i, j)    _WRITE_BLOCK_OR(writer, s, i, j, return -1)
+#define WRITE_STRING(s)         _WRITE_STRING_OR(writer, s, return -1)
+    Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         /* empty range: never match */
         WRITE_ASCII("(?!)", 4);
         return 4;
     }
-    else if (grouplen == 1 && buffer[0] == '!') {
+    Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
+    if (grouplen == 1 && token == '!') {
         /* negated empty range: match any character */
         WRITE_CHAR('.');
         return 1;
     }
-    else {
-        Py_ssize_t extra = 2; // '[' and ']'
-        WRITE_CHAR('[');
-        switch (buffer[0]) {
-            case '!': {
-                WRITE_CHAR('^');
-                if (_PyUnicodeWriter_WriteSubstring(writer, expression, 1, grouplen) < 0) {
-                    return -1;
-                }
-                break;
-            }
-            case '^':
-            case '[': {
-                WRITE_CHAR('\\');
-                extra++;
-                break;
-            }
-            default:
-                if (_PyUnicodeWriter_WriteStr(writer, expression) < 0) {
-                    return -1;
-                }
-                break;
+    Py_ssize_t extra = 2; // '[' and ']'
+    WRITE_CHAR('[');
+    switch (token) {
+        case '!': {
+            WRITE_CHAR('^');
+            WRITE_BLOCK(expression, 1, grouplen);
+            break;
+        }
+        case '^':
+        case '[': {
+            WRITE_CHAR('\\');
+            ++extra;
+            WRITE_STRING(expression);
+            break;
+        }
+        default: {
+            WRITE_STRING(expression);
+            break;
         }
-        WRITE_CHAR(']');
-        return grouplen + extra;
     }
-#undef WRITE_CHAR
+    WRITE_CHAR(']');
+    return grouplen + extra;
+#undef WRITE_STRING
+#undef WRITE_BLOCK
 #undef WRITE_ASCII
+#undef WRITE_CHAR
 }
 
-PyObject *
+static PyObject *
 process_wildcards(PyObject *pattern, PyObject *indices)
 {
-#define WRITE_SUBSTRING(i, j) \
-    do { \
-        if ((i) < (j)) { /* write the substring if non-empty */ \
-            if (_PyUnicodeWriter_WriteSubstring(_writer, pattern, (i), (j)) < 0) { \
-                goto abort; \
-            } \
-        } \
-    } while (0)
-
     const Py_ssize_t m = PyList_GET_SIZE(indices);
     if (m == 0) {
         // just write fr'(?s:{parts} + ")\Z"
@@ -502,6 +507,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
      * of the translated pattern.
      */
     PyObject *jobj = PyList_GET_ITEM(indices, 0);
+    assert(jobj != NULL);
     j = PyLong_AsSsize_t(jobj);  // get the first position of '*'
     if (j < 0) {
         return NULL;
@@ -513,27 +519,32 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     }
     _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
 
-    WRITE_SUBSTRING(i, j);  // write stuff before '*' if needed
+#define WRITE_BLOCK(i, j)       _WRITE_BLOCK_OR(_writer, pattern, i, j, goto abort)
+#define WRITE_ATOMIC_BEGIN()    _WRITE_ASCII_OR(_writer, "(?>.*?", 6, goto abort)
+#define WRITE_ATOMIC_END()      _WRITE_CHAR_OR(_writer, ')', goto abort)
+
+    WRITE_BLOCK(i, j);  // write stuff before '*' if needed
     i = j + 1;              // jump after the '*'
     for (Py_ssize_t k = 1; k < m; ++k) {
         PyObject *ind = PyList_GET_ITEM(indices, k);
+        assert(ind != NULL);
         j = PyLong_AsSsize_t(ind);
-        assert(j < 0 || i < j);
-        if (j < 0 ||
-            (_PyUnicodeWriter_WriteASCIIString(_writer, "(?>.*?", 6) < 0) ||
-            (_PyUnicodeWriter_WriteSubstring(_writer, pattern, i, j) < 0) ||
-            (_PyUnicodeWriter_WriteChar(_writer, ')') < 0))
-        {
+        if (j < 0) {
             goto abort;
         }
+        assert(i < j);
+        // atomic group begin
+        WRITE_ATOMIC_BEGIN();
+        WRITE_BLOCK(i, j);
+        WRITE_ATOMIC_END();
         i = j + 1;
     }
     // handle the last group
-    if (_PyUnicodeWriter_WriteASCIIString(_writer, ".*", 2) < 0) {
-        goto abort;
-    }
-    WRITE_SUBSTRING(i, n); // write the remaining substring
-#undef WRITE_SUBSTRING
+    _WRITE_ASCII_OR(_writer, ".*", 2, goto abort);
+    WRITE_BLOCK(i, n); // write the remaining substring
+#undef WRITE_BLOCK
+#undef WRITE_ATOMIC_END
+#undef WRITE_ATOMIC_BEGIN
     PyObject *res = PyUnicodeWriter_Finish(writer);
     if (res == NULL) {
         return NULL;
@@ -545,3 +556,13 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     PyUnicodeWriter_Discard(writer);
     return NULL;
 }
+
+#undef _WRITE_BLOCK_OR
+#undef _WRITE_BLOCK
+#undef _WRITE_STRING_OR
+#undef _WRITE_STRING
+#undef _WRITE_ASCII_OR
+#undef _WRITE_ASCII
+#undef _WRITE_CHAR_OR
+#undef _WRITE_CHAR
+#undef _WRITE_OR_FAIL

From f9343f3d545a84ec691b08322546403caff7e327 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:52:11 +0200
Subject: [PATCH 31/97] update generated objects

---
 Include/internal/pycore_global_objects_fini_generated.h | 1 +
 Include/internal/pycore_global_strings.h                | 1 +
 Include/internal/pycore_runtime_init_generated.h        | 1 +
 Include/internal/pycore_unicodeobject_generated.h       | 4 ++++
 4 files changed, 7 insertions(+)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index ec11eec5eec27d..44820e0ce13fad 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -951,6 +951,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fnmatch));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index f27bdeb0183aec..c21492376395e4 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -440,6 +440,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(fix_imports)
         STRUCT_FOR_ID(flags)
         STRUCT_FOR_ID(flush)
+        STRUCT_FOR_ID(fnmatch)
         STRUCT_FOR_ID(fold)
         STRUCT_FOR_ID(follow_symlinks)
         STRUCT_FOR_ID(format)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index ab94af0cfb90c9..9a99b3645fb717 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -949,6 +949,7 @@ extern "C" {
     INIT_ID(fix_imports), \
     INIT_ID(flags), \
     INIT_ID(flush), \
+    INIT_ID(fnmatch), \
     INIT_ID(fold), \
     INIT_ID(follow_symlinks), \
     INIT_ID(format), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index a0e532edc1bfc6..83ece722c6fa9d 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1560,6 +1560,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(fnmatch);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(fold);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));

From 46d7744399a314393e41d450608a0904d0107cc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 10 Jul 2024 19:02:50 +0200
Subject: [PATCH 32/97] update module names

---
 Python/stdlib_module_names.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h
index 9686d10563aa4d..4b20baaf24d0c8 100644
--- a/Python/stdlib_module_names.h
+++ b/Python/stdlib_module_names.h
@@ -32,6 +32,7 @@ static const char* _Py_stdlib_module_names[] = {
 "_dbm",
 "_decimal",
 "_elementtree",
+"_fnmatch",
 "_frozen_importlib",
 "_frozen_importlib_external",
 "_functools",

From 5bc902e23a2e05c63be7478e726c5fe0349be1d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 10 Jul 2024 19:17:33 +0200
Subject: [PATCH 33/97] fix smelly names

---
 Modules/_fnmatch/_fnmatchmodule.c | 16 +++++++--------
 Modules/_fnmatch/_fnmatchmodule.h | 34 +++++++++++++++----------------
 Modules/_fnmatch/posix.c          | 20 +++++++++---------
 Modules/_fnmatch/regex.c          |  6 +++---
 Modules/_fnmatch/translate.c      |  2 +-
 5 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 6e4b23d83cfb53..77afbb9f819272 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -179,11 +179,11 @@ _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
     // call os.fspath() on the names being matched, whereas it does on NT.
     if (PyBytes_Check(pat)) {
         const char *pattern = PyBytes_AS_STRING(pat);
-        return _posix_fnmatch_encoded_filter_cached(pattern, names);
+        return _Py_posix_fnmatch_encoded_filter_cached(pattern, names);
     }
     if (PyUnicode_Check(pat)) {
         const char *pattern = PyUnicode_AsUTF8(pat);
-        return _posix_fnmatch_unicode_filter_cached(pattern, names);
+        return _Py_posix_fnmatch_unicode_filter_cached(pattern, names);
     }
     PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
     return NULL;
@@ -192,7 +192,7 @@ _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
     if (matcher == NULL) {
         return NULL;
     }
-    PyObject *result = _regex_fnmatch_filter(matcher, names);
+    PyObject *result = _Py_regex_fnmatch_filter(matcher, names);
     Py_DECREF(matcher);
     return result;
 #endif
@@ -246,11 +246,11 @@ _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
     // the C implementation).
     if (PyBytes_Check(pat)) {
         const char *pattern = PyBytes_AS_STRING(pat);
-        return _posix_fnmatch_encoded_cached(pattern, name);
+        return _Py_posix_fnmatch_encoded_cached(pattern, name);
     }
     if (PyUnicode_Check(pat)) {
         const char *pattern = PyUnicode_AsUTF8(pat);
-        return _posix_fnmatch_unicode_cached(pattern, name);
+        return _Py_posix_fnmatch_unicode_cached(pattern, name);
     }
     PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
     return -1;
@@ -259,7 +259,7 @@ _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
     if (matcher == NULL) {
         return -1;
     }
-    int res = _regex_fnmatch_generic(matcher, name);
+    int res = _Py_regex_fnmatch_generic(matcher, name);
     Py_DECREF(matcher);
     return res;
 #endif
@@ -284,7 +284,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
             return NULL;
         }
         // translated regular expression as a str object
-        PyObject *str_expr = _regex_translate(module, unicode);
+        PyObject *str_expr = _Py_regex_translate(module, unicode);
         Py_DECREF(unicode);
         if (str_expr == NULL) {
             return NULL;
@@ -294,7 +294,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
         return expr;
     }
     else if (PyUnicode_Check(pattern)) {
-        return _regex_translate(module, pattern);
+        return _Py_regex_translate(module, pattern);
     }
     else {
         PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 2311e35efe691e..9a2128e6e005ae 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -45,17 +45,17 @@ get_fnmatchmodulestate_state(PyObject *module)
  * Construct a list of filtered names using fnmatch(3).
  */
 extern PyObject *
-_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names);
-/* Same as _posix_fnmatch_encoded_filter() but for unicode inputs. */
+_Py_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names);
+/* Same as _Py_posix_fnmatch_encoded_filter() but for unicode inputs. */
 extern PyObject *
-_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names);
+_Py_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names);
 
-/* cached 'pattern' version of _posix_fnmatch_encoded_filter() */
+/* cached 'pattern' version of _Py_posix_fnmatch_encoded_filter() */
 extern PyObject *
-_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names);
-/* cached 'pattern' version of _posix_fnmatch_unicode_filter() */
+_Py_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names);
+/* cached 'pattern' version of _Py_posix_fnmatch_unicode_filter() */
 extern PyObject *
-_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names);
+_Py_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names);
 
 /*
  * Perform a case-sensitive match using fnmatch(3).
@@ -71,17 +71,17 @@ _posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names);
  * sets a TypeError exception, or (2) something went wrong.
  */
 extern int
-_posix_fnmatch_encoded(PyObject *pattern, PyObject *string);
-/* Same as _posix_fnmatch_encoded() but for unicode inputs. */
+_Py_posix_fnmatch_encoded(PyObject *pattern, PyObject *string);
+/* Same as _Py_posix_fnmatch_encoded() but for unicode inputs. */
 extern int
-_posix_fnmatch_unicode(PyObject *pattern, PyObject *string);
+_Py_posix_fnmatch_unicode(PyObject *pattern, PyObject *string);
 
-/* cached 'pattern' version of _posix_fnmatch_encoded() */
+/* cached 'pattern' version of _Py_posix_fnmatch_encoded() */
 extern int
-_posix_fnmatch_encoded_cached(const char *pattern, PyObject *names);
-/* cached 'pattern' version of _posix_fnmatch_encoded() */
+_Py_posix_fnmatch_encoded_cached(const char *pattern, PyObject *names);
+/* cached 'pattern' version of _Py_posix_fnmatch_encoded() */
 extern int
-_posix_fnmatch_unicode_cached(const char *pattern, PyObject *names);
+_Py_posix_fnmatch_unicode_cached(const char *pattern, PyObject *names);
 #endif
 
 /*
@@ -98,7 +98,7 @@ _posix_fnmatch_unicode_cached(const char *pattern, PyObject *names);
  * and sets a TypeError exception, or (2) something went wrong.
  */
 extern int
-_regex_fnmatch_generic(PyObject *matcher, PyObject *string);
+_Py_regex_fnmatch_generic(PyObject *matcher, PyObject *string);
 
 /*
  * Perform a case-sensitive match using compiled RE patterns.
@@ -111,12 +111,12 @@ _regex_fnmatch_generic(PyObject *matcher, PyObject *string);
  * Returns a list of matched names, or NULL if an error occurred.
  */
 extern PyObject *
-_regex_fnmatch_filter(PyObject *matcher, PyObject *names);
+_Py_regex_fnmatch_filter(PyObject *matcher, PyObject *names);
 
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
  */
 extern PyObject *
-_regex_translate(PyObject *module, PyObject *pattern);
+_Py_regex_translate(PyObject *module, PyObject *pattern);
 
 #endif // _FNMATCHMODULE_H
diff --git a/Modules/_fnmatch/posix.c b/Modules/_fnmatch/posix.c
index d4fdbb42ba0210..45fe88b5440f74 100644
--- a/Modules/_fnmatch/posix.c
+++ b/Modules/_fnmatch/posix.c
@@ -42,49 +42,49 @@ _posix_fnmatch_filter_cached(const char *pattern, PyObject *names, Converter con
 // ==== API implementation ====================================================
 
 inline PyObject *
-_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names)
+_Py_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names)
 {
     return _posix_fnmatch_filter(pattern, names, &from_encoded);
 }
 
 inline PyObject *
-_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names)
+_Py_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names)
 {
     return _posix_fnmatch_filter(pattern, names, &from_unicode);
 }
 
 inline PyObject *
-_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names)
+_Py_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names)
 {
     assert(pattern != NULL);
     return _posix_fnmatch_filter_cached(pattern, names, &from_encoded);
 }
 
 inline PyObject *
-_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names)
+_Py_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names)
 {
     assert(pattern != NULL);
     return _posix_fnmatch_filter_cached(pattern, names, &from_unicode);
 }
 
 inline int
-_posix_fnmatch_encoded(PyObject *pattern, PyObject *string)
+_Py_posix_fnmatch_encoded(PyObject *pattern, PyObject *string)
 {
     const char *p = from_encoded(pattern, INVALID_PATTERN_TYPE);
     if (p == NULL) {
         return -1;
     }
-    return _posix_fnmatch_encoded_cached(p, string);
+    return _Py_posix_fnmatch_encoded_cached(p, string);
 }
 
 inline int
-_posix_fnmatch_unicode(PyObject *pattern, PyObject *string)
+_Py_posix_fnmatch_unicode(PyObject *pattern, PyObject *string)
 {
     const char *p = from_unicode(pattern, INVALID_PATTERN_TYPE);
     if (p == NULL) {
         return -1;
     }
-    return _posix_fnmatch_unicode_cached(p, string);
+    return _Py_posix_fnmatch_unicode_cached(p, string);
 }
 
 #define PROCESS_MATCH_RESULT(r) \
@@ -97,7 +97,7 @@ _posix_fnmatch_unicode(PyObject *pattern, PyObject *string)
     } while (0)
 
 inline int
-_posix_fnmatch_encoded_cached(const char *pattern, PyObject *string)
+_Py_posix_fnmatch_encoded_cached(const char *pattern, PyObject *string)
 {
     assert(pattern != NULL);
     const char *s = from_encoded(string, INVALID_NAME_TYPE);
@@ -108,7 +108,7 @@ _posix_fnmatch_encoded_cached(const char *pattern, PyObject *string)
 }
 
 inline int
-_posix_fnmatch_unicode_cached(const char *pattern, PyObject *string)
+_Py_posix_fnmatch_unicode_cached(const char *pattern, PyObject *string)
 {
     assert(pattern != NULL);
     const char *s = from_unicode(string, INVALID_NAME_TYPE);
diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
index 807e773635a9da..b6715bb33283b0 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/regex.c
@@ -5,7 +5,7 @@
 // ==== API implementation ====================================================
 
 inline int
-_regex_fnmatch_generic(PyObject *matcher, PyObject *name)
+_Py_regex_fnmatch_generic(PyObject *matcher, PyObject *name)
 {
     // If 'name' is of incorrect type, it will be detected when calling
     // the matcher function (we emulate 're.compile(...).match(name)').
@@ -20,7 +20,7 @@ _regex_fnmatch_generic(PyObject *matcher, PyObject *name)
 }
 
 PyObject *
-_regex_fnmatch_filter(PyObject *matcher, PyObject *names)
+_Py_regex_fnmatch_filter(PyObject *matcher, PyObject *names)
 {
     assert(PyCallable_Check(matcher));
     PyObject *iter = PyObject_GetIter(names);
@@ -36,7 +36,7 @@ _regex_fnmatch_filter(PyObject *matcher, PyObject *names)
 
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        int rc = _regex_fnmatch_generic(matcher, name);
+        int rc = _Py_regex_fnmatch_generic(matcher, name);
         if (rc < 0) {
             assert(PyErr_Occurred());
             goto abort;
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index d4d63a2693b7de..40c42beebbec26 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -93,7 +93,7 @@ process_wildcards(PyObject *pattern, PyObject *indices);
 // ==== API implementation ====================================================
 
 PyObject *
-_regex_translate(PyObject *module, PyObject *pattern)
+_Py_regex_translate(PyObject *module, PyObject *pattern)
 {
 #define READ(ind) PyUnicode_READ(kind, data, (ind))
 #define ADVANCE_IF_CHAR(ch, ind, maxind) \

From 78140286e55ee4429708f371f761132f22ac91da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:46:06 +0200
Subject: [PATCH 34/97] fix translation unit

- update comments
- remove calls to private API in functions
- more macro protection
- fix refleaks
---
 Modules/_fnmatch/translate.c | 162 +++++++++++++++++++----------------
 1 file changed, 90 insertions(+), 72 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 40c42beebbec26..8900800f0c933c 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -11,6 +11,8 @@
 
 // ==== Helper declarations ==================================================
 
+typedef fnmatchmodule_state State;
+
 #define _WRITE_OR_FAIL(writeop, onerror) \
     do { \
         if ((writeop) < 0) { \
@@ -18,22 +20,27 @@
         } \
     } while (0)
 
-#define _WRITE_CHAR      _PyUnicodeWriter_WriteChar
-#define _WRITE_CHAR_OR(_writer, ch, onerror) \
-    _WRITE_OR_FAIL(_WRITE_CHAR((_writer), (ch)), onerror)
+#define _WRITE_CHAR(writer, ch) \
+    _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(writer), (ch))
+#define _WRITE_CHAR_OR(writer, ch, onerror) \
+    _WRITE_OR_FAIL(_WRITE_CHAR((writer), (ch)), onerror)
 
-#define _WRITE_ASCII     _PyUnicodeWriter_WriteASCIIString
-#define _WRITE_ASCII_OR(_writer, ascii, length, onerror) \
-    _WRITE_OR_FAIL(_WRITE_ASCII((_writer), (ascii), (length)), onerror)
+#define _WRITE_ASCII(writer, ascii, length) \
+    _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(writer), (ascii), (length))
+#define _WRITE_ASCII_OR(writer, ascii, length, onerror) \
+    _WRITE_OR_FAIL(_WRITE_ASCII((writer), (ascii), (length)), onerror)
 
-#define _WRITE_STRING    _PyUnicodeWriter_WriteStr
-#define _WRITE_STRING_OR(_writer, string, onerror) \
-    _WRITE_OR_FAIL(_WRITE_STRING((_writer), (string)), onerror)
+#define _WRITE_STRING(writer, string) \
+    _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(writer), (string))
+#define _WRITE_STRING_OR(writer, string, onerror) \
+    _WRITE_OR_FAIL(_WRITE_STRING((writer), (string)), onerror)
 
-#define _WRITE_BLOCK    _PyUnicodeWriter_WriteSubstring
-#define _WRITE_BLOCK_OR(_writer, string, i, j, onerror) \
+#define _WRITE_BLOCK(writer, string, i, j) \
+    _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(writer), (string), (i), (j))
+#define _WRITE_BLOCK_OR(writer, string, i, j, onerror) \
     do { \
-        if ((i) < (j) && _WRITE_BLOCK((_writer), (string), (i), (j)) < 0) { \
+        Py_ssize_t _i = (i), _j = (j); /* to allow in-place operators on i or j */ \
+        if (_i < _j && _WRITE_BLOCK((writer), (string), _i, _j) < 0) { \
             onerror; \
         } \
     } while (0)
@@ -70,9 +77,7 @@ translate_expression(PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_literal(fnmatchmodule_state *state,
-              _PyUnicodeWriter *writer,
-              PyObject *unicode);
+write_literal(State *state, PyUnicodeWriter *writer, PyObject *unicode);
 
 /*
  * Write the translated pattern obtained by translate_expression().
@@ -80,12 +85,12 @@ write_literal(fnmatchmodule_state *state,
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_expression(_PyUnicodeWriter *writer, PyObject *expression);
+write_expression(PyUnicodeWriter *writer, PyObject *expression);
 
 /*
  * Build the final regular expression by processing the wildcards.
  *
- * The position of each wildcard in 'strings' is given by 'indices'.
+ * The position of each wildcard in 'pattern' is given by 'indices'.
  */
 static PyObject *
 process_wildcards(PyObject *pattern, PyObject *indices);
@@ -93,38 +98,52 @@ process_wildcards(PyObject *pattern, PyObject *indices);
 // ==== API implementation ====================================================
 
 PyObject *
-_Py_regex_translate(PyObject *module, PyObject *pattern)
+_Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 {
 #define READ(ind) PyUnicode_READ(kind, data, (ind))
 #define ADVANCE_IF_CHAR(ch, ind, maxind) \
     do { \
+        /* the following forces ind to be a variable name */ \
+        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
         if ((ind) < (maxind) && READ(ind) == (ch)) { \
             ++ind; \
         } \
     } while (0)
 #define _WHILE_READ_CMP(ch, ind, maxind, cmp) \
     do { \
+        /* the following forces ind to be a variable name */ \
+        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
         while ((ind) < (maxind) && READ(ind) cmp (ch)) { \
             ++ind; \
         } \
     } while (0)
-#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, !=)
-#define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP(ch, from, maxind, ==)
+#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), !=)
+#define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), ==)
 
-    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
+    State *state = get_fnmatchmodulestate_state(module);
     PyObject *re = state->re_module;
     const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
-    // We would write less data if there are successive '*', which should
-    // not be the case in general. Otherwise, we write >= n characters
-    // since escaping them would always add more characters so we will
-    // overestimate a bit the number of characters to write.
+    // We would write less data if there are successive '*',
+    // which should not be the case in general. Otherwise,
+    // we write >= n characters since escaping them always
+    // add more characters.
+    //
+    // Note that only b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f' need to
+    // be escaped when translated to RE patterns and '*' and '?'
+    // are already handled without being escaped.
+    //
+    // In general, UNIX style patterns are more likely to contain
+    // wildcards than characters to be escaped, with the exception
+    // of '-', '\' and '~' (we usually want to match filenmaes),
+    // and there is a sparse number of them. Therefore, we only
+    // estimate the number of characters to be written to be the
+    // same as the number of characters in the pattern.
     //
-    // TODO(picnixz): should we limit the estimation or not?
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create((Py_ssize_t)(1.05 * n));
+    // TODO: (picnixz): should we limit the estimation in case of a failure?
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(n);
     if (writer == NULL) {
         return NULL;
     }
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
     // list containing the indices where '*' has a special meaning
     PyObject *indices = PyList_New(0);
     if (indices == NULL) {
@@ -138,7 +157,7 @@ _Py_regex_translate(PyObject *module, PyObject *pattern)
         Py_UCS4 chr = READ(i++);
         switch (chr) {
             case '*': {
-                _WRITE_CHAR_OR(_writer, chr, goto abort);
+                _WRITE_CHAR_OR(writer, chr, goto abort);
                 SKIP_DUPLICATES('*', i, n);
                 PyObject *index = PyLong_FromSsize_t(h++);
                 if (index == NULL) {
@@ -153,7 +172,7 @@ _Py_regex_translate(PyObject *module, PyObject *pattern)
             }
             case '?': {
                 // translate optional '?' (fnmatch) into optional '.' (regex)
-                _WRITE_CHAR_OR(_writer, '.', goto abort);
+                _WRITE_CHAR_OR(writer, '.', goto abort);
                 ++h; // increase the expected result's length
                 break;
             }
@@ -163,7 +182,7 @@ _Py_regex_translate(PyObject *module, PyObject *pattern)
                 ADVANCE_IF_CHAR(']', j, n); // [!] or []
                 ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
                 if (j >= n) {
-                    _WRITE_ASCII_OR(_writer, "\\[", 2, goto abort);
+                    _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
                     h += 2; // we just wrote 2 characters
                     break;  // early break for clarity
                 }
@@ -171,21 +190,21 @@ _Py_regex_translate(PyObject *module, PyObject *pattern)
                     //              v--- pattern[j] (exclusive)
                     // '[' * ... * ']'
                     //     ^----- pattern[i] (inclusive)
-                    int rc = PyUnicode_FindChar(pattern, '-', i, j, 1);
-                    if (rc == -2) {
+                    int pos = PyUnicode_FindChar(pattern, '-', i, j, 1);
+                    if (pos == -2) {
                         goto abort;
                     }
                     PyObject *s1 = NULL, *s2 = NULL;
-                    if (rc == -1) {
-                        PyObject *group = PyUnicode_Substring(pattern, i, j);
-                        if (group == NULL) {
+                    if (pos == -1) {
+                        PyObject *s0 = PyUnicode_Substring(pattern, i, j);
+                        if (s0 == NULL) {
                             goto abort;
                         }
-                        s1 = _PyObject_CallMethod(group, &_Py_ID(replace), "ss", "\\", "\\\\");
-                        Py_DECREF(group);
+                        s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
+                        Py_DECREF(s0);
                     }
                     else {
-                        assert(rc >= 0);
+                        assert(pos >= 0);
                         assert(READ(j) == ']');
                         s1 = translate_expression(pattern, i, j);
                     }
@@ -197,7 +216,7 @@ _Py_regex_translate(PyObject *module, PyObject *pattern)
                     if (s2 == NULL) {
                         goto abort;
                     }
-                    int difflen = write_expression(_writer, s2);
+                    int difflen = write_expression(writer, s2);
                     Py_DECREF(s2);
                     if (difflen < 0) {
                         goto abort;
@@ -212,7 +231,7 @@ _Py_regex_translate(PyObject *module, PyObject *pattern)
                 if (str == NULL) {
                     goto abort;
                 }
-                int difflen = write_literal(state, _writer, str);
+                int difflen = write_literal(state, writer, str);
                 Py_DECREF(str);
                 if (difflen < 0) {
                     goto abort;
@@ -317,8 +336,10 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
         PyObject *repl = PyUnicode_Concat(chunk, hyphen);
         Py_DECREF(hyphen);
         // PyList_SetItem() does not create a new reference on 'repl'
-        // so we should not decref 'repl' after the call (I think?)
+        // so we should not decref 'repl' after the call, unless there
+        // is an issue while setting the item.
         if (repl == NULL || PyList_SetItem(chunks, chunkscount - 1, repl) < 0) {
+            Py_XDECREF(repl);
             goto abort;
         }
     }
@@ -360,11 +381,11 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
             PyObject *merged = PyUnicode_Concat(c1sub, c2sub);
             Py_DECREF(c1sub);
             Py_DECREF(c2sub);
-            if (merged == NULL) {
-                goto abort;
-            }
-            int rc = PyList_SetItem(chunks, c - 1, merged);
-            if (rc < 0) {
+            // PyList_SetItem() does not create a new reference on 'merged'
+            // so we should not decref 'merged' after the call, unless there
+            // is an issue while setting the item.
+            if (merged == NULL || PyList_SetItem(chunks, c - 1, merged) < 0) {
+                Py_XDECREF(merged);
                 goto abort;
             }
             if (PySequence_DelItem(chunks, c) < 0) {
@@ -385,10 +406,11 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
         }
         PyObject *s2 = _PyObject_CallMethod(s1, &_Py_ID(replace), "ss", "-", "\\-");
         Py_DECREF(s1);
-        if (s2 == NULL) {
-            goto abort;
-        }
-        if (PyList_SetItem(chunks, c, s2) < 0) {
+        // PyList_SetItem() does not create a new reference on 's2'
+        // so we should not decref 's2' after the call, unless there
+        // is an issue while setting the item.
+        if (s2 == NULL || PyList_SetItem(chunks, c, s2) < 0) {
+            Py_XDECREF(s2);
             goto abort;
         }
     }
@@ -409,9 +431,7 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 }
 
 static Py_ssize_t
-write_literal(fnmatchmodule_state *state,
-              _PyUnicodeWriter *writer,
-              PyObject *unicode)
+write_literal(State *state, PyUnicodeWriter *writer, PyObject *unicode)
 {
     PyObject *escaped = PyObject_CallMethodOneArg(state->re_module,
                                                   &_Py_ID(escape),
@@ -420,7 +440,8 @@ write_literal(fnmatchmodule_state *state,
         return -1;
     }
     Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
-    int rc = _PyUnicodeWriter_WriteStr(writer, escaped);
+    assert(written >= 0);
+    int rc = _WRITE_STRING(writer, escaped);
     Py_DECREF(escaped);
     if (rc < 0) {
         return -1;
@@ -430,12 +451,12 @@ write_literal(fnmatchmodule_state *state,
 }
 
 static Py_ssize_t
-write_expression(_PyUnicodeWriter *writer, PyObject *expression)
+write_expression(PyUnicodeWriter *writer, PyObject *expression)
 {
-#define WRITE_CHAR(c)           _WRITE_CHAR_OR(writer, c, return -1)
-#define WRITE_ASCII(s, n)       _WRITE_ASCII_OR(writer, s, n, return -1)
-#define WRITE_BLOCK(s, i, j)    _WRITE_BLOCK_OR(writer, s, i, j, return -1)
-#define WRITE_STRING(s)         _WRITE_STRING_OR(writer, s, return -1)
+#define WRITE_CHAR(c)           _WRITE_CHAR_OR(writer, (c), return -1)
+#define WRITE_ASCII(s, n)       _WRITE_ASCII_OR(writer, (s), (n), return -1)
+#define WRITE_BLOCK(s, i, j)    _WRITE_BLOCK_OR(writer, (s), (i), (j), return -1)
+#define WRITE_STRING(s)         _WRITE_STRING_OR(writer, (s), return -1)
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         /* empty range: never match */
@@ -452,14 +473,14 @@ write_expression(_PyUnicodeWriter *writer, PyObject *expression)
     WRITE_CHAR('[');
     switch (token) {
         case '!': {
-            WRITE_CHAR('^');
+            WRITE_CHAR('^'); // replace '!' by '^'
             WRITE_BLOCK(expression, 1, grouplen);
             break;
         }
         case '^':
         case '[': {
             WRITE_CHAR('\\');
-            ++extra;
+            ++extra; // because we wrote '\\'
             WRITE_STRING(expression);
             break;
         }
@@ -499,7 +520,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
      * the STRING by "(?>.*?" and ")", and thus we will write at
      * least 7 + len(STRING) characters.
      *
-     * We write one additional '.*' if indices[-1] + 1 = n.
+     * We write one additional '.*' if indices[-1] + 1 == n.
      *
      * Since the result is surrounded by "(?s:" and ")\Z", we
      * write at least "indices[0] + 7m + n + 6" characters,
@@ -517,12 +538,9 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     if (writer == NULL) {
         return NULL;
     }
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter *)(writer);
-
-#define WRITE_BLOCK(i, j)       _WRITE_BLOCK_OR(_writer, pattern, i, j, goto abort)
-#define WRITE_ATOMIC_BEGIN()    _WRITE_ASCII_OR(_writer, "(?>.*?", 6, goto abort)
-#define WRITE_ATOMIC_END()      _WRITE_CHAR_OR(_writer, ')', goto abort)
-
+#define WRITE_BLOCK(i, j)       _WRITE_BLOCK_OR(writer, pattern, (i), (j), goto abort)
+#define WRITE_ATOMIC_BEGIN()    _WRITE_ASCII_OR(writer, "(?>.*?", 6, goto abort)
+#define WRITE_ATOMIC_END()      _WRITE_CHAR_OR(writer, ')', goto abort)
     WRITE_BLOCK(i, j);  // write stuff before '*' if needed
     i = j + 1;              // jump after the '*'
     for (Py_ssize_t k = 1; k < m; ++k) {
@@ -533,15 +551,15 @@ process_wildcards(PyObject *pattern, PyObject *indices)
             goto abort;
         }
         assert(i < j);
-        // atomic group begin
+        // write the atomic RE group
         WRITE_ATOMIC_BEGIN();
         WRITE_BLOCK(i, j);
         WRITE_ATOMIC_END();
         i = j + 1;
     }
     // handle the last group
-    _WRITE_ASCII_OR(_writer, ".*", 2, goto abort);
-    WRITE_BLOCK(i, n); // write the remaining substring
+    _WRITE_ASCII_OR(writer, ".*", 2, goto abort);
+    WRITE_BLOCK(i, n); // write the remaining substring (if non-empty)
 #undef WRITE_BLOCK
 #undef WRITE_ATOMIC_END
 #undef WRITE_ATOMIC_BEGIN

From 3f075bbc2233e98fb49d68965e9a4bed27607199 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:46:58 +0200
Subject: [PATCH 35/97] remove fnmatch(3) detection

---
 configure.ac | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6093c994bd13af..bd1440fdd5d6df 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3854,30 +3854,6 @@ if test "$ac_cv_c_complex_supported" = "yes"; then
               [Defined if _Complex C type is available.])
 fi
 
-# check for fnmatch(3) support
-#
-# We test for the plain POSIX implementation (case-sensitive match).
-#
-# To ensure that the implementation of fnmatch(3) is compliant
-# we run some tests to make sure that everything works well.
-#
-# Note that MSVC does not support fnmatch(3).
-AC_CACHE_CHECK([for case-sensititve fnmatch(3)], [ac_cv_fnmatch_supported],
-[AC_RUN_IFELSE(
-  [AC_LANG_PROGRAM([@%:@include <fnmatch.h>], [[
-    exit(!(
-        fnmatch("a*", "abc", 0) != FNM_NOMATCH &&
-        fnmatch("a*", "Abc", 0) == FNM_NOMATCH
-    ));
-  ]])], [ac_cv_fnmatch_supported=yes],
-  [ac_cv_fnmatch_supported=no],
-  [ac_cv_fnmatch_supported=no]
-)])
-if test "$ac_cv_fnmatch_supported" = "yes"; then
-  AC_DEFINE([Py_HAVE_FNMATCH], [1],
-            [Defined if case-sensitive fnmatch(3) is supported.])
-fi
-
 # check for systems that require aligned memory access
 AC_CACHE_CHECK([aligned memory access is required], [ac_cv_aligned_required],
 [AC_RUN_IFELSE([AC_LANG_SOURCE([[

From c78a813b0e988012718ce1f83ca1edb84d5aaecd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:47:42 +0200
Subject: [PATCH 36/97] remove fnmatch(3) layout

---
 Makefile.pre.in                   |   1 -
 Modules/Setup.bootstrap.in        |   2 +-
 Modules/_fnmatch/_fnmatchmodule.c |  39 +-----
 Modules/_fnmatch/_fnmatchmodule.h |  70 +----------
 Modules/_fnmatch/posix.c          | 191 ------------------------------
 5 files changed, 11 insertions(+), 292 deletions(-)
 delete mode 100644 Modules/_fnmatch/posix.c

diff --git a/Makefile.pre.in b/Makefile.pre.in
index ec99023f36b2b3..b751da584fb956 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -354,7 +354,6 @@ FNMATCH_H=	Modules/_fnmatch/_fnmatchmodule.h
 
 FNMATCH_OBJS=	\
 		Modules/_fnmatch/_fnmatchmodule.o \
-		Modules/_fnmatch/posix.o \
 		Modules/_fnmatch/regex.o \
 		Modules/_fnmatch/translate.o
 
diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in
index c54cd207aec57d..4001650e77682c 100644
--- a/Modules/Setup.bootstrap.in
+++ b/Modules/Setup.bootstrap.in
@@ -35,7 +35,7 @@ _stat _stat.c
 _symtable symtablemodule.c
 
 # miscellaneous accelerators
-_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/posix.c _fnmatch/regex.c _fnmatch/translate.c
+_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/regex.c _fnmatch/translate.c
 
 # for systems without $HOME env, used by site._getuserbase()
 @MODULE_PWD_TRUE@pwd pwdmodule.c
diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 77afbb9f819272..d397785172eee8 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -174,28 +174,13 @@ static PyObject *
 _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
 /*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
 {
-#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
-    // Note that the Python implementation of fnmatch.filter() does not
-    // call os.fspath() on the names being matched, whereas it does on NT.
-    if (PyBytes_Check(pat)) {
-        const char *pattern = PyBytes_AS_STRING(pat);
-        return _Py_posix_fnmatch_encoded_filter_cached(pattern, names);
-    }
-    if (PyUnicode_Check(pat)) {
-        const char *pattern = PyUnicode_AsUTF8(pat);
-        return _Py_posix_fnmatch_unicode_filter_cached(pattern, names);
-    }
-    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
-    return NULL;
-#else
     PyObject *matcher = get_matcher_function(module, pat);
     if (matcher == NULL) {
         return NULL;
     }
-    PyObject *result = _Py_regex_fnmatch_filter(matcher, names);
+    PyObject *result = _Py_fnmatch_filter(matcher, names);
     Py_DECREF(matcher);
     return result;
-#endif
 }
 
 /*[clinic input]
@@ -240,29 +225,13 @@ static int
 _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
 /*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
 {
-#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
-    // This function does not transform path-like objects, nor does it
-    // case-normalize 'name' or 'pattern' (whether it is the Python or
-    // the C implementation).
-    if (PyBytes_Check(pat)) {
-        const char *pattern = PyBytes_AS_STRING(pat);
-        return _Py_posix_fnmatch_encoded_cached(pattern, name);
-    }
-    if (PyUnicode_Check(pat)) {
-        const char *pattern = PyUnicode_AsUTF8(pat);
-        return _Py_posix_fnmatch_unicode_cached(pattern, name);
-    }
-    PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
-    return -1;
-#else
     PyObject *matcher = get_matcher_function(module, pat);
     if (matcher == NULL) {
         return -1;
     }
-    int res = _Py_regex_fnmatch_generic(matcher, name);
+    int res = _Py_fnmatch_fnmatch(matcher, name);
     Py_DECREF(matcher);
     return res;
-#endif
 }
 
 /*[clinic input]
@@ -284,7 +253,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
             return NULL;
         }
         // translated regular expression as a str object
-        PyObject *str_expr = _Py_regex_translate(module, unicode);
+        PyObject *str_expr = _Py_fnmatch_translate(module, unicode);
         Py_DECREF(unicode);
         if (str_expr == NULL) {
             return NULL;
@@ -294,7 +263,7 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
         return expr;
     }
     else if (PyUnicode_Check(pattern)) {
-        return _Py_regex_translate(module, pattern);
+        return _Py_fnmatch_translate(module, pattern);
     }
     else {
         PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 9a2128e6e005ae..cbedaccf80c905 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -1,9 +1,5 @@
 /*
-* C accelerator for the 'fnmatch' module (POSIX only).
- *
- * Most functions expect string or bytes instances, and thus the Python
- * implementation should first pre-process path-like objects, possibly
- * applying normalizations depending on the platform if needed.
+ * C accelerator for the 'fnmatch' module.
  */
 
 #ifndef _FNMATCHMODULE_H
@@ -11,18 +7,6 @@
 
 #include "Python.h"
 
-#undef Py_USE_FNMATCH_FALLBACK
-/*
- * For now, only test the C acceleration of the Python implementation.
- *
- * TODO(picnixz): Currently, I don't know how to handle backslashes
- * TODO(picnixz): in fnmatch(3) so that they are treated correctly
- * TODO(picnixz): depending on whether the string was a raw string
- * TODO(picnixz): or not. To see the bug, uncomment the following
- * TODO(picnixz): macro and run the tests.
- */
-#define Py_USE_FNMATCH_FALLBACK 1
-
 typedef struct {
     PyObject *py_module;    // 'fnmatch' module
     PyObject *re_module;    // 're' module
@@ -40,50 +24,6 @@ get_fnmatchmodulestate_state(PyObject *module)
     return (fnmatchmodule_state *)state;
 }
 
-#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
-/*
- * Construct a list of filtered names using fnmatch(3).
- */
-extern PyObject *
-_Py_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names);
-/* Same as _Py_posix_fnmatch_encoded_filter() but for unicode inputs. */
-extern PyObject *
-_Py_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names);
-
-/* cached 'pattern' version of _Py_posix_fnmatch_encoded_filter() */
-extern PyObject *
-_Py_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names);
-/* cached 'pattern' version of _Py_posix_fnmatch_unicode_filter() */
-extern PyObject *
-_Py_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names);
-
-/*
- * Perform a case-sensitive match using fnmatch(3).
- *
- * Parameters
- *
- *      pattern  A UNIX shell pattern.
- *      string   The string to match (bytes object).
- *
- * Returns 1 if the 'string' matches the 'pattern' and 0 otherwise.
- *
- * Returns -1 if (1) 'string' is not a `bytes` object, and
- * sets a TypeError exception, or (2) something went wrong.
- */
-extern int
-_Py_posix_fnmatch_encoded(PyObject *pattern, PyObject *string);
-/* Same as _Py_posix_fnmatch_encoded() but for unicode inputs. */
-extern int
-_Py_posix_fnmatch_unicode(PyObject *pattern, PyObject *string);
-
-/* cached 'pattern' version of _Py_posix_fnmatch_encoded() */
-extern int
-_Py_posix_fnmatch_encoded_cached(const char *pattern, PyObject *names);
-/* cached 'pattern' version of _Py_posix_fnmatch_encoded() */
-extern int
-_Py_posix_fnmatch_unicode_cached(const char *pattern, PyObject *names);
-#endif
-
 /*
  * Test whether a name matches a compiled RE pattern.
  *
@@ -98,7 +38,7 @@ _Py_posix_fnmatch_unicode_cached(const char *pattern, PyObject *names);
  * and sets a TypeError exception, or (2) something went wrong.
  */
 extern int
-_Py_regex_fnmatch_generic(PyObject *matcher, PyObject *string);
+_Py_fnmatch_fnmatch(PyObject *matcher, PyObject *string);
 
 /*
  * Perform a case-sensitive match using compiled RE patterns.
@@ -111,12 +51,14 @@ _Py_regex_fnmatch_generic(PyObject *matcher, PyObject *string);
  * Returns a list of matched names, or NULL if an error occurred.
  */
 extern PyObject *
-_Py_regex_fnmatch_filter(PyObject *matcher, PyObject *names);
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names);
 
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
+ *
+ * Note: this is the C implementation of fnmatch.translate().
  */
 extern PyObject *
-_Py_regex_translate(PyObject *module, PyObject *pattern);
+_Py_fnmatch_translate(PyObject *module, PyObject *pattern);
 
 #endif // _FNMATCHMODULE_H
diff --git a/Modules/_fnmatch/posix.c b/Modules/_fnmatch/posix.c
deleted file mode 100644
index 45fe88b5440f74..00000000000000
--- a/Modules/_fnmatch/posix.c
+++ /dev/null
@@ -1,191 +0,0 @@
-#include "Python.h"
-
-#include "_fnmatchmodule.h" // for pre-declarations
-
-#if defined(Py_HAVE_FNMATCH) && !defined(Py_USE_FNMATCH_FALLBACK)
-
-#include <fnmatch.h>        // for fnmatch(3)
-
-#define INVALID_PATTERN_TYPE "pattern must be a %s object, got %.200s"
-#define INVALID_NAME_TYPE    "name must be a %s object, got %.200s"
-
-// ==== Helper declarations ===================================================
-
-/*
- * Return a bytes object as a "const char *", or NULL on error.
- *
- * The 'error' message is either INVALID_PATTERN_TYPE or INVALID_NAME_TYPE,
- * and is used to set a TypeError if 'arg' is of incorrect type.
- */
-static inline const char *
-from_encoded(PyObject *arg, const char *error);
-
-/*
- * Return a str object as a "const char *", or NULL on error.
- *
- * The 'error' message is either INVALID_PATTERN_TYPE or INVALID_NAME_TYPE
- * and is used to set a TypeError if 'arg' is of incorrect type.
- */
-static inline const char *
-from_unicode(PyObject *arg, const char *error);
-
-/* The type of from_encoded() or from_unicode() conversion functions. */
-typedef const char *(*Converter)(PyObject *string, const char *error);
-
-static inline PyObject *
-_posix_fnmatch_filter(PyObject *pattern, PyObject *names, Converter converter);
-
-/* cached 'pattern' version of _posix_fnmatch_filter()  */
-static /* not inline */ PyObject *
-_posix_fnmatch_filter_cached(const char *pattern, PyObject *names, Converter converter);
-
-// ==== API implementation ====================================================
-
-inline PyObject *
-_Py_posix_fnmatch_encoded_filter(PyObject *pattern, PyObject *names)
-{
-    return _posix_fnmatch_filter(pattern, names, &from_encoded);
-}
-
-inline PyObject *
-_Py_posix_fnmatch_unicode_filter(PyObject *pattern, PyObject *names)
-{
-    return _posix_fnmatch_filter(pattern, names, &from_unicode);
-}
-
-inline PyObject *
-_Py_posix_fnmatch_encoded_filter_cached(const char *pattern, PyObject *names)
-{
-    assert(pattern != NULL);
-    return _posix_fnmatch_filter_cached(pattern, names, &from_encoded);
-}
-
-inline PyObject *
-_Py_posix_fnmatch_unicode_filter_cached(const char *pattern, PyObject *names)
-{
-    assert(pattern != NULL);
-    return _posix_fnmatch_filter_cached(pattern, names, &from_unicode);
-}
-
-inline int
-_Py_posix_fnmatch_encoded(PyObject *pattern, PyObject *string)
-{
-    const char *p = from_encoded(pattern, INVALID_PATTERN_TYPE);
-    if (p == NULL) {
-        return -1;
-    }
-    return _Py_posix_fnmatch_encoded_cached(p, string);
-}
-
-inline int
-_Py_posix_fnmatch_unicode(PyObject *pattern, PyObject *string)
-{
-    const char *p = from_unicode(pattern, INVALID_PATTERN_TYPE);
-    if (p == NULL) {
-        return -1;
-    }
-    return _Py_posix_fnmatch_unicode_cached(p, string);
-}
-
-#define PROCESS_MATCH_RESULT(r) \
-    do { \
-        int res = (r); \
-        if (res < 0) { \
-            return res; \
-        } \
-        return res != FNM_NOMATCH; \
-    } while (0)
-
-inline int
-_Py_posix_fnmatch_encoded_cached(const char *pattern, PyObject *string)
-{
-    assert(pattern != NULL);
-    const char *s = from_encoded(string, INVALID_NAME_TYPE);
-    if (s == NULL) {
-        return -1;
-    }
-    PROCESS_MATCH_RESULT(fnmatch(pattern, s, 0));
-}
-
-inline int
-_Py_posix_fnmatch_unicode_cached(const char *pattern, PyObject *string)
-{
-    assert(pattern != NULL);
-    const char *s = from_unicode(string, INVALID_NAME_TYPE);
-    if (s == NULL) {
-        return -1;
-    }
-    PROCESS_MATCH_RESULT(fnmatch(pattern, s, 0));
-}
-
-#undef PROCESS_MATCH_RESULT
-
-// ==== Helper implementations ================================================
-
-#define GENERATE_CONVERTER(function, predicate, converter, expecting) \
-    static inline const char * \
-    function(PyObject *arg, const char *error) \
-    { \
-        if (!predicate(arg)) { \
-            PyErr_Format(PyExc_TypeError, error, expecting, Py_TYPE(arg)->tp_name); \
-            return NULL; \
-        } \
-        return converter(arg); \
-    }
-GENERATE_CONVERTER(from_encoded, PyBytes_Check, PyBytes_AS_STRING, "bytes")
-GENERATE_CONVERTER(from_unicode, PyUnicode_Check, PyUnicode_AsUTF8, "str")
-#undef GENERATE_CONVERTER
-
-static inline PyObject *
-_posix_fnmatch_filter(PyObject *pattern, PyObject *names, Converter converter)
-{
-    const char *p = converter(pattern, INVALID_PATTERN_TYPE);
-    if (p == NULL) {
-        return NULL;
-    }
-    return _posix_fnmatch_filter_cached(p, names, converter);
-}
-
-static PyObject *
-_posix_fnmatch_filter_cached(const char *pattern, PyObject *names, Converter converter)
-{
-    assert(pattern != NULL);
-    PyObject *iter = PyObject_GetIter(names);
-    if (iter == NULL) {
-        return NULL;
-    }
-    PyObject *res = PyList_New(0);
-    if (res == NULL) {
-        Py_DECREF(iter);
-        return NULL;
-    }
-    PyObject *name = NULL;
-    while ((name = PyIter_Next(iter))) {
-        const char *n = converter(name, INVALID_NAME_TYPE);
-        if (n == NULL) {
-            goto abort;
-        }
-        if (fnmatch(pattern, n, 0) != FNM_NOMATCH) {
-            if (PyList_Append(res, name) < 0) {
-                goto abort;
-            }
-        }
-        Py_DECREF(name);
-        if (PyErr_Occurred()) {
-            Py_DECREF(res);
-            Py_DECREF(iter);
-            return NULL;
-        }
-    }
-    Py_DECREF(iter);
-    return res;
-abort:
-    Py_XDECREF(name);
-    Py_DECREF(iter);
-    Py_DECREF(res);
-    return NULL;
-}
-
-#undef INVALID_NAME_TYPE
-#undef INVALID_PATTERN_TYPE
-#endif

From 2e166cce9adaba4a0cf5116c0523d7049486325f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:49:13 +0200
Subject: [PATCH 37/97] update configuration scripts

---
 configure     | 54 ---------------------------------------------------
 pyconfig.h.in |  3 ---
 2 files changed, 57 deletions(-)

diff --git a/configure b/configure
index 0fefae0032587f..7d3934825cfd6e 100755
--- a/configure
+++ b/configure
@@ -14044,60 +14044,6 @@ printf "%s\n" "#define Py_HAVE_C_COMPLEX 1" >>confdefs.h
 
 fi
 
-# check for fnmatch(3) support
-#
-# We test for the plain POSIX implementation (case-sensitive match).
-#
-# To ensure that the implementation of fnmatch(3) is compliant
-# we run some tests to make sure that everything works well.
-#
-# Note that MSVC does not support fnmatch(3).
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for case-sensititve fnmatch(3)" >&5
-printf %s "checking for case-sensititve fnmatch(3)... " >&6; }
-if test ${ac_cv_fnmatch_supported+y}
-then :
-  printf %s "(cached) " >&6
-else $as_nop
-  if test "$cross_compiling" = yes
-then :
-  ac_cv_fnmatch_supported=no
-
-else $as_nop
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <fnmatch.h>
-int
-main (void)
-{
-
-    exit(!(
-        fnmatch("a*", "abc", 0) != FNM_NOMATCH &&
-        fnmatch("a*", "Abc", 0) == FNM_NOMATCH
-    ));
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_run "$LINENO"
-then :
-  ac_cv_fnmatch_supported=yes
-else $as_nop
-  ac_cv_fnmatch_supported=no
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fnmatch_supported" >&5
-printf "%s\n" "$ac_cv_fnmatch_supported" >&6; }
-if test "$ac_cv_fnmatch_supported" = "yes"; then
-
-printf "%s\n" "#define Py_HAVE_FNMATCH 1" >>confdefs.h
-
-fi
-
 # check for systems that require aligned memory access
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking aligned memory access is required" >&5
 printf %s "checking aligned memory access is required... " >&6; }
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 0997722334867c..8fbba7ed3b949e 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1689,9 +1689,6 @@
 /* Defined if _Complex C type is available. */
 #undef Py_HAVE_C_COMPLEX
 
-/* Defined if case-sensitive fnmatch(3) is supported. */
-#undef Py_HAVE_FNMATCH
-
 /* Define if year with century should be normalized for strftime. */
 #undef Py_NORMALIZE_CENTURY
 

From 14cd1fde9bdb16bcf295d07ce6408df88bb17bb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:51:31 +0200
Subject: [PATCH 38/97] update function names

---
 Modules/_fnmatch/regex.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
index b6715bb33283b0..9ebf0c90dbf746 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/regex.c
@@ -5,7 +5,7 @@
 // ==== API implementation ====================================================
 
 inline int
-_Py_regex_fnmatch_generic(PyObject *matcher, PyObject *name)
+_Py_fnmatch_fnmatch(PyObject *matcher, PyObject *name)
 {
     // If 'name' is of incorrect type, it will be detected when calling
     // the matcher function (we emulate 're.compile(...).match(name)').
@@ -20,7 +20,7 @@ _Py_regex_fnmatch_generic(PyObject *matcher, PyObject *name)
 }
 
 PyObject *
-_Py_regex_fnmatch_filter(PyObject *matcher, PyObject *names)
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names)
 {
     assert(PyCallable_Check(matcher));
     PyObject *iter = PyObject_GetIter(names);
@@ -36,7 +36,7 @@ _Py_regex_fnmatch_filter(PyObject *matcher, PyObject *names)
 
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        int rc = _Py_regex_fnmatch_generic(matcher, name);
+        int rc = _Py_fnmatch_fnmatch(matcher, name);
         if (rc < 0) {
             assert(PyErr_Occurred());
             goto abort;

From 2a718f4d71b0733ab04ff29a6e950d6b67329c27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 16:57:28 +0200
Subject: [PATCH 39/97] make the C interface equivalent to the Python one

---
 Modules/_fnmatch/_fnmatchmodule.c | 99 +++++++++++++++++++++----------
 Modules/_fnmatch/_fnmatchmodule.h |  9 ++-
 Modules/_fnmatch/regex.c          | 56 +++++++++++++++--
 3 files changed, 125 insertions(+), 39 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index d397785172eee8..0363fb98830883 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -26,7 +26,7 @@ fnmatchmodule_get_matcher_function(PyObject *module, PyObject *pattern)
     }
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
     // compile the pattern
-    PyObject *compiled = _PyObject_CallMethod(st->re_module, &_Py_ID(compile), "O", expr);
+    PyObject *compiled = PyObject_CallMethodOneArg(st->re_module, &_Py_ID(compile), expr);
     Py_DECREF(expr);
     if (compiled == NULL) {
         return NULL;
@@ -62,9 +62,7 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
     if (maxsize == NULL) {
         return -1;
     }
-    PyObject *args[] = {NULL, maxsize, Py_True};
-    size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
-    PyObject *decorator = PyObject_Vectorcall(st->lru_cache, args + 1, nargsf, NULL);
+    PyObject *decorator = PyObject_CallFunctionObjArgs(st->lru_cache, maxsize, Py_True, NULL);
     Py_DECREF(maxsize);
     if (decorator == NULL) {
         return -1;
@@ -86,35 +84,30 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
 static inline PyObject *
 get_matcher_function(PyObject *module, PyObject *pattern)
 {
-    assert(module != NULL);
-    assert(pattern != NULL);
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
     assert(st->translator != NULL);
-    size_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET;
-    return PyObject_Vectorcall(st->translator, &pattern, nargsf, NULL);
+    return PyObject_CallOneArg(st->translator, pattern);
 }
 
 // ==== Module state functions ================================================
 
-#define IMPORT_MODULE(state, attribute, name) \
+static int
+fnmatchmodule_exec(PyObject *module)
+{
+#define IMPORT_MODULE(attribute, name) \
     do { \
-        state->attribute = NULL; \
-        state->attribute = PyImport_ImportModule((name)); \
-        if (state->attribute == NULL) { \
+        st->attribute = NULL; \
+        st->attribute = PyImport_ImportModule((name)); \
+        if (st->attribute == NULL) { \
             return -1; \
         } \
     } while (0)
 
-static int
-fnmatchmodule_exec(PyObject *module)
-{
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
-    st->py_module = NULL;
-    IMPORT_MODULE(st, py_module, "fnmatch");
-    st->os_module = NULL;
-    IMPORT_MODULE(st, os_module, "os");
-    st->re_module = NULL;
-    IMPORT_MODULE(st, re_module, "re");
+    IMPORT_MODULE(os_module, "os");
+    IMPORT_MODULE(posixpath_module, "posixpath");
+    IMPORT_MODULE(re_module, "re");
+#undef IMPORT_MODULE
     st->lru_cache = NULL;
     if (fnmatchmodule_load_lru_cache(module, st) < 0) {
         return -1;
@@ -125,14 +118,13 @@ fnmatchmodule_exec(PyObject *module)
     }
     return 0;
 }
-#undef IMPORT_MODULE
 
 static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
-    Py_VISIT(st->py_module);
     Py_VISIT(st->os_module);
+    Py_VISIT(st->posixpath_module);
     Py_VISIT(st->re_module);
     Py_VISIT(st->lru_cache);
     Py_VISIT(st->translator);
@@ -143,8 +135,8 @@ static int
 fnmatchmodule_clear(PyObject *m)
 {
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
-    Py_CLEAR(st->py_module);
     Py_CLEAR(st->os_module);
+    Py_CLEAR(st->posixpath_module);
     Py_CLEAR(st->re_module);
     Py_CLEAR(st->lru_cache);
     Py_CLEAR(st->translator);
@@ -174,12 +166,40 @@ static PyObject *
 _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
 /*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
 {
-    PyObject *matcher = get_matcher_function(module, pat);
+    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
+    if (os_path == NULL) {
+        return NULL;
+    }
+    // filter() always calls os.path.normcase() on the pattern,
+    // but not on the names being mathed if os.path is posixmodule
+    // XXX: maybe this should be changed in Python as well?
+    // Note: the Python implementation uses the *runtime* os.path.normcase.
+    PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
+    if (normcase == NULL) {
+        Py_DECREF(os_path);
+        return NULL;
+    }
+    PyObject *patobj = PyObject_CallOneArg(normcase, pat);
+    if (patobj == NULL) {
+        Py_DECREF(normcase);
+        Py_DECREF(os_path);
+        return NULL;
+    }
+    int isposix = Py_Is(os_path, st->posixpath_module);
+    Py_DECREF(os_path);
+    // the matcher is cached with respect to the *normalized* pattern
+    PyObject *matcher = get_matcher_function(module, patobj);
+    Py_DECREF(patobj);
     if (matcher == NULL) {
+        Py_DECREF(normcase);
         return NULL;
     }
-    PyObject *result = _Py_fnmatch_filter(matcher, names);
+    PyObject *result = isposix
+        ? _Py_fnmatch_filter(matcher, names)
+        : _Py_fnmatch_filter_normalized(matcher, names, normcase);
     Py_DECREF(matcher);
+    Py_DECREF(normcase);
     return result;
 }
 
@@ -196,15 +216,31 @@ _fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
 /*[clinic end generated code: output=b4cd0bd911e8bc93 input=c45e0366489540b8]*/
 {
     fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
-    PyObject *res = _PyObject_CallMethod(st->py_module, &_Py_ID(fnmatch), "OO", name, pat);
-    if (res == NULL) {
+    // use the runtime 'os.path' value and not a cached one
+    PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
+    if (os_path == NULL) {
+        return -1;
+    }
+    PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
+    Py_DECREF(os_path);
+    if (normcase == NULL) {
+        return -1;
+    }
+    // apply case normalization on both arguments
+    PyObject *nameobj = PyObject_CallOneArg(normcase, name);
+    if (nameobj == NULL) {
+        Py_DECREF(normcase);
         return -1;
     }
-    int matching = PyLong_AsLong(res);
-    if (matching < 0) {
+    PyObject *patobj = PyObject_CallOneArg(normcase, pat);
+    Py_DECREF(normcase);
+    if (patobj == NULL) {
+        Py_DECREF(nameobj);
         return -1;
     }
-    Py_DECREF(res);
+    int matching = _fnmatch_fnmatchcase_impl(module, nameobj, patobj);
+    Py_DECREF(patobj);
+    Py_DECREF(nameobj);
     return matching;
 }
 
@@ -225,6 +261,7 @@ static int
 _fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
 /*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
 {
+    // fnmatchcase() does not apply any case normalization on the inputs
     PyObject *matcher = get_matcher_function(module, pat);
     if (matcher == NULL) {
         return -1;
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index cbedaccf80c905..b9601e59b6b9fa 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -8,9 +8,9 @@
 #include "Python.h"
 
 typedef struct {
-    PyObject *py_module;    // 'fnmatch' module
-    PyObject *re_module;    // 're' module
-    PyObject *os_module;    // 'os' module
+    PyObject *os_module;            // 'os' module
+    PyObject *posixpath_module;     // 'posixpath' module
+    PyObject *re_module;            // 're' module
 
     PyObject *lru_cache;    // the LRU cache decorator
     PyObject *translator;   // the translation unit whose calls are cached
@@ -52,6 +52,9 @@ _Py_fnmatch_fnmatch(PyObject *matcher, PyObject *string);
  */
 extern PyObject *
 _Py_fnmatch_filter(PyObject *matcher, PyObject *names);
+/* same as _Py_fnmatch_filter() but calls os.path.normcase() on each name */
+extern PyObject *
+_Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase);
 
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
index 9ebf0c90dbf746..73f4a338927bb1 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/regex.c
@@ -10,11 +10,11 @@ _Py_fnmatch_fnmatch(PyObject *matcher, PyObject *name)
     // If 'name' is of incorrect type, it will be detected when calling
     // the matcher function (we emulate 're.compile(...).match(name)').
     assert(PyCallable_Check(matcher));
-    PyObject *match = PyObject_CallFunction(matcher, "O", name);
+    PyObject *match = PyObject_CallOneArg(matcher, name);
     if (match == NULL) {
         return -1;
     }
-    int matching = match == Py_None ? 0 : 1;
+    int matching = Py_IsNone(match) ? 0 : 1;
     Py_DECREF(match);
     return matching;
 }
@@ -48,15 +48,61 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names)
         }
         Py_DECREF(name);
         if (PyErr_Occurred()) {
-            Py_DECREF(res);
-            Py_DECREF(iter);
-            return NULL;
+            goto error;
         }
     }
     Py_DECREF(iter);
     return res;
 abort:
     Py_XDECREF(name);
+error:
+    Py_DECREF(iter);
+    Py_DECREF(res);
+    return NULL;
+}
+
+PyObject *
+_Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase)
+{
+    assert(PyCallable_Check(matcher));
+    PyObject *iter = PyObject_GetIter(names);
+    if (iter == NULL) {
+        return NULL;
+    }
+
+    PyObject *res = PyList_New(0);
+    if (res == NULL) {
+        Py_DECREF(iter);
+        return NULL;
+    }
+
+    PyObject *name = NULL;
+    while ((name = PyIter_Next(iter))) {
+        PyObject *normalized = PyObject_CallOneArg(normcase, name);
+        if (normalized == NULL) {
+            goto abort;
+        }
+        int rc = _Py_fnmatch_fnmatch(matcher, normalized);
+        Py_DECREF(normalized);
+        if (rc < 0) {
+            assert(PyErr_Occurred());
+            goto abort;
+        }
+        if (rc == 1) {
+            if (PyList_Append(res, name) < 0) {
+                goto abort;
+            }
+        }
+        Py_DECREF(name);
+        if (PyErr_Occurred()) {
+            goto error;
+        }
+    }
+    Py_DECREF(iter);
+    return res;
+abort:
+    Py_XDECREF(name);
+error:
     Py_DECREF(iter);
     Py_DECREF(res);
     return NULL;

From c7a06854bdbb08219e25357c7e927f050cb8ed6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 16:57:44 +0200
Subject: [PATCH 40/97] fixups

---
 Modules/_fnmatch/_fnmatchmodule.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 0363fb98830883..ffc3c4ca23a6f3 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,5 +1,5 @@
 #include "Python.h"
-#include "pycore_call.h" // for _PyObject_CallMethod
+#include "pycore_call.h"
 
 #include "_fnmatchmodule.h"
 #include "clinic/_fnmatchmodule.c.h"

From 1340fd25735f011c33e48560ad21ec03ecbccddf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 16:57:50 +0200
Subject: [PATCH 41/97] update generated objects

---
 Include/internal/pycore_global_objects_fini_generated.h | 2 +-
 Include/internal/pycore_global_strings.h                | 2 +-
 Include/internal/pycore_runtime_init_generated.h        | 2 +-
 Include/internal/pycore_unicodeobject_generated.h       | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 44820e0ce13fad..99447e2dc06df3 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -951,7 +951,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush));
-    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fnmatch));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format));
@@ -1105,6 +1104,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nlocals));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_depth));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_offset));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(normcase));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ns));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nstype));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nt));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index c21492376395e4..ff99456786f0d4 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -440,7 +440,6 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(fix_imports)
         STRUCT_FOR_ID(flags)
         STRUCT_FOR_ID(flush)
-        STRUCT_FOR_ID(fnmatch)
         STRUCT_FOR_ID(fold)
         STRUCT_FOR_ID(follow_symlinks)
         STRUCT_FOR_ID(format)
@@ -594,6 +593,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(nlocals)
         STRUCT_FOR_ID(node_depth)
         STRUCT_FOR_ID(node_offset)
+        STRUCT_FOR_ID(normcase)
         STRUCT_FOR_ID(ns)
         STRUCT_FOR_ID(nstype)
         STRUCT_FOR_ID(nt)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 9a99b3645fb717..3c07832b03e270 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -949,7 +949,6 @@ extern "C" {
     INIT_ID(fix_imports), \
     INIT_ID(flags), \
     INIT_ID(flush), \
-    INIT_ID(fnmatch), \
     INIT_ID(fold), \
     INIT_ID(follow_symlinks), \
     INIT_ID(format), \
@@ -1103,6 +1102,7 @@ extern "C" {
     INIT_ID(nlocals), \
     INIT_ID(node_depth), \
     INIT_ID(node_offset), \
+    INIT_ID(normcase), \
     INIT_ID(ns), \
     INIT_ID(nstype), \
     INIT_ID(nt), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index 83ece722c6fa9d..cfc503079aed57 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1560,10 +1560,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
-    string = &_Py_ID(fnmatch);
-    _PyUnicode_InternStatic(interp, &string);
-    assert(_PyUnicode_CheckConsistency(string, 1));
-    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(fold);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2176,6 +2172,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(normcase);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(ns);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));

From 2b6fe4f521db8a57aa50f64243ce033afb671e00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 17:05:04 +0200
Subject: [PATCH 42/97] reflect `__all__` ordering

---
 Lib/fnmatch.py | 61 ++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index ffa15825954f5a..bfdbc78ffce3a6 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -16,35 +16,6 @@
 
 __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
 
-def fnmatch(name, pat):
-    """Test whether FILENAME matches PATTERN.
-
-    Patterns are Unix shell style:
-
-    *       matches everything
-    ?       matches any single character
-    [seq]   matches any character in seq
-    [!seq]  matches any char not in seq
-
-    An initial period in FILENAME is not special.
-    Both FILENAME and PATTERN are first case-normalized
-    if the operating system requires it.
-    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
-    """
-    name = os.path.normcase(name)
-    pat = os.path.normcase(pat)
-    return fnmatchcase(name, pat)
-
-@functools.lru_cache(maxsize=32768, typed=True)
-def _compile_pattern(pat):
-    if isinstance(pat, bytes):
-        pat_str = str(pat, 'ISO-8859-1')
-        res_str = translate(pat_str)
-        res = bytes(res_str, 'ISO-8859-1')
-    else:
-        res = translate(pat)
-    return re.compile(res).match
-
 try:
     from _fnmatch import filter
 except ImportError:
@@ -64,6 +35,28 @@ def filter(names, pat):
                     result.append(name)
         return result
 
+try:
+    from _fnmatch import fnmatch
+except ImportError:
+    def fnmatch(name, pat):
+        """Test whether FILENAME matches PATTERN.
+
+        Patterns are Unix shell style:
+
+        *       matches everything
+        ?       matches any single character
+        [seq]   matches any character in seq
+        [!seq]  matches any char not in seq
+
+        An initial period in FILENAME is not special.
+        Both FILENAME and PATTERN are first case-normalized
+        if the operating system requires it.
+        If you don't want this, use fnmatchcase(FILENAME, PATTERN).
+        """
+        name = os.path.normcase(name)
+        pat = os.path.normcase(pat)
+        return fnmatchcase(name, pat)
+
 try:
     from _fnmatch import fnmatchcase
 except ImportError:
@@ -89,6 +82,16 @@ def translate(pat):
         parts = _translate(pat, STAR, '.')
         return _join_translated_parts(parts, STAR)
 
+@functools.lru_cache(maxsize=32768, typed=True)
+def _compile_pattern(pat):
+    if isinstance(pat, bytes):
+        pat_str = str(pat, 'ISO-8859-1')
+        res_str = translate(pat_str)
+        res = bytes(res_str, 'ISO-8859-1')
+    else:
+        res = translate(pat)
+    return re.compile(res).match
+
 def _translate(pat, STAR, QUESTION_MARK):
     res = []
     add = res.append

From 124f8f86d633bfef0db4c25d485872607542a61c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 11 Jul 2024 17:23:35 +0200
Subject: [PATCH 43/97] update comments

---
 Lib/test/test_fnmatch.py          |  3 ++-
 Modules/_fnmatch/_fnmatchmodule.c |  2 ++
 Modules/_fnmatch/_fnmatchmodule.h |  2 ++
 Modules/_fnmatch/regex.c          | 13 +++++++------
 Modules/_fnmatch/translate.c      | 12 ++++++------
 5 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 6d72df182af862..19f12db4fa2160 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -1,6 +1,6 @@
 """Test cases for the fnmatch module."""
-import itertools
 
+import itertools
 import os
 import string
 import unittest
@@ -312,6 +312,7 @@ class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
 
     @staticmethod
     def translate_func(pattern):
+        # Pure Python implementation of translate()
         STAR = object()
         parts = py_fnmatch._translate(pattern, STAR, '.')
         return py_fnmatch._join_translated_parts(parts, STAR)
diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index ffc3c4ca23a6f3..d83a2d5a0e7405 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -308,6 +308,8 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
     }
 }
 
+// ==== Module specs ==========================================================
+
 static PyMethodDef fnmatchmodule_methods[] = {
     _FNMATCH_FILTER_METHODDEF
     _FNMATCH_FNMATCH_METHODDEF
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index b9601e59b6b9fa..10a5811b15cea0 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -24,6 +24,8 @@ get_fnmatchmodulestate_state(PyObject *module)
     return (fnmatchmodule_state *)state;
 }
 
+// ==== Helper prototypes =====================================================
+
 /*
  * Test whether a name matches a compiled RE pattern.
  *
diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
index 73f4a338927bb1..524dde992ccfc1 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/regex.c
@@ -36,12 +36,12 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names)
 
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        int rc = _Py_fnmatch_fnmatch(matcher, name);
-        if (rc < 0) {
+        int matching = _Py_fnmatch_fnmatch(matcher, name);
+        if (matching < 0) {
             assert(PyErr_Occurred());
             goto abort;
         }
-        if (rc == 1) {
+        if (matching == 1) {
             if (PyList_Append(res, name) < 0) {
                 goto abort;
             }
@@ -82,13 +82,14 @@ _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *norm
         if (normalized == NULL) {
             goto abort;
         }
-        int rc = _Py_fnmatch_fnmatch(matcher, normalized);
+        int matching = _Py_fnmatch_fnmatch(matcher, normalized);
         Py_DECREF(normalized);
-        if (rc < 0) {
+        if (matching < 0) {
             assert(PyErr_Occurred());
             goto abort;
         }
-        if (rc == 1) {
+        if (matching == 1) {
+            // add the non-normalized name if its normalization matches
             if (PyList_Append(res, name) < 0) {
                 goto abort;
             }
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 8900800f0c933c..dea389bd2f7a89 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -9,9 +9,7 @@
 
 #include "_fnmatchmodule.h" // for get_fnmatchmodulestate_state()
 
-// ==== Helper declarations ==================================================
-
-typedef fnmatchmodule_state State;
+// ==== Macro definitions =====================================================
 
 #define _WRITE_OR_FAIL(writeop, onerror) \
     do { \
@@ -45,6 +43,8 @@ typedef fnmatchmodule_state State;
         } \
     } while (0)
 
+// ==== Helper declarations ===================================================
+
 /*
  * Creates a new Unicode object from a Py_UCS4 character.
  *
@@ -77,7 +77,7 @@ translate_expression(PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_literal(State *state, PyUnicodeWriter *writer, PyObject *unicode);
+write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *unicode);
 
 /*
  * Write the translated pattern obtained by translate_expression().
@@ -120,7 +120,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 #define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), !=)
 #define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), ==)
 
-    State *state = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
     PyObject *re = state->re_module;
     const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
     // We would write less data if there are successive '*',
@@ -431,7 +431,7 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 }
 
 static Py_ssize_t
-write_literal(State *state, PyUnicodeWriter *writer, PyObject *unicode)
+write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *unicode)
 {
     PyObject *escaped = PyObject_CallMethodOneArg(state->re_module,
                                                   &_Py_ID(escape),

From 7621d6c0248728340bc32025efd28080b6afc0bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 09:24:48 +0200
Subject: [PATCH 44/97] blurb

---
 .../next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst

diff --git a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
new file mode 100644
index 00000000000000..639af4fb31ff93
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
@@ -0,0 +1,2 @@
+Improve the performances of :func:`fnmatch.translate` by 50% and of
+:func:`fnmatch.filter` by 10%.  Patch by Bénédikt Tran.

From 9b94fe6e75b172bc21d57390e47bf5c2ad54bedc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 09:31:24 +0200
Subject: [PATCH 45/97] (hopefully MSVC will be happy...)

---
 PCbuild/pythoncore.vcxproj         |  4 ++++
 PCbuild/pythoncore.vcxproj.filters | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index f36fcb8caece33..dbd27c2f0c44d4 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -368,6 +368,7 @@
     <ClInclude Include="..\Modules\_math.h" />
     <ClInclude Include="..\Modules\hashtable.h" />
     <ClInclude Include="..\Modules\rotatingtree.h" />
+    <ClInclude Include="..\Modules\_fnmatch\_fnmatchmodule.h" />
     <ClInclude Include="..\Modules\_io\_iomodule.h" />
     <ClInclude Include="..\Modules\cjkcodecs\alg_jisx0201.h" />
     <ClInclude Include="..\Modules\cjkcodecs\cjkcodecs.h" />
@@ -473,6 +474,9 @@
     <ClCompile Include="..\Modules\_typingmodule.c" />
     <ClCompile Include="..\Modules\timemodule.c" />
     <ClCompile Include="..\Modules\xxsubtype.c" />
+    <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c" />
+    <ClCompile Include="..\Modules\_fnmatch\regex.c" />
+    <ClCompile Include="..\Modules\_fnmatch\translate.c" />
     <ClCompile Include="..\Modules\_interpretersmodule.c" />
     <ClCompile Include="..\Modules\_interpchannelsmodule.c" />
     <ClCompile Include="..\Modules\_interpqueuesmodule.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index a1b43addf9e36a..00300074a1e5ed 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -252,6 +252,9 @@
     <ClInclude Include="..\Modules\rotatingtree.h">
       <Filter>Modules</Filter>
     </ClInclude>
+    <ClInclude Include="..\Modules\_fnmatch\_fnmatchmodule.h">
+      <Filter>Modules\_fnmatch</Filter>
+    </ClInclude>
     <ClInclude Include="..\Modules\_io\_iomodule.h">
       <Filter>Modules\_io</Filter>
     </ClInclude>
@@ -1058,6 +1061,15 @@
     <ClCompile Include="..\Modules\zlibmodule.c">
       <Filter>Modules</Filter>
     </ClCompile>
+    <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c">
+      <Filter>Modules\_fnmatch</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_fnmatch\regex.c">
+      <Filter>Modules\_fnmatch</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_fnmatch\translate.c">
+      <Filter>Modules\_fnmatch</Filter>
+    </ClCompile>
     <ClCompile Include="..\Modules\_io\fileio.c">
       <Filter>Modules\_io</Filter>
     </ClCompile>

From 3903987e830befad80ee4c4a626c2e649aaf1590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 09:37:06 +0200
Subject: [PATCH 46/97] fix MSVC warnings

---
 Modules/_fnmatch/translate.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index dea389bd2f7a89..3b0427480a54f1 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -151,7 +151,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     }
     const int kind = PyUnicode_KIND(pattern);
     const void *data = PyUnicode_DATA(pattern);
-    Py_ssize_t h = 0, i = 0;
+    // i is the current index, wi is the index of a wildcard
+    Py_ssize_t i = 0, wi = 0;
     while (i < n) {
         // read and advance to the next character
         Py_UCS4 chr = READ(i++);
@@ -159,7 +160,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             case '*': {
                 _WRITE_CHAR_OR(writer, chr, goto abort);
                 SKIP_DUPLICATES('*', i, n);
-                PyObject *index = PyLong_FromSsize_t(h++);
+                PyObject *index = PyLong_FromSsize_t(wi++);
                 if (index == NULL) {
                     goto abort;
                 }
@@ -173,7 +174,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             case '?': {
                 // translate optional '?' (fnmatch) into optional '.' (regex)
                 _WRITE_CHAR_OR(writer, '.', goto abort);
-                ++h; // increase the expected result's length
+                ++wi; // increase the expected result's length
                 break;
             }
             case '[': {
@@ -183,14 +184,14 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
                 if (j >= n) {
                     _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
-                    h += 2; // we just wrote 2 characters
+                    wi += 2; // we just wrote 2 characters
                     break;  // early break for clarity
                 }
                 else {
                     //              v--- pattern[j] (exclusive)
                     // '[' * ... * ']'
                     //     ^----- pattern[i] (inclusive)
-                    int pos = PyUnicode_FindChar(pattern, '-', i, j, 1);
+                    Py_ssize_t pos = PyUnicode_FindChar(pattern, '-', i, j, 1);
                     if (pos == -2) {
                         goto abort;
                     }
@@ -216,12 +217,12 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     if (s2 == NULL) {
                         goto abort;
                     }
-                    int difflen = write_expression(writer, s2);
+                    Py_ssize_t difflen = write_expression(writer, s2);
                     Py_DECREF(s2);
                     if (difflen < 0) {
                         goto abort;
                     }
-                    h += difflen;
+                    wi += difflen;
                     i = j + 1;  // jump to the character after ']'
                     break;      // early break for clarity
                 }
@@ -231,12 +232,12 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 if (str == NULL) {
                     goto abort;
                 }
-                int difflen = write_literal(state, writer, str);
+                Py_ssize_t difflen = write_literal(state, writer, str);
                 Py_DECREF(str);
                 if (difflen < 0) {
                     goto abort;
                 }
-                h += difflen;
+                wi += difflen;
                 break;
             }
         }

From c7422a5b282d095de5638cff505b7bbf575b2dfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 09:56:27 +0200
Subject: [PATCH 47/97] fixup typo!

---
 PC/config.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PC/config.c b/PC/config.c
index 8f49d9255b4fbe..7c7c2540118cf2 100644
--- a/PC/config.c
+++ b/PC/config.c
@@ -10,9 +10,9 @@ extern PyObject* PyInit_array(void);
 extern PyObject* PyInit_binascii(void);
 extern PyObject* PyInit_cmath(void);
 extern PyObject* PyInit_errno(void);
+extern PyObject* PyInit__fnmatch(void);
 extern PyObject* PyInit_faulthandler(void);
 extern PyObject* PyInit__tracemalloc(void);
-extern PyObject* PyInit_fnmatch(void);
 extern PyObject* PyInit_gc(void);
 extern PyObject* PyInit_math(void);
 extern PyObject* PyInit__md5(void);
@@ -92,7 +92,7 @@ struct _inittab _PyImport_Inittab[] = {
     {"binascii", PyInit_binascii},
     {"cmath", PyInit_cmath},
     {"errno", PyInit_errno},
-    {"_fnmatch", PyInit_fnmatch},
+    {"_fnmatch", PyInit__fnmatch},
     {"faulthandler", PyInit_faulthandler},
     {"gc", PyInit_gc},
     {"math", PyInit_math},

From 7dbe55c46c4f0e2f76aeb3874c685e5c2a1f2d5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 10:16:03 +0200
Subject: [PATCH 48/97] `get_fnmatchmodulestate_state` ->
 `get_fnmatchmodule_state`

---
 Modules/_fnmatch/_fnmatchmodule.c | 14 +++++++-------
 Modules/_fnmatch/_fnmatchmodule.h |  2 +-
 Modules/_fnmatch/translate.c      |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index d83a2d5a0e7405..c4e402de7c46ee 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -24,7 +24,7 @@ fnmatchmodule_get_matcher_function(PyObject *module, PyObject *pattern)
     if (expr == NULL) {
         return NULL;
     }
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     // compile the pattern
     PyObject *compiled = PyObject_CallMethodOneArg(st->re_module, &_Py_ID(compile), expr);
     Py_DECREF(expr);
@@ -84,7 +84,7 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
 static inline PyObject *
 get_matcher_function(PyObject *module, PyObject *pattern)
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     assert(st->translator != NULL);
     return PyObject_CallOneArg(st->translator, pattern);
 }
@@ -103,7 +103,7 @@ fnmatchmodule_exec(PyObject *module)
         } \
     } while (0)
 
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     IMPORT_MODULE(os_module, "os");
     IMPORT_MODULE(posixpath_module, "posixpath");
     IMPORT_MODULE(re_module, "re");
@@ -122,7 +122,7 @@ fnmatchmodule_exec(PyObject *module)
 static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(m);
     Py_VISIT(st->os_module);
     Py_VISIT(st->posixpath_module);
     Py_VISIT(st->re_module);
@@ -134,7 +134,7 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 static int
 fnmatchmodule_clear(PyObject *m)
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(m);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(m);
     Py_CLEAR(st->os_module);
     Py_CLEAR(st->posixpath_module);
     Py_CLEAR(st->re_module);
@@ -166,7 +166,7 @@ static PyObject *
 _fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
 /*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
     if (os_path == NULL) {
         return NULL;
@@ -215,7 +215,7 @@ static int
 _fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
 /*[clinic end generated code: output=b4cd0bd911e8bc93 input=c45e0366489540b8]*/
 {
-    fnmatchmodule_state *st = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     // use the runtime 'os.path' value and not a cached one
     PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
     if (os_path == NULL) {
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 10a5811b15cea0..cde36e56dc901f 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -17,7 +17,7 @@ typedef struct {
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
-get_fnmatchmodulestate_state(PyObject *module)
+get_fnmatchmodule_state(PyObject *module)
 {
     void *state = PyModule_GetState(module);
     assert(state != NULL);
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 3b0427480a54f1..5223f699a5bc82 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -120,7 +120,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 #define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), !=)
 #define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), ==)
 
-    fnmatchmodule_state *state = get_fnmatchmodulestate_state(module);
+    fnmatchmodule_state *state = get_fnmatchmodule_state(module);
     PyObject *re = state->re_module;
     const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
     // We would write less data if there are successive '*',

From 4a879112249bb9f045b337686838dcf87c919d2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 12:08:13 +0200
Subject: [PATCH 49/97] remove unused imports

---
 Modules/_fnmatch/translate.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 5223f699a5bc82..7dd5fae64ebe90 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -4,11 +4,10 @@
  * disabled on demand.
  */
 
-#include "Python.h"
-#include "pycore_call.h"    // for _PyObject_CallMethod()
-
 #include "_fnmatchmodule.h" // for get_fnmatchmodulestate_state()
 
+#include "pycore_call.h"
+
 // ==== Macro definitions =====================================================
 
 #define _WRITE_OR_FAIL(writeop, onerror) \

From aca2b1b60cf40287d7e2ef214539d9504059f152 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 12:08:31 +0200
Subject: [PATCH 50/97] update state

---
 Modules/_fnmatch/_fnmatchmodule.h | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index cde36e56dc901f..9e70e800de36ae 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -8,12 +8,14 @@
 #include "Python.h"
 
 typedef struct {
-    PyObject *os_module;            // 'os' module
-    PyObject *posixpath_module;     // 'posixpath' module
-    PyObject *re_module;            // 're' module
+    PyObject *os_module;        // import os
+    PyObject *posixpath_module; // import posixpath
+    PyObject *re_module;        // import re
 
-    PyObject *lru_cache;    // the LRU cache decorator
-    PyObject *translator;   // the translation unit whose calls are cached
+    PyObject *lru_cache;        // functools.lru_cache() inner decorator
+    PyObject *translator;       // the translation unit whose calls are cached
+
+    PyObject *hyphen_str;       // interned hyphen glyph '-'
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
@@ -51,10 +53,17 @@ _Py_fnmatch_fnmatch(PyObject *matcher, PyObject *string);
  *      names    An iterable of strings (str or bytes objects) to match.
  *
  * Returns a list of matched names, or NULL if an error occurred.
- */
+*/
 extern PyObject *
 _Py_fnmatch_filter(PyObject *matcher, PyObject *names);
-/* same as _Py_fnmatch_filter() but calls os.path.normcase() on each name */
+
+/*
+ * Similar to _Py_fnmatch_filter() but matches os.path.normcase(name)
+ * instead. The returned values are however a sub-sequence of 'names'.
+ *
+ * The 'normcase' argument is a callable implementing os.path.normcase().
+ *
+ */
 extern PyObject *
 _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase);
 

From 2ef61ad556f9afb3ce08aced5340d8af0eb14059 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 12:08:52 +0200
Subject: [PATCH 51/97] simplify implementation

---
 Modules/_fnmatch/regex.c | 39 +++++----------------------------------
 1 file changed, 5 insertions(+), 34 deletions(-)

diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/regex.c
index 524dde992ccfc1..1b4d55943ef7a5 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/regex.c
@@ -1,5 +1,3 @@
-#include "Python.h"
-
 #include "_fnmatchmodule.h" // for pre-declarations
 
 // ==== API implementation ====================================================
@@ -9,7 +7,6 @@ _Py_fnmatch_fnmatch(PyObject *matcher, PyObject *name)
 {
     // If 'name' is of incorrect type, it will be detected when calling
     // the matcher function (we emulate 're.compile(...).match(name)').
-    assert(PyCallable_Check(matcher));
     PyObject *match = PyObject_CallOneArg(matcher, name);
     if (match == NULL) {
         return -1;
@@ -22,40 +19,27 @@ _Py_fnmatch_fnmatch(PyObject *matcher, PyObject *name)
 PyObject *
 _Py_fnmatch_filter(PyObject *matcher, PyObject *names)
 {
-    assert(PyCallable_Check(matcher));
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
         return NULL;
     }
-
     PyObject *res = PyList_New(0);
     if (res == NULL) {
         Py_DECREF(iter);
         return NULL;
     }
-
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
         int matching = _Py_fnmatch_fnmatch(matcher, name);
-        if (matching < 0) {
-            assert(PyErr_Occurred());
+        if (matching < 0 || (matching == 1 && PyList_Append(res, name) < 0)) {
             goto abort;
         }
-        if (matching == 1) {
-            if (PyList_Append(res, name) < 0) {
-                goto abort;
-            }
-        }
         Py_DECREF(name);
-        if (PyErr_Occurred()) {
-            goto error;
-        }
     }
     Py_DECREF(iter);
     return res;
 abort:
-    Py_XDECREF(name);
-error:
+    Py_DECREF(name);
     Py_DECREF(iter);
     Py_DECREF(res);
     return NULL;
@@ -64,18 +48,15 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names)
 PyObject *
 _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase)
 {
-    assert(PyCallable_Check(matcher));
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
         return NULL;
     }
-
     PyObject *res = PyList_New(0);
     if (res == NULL) {
         Py_DECREF(iter);
         return NULL;
     }
-
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
         PyObject *normalized = PyObject_CallOneArg(normcase, name);
@@ -84,26 +65,16 @@ _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *norm
         }
         int matching = _Py_fnmatch_fnmatch(matcher, normalized);
         Py_DECREF(normalized);
-        if (matching < 0) {
-            assert(PyErr_Occurred());
+        // add the non-normalized name if its normalization matches
+        if (matching < 0 || (matching == 1 && PyList_Append(res, name) < 0)) {
             goto abort;
         }
-        if (matching == 1) {
-            // add the non-normalized name if its normalization matches
-            if (PyList_Append(res, name) < 0) {
-                goto abort;
-            }
-        }
         Py_DECREF(name);
-        if (PyErr_Occurred()) {
-            goto error;
-        }
     }
     Py_DECREF(iter);
     return res;
 abort:
-    Py_XDECREF(name);
-error:
+    Py_DECREF(name);
     Py_DECREF(iter);
     Py_DECREF(res);
     return NULL;

From 13dc17ec98ec170e58cbc53ef0182a3d68f58028 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 14:23:42 +0200
Subject: [PATCH 52/97] harmonize docs

---
 Lib/fnmatch.py                     | 15 ++++++++-------
 Makefile.pre.in                    |  2 +-
 PCbuild/pythoncore.vcxproj         |  2 +-
 PCbuild/pythoncore.vcxproj.filters |  2 +-
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index bfdbc78ffce3a6..1dc52f2575ae6c 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -20,7 +20,7 @@
     from _fnmatch import filter
 except ImportError:
     def filter(names, pat):
-        """Construct a list from those elements of the iterable NAMES that match PAT."""
+        """Construct a list from the names in *names* matching *pat*."""
         result = []
         pat = os.path.normcase(pat)
         match = _compile_pattern(pat)
@@ -39,7 +39,7 @@ def filter(names, pat):
     from _fnmatch import fnmatch
 except ImportError:
     def fnmatch(name, pat):
-        """Test whether FILENAME matches PATTERN.
+        """Test whether *name* matches *pat*.
 
         Patterns are Unix shell style:
 
@@ -48,10 +48,11 @@ def fnmatch(name, pat):
         [seq]   matches any character in seq
         [!seq]  matches any char not in seq
 
-        An initial period in FILENAME is not special.
-        Both FILENAME and PATTERN are first case-normalized
+        An initial period in *name* is not special.
+        Both *name* and *pat* are first case-normalized
         if the operating system requires it.
-        If you don't want this, use fnmatchcase(FILENAME, PATTERN).
+
+        If you don't want this, use fnmatchcase(name, pat).
         """
         name = os.path.normcase(name)
         pat = os.path.normcase(pat)
@@ -61,7 +62,7 @@ def fnmatch(name, pat):
     from _fnmatch import fnmatchcase
 except ImportError:
     def fnmatchcase(name, pat):
-        """Test whether FILENAME matches PATTERN, including case.
+        """Test whether *name* matches *pat*, including case.
 
         This is a version of fnmatch() which doesn't case-normalize
         its arguments.
@@ -73,7 +74,7 @@ def fnmatchcase(name, pat):
     from _fnmatch import translate
 except ImportError:
     def translate(pat):
-        """Translate a shell PATTERN to a regular expression.
+        """Translate a shell pattern *pat* to a regular expression.
 
         There is no way to quote meta-characters.
         """
diff --git a/Makefile.pre.in b/Makefile.pre.in
index f01f2e852a1d6d..bd5e471c50bd33 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -355,7 +355,7 @@ FNMATCH_H=	Modules/_fnmatch/_fnmatchmodule.h
 
 FNMATCH_OBJS=	\
 		Modules/_fnmatch/_fnmatchmodule.o \
-		Modules/_fnmatch/regex.o \
+		Modules/_fnmatch/matcher.o \
 		Modules/_fnmatch/translate.o
 
 ##########################################################################
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index dbd27c2f0c44d4..ea52c20cc66db1 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -475,7 +475,7 @@
     <ClCompile Include="..\Modules\timemodule.c" />
     <ClCompile Include="..\Modules\xxsubtype.c" />
     <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c" />
-    <ClCompile Include="..\Modules\_fnmatch\regex.c" />
+    <ClCompile Include="..\Modules\_fnmatch\matcher.c" />
     <ClCompile Include="..\Modules\_fnmatch\translate.c" />
     <ClCompile Include="..\Modules\_interpretersmodule.c" />
     <ClCompile Include="..\Modules\_interpchannelsmodule.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 00300074a1e5ed..912407b56ed783 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -1064,7 +1064,7 @@
     <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c">
       <Filter>Modules\_fnmatch</Filter>
     </ClCompile>
-    <ClCompile Include="..\Modules\_fnmatch\regex.c">
+    <ClCompile Include="..\Modules\_fnmatch\matcher.c">
       <Filter>Modules\_fnmatch</Filter>
     </ClCompile>
     <ClCompile Include="..\Modules\_fnmatch\translate.c">

From dba784b84197ddf1d7bd7217c5b728bd24f4a93f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 14:25:17 +0200
Subject: [PATCH 53/97] improvements

- rename `regex.c` -> `matcher.c`
- use interned strings
- remove redundant macros
- add comments to local macros
- add some RFE notes
---
 Modules/Setup.bootstrap.in                 |   2 +-
 Modules/_fnmatch/_fnmatchmodule.c          | 288 ++++++++++++---------
 Modules/_fnmatch/_fnmatchmodule.h          |  42 +--
 Modules/_fnmatch/clinic/_fnmatchmodule.c.h |  88 ++++---
 Modules/_fnmatch/{regex.c => matcher.c}    |  12 +-
 Modules/_fnmatch/translate.c               | 160 +++++++-----
 6 files changed, 344 insertions(+), 248 deletions(-)
 rename Modules/_fnmatch/{regex.c => matcher.c} (85%)

diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in
index 7201c857ddba0a..35198091329d01 100644
--- a/Modules/Setup.bootstrap.in
+++ b/Modules/Setup.bootstrap.in
@@ -36,7 +36,7 @@ _stat _stat.c
 _symtable symtablemodule.c
 
 # miscellaneous accelerators
-_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/regex.c _fnmatch/translate.c
+_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/matcher.c _fnmatch/translate.c
 
 # for systems without $HOME env, used by site._getuserbase()
 @MODULE_PWD_TRUE@pwd pwdmodule.c
diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index c4e402de7c46ee..62a2e7b7f1ed40 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,13 +1,10 @@
-#include "Python.h"
-#include "pycore_call.h"
-
 #include "_fnmatchmodule.h"
 #include "clinic/_fnmatchmodule.c.h"
 
 #define COMPILED_CACHE_SIZE     32768
 #define INVALID_PATTERN_TYPE    "pattern must be a string or a bytes object"
 
-// ==== Helper implementations ================================================
+// ==== Cached translation unit ===============================================
 
 /*
  * Compile a UNIX shell pattern into a RE pattern
@@ -16,18 +13,20 @@
  * This function is LRU-cached by the module itself.
  */
 static PyObject *
-fnmatchmodule_get_matcher_function(PyObject *module, PyObject *pattern)
+get_matcher_function_impl(PyObject *module, PyObject *pattern)
 {
     // translate the pattern into a RE pattern
     assert(module != NULL);
-    PyObject *expr = _fnmatch_translate_impl(module, pattern);
-    if (expr == NULL) {
+    PyObject *translated = fnmatch_translate_impl(module, pattern);
+    if (translated == NULL) {
         return NULL;
     }
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     // compile the pattern
-    PyObject *compiled = PyObject_CallMethodOneArg(st->re_module, &_Py_ID(compile), expr);
-    Py_DECREF(expr);
+    PyObject *compiled = PyObject_CallMethodOneArg(st->re_module,
+                                                   &_Py_ID(compile),
+                                                   translated);
+    Py_DECREF(translated);
     if (compiled == NULL) {
         return NULL;
     }
@@ -39,49 +38,42 @@ fnmatchmodule_get_matcher_function(PyObject *module, PyObject *pattern)
 
 static PyMethodDef get_matcher_function_def = {
     "get_matcher_function",
-    (PyCFunction)(fnmatchmodule_get_matcher_function),
+    (PyCFunction)(get_matcher_function_impl),
     METH_O,
     NULL
 };
 
-static int
-fnmatchmodule_load_lru_cache(PyObject *module, fnmatchmodule_state *st)
-{
-    st->lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
-    if (st->lru_cache == NULL) {
-        return -1;
-    }
-    return 0;
-}
-
 static int
 fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
 {
-    assert(st->lru_cache != NULL);
+    // make sure that this function is called once
+    assert(st->translator == NULL);
     PyObject *maxsize = PyLong_FromLong(COMPILED_CACHE_SIZE);
     if (maxsize == NULL) {
         return -1;
     }
-    PyObject *decorator = PyObject_CallFunctionObjArgs(st->lru_cache, maxsize, Py_True, NULL);
+    PyObject *lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    PyObject *decorator = PyObject_CallFunctionObjArgs(lru_cache, maxsize, Py_True, NULL);
+    Py_DECREF(lru_cache);
     Py_DECREF(maxsize);
     if (decorator == NULL) {
         return -1;
     }
-    // TODO(picnixz): should INCREF the refcount of 'module'?
     assert(module != NULL);
     PyObject *decorated = PyCFunction_New(&get_matcher_function_def, module);
-    PyObject *translator = PyObject_CallOneArg(decorator, decorated);
+    // reference on 'translator' will be removed upon module cleanup
+    st->translator = PyObject_CallOneArg(decorator, decorated);
     Py_DECREF(decorated);
     Py_DECREF(decorator);
-    if (translator == NULL) {
+    if (st->translator == NULL) {
         return -1;
     }
-    // reference on 'translator' will be removed upon module cleanup
-    st->translator = translator;
     return 0;
 }
 
-static inline PyObject *
+// ==== Module data getters ===================================================
+
+static inline PyObject * /* reference to re.compile(pattern).match() */
 get_matcher_function(PyObject *module, PyObject *pattern)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
@@ -89,45 +81,75 @@ get_matcher_function(PyObject *module, PyObject *pattern)
     return PyObject_CallOneArg(st->translator, pattern);
 }
 
+static inline PyObject * /* reference to os.path.normcase() */
+get_platform_normcase_function(PyObject *module, bool *isposix)
+{
+    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
+    PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
+    if (os_path == NULL) {
+        return NULL;
+    }
+    PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
+    if (isposix != NULL) {
+        *isposix = (bool)Py_Is(os_path, st->posixpath_module);
+    }
+    Py_DECREF(os_path);
+    return normcase;
+}
+
 // ==== Module state functions ================================================
 
-static int
-fnmatchmodule_exec(PyObject *module)
-{
-#define IMPORT_MODULE(attribute, name) \
+#define IMPORT_MODULE(state, attribute, name) \
     do { \
-        st->attribute = NULL; \
-        st->attribute = PyImport_ImportModule((name)); \
-        if (st->attribute == NULL) { \
+        /* make sure that the attribute is initialized once */ \
+        assert(state->attribute == NULL); \
+        state->attribute = PyImport_ImportModule((name)); \
+        if (state->attribute == NULL) { \
             return -1; \
         } \
     } while (0)
 
+#define INTERN_STRING(state, attribute, literal) \
+    do { \
+        /* make sure that the attribute is initialized once */ \
+        assert(state->attribute == NULL); \
+        state->attribute = PyUnicode_InternFromString((literal)); \
+        if (state->attribute == NULL) { \
+            return -1; \
+        } \
+    } while (0)
+
+static int
+fnmatchmodule_exec(PyObject *module)
+{
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    IMPORT_MODULE(os_module, "os");
-    IMPORT_MODULE(posixpath_module, "posixpath");
-    IMPORT_MODULE(re_module, "re");
-#undef IMPORT_MODULE
-    st->lru_cache = NULL;
-    if (fnmatchmodule_load_lru_cache(module, st) < 0) {
-        return -1;
-    }
-    st->translator = NULL;
+    IMPORT_MODULE(st, os_module, "os");
+    IMPORT_MODULE(st, posixpath_module, "posixpath");
+    IMPORT_MODULE(st, re_module, "re");
     if (fnmatchmodule_load_translator(module, st) < 0) {
         return -1;
     }
+    INTERN_STRING(st, hyphen_str, "-");
+    INTERN_STRING(st, re_empty_range_str, "(?!)");
+    INTERN_STRING(st, re_atomic_bgroup_str, "(?>.*?");
+    INTERN_STRING(st, re_wildcard_str, ".*");
     return 0;
 }
+#undef INTERN_STRING
+#undef IMPORT_MODULE
 
 static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
-    Py_VISIT(st->os_module);
-    Py_VISIT(st->posixpath_module);
-    Py_VISIT(st->re_module);
-    Py_VISIT(st->lru_cache);
+    Py_VISIT(st->re_wildcard_str);
+    Py_VISIT(st->re_atomic_bgroup_str);
+    Py_VISIT(st->re_empty_range_str);
+    Py_VISIT(st->hyphen_str);
     Py_VISIT(st->translator);
+    Py_VISIT(st->re_module);
+    Py_VISIT(st->posixpath_module);
+    Py_VISIT(st->os_module);
     return 0;
 }
 
@@ -135,169 +157,176 @@ static int
 fnmatchmodule_clear(PyObject *m)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
-    Py_CLEAR(st->os_module);
-    Py_CLEAR(st->posixpath_module);
-    Py_CLEAR(st->re_module);
-    Py_CLEAR(st->lru_cache);
+    Py_CLEAR(st->re_wildcard_str);
+    Py_CLEAR(st->re_atomic_bgroup_str);
+    Py_CLEAR(st->re_empty_range_str);
+    Py_CLEAR(st->hyphen_str);
     Py_CLEAR(st->translator);
+    Py_CLEAR(st->re_module);
+    Py_CLEAR(st->posixpath_module);
+    Py_CLEAR(st->os_module);
     return 0;
 }
 
-static void
+static inline void
 fnmatchmodule_free(void *m)
 {
     (void)fnmatchmodule_clear((PyObject *)m);
 }
 
 /*[clinic input]
-module _fnmatch
+module fnmatch
 [clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=356e324d57d93f08]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=797aa965370a9ef2]*/
 
 /*[clinic input]
-_fnmatch.filter -> object
+fnmatch.filter -> object
 
     names: object
-    pat: object
+    pat as pattern: object
+
+Construct a list from the names in *names* matching *pat*.
 
 [clinic start generated code]*/
 
 static PyObject *
-_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat)
-/*[clinic end generated code: output=7f11aa68436d05fc input=1d233174e1c4157a]*/
+fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
+/*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/
 {
-    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
-    if (os_path == NULL) {
-        return NULL;
-    }
     // filter() always calls os.path.normcase() on the pattern,
     // but not on the names being mathed if os.path is posixmodule
     // XXX: maybe this should be changed in Python as well?
     // Note: the Python implementation uses the *runtime* os.path.normcase.
-    PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
+    bool isposix = 0;
+    PyObject *normcase = get_platform_normcase_function(module, &isposix);
     if (normcase == NULL) {
-        Py_DECREF(os_path);
         return NULL;
     }
-    PyObject *patobj = PyObject_CallOneArg(normcase, pat);
-    if (patobj == NULL) {
+    PyObject *normalized_pattern = PyObject_CallOneArg(normcase, pattern);
+    if (normalized_pattern == NULL) {
         Py_DECREF(normcase);
-        Py_DECREF(os_path);
         return NULL;
     }
-    int isposix = Py_Is(os_path, st->posixpath_module);
-    Py_DECREF(os_path);
     // the matcher is cached with respect to the *normalized* pattern
-    PyObject *matcher = get_matcher_function(module, patobj);
-    Py_DECREF(patobj);
+    PyObject *matcher = get_matcher_function(module, normalized_pattern);
+    Py_DECREF(normalized_pattern);
     if (matcher == NULL) {
         Py_DECREF(normcase);
         return NULL;
     }
-    PyObject *result = isposix
+    PyObject *filtered = isposix
         ? _Py_fnmatch_filter(matcher, names)
         : _Py_fnmatch_filter_normalized(matcher, names, normcase);
     Py_DECREF(matcher);
     Py_DECREF(normcase);
-    return result;
+    return filtered;
 }
 
 /*[clinic input]
-_fnmatch.fnmatch -> bool
+fnmatch.fnmatch -> bool
 
     name: object
-    pat: object
+    pat as pattern: object
+
+Test whether *name* matches *pat*.
+
+Patterns are Unix shell style:
+
+*       matches everything
+?       matches any single character
+[seq]   matches any character in seq
+[!seq]  matches any char not in seq
+
+An initial period in *name* is not special.
+Both *name* and *pat* are first case-normalized
+if the operating system requires it.
+
+If you don't want this, use fnmatchcase(name, pat).
 
 [clinic start generated code]*/
 
 static int
-_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat)
-/*[clinic end generated code: output=b4cd0bd911e8bc93 input=c45e0366489540b8]*/
+fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern)
+/*[clinic end generated code: output=c9dc542e8d6933b6 input=279a4a4f2ddea6a2]*/
 {
-    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     // use the runtime 'os.path' value and not a cached one
-    PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
-    if (os_path == NULL) {
-        return -1;
-    }
-    PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
-    Py_DECREF(os_path);
+    PyObject *normcase = get_platform_normcase_function(module, NULL);
     if (normcase == NULL) {
         return -1;
     }
     // apply case normalization on both arguments
-    PyObject *nameobj = PyObject_CallOneArg(normcase, name);
-    if (nameobj == NULL) {
+    PyObject *norm_name = PyObject_CallOneArg(normcase, name);
+    if (norm_name == NULL) {
         Py_DECREF(normcase);
         return -1;
     }
-    PyObject *patobj = PyObject_CallOneArg(normcase, pat);
+    PyObject *norm_pattern = PyObject_CallOneArg(normcase, pattern);
     Py_DECREF(normcase);
-    if (patobj == NULL) {
-        Py_DECREF(nameobj);
+    if (norm_pattern == NULL) {
+        Py_DECREF(norm_name);
         return -1;
     }
-    int matching = _fnmatch_fnmatchcase_impl(module, nameobj, patobj);
-    Py_DECREF(patobj);
-    Py_DECREF(nameobj);
+    int matching = fnmatch_fnmatchcase_impl(module, norm_name, norm_pattern);
+    Py_DECREF(norm_pattern);
+    Py_DECREF(norm_name);
     return matching;
 }
 
 /*[clinic input]
-_fnmatch.fnmatchcase -> bool
+fnmatch.fnmatchcase -> bool
 
     name: object
-    pat: object
+    pat as pattern: object
 
-Test whether `name` matches `pattern`, including case.
+Test whether *name* matches *pat*, including case.
 
 This is a version of fnmatch() which doesn't case-normalize
 its arguments.
-
 [clinic start generated code]*/
 
 static int
-_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat)
-/*[clinic end generated code: output=4d1283b1b1fc7cb8 input=b02a6a5c8c5a46e2]*/
+fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern)
+/*[clinic end generated code: output=4d6b268169001876 input=91d62999c08fd55e]*/
 {
     // fnmatchcase() does not apply any case normalization on the inputs
-    PyObject *matcher = get_matcher_function(module, pat);
+    PyObject *matcher = get_matcher_function(module, pattern);
     if (matcher == NULL) {
         return -1;
     }
-    int res = _Py_fnmatch_fnmatch(matcher, name);
+    int matching = _Py_fnmatch_match(matcher, name);
     Py_DECREF(matcher);
-    return res;
+    return matching;
 }
 
 /*[clinic input]
-_fnmatch.translate -> object
+fnmatch.translate -> object
 
     pat as pattern: object
 
+Translate a shell pattern *pat* to a regular expression.
+
+There is no way to quote meta-characters.
 [clinic start generated code]*/
 
 static PyObject *
-_fnmatch_translate_impl(PyObject *module, PyObject *pattern)
-/*[clinic end generated code: output=2d9e3bbcbcc6e90e input=56e39f7beea97810]*/
+fnmatch_translate_impl(PyObject *module, PyObject *pattern)
+/*[clinic end generated code: output=77e0f5de9fbb59bd input=2cc1203a34c571fd]*/
 {
     if (PyBytes_Check(pattern)) {
-        PyObject *unicode = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
+        PyObject *decoded = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
                                                    PyBytes_GET_SIZE(pattern),
                                                    "strict");
-        if (unicode == NULL) {
+        if (decoded == NULL) {
             return NULL;
         }
-        // translated regular expression as a str object
-        PyObject *str_expr = _Py_fnmatch_translate(module, unicode);
-        Py_DECREF(unicode);
-        if (str_expr == NULL) {
+        PyObject *translated = _Py_fnmatch_translate(module, decoded);
+        Py_DECREF(decoded);
+        if (translated == NULL) {
             return NULL;
         }
-        PyObject *expr = PyUnicode_AsLatin1String(str_expr);
-        Py_DECREF(str_expr);
-        return expr;
+        PyObject *res = PyUnicode_AsLatin1String(translated);
+        Py_DECREF(translated);
+        return res;
     }
     else if (PyUnicode_Check(pattern)) {
         return _Py_fnmatch_translate(module, pattern);
@@ -310,11 +339,30 @@ _fnmatch_translate_impl(PyObject *module, PyObject *pattern)
 
 // ==== Module specs ==========================================================
 
+/*[python input]
+import fnmatch
+import textwrap
+fmt = 'PyDoc_STRVAR(fnmatchmodule_doc,\n"%s");'
+print(fmt % '\\n\\\n'.join(fnmatch.__doc__.splitlines()))
+[python start generated code]*/
+PyDoc_STRVAR(fnmatchmodule_doc,
+"Filename matching with shell patterns.\n\
+\n\
+fnmatch(FILENAME, PATTERN) matches according to the local convention.\n\
+fnmatchcase(FILENAME, PATTERN) always takes case in account.\n\
+\n\
+The functions operate by translating the pattern into a regular\n\
+expression.  They cache the compiled regular expressions for speed.\n\
+\n\
+The function translate(PATTERN) returns a regular expression\n\
+corresponding to PATTERN.  (It does not compile it.)");
+/*[python end generated code: output=b5d0696157f04882 input=8dfe2add227b2686]*/
+
 static PyMethodDef fnmatchmodule_methods[] = {
-    _FNMATCH_FILTER_METHODDEF
-    _FNMATCH_FNMATCH_METHODDEF
-    _FNMATCH_FNMATCHCASE_METHODDEF
-    _FNMATCH_TRANSLATE_METHODDEF
+    FNMATCH_FILTER_METHODDEF
+    FNMATCH_FNMATCH_METHODDEF
+    FNMATCH_FNMATCHCASE_METHODDEF
+    FNMATCH_TRANSLATE_METHODDEF
     {NULL, NULL}
 };
 
@@ -328,7 +376,7 @@ static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
 static struct PyModuleDef _fnmatchmodule = {
     PyModuleDef_HEAD_INIT,
     .m_name = "_fnmatch",
-    .m_doc = NULL,
+    .m_doc = fnmatchmodule_doc,
     .m_size = sizeof(fnmatchmodule_state),
     .m_methods = fnmatchmodule_methods,
     .m_slots = fnmatchmodule_slots,
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 9e70e800de36ae..4169967e0961af 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -8,14 +8,17 @@
 #include "Python.h"
 
 typedef struct {
-    PyObject *os_module;        // import os
-    PyObject *posixpath_module; // import posixpath
-    PyObject *re_module;        // import re
+    PyObject *os_module;            // import os
+    PyObject *posixpath_module;     // import posixpath
+    PyObject *re_module;            // import re
 
-    PyObject *lru_cache;        // functools.lru_cache() inner decorator
-    PyObject *translator;       // the translation unit whose calls are cached
+    PyObject *translator;           // LRU-cached translation unit
 
-    PyObject *hyphen_str;       // interned hyphen glyph '-'
+    // strings used by translate.c
+    PyObject *hyphen_str;           // hyphen glyph '-'
+    PyObject *re_empty_range_str;      // RE empty range '(?!)'
+    PyObject *re_atomic_bgroup_str;    // RE atomic group begin '(?>.*?'
+    PyObject *re_wildcard_str;         // RE wildcard '.*'
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
@@ -33,27 +36,26 @@ get_fnmatchmodule_state(PyObject *module)
  *
  * Parameters
  *
- *      matcher  A reference to the 'match()' method of a compiled pattern.
- *      string   The string to match (str or bytes object).
+ *  matcher     A reference to the 'match()' method of a compiled pattern.
+ *  string      The string to match (str or bytes object).
  *
- * Returns 1 if the 'string' matches the pattern and 0 otherwise.
+ * Returns
  *
- * Returns -1 if (1) 'string' is not a `str` or a `bytes` object,
- * and sets a TypeError exception, or (2) something went wrong.
+ *  -1  if the call 'matcher(string)' failed (e.g., invalid type),
+ *   0  if the 'string' does NOT match the pattern,
+ *   1  if the 'string' matches the pattern.
  */
 extern int
-_Py_fnmatch_fnmatch(PyObject *matcher, PyObject *string);
+_Py_fnmatch_match(PyObject *matcher, PyObject *string);
 
 /*
- * Perform a case-sensitive match using compiled RE patterns.
+ * Returns a list of matched names, or NULL if an error occurred.
  *
  * Parameters
  *
- *      matcher  A reference to the 'match()' method of a compiled pattern.
- *      names    An iterable of strings (str or bytes objects) to match.
- *
- * Returns a list of matched names, or NULL if an error occurred.
-*/
+ *  matcher     A reference to the 'match()' method of a compiled pattern.
+ *  names       An iterable of strings (str or bytes objects) to match.
+ */
 extern PyObject *
 _Py_fnmatch_filter(PyObject *matcher, PyObject *names);
 
@@ -62,7 +64,6 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names);
  * instead. The returned values are however a sub-sequence of 'names'.
  *
  * The 'normcase' argument is a callable implementing os.path.normcase().
- *
  */
 extern PyObject *
 _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase);
@@ -70,6 +71,9 @@ _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *norm
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
  *
+ * The 'pattern' must be a Unicode object (not a bytes) object,
+ * and the translated pattern will be a Unicode object as well.
+ *
  * Note: this is the C implementation of fnmatch.translate().
  */
 extern PyObject *
diff --git a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
index 5250bddbecc273..c611f01673b326 100644
--- a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
+++ b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
@@ -8,19 +8,20 @@ preserve
 #endif
 #include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
 
-PyDoc_STRVAR(_fnmatch_filter__doc__,
+PyDoc_STRVAR(fnmatch_filter__doc__,
 "filter($module, /, names, pat)\n"
 "--\n"
-"\n");
+"\n"
+"Construct a list from the names in *names* matching *pat*.");
 
-#define _FNMATCH_FILTER_METHODDEF    \
-    {"filter", _PyCFunction_CAST(_fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, _fnmatch_filter__doc__},
+#define FNMATCH_FILTER_METHODDEF    \
+    {"filter", _PyCFunction_CAST(fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, fnmatch_filter__doc__},
 
 static PyObject *
-_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat);
+fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern);
 
 static PyObject *
-_fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
@@ -50,33 +51,47 @@ _fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj
     #undef KWTUPLE
     PyObject *argsbuf[2];
     PyObject *names;
-    PyObject *pat;
+    PyObject *pattern;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
     if (!args) {
         goto exit;
     }
     names = args[0];
-    pat = args[1];
-    return_value = _fnmatch_filter_impl(module, names, pat);
+    pattern = args[1];
+    return_value = fnmatch_filter_impl(module, names, pattern);
 
 exit:
     return return_value;
 }
 
-PyDoc_STRVAR(_fnmatch_fnmatch__doc__,
+PyDoc_STRVAR(fnmatch_fnmatch__doc__,
 "fnmatch($module, /, name, pat)\n"
 "--\n"
-"\n");
+"\n"
+"Test whether *name* matches *pat*.\n"
+"\n"
+"Patterns are Unix shell style:\n"
+"\n"
+"*       matches everything\n"
+"?       matches any single character\n"
+"[seq]   matches any character in seq\n"
+"[!seq]  matches any char not in seq\n"
+"\n"
+"An initial period in *name* is not special.\n"
+"Both *name* and *pat* are first case-normalized\n"
+"if the operating system requires it.\n"
+"\n"
+"If you don\'t want this, use fnmatchcase(name, pat).");
 
-#define _FNMATCH_FNMATCH_METHODDEF    \
-    {"fnmatch", _PyCFunction_CAST(_fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatch__doc__},
+#define FNMATCH_FNMATCH_METHODDEF    \
+    {"fnmatch", _PyCFunction_CAST(fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatch__doc__},
 
 static int
-_fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pat);
+fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern);
 
 static PyObject *
-_fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
@@ -106,7 +121,7 @@ _fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
     #undef KWTUPLE
     PyObject *argsbuf[2];
     PyObject *name;
-    PyObject *pat;
+    PyObject *pattern;
     int _return_value;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
@@ -114,8 +129,8 @@ _fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
         goto exit;
     }
     name = args[0];
-    pat = args[1];
-    _return_value = _fnmatch_fnmatch_impl(module, name, pat);
+    pattern = args[1];
+    _return_value = fnmatch_fnmatch_impl(module, name, pattern);
     if ((_return_value == -1) && PyErr_Occurred()) {
         goto exit;
     }
@@ -125,23 +140,23 @@ _fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
     return return_value;
 }
 
-PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
+PyDoc_STRVAR(fnmatch_fnmatchcase__doc__,
 "fnmatchcase($module, /, name, pat)\n"
 "--\n"
 "\n"
-"Test whether `name` matches `pattern`, including case.\n"
+"Test whether *name* matches *pat*, including case.\n"
 "\n"
 "This is a version of fnmatch() which doesn\'t case-normalize\n"
 "its arguments.");
 
-#define _FNMATCH_FNMATCHCASE_METHODDEF    \
-    {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__},
+#define FNMATCH_FNMATCHCASE_METHODDEF    \
+    {"fnmatchcase", _PyCFunction_CAST(fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatchcase__doc__},
 
 static int
-_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat);
+fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern);
 
 static PyObject *
-_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
@@ -171,7 +186,7 @@ _fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs,
     #undef KWTUPLE
     PyObject *argsbuf[2];
     PyObject *name;
-    PyObject *pat;
+    PyObject *pattern;
     int _return_value;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
@@ -179,8 +194,8 @@ _fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs,
         goto exit;
     }
     name = args[0];
-    pat = args[1];
-    _return_value = _fnmatch_fnmatchcase_impl(module, name, pat);
+    pattern = args[1];
+    _return_value = fnmatch_fnmatchcase_impl(module, name, pattern);
     if ((_return_value == -1) && PyErr_Occurred()) {
         goto exit;
     }
@@ -190,19 +205,22 @@ _fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs,
     return return_value;
 }
 
-PyDoc_STRVAR(_fnmatch_translate__doc__,
+PyDoc_STRVAR(fnmatch_translate__doc__,
 "translate($module, /, pat)\n"
 "--\n"
-"\n");
+"\n"
+"Translate a shell pattern *pat* to a regular expression.\n"
+"\n"
+"There is no way to quote meta-characters.");
 
-#define _FNMATCH_TRANSLATE_METHODDEF    \
-    {"translate", _PyCFunction_CAST(_fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, _fnmatch_translate__doc__},
+#define FNMATCH_TRANSLATE_METHODDEF    \
+    {"translate", _PyCFunction_CAST(fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, fnmatch_translate__doc__},
 
 static PyObject *
-_fnmatch_translate_impl(PyObject *module, PyObject *pattern);
+fnmatch_translate_impl(PyObject *module, PyObject *pattern);
 
 static PyObject *
-_fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
@@ -238,9 +256,9 @@ _fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py
         goto exit;
     }
     pattern = args[0];
-    return_value = _fnmatch_translate_impl(module, pattern);
+    return_value = fnmatch_translate_impl(module, pattern);
 
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=d9bb3df00c5c2b5e input=a9049054013a1b77]*/
+/*[clinic end generated code: output=50f858ef4bfb569a input=a9049054013a1b77]*/
diff --git a/Modules/_fnmatch/regex.c b/Modules/_fnmatch/matcher.c
similarity index 85%
rename from Modules/_fnmatch/regex.c
rename to Modules/_fnmatch/matcher.c
index 1b4d55943ef7a5..899fe56ee063d3 100644
--- a/Modules/_fnmatch/regex.c
+++ b/Modules/_fnmatch/matcher.c
@@ -1,9 +1,11 @@
-#include "_fnmatchmodule.h" // for pre-declarations
+/*
+ * Provide the implementation of the high-level matcher-based functions.
+ */
 
-// ==== API implementation ====================================================
+#include "_fnmatchmodule.h"
 
 inline int
-_Py_fnmatch_fnmatch(PyObject *matcher, PyObject *name)
+_Py_fnmatch_match(PyObject *matcher, PyObject *name)
 {
     // If 'name' is of incorrect type, it will be detected when calling
     // the matcher function (we emulate 're.compile(...).match(name)').
@@ -30,7 +32,7 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names)
     }
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        int matching = _Py_fnmatch_fnmatch(matcher, name);
+        int matching = _Py_fnmatch_match(matcher, name);
         if (matching < 0 || (matching == 1 && PyList_Append(res, name) < 0)) {
             goto abort;
         }
@@ -63,7 +65,7 @@ _Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *norm
         if (normalized == NULL) {
             goto abort;
         }
-        int matching = _Py_fnmatch_fnmatch(matcher, normalized);
+        int matching = _Py_fnmatch_match(matcher, normalized);
         Py_DECREF(normalized);
         // add the non-normalized name if its normalization matches
         if (matching < 0 || (matching == 1 && PyList_Append(res, name) < 0)) {
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 7dd5fae64ebe90..f20740bbbb4d37 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -1,7 +1,6 @@
 /*
  * C accelerator for the translation function from UNIX shell patterns
- * to RE patterns. This accelerator is platform-independent but can be
- * disabled on demand.
+ * to RE patterns.
  */
 
 #include "_fnmatchmodule.h" // for get_fnmatchmodulestate_state()
@@ -9,6 +8,10 @@
 #include "pycore_call.h"
 
 // ==== Macro definitions =====================================================
+//
+// The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
+// since they directly delegate to the _PyUnicodeWriter_Write* underlying
+// function.
 
 #define _WRITE_OR_FAIL(writeop, onerror) \
     do { \
@@ -17,23 +20,31 @@
         } \
     } while (0)
 
+/* write a character 'ch' */
 #define _WRITE_CHAR(writer, ch) \
     _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(writer), (ch))
+/* write a character 'ch', or execute 'onerror' if it fails */
 #define _WRITE_CHAR_OR(writer, ch, onerror) \
     _WRITE_OR_FAIL(_WRITE_CHAR((writer), (ch)), onerror)
 
+/* write an ASCII 'string' of given 'length' */
 #define _WRITE_ASCII(writer, ascii, length) \
     _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(writer), (ascii), (length))
+/* write an ASCII 'string' of given 'length', or execute 'onerror' if it fails */
 #define _WRITE_ASCII_OR(writer, ascii, length, onerror) \
     _WRITE_OR_FAIL(_WRITE_ASCII((writer), (ascii), (length)), onerror)
 
+/* write a 'string' */
 #define _WRITE_STRING(writer, string) \
     _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(writer), (string))
+/* write a 'string', or execute 'onerror' if it fails */
 #define _WRITE_STRING_OR(writer, string, onerror) \
     _WRITE_OR_FAIL(_WRITE_STRING((writer), (string)), onerror)
 
+/* write the substring string[i:j] */
 #define _WRITE_BLOCK(writer, string, i, j) \
     _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(writer), (string), (i), (j))
+/* write the substring string[i:j] if i < j, or execute 'onerror' if it fails */
 #define _WRITE_BLOCK_OR(writer, string, i, j, onerror) \
     do { \
         Py_ssize_t _i = (i), _j = (j); /* to allow in-place operators on i or j */ \
@@ -68,7 +79,8 @@ get_unicode_character(Py_UCS4 ch);
  * values for '[!1-5]' are 10 (not 9) and 13 respectively.
  */
 static PyObject *
-translate_expression(PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
+translate_expression(fnmatchmodule_state *state,
+                     PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
 
 /*
  * Write an escaped string using re.escape().
@@ -76,7 +88,8 @@ translate_expression(PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *unicode);
+write_literal(fnmatchmodule_state *state,
+              PyUnicodeWriter *writer, PyObject *literal);
 
 /*
  * Write the translated pattern obtained by translate_expression().
@@ -84,7 +97,8 @@ write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *uni
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_expression(PyUnicodeWriter *writer, PyObject *expression);
+write_expression(fnmatchmodule_state *state,
+                 PyUnicodeWriter *writer, PyObject *expression);
 
 /*
  * Build the final regular expression by processing the wildcards.
@@ -92,33 +106,15 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression);
  * The position of each wildcard in 'pattern' is given by 'indices'.
  */
 static PyObject *
-process_wildcards(PyObject *pattern, PyObject *indices);
+process_wildcards(fnmatchmodule_state *state,
+                  PyObject *pattern, PyObject *indices);
 
 // ==== API implementation ====================================================
 
 PyObject *
 _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 {
-#define READ(ind) PyUnicode_READ(kind, data, (ind))
-#define ADVANCE_IF_CHAR(ch, ind, maxind) \
-    do { \
-        /* the following forces ind to be a variable name */ \
-        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
-        if ((ind) < (maxind) && READ(ind) == (ch)) { \
-            ++ind; \
-        } \
-    } while (0)
-#define _WHILE_READ_CMP(ch, ind, maxind, cmp) \
-    do { \
-        /* the following forces ind to be a variable name */ \
-        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
-        while ((ind) < (maxind) && READ(ind) cmp (ch)) { \
-            ++ind; \
-        } \
-    } while (0)
-#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), !=)
-#define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), ==)
-
+    assert(PyUnicode_Check(pattern));
     fnmatchmodule_state *state = get_fnmatchmodule_state(module);
     PyObject *re = state->re_module;
     const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
@@ -138,7 +134,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     // estimate the number of characters to be written to be the
     // same as the number of characters in the pattern.
     //
-    // TODO: (picnixz): should we limit the estimation in case of a failure?
+    // TODO: (picnixz): should we limit the estimation?
     PyUnicodeWriter *writer = PyUnicodeWriter_Create(n);
     if (writer == NULL) {
         return NULL;
@@ -150,14 +146,38 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     }
     const int kind = PyUnicode_KIND(pattern);
     const void *data = PyUnicode_DATA(pattern);
-    // i is the current index, wi is the index of a wildcard
-    Py_ssize_t i = 0, wi = 0;
+    /* declaration of some local helping macros */
+#define READ(ind) PyUnicode_READ(kind, data, (ind))
+    /* advance 'ind' if the character is 'ch' */
+#define ADVANCE_IF_CHAR(ch, ind, maxind) \
+    do { \
+        /* the following forces ind to be a variable name */ \
+        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
+        if ((ind) < (maxind) && READ(ind) == (ch)) { \
+            ++ind; \
+        } \
+    } while (0)
+    /* advance 'ind' until the character compares to 'READ[ind] CMPOP ch' */
+#define _WHILE_READ_CMP(ch, ind, maxind, CMPOP) \
+    do { \
+        /* the following forces ind to be a variable name */ \
+        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
+        while ((ind) < (maxind) && READ(ind) CMPOP (ch)) { \
+            ++ind; \
+        } \
+    } while (0)
+    /* advance 'from' as long as READ(from) != ch */
+#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), !=)
+    /* advance 'from' as long as READ(from) == ch */
+#define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), ==)
+    Py_ssize_t i = 0;   // current index
+    Py_ssize_t wi = 0;  // number of characters written
     while (i < n) {
         // read and advance to the next character
         Py_UCS4 chr = READ(i++);
         switch (chr) {
             case '*': {
-                _WRITE_CHAR_OR(writer, chr, goto abort);
+                _WRITE_CHAR_OR(writer, '*', goto abort);
                 SKIP_DUPLICATES('*', i, n);
                 PyObject *index = PyLong_FromSsize_t(wi++);
                 if (index == NULL) {
@@ -182,7 +202,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 ADVANCE_IF_CHAR(']', j, n); // [!] or []
                 ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
                 if (j >= n) {
-                    _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
+                    _WRITE_CHAR_OR(writer, '\\', goto abort);
+                    _WRITE_CHAR_OR(writer, '[', goto abort);
                     wi += 2; // we just wrote 2 characters
                     break;  // early break for clarity
                 }
@@ -200,23 +221,27 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                         if (s0 == NULL) {
                             goto abort;
                         }
+                        // NOTE(picnixz): maybe cache the method and intern the arguments?
+                        // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
                         s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
                         Py_DECREF(s0);
                     }
                     else {
                         assert(pos >= 0);
                         assert(READ(j) == ']');
-                        s1 = translate_expression(pattern, i, j);
+                        s1 = translate_expression(state, pattern, i, j);
                     }
                     if (s1 == NULL) {
                         goto abort;
                     }
+                    // NOTE(picnixz): maybe cache the method and intern the arguments?
+                    // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
                     s2 = _PyObject_CallMethod(re, &_Py_ID(sub), "ssO", "([&~|])", "\\\\\\1", s1);
                     Py_DECREF(s1);
                     if (s2 == NULL) {
                         goto abort;
                     }
-                    Py_ssize_t difflen = write_expression(writer, s2);
+                    Py_ssize_t difflen = write_expression(state, writer, s2);
                     Py_DECREF(s2);
                     if (difflen < 0) {
                         goto abort;
@@ -251,7 +276,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         Py_DECREF(indices);
         return NULL;
     }
-    PyObject *res = process_wildcards(translated, indices);
+    PyObject *res = process_wildcards(state, translated, indices);
     Py_DECREF(translated);
     Py_DECREF(indices);
     return res;
@@ -289,7 +314,8 @@ get_unicode_character(Py_UCS4 ch)
 }
 
 static PyObject *
-translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
+translate_expression(fnmatchmodule_state *state,
+                     PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 {
     PyObject *chunks = PyList_New(0);
     if (chunks == NULL) {
@@ -329,12 +355,7 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
         assert(chunkscount > 0);
         PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
         assert(chunk != NULL);
-        PyObject *hyphen = PyUnicode_FromOrdinal('-');
-        if (hyphen == NULL) {
-            goto abort;
-        }
-        PyObject *repl = PyUnicode_Concat(chunk, hyphen);
-        Py_DECREF(hyphen);
+        PyObject *repl = PyUnicode_Concat(chunk, state->hyphen_str);
         // PyList_SetItem() does not create a new reference on 'repl'
         // so we should not decref 'repl' after the call, unless there
         // is an issue while setting the item.
@@ -400,10 +421,14 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
     for (c = 0; c < chunkscount; ++c) {
         PyObject *s0 = PyList_GET_ITEM(chunks, c);
         assert(s0 != NULL);
+        // NOTE(picnixz): maybe cache the method and intern the arguments?
+        // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
         PyObject *s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
         if (s1 == NULL) {
             goto abort;
         }
+        // NOTE(picnixz): maybe cache the method and intern the arguments?
+        // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
         PyObject *s2 = _PyObject_CallMethod(s1, &_Py_ID(replace), "ss", "-", "\\-");
         Py_DECREF(s1);
         // PyList_SetItem() does not create a new reference on 's2'
@@ -431,11 +456,11 @@ translate_expression(PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 }
 
 static Py_ssize_t
-write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *unicode)
+write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *literal)
 {
     PyObject *escaped = PyObject_CallMethodOneArg(state->re_module,
                                                   &_Py_ID(escape),
-                                                  unicode);
+                                                  literal);
     if (escaped == NULL) {
         return -1;
     }
@@ -451,7 +476,8 @@ write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *uni
 }
 
 static Py_ssize_t
-write_expression(PyUnicodeWriter *writer, PyObject *expression)
+write_expression(fnmatchmodule_state *state,
+                 PyUnicodeWriter *writer, PyObject *expression)
 {
 #define WRITE_CHAR(c)           _WRITE_CHAR_OR(writer, (c), return -1)
 #define WRITE_ASCII(s, n)       _WRITE_ASCII_OR(writer, (s), (n), return -1)
@@ -460,7 +486,7 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression)
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         /* empty range: never match */
-        WRITE_ASCII("(?!)", 4);
+        WRITE_STRING(state->re_empty_range_str);
         return 4;
     }
     Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
@@ -498,11 +524,12 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression)
 }
 
 static PyObject *
-process_wildcards(PyObject *pattern, PyObject *indices)
+process_wildcards(fnmatchmodule_state *state,
+                  PyObject *pattern, PyObject *indices)
 {
     const Py_ssize_t m = PyList_GET_SIZE(indices);
     if (m == 0) {
-        // just write fr'(?s:{parts} + ")\Z"
+        // "(?s:" + pattern + ")\Z"
         return PyUnicode_FromFormat("(?s:%S)\\Z", pattern);
     }
     /*
@@ -523,9 +550,9 @@ process_wildcards(PyObject *pattern, PyObject *indices)
      * We write one additional '.*' if indices[-1] + 1 == n.
      *
      * Since the result is surrounded by "(?s:" and ")\Z", we
-     * write at least "indices[0] + 7m + n + 6" characters,
+     * write at least "indices[0] + 7*m + n + 6" characters,
      * where 'm' is the number of stars and 'n' the length
-     * of the translated pattern.
+     * of the /translated) pattern.
      */
     PyObject *jobj = PyList_GET_ITEM(indices, 0);
     assert(jobj != NULL);
@@ -538,12 +565,10 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     if (writer == NULL) {
         return NULL;
     }
-#define WRITE_BLOCK(i, j)       _WRITE_BLOCK_OR(writer, pattern, (i), (j), goto abort)
-#define WRITE_ATOMIC_BEGIN()    _WRITE_ASCII_OR(writer, "(?>.*?", 6, goto abort)
-#define WRITE_ATOMIC_END()      _WRITE_CHAR_OR(writer, ')', goto abort)
-    WRITE_BLOCK(i, j);  // write stuff before '*' if needed
+    _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
     i = j + 1;              // jump after the '*'
     for (Py_ssize_t k = 1; k < m; ++k) {
+        // process all but the last wildcard.
         PyObject *ind = PyList_GET_ITEM(indices, k);
         assert(ind != NULL);
         j = PyLong_AsSsize_t(ind);
@@ -551,25 +576,24 @@ process_wildcards(PyObject *pattern, PyObject *indices)
             goto abort;
         }
         assert(i < j);
-        // write the atomic RE group
-        WRITE_ATOMIC_BEGIN();
-        WRITE_BLOCK(i, j);
-        WRITE_ATOMIC_END();
+        // write the atomic RE group '(?>.*?' + BLOCK + ')'
+        _WRITE_STRING_OR(writer, state->re_atomic_bgroup_str, goto abort);
+        _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
+        _WRITE_CHAR_OR(writer, ')', goto abort);
         i = j + 1;
     }
-    // handle the last group
-    _WRITE_ASCII_OR(writer, ".*", 2, goto abort);
-    WRITE_BLOCK(i, n); // write the remaining substring (if non-empty)
-#undef WRITE_BLOCK
-#undef WRITE_ATOMIC_END
-#undef WRITE_ATOMIC_BEGIN
-    PyObject *res = PyUnicodeWriter_Finish(writer);
-    if (res == NULL) {
+    // handle the remaining wildcard
+    _WRITE_STRING_OR(writer, state->re_wildcard_str, goto abort);
+    // write the remaining substring (if non-empty)
+    _WRITE_BLOCK_OR(writer, pattern, i, n, goto abort);
+    PyObject *processed = PyUnicodeWriter_Finish(writer);
+    if (processed == NULL) {
         return NULL;
     }
-    PyObject *formatted = PyUnicode_FromFormat("(?s:%S)\\Z", res);
-    Py_DECREF(res);
-    return formatted;
+    // "(?s:" + processed + ")\Z"
+    PyObject *res = PyUnicode_FromFormat("(?s:%S)\\Z", processed);
+    Py_DECREF(processed);
+    return res;
 abort:
     PyUnicodeWriter_Discard(writer);
     return NULL;

From 5374ff45aee1a274120a6a60e6ee4974ff477321 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 12 Jul 2024 14:50:49 +0200
Subject: [PATCH 54/97] revert some improvements that were not improvements

---
 Modules/_fnmatch/_fnmatchmodule.c |  9 -------
 Modules/_fnmatch/_fnmatchmodule.h |  3 ---
 Modules/_fnmatch/translate.c      | 42 +++++++++++++------------------
 3 files changed, 17 insertions(+), 37 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 62a2e7b7f1ed40..114d3ca6dc252d 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -130,9 +130,6 @@ fnmatchmodule_exec(PyObject *module)
         return -1;
     }
     INTERN_STRING(st, hyphen_str, "-");
-    INTERN_STRING(st, re_empty_range_str, "(?!)");
-    INTERN_STRING(st, re_atomic_bgroup_str, "(?>.*?");
-    INTERN_STRING(st, re_wildcard_str, ".*");
     return 0;
 }
 #undef INTERN_STRING
@@ -142,9 +139,6 @@ static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
-    Py_VISIT(st->re_wildcard_str);
-    Py_VISIT(st->re_atomic_bgroup_str);
-    Py_VISIT(st->re_empty_range_str);
     Py_VISIT(st->hyphen_str);
     Py_VISIT(st->translator);
     Py_VISIT(st->re_module);
@@ -157,9 +151,6 @@ static int
 fnmatchmodule_clear(PyObject *m)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
-    Py_CLEAR(st->re_wildcard_str);
-    Py_CLEAR(st->re_atomic_bgroup_str);
-    Py_CLEAR(st->re_empty_range_str);
     Py_CLEAR(st->hyphen_str);
     Py_CLEAR(st->translator);
     Py_CLEAR(st->re_module);
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 4169967e0961af..bae2908969c7d0 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -16,9 +16,6 @@ typedef struct {
 
     // strings used by translate.c
     PyObject *hyphen_str;           // hyphen glyph '-'
-    PyObject *re_empty_range_str;      // RE empty range '(?!)'
-    PyObject *re_atomic_bgroup_str;    // RE atomic group begin '(?>.*?'
-    PyObject *re_wildcard_str;         // RE wildcard '.*'
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index f20740bbbb4d37..0c3d0757a62174 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -97,8 +97,7 @@ write_literal(fnmatchmodule_state *state,
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_expression(fnmatchmodule_state *state,
-                 PyUnicodeWriter *writer, PyObject *expression);
+write_expression(PyUnicodeWriter *writer, PyObject *expression);
 
 /*
  * Build the final regular expression by processing the wildcards.
@@ -106,8 +105,7 @@ write_expression(fnmatchmodule_state *state,
  * The position of each wildcard in 'pattern' is given by 'indices'.
  */
 static PyObject *
-process_wildcards(fnmatchmodule_state *state,
-                  PyObject *pattern, PyObject *indices);
+process_wildcards(PyObject *pattern, PyObject *indices);
 
 // ==== API implementation ====================================================
 
@@ -202,8 +200,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 ADVANCE_IF_CHAR(']', j, n); // [!] or []
                 ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
                 if (j >= n) {
-                    _WRITE_CHAR_OR(writer, '\\', goto abort);
-                    _WRITE_CHAR_OR(writer, '[', goto abort);
+                    _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
                     wi += 2; // we just wrote 2 characters
                     break;  // early break for clarity
                 }
@@ -221,7 +218,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                         if (s0 == NULL) {
                             goto abort;
                         }
-                        // NOTE(picnixz): maybe cache the method and intern the arguments?
+                        // NOTE(picnixz): maybe cache the method and intern the arguments
                         // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
                         s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
                         Py_DECREF(s0);
@@ -234,14 +231,14 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     if (s1 == NULL) {
                         goto abort;
                     }
-                    // NOTE(picnixz): maybe cache the method and intern the arguments?
+                    // NOTE(picnixz): maybe cache the method and intern the arguments
                     // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
                     s2 = _PyObject_CallMethod(re, &_Py_ID(sub), "ssO", "([&~|])", "\\\\\\1", s1);
                     Py_DECREF(s1);
                     if (s2 == NULL) {
                         goto abort;
                     }
-                    Py_ssize_t difflen = write_expression(state, writer, s2);
+                    Py_ssize_t difflen = write_expression(writer, s2);
                     Py_DECREF(s2);
                     if (difflen < 0) {
                         goto abort;
@@ -276,7 +273,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         Py_DECREF(indices);
         return NULL;
     }
-    PyObject *res = process_wildcards(state, translated, indices);
+    PyObject *res = process_wildcards(translated, indices);
     Py_DECREF(translated);
     Py_DECREF(indices);
     return res;
@@ -421,13 +418,13 @@ translate_expression(fnmatchmodule_state *state,
     for (c = 0; c < chunkscount; ++c) {
         PyObject *s0 = PyList_GET_ITEM(chunks, c);
         assert(s0 != NULL);
-        // NOTE(picnixz): maybe cache the method and intern the arguments?
+        // NOTE(picnixz): maybe cache the method and intern the arguments
         // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
         PyObject *s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
         if (s1 == NULL) {
             goto abort;
         }
-        // NOTE(picnixz): maybe cache the method and intern the arguments?
+        // NOTE(picnixz): maybe cache the method and intern the arguments
         // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
         PyObject *s2 = _PyObject_CallMethod(s1, &_Py_ID(replace), "ss", "-", "\\-");
         Py_DECREF(s1);
@@ -456,7 +453,8 @@ translate_expression(fnmatchmodule_state *state,
 }
 
 static Py_ssize_t
-write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *literal)
+write_literal(fnmatchmodule_state *state,
+              PyUnicodeWriter *writer, PyObject *literal)
 {
     PyObject *escaped = PyObject_CallMethodOneArg(state->re_module,
                                                   &_Py_ID(escape),
@@ -476,17 +474,14 @@ write_literal(fnmatchmodule_state *state, PyUnicodeWriter *writer, PyObject *lit
 }
 
 static Py_ssize_t
-write_expression(fnmatchmodule_state *state,
-                 PyUnicodeWriter *writer, PyObject *expression)
+write_expression(PyUnicodeWriter *writer, PyObject *expression)
 {
 #define WRITE_CHAR(c)           _WRITE_CHAR_OR(writer, (c), return -1)
-#define WRITE_ASCII(s, n)       _WRITE_ASCII_OR(writer, (s), (n), return -1)
-#define WRITE_BLOCK(s, i, j)    _WRITE_BLOCK_OR(writer, (s), (i), (j), return -1)
 #define WRITE_STRING(s)         _WRITE_STRING_OR(writer, (s), return -1)
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         /* empty range: never match */
-        WRITE_STRING(state->re_empty_range_str);
+        _WRITE_ASCII_OR(writer, "(?!)", 4, return -1);
         return 4;
     }
     Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
@@ -500,7 +495,7 @@ write_expression(fnmatchmodule_state *state,
     switch (token) {
         case '!': {
             WRITE_CHAR('^'); // replace '!' by '^'
-            WRITE_BLOCK(expression, 1, grouplen);
+            _WRITE_BLOCK_OR(writer, expression, 1, grouplen, return -1);
             break;
         }
         case '^':
@@ -518,14 +513,11 @@ write_expression(fnmatchmodule_state *state,
     WRITE_CHAR(']');
     return grouplen + extra;
 #undef WRITE_STRING
-#undef WRITE_BLOCK
-#undef WRITE_ASCII
 #undef WRITE_CHAR
 }
 
 static PyObject *
-process_wildcards(fnmatchmodule_state *state,
-                  PyObject *pattern, PyObject *indices)
+process_wildcards(PyObject *pattern, PyObject *indices)
 {
     const Py_ssize_t m = PyList_GET_SIZE(indices);
     if (m == 0) {
@@ -577,13 +569,13 @@ process_wildcards(fnmatchmodule_state *state,
         }
         assert(i < j);
         // write the atomic RE group '(?>.*?' + BLOCK + ')'
-        _WRITE_STRING_OR(writer, state->re_atomic_bgroup_str, goto abort);
+        _WRITE_ASCII_OR(writer, "(?>.*?", 6, goto abort);
         _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
         _WRITE_CHAR_OR(writer, ')', goto abort);
         i = j + 1;
     }
     // handle the remaining wildcard
-    _WRITE_STRING_OR(writer, state->re_wildcard_str, goto abort);
+    _WRITE_ASCII_OR(writer, ".*", 2, goto abort);
     // write the remaining substring (if non-empty)
     _WRITE_BLOCK_OR(writer, pattern, i, n, goto abort);
     PyObject *processed = PyUnicodeWriter_Finish(writer);

From 4ff4f370284b064abbfb50a1459aeda6efb6e8c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 13 Jul 2024 09:49:49 +0200
Subject: [PATCH 55/97] remove incorrect usage of clinic

---
 Modules/_fnmatch/_fnmatchmodule.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 114d3ca6dc252d..92a24a9681ab02 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -330,24 +330,16 @@ fnmatch_translate_impl(PyObject *module, PyObject *pattern)
 
 // ==== Module specs ==========================================================
 
-/*[python input]
-import fnmatch
-import textwrap
-fmt = 'PyDoc_STRVAR(fnmatchmodule_doc,\n"%s");'
-print(fmt % '\\n\\\n'.join(fnmatch.__doc__.splitlines()))
-[python start generated code]*/
+// fmt: off
 PyDoc_STRVAR(fnmatchmodule_doc,
-"Filename matching with shell patterns.\n\
-\n\
-fnmatch(FILENAME, PATTERN) matches according to the local convention.\n\
-fnmatchcase(FILENAME, PATTERN) always takes case in account.\n\
-\n\
-The functions operate by translating the pattern into a regular\n\
-expression.  They cache the compiled regular expressions for speed.\n\
-\n\
-The function translate(PATTERN) returns a regular expression\n\
-corresponding to PATTERN.  (It does not compile it.)");
-/*[python end generated code: output=b5d0696157f04882 input=8dfe2add227b2686]*/
+"Filename matching with shell patterns.\n"
+"fnmatch(FILENAME, PATTERN) matches according to the local convention.\n"
+"fnmatchcase(FILENAME, PATTERN) always takes case in account.\n\n"
+"The functions operate by translating the pattern into a regular\n"
+"expression.  They cache the compiled regular expressions for speed.\n\n"
+"The function translate(PATTERN) returns a regular expression\n"
+"corresponding to PATTERN.  (It does not compile it.)");
+// fmt: on
 
 static PyMethodDef fnmatchmodule_methods[] = {
     FNMATCH_FILTER_METHODDEF

From 178f2d3599ddc2cc79599ab005a845e7a288410c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 13 Jul 2024 10:12:31 +0200
Subject: [PATCH 56/97] update TODO note

---
 Modules/_fnmatch/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 0c3d0757a62174..00ad81c030a9e5 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -132,7 +132,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     // estimate the number of characters to be written to be the
     // same as the number of characters in the pattern.
     //
-    // TODO: (picnixz): should we limit the estimation?
+    // TODO(picnixz): should we limit the estimation?
     PyUnicodeWriter *writer = PyUnicodeWriter_Create(n);
     if (writer == NULL) {
         return NULL;

From 9d237b13be2eac358e36f3cd5fbebe55a8cf3017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 15 Jul 2024 10:31:08 +0200
Subject: [PATCH 57/97] use a dynamic module instead

---
 Modules/Setup.bootstrap.in | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in
index 35198091329d01..4dcc0f55176d0e 100644
--- a/Modules/Setup.bootstrap.in
+++ b/Modules/Setup.bootstrap.in
@@ -35,8 +35,5 @@ _operator _operator.c
 _stat _stat.c
 _symtable symtablemodule.c
 
-# miscellaneous accelerators
-_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/matcher.c _fnmatch/translate.c
-
 # for systems without $HOME env, used by site._getuserbase()
 @MODULE_PWD_TRUE@pwd pwdmodule.c

From b1568d462b63ff1e263136b47a16d40d142d1067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 15 Jul 2024 12:31:14 +0200
Subject: [PATCH 58/97] remove un-necessary definitions

---
 Makefile.pre.in | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index bd5e471c50bd33..52e32297d3685c 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -351,13 +351,6 @@ IO_OBJS=	\
 		Modules/_io/bytesio.o \
 		Modules/_io/stringio.o
 
-FNMATCH_H=	Modules/_fnmatch/_fnmatchmodule.h
-
-FNMATCH_OBJS=	\
-		Modules/_fnmatch/_fnmatchmodule.o \
-		Modules/_fnmatch/matcher.o \
-		Modules/_fnmatch/translate.o
-
 ##########################################################################
 # mimalloc
 
@@ -1747,8 +1740,6 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt
 		$(MULTIARCH_CPPFLAGS) \
 		-o $@ $(srcdir)/Python/sysmodule.c
 
-$(FNMATCH_OBJS): $(FNMATCH_H)
-
 $(IO_OBJS): $(IO_H)
 
 .PHONY: regen-pegen-metaparser

From 84b0b82cf00545ea60ceade2a9c47be2c23bebf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 15 Jul 2024 12:32:17 +0200
Subject: [PATCH 59/97] update configuration

---
 Modules/Setup.stdlib.in           |  1 +
 Modules/_fnmatch/_fnmatchmodule.c |  3 ++
 Modules/_fnmatch/_fnmatchmodule.h |  4 +++
 configure                         | 56 +++++++++++++++----------------
 configure.ac                      |  2 +-
 5 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index dfc75077650df8..e689d18b70b035 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -33,6 +33,7 @@
 @MODULE__BISECT_TRUE@_bisect _bisectmodule.c
 @MODULE__CONTEXTVARS_TRUE@_contextvars _contextvarsmodule.c
 @MODULE__CSV_TRUE@_csv _csv.c
+@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/matcher.c _fnmatch/translate.c
 @MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c
 @MODULE__JSON_TRUE@_json _json.c
 @MODULE__LSPROF_TRUE@_lsprof _lsprof.c rotatingtree.c
diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 92a24a9681ab02..3c14745859c91b 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,4 +1,7 @@
 #include "_fnmatchmodule.h"
+
+#include "pycore_runtime.h" // _Py_ID()
+
 #include "clinic/_fnmatchmodule.c.h"
 
 #define COMPILED_CACHE_SIZE     32768
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index bae2908969c7d0..c4ce1d23201156 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -5,6 +5,10 @@
 #ifndef _FNMATCHMODULE_H
 #define _FNMATCHMODULE_H
 
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
 #include "Python.h"
 
 typedef struct {
diff --git a/configure b/configure
index f1dcfa1fa1dbcd..09042aaf0507bd 100755
--- a/configure
+++ b/configure
@@ -801,6 +801,8 @@ MODULE__JSON_FALSE
 MODULE__JSON_TRUE
 MODULE__HEAPQ_FALSE
 MODULE__HEAPQ_TRUE
+MODULE__FNMATCH_FALSE
+MODULE__FNMATCH_TRUE
 MODULE__CSV_FALSE
 MODULE__CSV_TRUE
 MODULE__CONTEXTVARS_FALSE
@@ -815,8 +817,6 @@ MODULE_TIME_FALSE
 MODULE_TIME_TRUE
 MODULE__IO_FALSE
 MODULE__IO_TRUE
-MODULE__FNMATCH_FALSE
-MODULE__FNMATCH_TRUE
 MODULE_BUILDTYPE
 TEST_MODULES
 LIBB2_LIBS
@@ -29018,28 +29018,6 @@ MODULE_BLOCK=
 
 
 
-        if test "$py_cv_module__fnmatch" != "n/a"
-then :
-  py_cv_module__fnmatch=yes
-fi
-   if test "$py_cv_module__fnmatch" = yes; then
-  MODULE__FNMATCH_TRUE=
-  MODULE__FNMATCH_FALSE='#'
-else
-  MODULE__FNMATCH_TRUE='#'
-  MODULE__FNMATCH_FALSE=
-fi
-
-  as_fn_append MODULE_BLOCK "MODULE__FNMATCH_STATE=$py_cv_module__fnmatch$as_nl"
-  if test "x$py_cv_module__fnmatch" = xyes
-then :
-
-    as_fn_append MODULE_BLOCK "MODULE__FNMATCH_CFLAGS=-I\$(srcdir)/Modules/_fnmatch$as_nl"
-
-
-fi
-
-
         if test "$py_cv_module__io" != "n/a"
 then :
   py_cv_module__io=yes
@@ -29192,6 +29170,28 @@ then :
 
 
 
+fi
+
+
+        if test "$py_cv_module__fnmatch" != "n/a"
+then :
+  py_cv_module__fnmatch=yes
+fi
+   if test "$py_cv_module__fnmatch" = yes; then
+  MODULE__FNMATCH_TRUE=
+  MODULE__FNMATCH_FALSE='#'
+else
+  MODULE__FNMATCH_TRUE='#'
+  MODULE__FNMATCH_FALSE=
+fi
+
+  as_fn_append MODULE_BLOCK "MODULE__FNMATCH_STATE=$py_cv_module__fnmatch$as_nl"
+  if test "x$py_cv_module__fnmatch" = xyes
+then :
+
+    as_fn_append MODULE_BLOCK "MODULE__FNMATCH_CFLAGS=-I\$(srcdir)/Modules/_fnmatch$as_nl"
+
+
 fi
 
 
@@ -31749,10 +31749,6 @@ LTLIBOBJS=$ac_ltlibobjs
 
 
 
-if test -z "${MODULE__FNMATCH_TRUE}" && test -z "${MODULE__FNMATCH_FALSE}"; then
-  as_fn_error $? "conditional \"MODULE__FNMATCH\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
 if test -z "${MODULE__IO_TRUE}" && test -z "${MODULE__IO_FALSE}"; then
   as_fn_error $? "conditional \"MODULE__IO\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -31781,6 +31777,10 @@ if test -z "${MODULE__CSV_TRUE}" && test -z "${MODULE__CSV_FALSE}"; then
   as_fn_error $? "conditional \"MODULE__CSV\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${MODULE__FNMATCH_TRUE}" && test -z "${MODULE__FNMATCH_FALSE}"; then
+  as_fn_error $? "conditional \"MODULE__FNMATCH\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${MODULE__HEAPQ_TRUE}" && test -z "${MODULE__HEAPQ_FALSE}"; then
   as_fn_error $? "conditional \"MODULE__HEAPQ\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/configure.ac b/configure.ac
index 7b8fc287c56c35..a0e476437a827e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7681,7 +7681,6 @@ AC_DEFUN([PY_STDLIB_MOD_SIMPLE], [
 ])
 
 dnl static modules in Modules/Setup.bootstrap
-PY_STDLIB_MOD_SIMPLE([_fnmatch], [-I\$(srcdir)/Modules/_fnmatch], [])
 PY_STDLIB_MOD_SIMPLE([_io], [-I\$(srcdir)/Modules/_io], [])
 PY_STDLIB_MOD_SIMPLE([time], [], [$TIMEMODULE_LIB])
 
@@ -7691,6 +7690,7 @@ PY_STDLIB_MOD_SIMPLE([_asyncio])
 PY_STDLIB_MOD_SIMPLE([_bisect])
 PY_STDLIB_MOD_SIMPLE([_contextvars])
 PY_STDLIB_MOD_SIMPLE([_csv])
+PY_STDLIB_MOD_SIMPLE([_fnmatch], [-I\$(srcdir)/Modules/_fnmatch], [])
 PY_STDLIB_MOD_SIMPLE([_heapq])
 PY_STDLIB_MOD_SIMPLE([_json])
 PY_STDLIB_MOD_SIMPLE([_lsprof])

From 86960246867060d805d92fa8d11d9b43e3d14637 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 16 Jul 2024 16:34:09 +0200
Subject: [PATCH 60/97] intern strings & functions

---
 Modules/_fnmatch/_fnmatchmodule.c | 22 ++++++++++++++++++++++
 Modules/_fnmatch/_fnmatchmodule.h | 17 ++++++++++++-----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 3c14745859c91b..e51283a4af60c6 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -56,6 +56,10 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
         return -1;
     }
     PyObject *lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    if (lru_cache == NULL) {
+        Py_DECREF(maxsize);
+        return -1;
+    }
     PyObject *decorator = PyObject_CallFunctionObjArgs(lru_cache, maxsize, Py_True, NULL);
     Py_DECREF(lru_cache);
     Py_DECREF(maxsize);
@@ -133,6 +137,11 @@ fnmatchmodule_exec(PyObject *module)
         return -1;
     }
     INTERN_STRING(st, hyphen_str, "-");
+    INTERN_STRING(st, hyphen_esc_str, "\\-");
+    INTERN_STRING(st, backslash_str, "\\");
+    INTERN_STRING(st, backslash_esc_str, "\\\\");
+    INTERN_STRING(st, inactive_toks_str, "([&~|])");
+    INTERN_STRING(st, inactive_toks_repl_str, "\\\\\\1");
     return 0;
 }
 #undef INTERN_STRING
@@ -142,6 +151,11 @@ static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
+    Py_VISIT(st->inactive_toks_repl_str);
+    Py_VISIT(st->inactive_toks_str);
+    Py_VISIT(st->backslash_esc_str);
+    Py_VISIT(st->backslash_str);
+    Py_VISIT(st->hyphen_esc_str);
     Py_VISIT(st->hyphen_str);
     Py_VISIT(st->translator);
     Py_VISIT(st->re_module);
@@ -154,6 +168,11 @@ static int
 fnmatchmodule_clear(PyObject *m)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
+    Py_CLEAR(st->inactive_toks_repl_str);
+    Py_CLEAR(st->inactive_toks_str);
+    Py_CLEAR(st->backslash_esc_str);
+    Py_CLEAR(st->backslash_str);
+    Py_CLEAR(st->hyphen_esc_str);
     Py_CLEAR(st->hyphen_str);
     Py_CLEAR(st->translator);
     Py_CLEAR(st->re_module);
@@ -376,3 +395,6 @@ PyInit__fnmatch(void)
 {
     return PyModuleDef_Init(&_fnmatchmodule);
 }
+
+#undef INVALID_PATTERN_TYPE
+#undef COMPILED_CACHE_SIZE
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index c4ce1d23201156..9c2dd217bb284b 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -12,14 +12,21 @@
 #include "Python.h"
 
 typedef struct {
-    PyObject *os_module;            // import os
-    PyObject *posixpath_module;     // import posixpath
-    PyObject *re_module;            // import re
+    PyObject *os_module;                // import os
+    PyObject *posixpath_module;         // import posixpath
+    PyObject *re_module;                // import re
 
-    PyObject *translator;           // LRU-cached translation unit
+    PyObject *translator;               // LRU-cached translation unit
 
     // strings used by translate.c
-    PyObject *hyphen_str;           // hyphen glyph '-'
+    PyObject *hyphen_str;               // hyphen '-'
+    PyObject *hyphen_esc_str;           // escaped hyphen '\\-'
+
+    PyObject *backslash_str;            // backslash '\\'
+    PyObject *backslash_esc_str;        // escaped backslash '\\\\'
+
+    PyObject *inactive_toks_str;        // inactive tokens '([&~|])'
+    PyObject *inactive_toks_repl_str;   // replacement pattern '\\\\\\1'
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *

From b27b6d89c475d35d4eb5dc478af36261e94dc9df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 16 Jul 2024 16:34:43 +0200
Subject: [PATCH 61/97] various improvements

- intern strings
- local cached functions
- split translate_expression() into helper functions
---
 Modules/_fnmatch/translate.c | 232 ++++++++++++++++++++++-------------
 1 file changed, 147 insertions(+), 85 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 00ad81c030a9e5..eec3c6c14a8e65 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -82,15 +82,6 @@ static PyObject *
 translate_expression(fnmatchmodule_state *state,
                      PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
 
-/*
- * Write an escaped string using re.escape().
- *
- * This returns the number of written characters, or -1 if an error occurred.
- */
-static Py_ssize_t
-write_literal(fnmatchmodule_state *state,
-              PyUnicodeWriter *writer, PyObject *literal);
-
 /*
  * Write the translated pattern obtained by translate_expression().
  *
@@ -114,8 +105,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 {
     assert(PyUnicode_Check(pattern));
     fnmatchmodule_state *state = get_fnmatchmodule_state(module);
-    PyObject *re = state->re_module;
     const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
+
     // We would write less data if there are successive '*',
     // which should not be the case in general. Otherwise,
     // we write >= n characters since escaping them always
@@ -137,11 +128,25 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     if (writer == NULL) {
         return NULL;
     }
+
     // list containing the indices where '*' has a special meaning
-    PyObject *indices = PyList_New(0);
+    PyObject *indices = NULL;
+    // cached functions (cache is local to the call)
+    PyObject *re_escape_func = NULL, *re_sub_func = NULL;
+
+    indices = PyList_New(0);
     if (indices == NULL) {
         goto abort;
     }
+    re_escape_func = PyObject_GetAttr(state->re_module, &_Py_ID(escape));
+    if (re_escape_func == NULL) {
+        goto abort;
+    }
+    re_sub_func = PyObject_GetAttr(state->re_module, &_Py_ID(sub));
+    if (re_sub_func == NULL) {
+        goto abort;
+    }
+
     const int kind = PyUnicode_KIND(pattern);
     const void *data = PyUnicode_DATA(pattern);
     /* declaration of some local helping macros */
@@ -218,9 +223,11 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                         if (s0 == NULL) {
                             goto abort;
                         }
-                        // NOTE(picnixz): maybe cache the method and intern the arguments
-                        // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
-                        s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
+                        s1 = PyObject_CallMethodObjArgs(
+                            s0, &_Py_ID(replace),
+                            state->backslash_str, state->backslash_esc_str,
+                            NULL
+                        );
                         Py_DECREF(s0);
                     }
                     else {
@@ -231,9 +238,13 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     if (s1 == NULL) {
                         goto abort;
                     }
-                    // NOTE(picnixz): maybe cache the method and intern the arguments
-                    // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
-                    s2 = _PyObject_CallMethod(re, &_Py_ID(sub), "ssO", "([&~|])", "\\\\\\1", s1);
+                    s2 = PyObject_CallFunctionObjArgs(
+                        re_sub_func,
+                        state->inactive_toks_str,
+                        state->inactive_toks_repl_str,
+                        s1,
+                        NULL
+                    );
                     Py_DECREF(s1);
                     if (s2 == NULL) {
                         goto abort;
@@ -253,12 +264,14 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 if (str == NULL) {
                     goto abort;
                 }
-                Py_ssize_t difflen = write_literal(state, writer, str);
+                PyObject *escchr = PyObject_CallOneArg(re_escape_func, str);
                 Py_DECREF(str);
-                if (difflen < 0) {
+                if (escchr == NULL) {
                     goto abort;
                 }
-                wi += difflen;
+                _WRITE_STRING_OR(writer, escchr, Py_DECREF(escchr); goto abort);
+                wi += PyUnicode_GET_LENGTH(escchr);
+                Py_DECREF(escchr);
                 break;
             }
         }
@@ -268,6 +281,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 #undef _WHILE_READ_CMP
 #undef ADVANCE_IF_CHAR
 #undef READ
+    Py_DECREF(re_sub_func);
+    Py_DECREF(re_escape_func);
     PyObject *translated = PyUnicodeWriter_Finish(writer);
     if (translated == NULL) {
         Py_DECREF(indices);
@@ -278,8 +293,10 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     Py_DECREF(indices);
     return res;
 abort:
-    PyUnicodeWriter_Discard(writer);
+    Py_XDECREF(re_sub_func);
+    Py_XDECREF(re_escape_func);
     Py_XDECREF(indices);
+    PyUnicodeWriter_Discard(writer);
     return NULL;
 }
 
@@ -310,18 +327,36 @@ get_unicode_character(Py_UCS4 ch)
     return unicode;
 }
 
+/*
+ * Extract a list of chunks from the pattern group described by i and j.
+ *
+ * See translate_expression() for its usage.
+ */
 static PyObject *
-translate_expression(fnmatchmodule_state *state,
-                     PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
+translate_expression_split(fnmatchmodule_state *state,
+                           PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
 {
-    PyObject *chunks = PyList_New(0);
+    PyObject *chunks = NULL;
+    // local cache for some objects
+    PyObject *str_find_func = NULL, *max_find_index = NULL;
+
+    chunks = PyList_New(0);
     if (chunks == NULL) {
-        return NULL;
+        goto abort;
+    }
+    str_find_func = PyObject_GetAttr(pattern, &_Py_ID(find));
+    if (str_find_func == NULL) {
+        goto abort;
+    }
+    max_find_index = PyLong_FromSsize_t(j);
+    if (max_find_index == NULL) {
+        goto abort;
     }
+
     Py_ssize_t k = (PyUnicode_READ_CHAR(pattern, i) == '!') ? i + 2 : i + 1;
-    Py_ssize_t chunkscount = 0;
     while (k < j) {
-        PyObject *eobj = _PyObject_CallMethod(pattern, &_Py_ID(find), "sii", "-", k, j);
+        PyObject *eobj = PyObject_CallFunction(
+            str_find_func, "OnO", state->hyphen_str, k, max_find_index);
         if (eobj == NULL) {
             goto abort;
         }
@@ -344,11 +379,12 @@ translate_expression(fnmatchmodule_state *state,
         if (rc < 0) {
             goto abort;
         }
-        chunkscount += 1;
         i = t + 1;
         k = t + 3;
     }
+    // handle the last group
     if (i >= j) {
+        Py_ssize_t chunkscount = PyList_GET_SIZE(chunks);
         assert(chunkscount > 0);
         PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
         assert(chunk != NULL);
@@ -362,6 +398,7 @@ translate_expression(fnmatchmodule_state *state,
         }
     }
     else {
+        // add the remaining sub-pattern
         PyObject *sub = PyUnicode_Substring(pattern, i, j);
         if (sub == NULL) {
             goto abort;
@@ -371,10 +408,26 @@ translate_expression(fnmatchmodule_state *state,
         if (rc < 0) {
             goto abort;
         }
-        chunkscount += 1;
     }
-    // remove empty ranges (they are not valid in RE)
-    Py_ssize_t c = chunkscount;
+    Py_DECREF(max_find_index);
+    Py_DECREF(str_find_func);
+    return chunks;
+abort:
+    Py_XDECREF(max_find_index);
+    Py_XDECREF(str_find_func);
+    Py_XDECREF(chunks);
+    return NULL;
+}
+
+/*
+ * Remove empty ranges (they are invalid in RE).
+ *
+ * See translate_expression() for its usage.
+ */
+static int
+translate_expression_simplify(fnmatchmodule_state *st, PyObject *chunks)
+{
+    Py_ssize_t c = PyList_GET_SIZE(chunks);
     while (--c) {
         PyObject *c1 = PyList_GET_ITEM(chunks, c - 1);
         assert(c1 != NULL);
@@ -387,64 +440,94 @@ translate_expression(fnmatchmodule_state *state,
         assert(c2len > 0);
 
         if (PyUnicode_READ_CHAR(c1, c1len - 1) > PyUnicode_READ_CHAR(c2, 0)) {
-            // all but the last character in the chunk
-            PyObject *c1sub = PyUnicode_Substring(c1, 0, c1len - 1);
-            // all but the first character in the chunk
-            PyObject *c2sub = PyUnicode_Substring(c2, 1, c2len);
-            if (c1sub == NULL || c2sub == NULL) {
-                Py_XDECREF(c1sub);
-                Py_XDECREF(c2sub);
-                goto abort;
+            Py_ssize_t olen = c1len + c2len - 2;
+            assert(olen >= 0);
+            // see https://github.com/python/cpython/issues/114917 for
+            // why we need olen + 1 and not olen currently
+            PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen + 1);
+            if (writer == NULL) {
+                return -1;
+            }
+            // all but the last character in the first chunk
+            if (_WRITE_BLOCK(writer, c1, 0, c1len - 1) < 0) {
+                PyUnicodeWriter_Discard(writer);
+                return -1;
+            }
+            // all but the first character in the second chunk
+            if (_WRITE_BLOCK(writer, c2, 1, c2len) < 0) {
+                PyUnicodeWriter_Discard(writer);
+                return -1;
             }
-            PyObject *merged = PyUnicode_Concat(c1sub, c2sub);
-            Py_DECREF(c1sub);
-            Py_DECREF(c2sub);
             // PyList_SetItem() does not create a new reference on 'merged'
             // so we should not decref 'merged' after the call, unless there
             // is an issue while setting the item.
+            PyObject *merged = PyUnicodeWriter_Finish(writer);
             if (merged == NULL || PyList_SetItem(chunks, c - 1, merged) < 0) {
                 Py_XDECREF(merged);
-                goto abort;
+                return -1;
             }
             if (PySequence_DelItem(chunks, c) < 0) {
-                goto abort;
+                return -1;
             }
-            chunkscount--;
         }
     }
-    assert(chunkscount == PyList_GET_SIZE(chunks));
-    // Escape backslashes and hyphens for set difference (--),
-    // but hyphens that create ranges should not be escaped.
-    for (c = 0; c < chunkscount; ++c) {
+    return 0;
+}
+
+/*
+ * Escape backslashes and hyphens for set difference (--),
+ * but hyphens that create ranges should not be escaped.
+ *
+ * See translate_expression() for its usage.
+ */
+static int
+translate_expression_escape(fnmatchmodule_state *st, PyObject *chunks)
+{
+    for (Py_ssize_t c = 0; c < PyList_GET_SIZE(chunks); ++c) {
         PyObject *s0 = PyList_GET_ITEM(chunks, c);
         assert(s0 != NULL);
-        // NOTE(picnixz): maybe cache the method and intern the arguments
-        // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
-        PyObject *s1 = _PyObject_CallMethod(s0, &_Py_ID(replace), "ss", "\\", "\\\\");
+        PyObject *s1 = PyObject_CallMethodObjArgs(s0,
+                                                  &_Py_ID(replace),
+                                                  st->backslash_str,
+                                                  st->backslash_esc_str,
+                                                  NULL);
         if (s1 == NULL) {
-            goto abort;
+            return -1;
         }
-        // NOTE(picnixz): maybe cache the method and intern the arguments
-        // NOTE(picnixz): to be able to use PyObject_CallFunctionObjArgs()
-        PyObject *s2 = _PyObject_CallMethod(s1, &_Py_ID(replace), "ss", "-", "\\-");
+        PyObject *s2 = PyObject_CallMethodObjArgs(s1,
+                                                  &_Py_ID(replace),
+                                                  st->hyphen_str,
+                                                  st->hyphen_esc_str,
+                                                  NULL);
         Py_DECREF(s1);
         // PyList_SetItem() does not create a new reference on 's2'
         // so we should not decref 's2' after the call, unless there
         // is an issue while setting the item.
         if (s2 == NULL || PyList_SetItem(chunks, c, s2) < 0) {
             Py_XDECREF(s2);
-            goto abort;
+            return -1;
         }
     }
-    PyObject *hyphen = PyUnicode_FromOrdinal('-');
-    if (hyphen == NULL) {
+    return 0;
+}
+
+static PyObject *
+translate_expression(fnmatchmodule_state *state,
+                     PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
+{
+    PyObject *chunks = translate_expression_split(state, pattern, i, j);
+    if (chunks == NULL) {
+        goto abort;
+    }
+    // remove empty ranges
+    if (translate_expression_simplify(state, chunks) < 0) {
         goto abort;
     }
-    PyObject *res = PyUnicode_Join(hyphen, chunks);
-    Py_DECREF(hyphen);
-    if (res == NULL) {
+    // escape backslashes and set differences
+    if (translate_expression_escape(state, chunks) < 0) {
         goto abort;
     }
+    PyObject *res = PyUnicode_Join(state->hyphen_str, chunks);
     Py_DECREF(chunks);
     return res;
 abort:
@@ -452,27 +535,6 @@ translate_expression(fnmatchmodule_state *state,
     return NULL;
 }
 
-static Py_ssize_t
-write_literal(fnmatchmodule_state *state,
-              PyUnicodeWriter *writer, PyObject *literal)
-{
-    PyObject *escaped = PyObject_CallMethodOneArg(state->re_module,
-                                                  &_Py_ID(escape),
-                                                  literal);
-    if (escaped == NULL) {
-        return -1;
-    }
-    Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
-    assert(written >= 0);
-    int rc = _WRITE_STRING(writer, escaped);
-    Py_DECREF(escaped);
-    if (rc < 0) {
-        return -1;
-    }
-    assert(written > 0);
-    return written;
-}
-
 static Py_ssize_t
 write_expression(PyUnicodeWriter *writer, PyObject *expression)
 {
@@ -522,7 +584,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     const Py_ssize_t m = PyList_GET_SIZE(indices);
     if (m == 0) {
         // "(?s:" + pattern + ")\Z"
-        return PyUnicode_FromFormat("(?s:%S)\\Z", pattern);
+        return PyUnicode_FromFormat("(?s:%U)\\Z", pattern);
     }
     /*
      * Special cases: indices[0] == 0 or indices[-1] + 1 == n
@@ -583,7 +645,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
         return NULL;
     }
     // "(?s:" + processed + ")\Z"
-    PyObject *res = PyUnicode_FromFormat("(?s:%S)\\Z", processed);
+    PyObject *res = PyUnicode_FromFormat("(?s:%U)\\Z", processed);
     Py_DECREF(processed);
     return res;
 abort:

From b564b2210d3c9961b8cf0582d03c6a4c5df5c088 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 16 Jul 2024 17:21:26 +0200
Subject: [PATCH 62/97] PEP-7 fixes

---
 Modules/_fnmatch/_fnmatchmodule.c |   6 +-
 Modules/_fnmatch/_fnmatchmodule.h |   4 +-
 Modules/_fnmatch/matcher.c        |   4 +-
 Modules/_fnmatch/translate.c      | 161 +++++++++++++++---------------
 4 files changed, 91 insertions(+), 84 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index e51283a4af60c6..3609a59e9bcd7b 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -55,12 +55,14 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
     if (maxsize == NULL) {
         return -1;
     }
-    PyObject *lru_cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    PyObject *lru_cache = _PyImport_GetModuleAttrString("functools",
+                                                        "lru_cache");
     if (lru_cache == NULL) {
         Py_DECREF(maxsize);
         return -1;
     }
-    PyObject *decorator = PyObject_CallFunctionObjArgs(lru_cache, maxsize, Py_True, NULL);
+    PyObject *decorator = PyObject_CallFunctionObjArgs(
+        lru_cache, maxsize, Py_True, NULL);
     Py_DECREF(lru_cache);
     Py_DECREF(maxsize);
     if (decorator == NULL) {
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 9c2dd217bb284b..46afd8e0dda630 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -74,7 +74,9 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names);
  * The 'normcase' argument is a callable implementing os.path.normcase().
  */
 extern PyObject *
-_Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase);
+_Py_fnmatch_filter_normalized(PyObject *matcher,
+                              PyObject *names,
+                              PyObject *normcase);
 
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
diff --git a/Modules/_fnmatch/matcher.c b/Modules/_fnmatch/matcher.c
index 899fe56ee063d3..22fdc41d719b59 100644
--- a/Modules/_fnmatch/matcher.c
+++ b/Modules/_fnmatch/matcher.c
@@ -48,7 +48,9 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names)
 }
 
 PyObject *
-_Py_fnmatch_filter_normalized(PyObject *matcher, PyObject *names, PyObject *normcase)
+_Py_fnmatch_filter_normalized(PyObject *matcher,
+                              PyObject *names,
+                              PyObject *normcase)
 {
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index eec3c6c14a8e65..6d841f10840c94 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -11,46 +11,55 @@
 //
 // The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
 // since they directly delegate to the _PyUnicodeWriter_Write* underlying
-// function.
+// function. In particular, the caller is responsible for type safety.
 
-#define _WRITE_OR_FAIL(writeop, onerror) \
-    do { \
-        if ((writeop) < 0) { \
-            onerror; \
-        } \
+#define _WRITE_OR_FAIL(WRITE_OPERATION, ON_ERROR)   \
+    do {                                            \
+        if ((WRITE_OPERATION) < 0) {                \
+            ON_ERROR;                               \
+        }                                           \
     } while (0)
 
-/* write a character 'ch' */
-#define _WRITE_CHAR(writer, ch) \
-    _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(writer), (ch))
-/* write a character 'ch', or execute 'onerror' if it fails */
-#define _WRITE_CHAR_OR(writer, ch, onerror) \
-    _WRITE_OR_FAIL(_WRITE_CHAR((writer), (ch)), onerror)
-
-/* write an ASCII 'string' of given 'length' */
-#define _WRITE_ASCII(writer, ascii, length) \
-    _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(writer), (ascii), (length))
-/* write an ASCII 'string' of given 'length', or execute 'onerror' if it fails */
-#define _WRITE_ASCII_OR(writer, ascii, length, onerror) \
-    _WRITE_OR_FAIL(_WRITE_ASCII((writer), (ascii), (length)), onerror)
-
-/* write a 'string' */
-#define _WRITE_STRING(writer, string) \
-    _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(writer), (string))
-/* write a 'string', or execute 'onerror' if it fails */
-#define _WRITE_STRING_OR(writer, string, onerror) \
-    _WRITE_OR_FAIL(_WRITE_STRING((writer), (string)), onerror)
-
-/* write the substring string[i:j] */
-#define _WRITE_BLOCK(writer, string, i, j) \
-    _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(writer), (string), (i), (j))
-/* write the substring string[i:j] if i < j, or execute 'onerror' if it fails */
-#define _WRITE_BLOCK_OR(writer, string, i, j, onerror) \
-    do { \
-        Py_ssize_t _i = (i), _j = (j); /* to allow in-place operators on i or j */ \
-        if (_i < _j && _WRITE_BLOCK((writer), (string), _i, _j) < 0) { \
-            onerror; \
-        } \
+/* write a character CHAR */
+#define _WRITE_CHAR(WRITER, CHAR) \
+    _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR))
+/* write a character CHAR or execute the ON_ERROR statements if it fails */
+#define _WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR) \
+    _WRITE_OR_FAIL(_WRITE_CHAR((WRITER), (CHAR)), ON_ERROR)
+
+/* write an ASCII string STRING of given length LENGTH */
+#define _WRITE_ASCII(WRITER, ASCII, LENGTH)                         \
+    _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(WRITER), \
+                                      (ASCII), (LENGTH))
+/*
+ * Write an ASCII string STRING of given length LENGTH,
+ * or execute the ON_ERROR statements if it fails.
+ */
+#define _WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR) \
+    _WRITE_OR_FAIL(_WRITE_ASCII((WRITER), (ASCII), (LENGTH)), ON_ERROR)
+
+/* write the string STRING */
+#define _WRITE_STRING(WRITER, STRING) \
+    _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
+/* write the string STRING or execute the ON_ERROR statements if it fails */
+#define _WRITE_STRING_OR(WRITER, STRING, ON_ERROR) \
+    _WRITE_OR_FAIL(_WRITE_STRING((WRITER), (STRING)), ON_ERROR)
+
+/* write the substring STRING[START:STOP] */
+#define _WRITE_BLOCK(WRITER, STRING, START, STOP)                   \
+    _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER),   \
+                                    (STRING), (START), (STOP))
+/*
+ * Write the substring STRING[START:STOP] if START < STOP,
+ * or execute the ON_ERROR statements if it fails.
+ */
+#define _WRITE_BLOCK_OR(WRITER, STRING, START, STOP, ON_ERROR)          \
+    do {                                                                \
+        /* intermediate variables to allow in-place operations */       \
+        Py_ssize_t _i = (START), _j = (STOP);                           \
+        if (_i < _j && _WRITE_BLOCK((WRITER), (STRING), _i, _j) < 0) {  \
+            ON_ERROR;                                                   \
+        }                                                               \
     } while (0)
 
 // ==== Helper declarations ===================================================
@@ -147,32 +156,20 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         goto abort;
     }
 
-    const int kind = PyUnicode_KIND(pattern);
-    const void *data = PyUnicode_DATA(pattern);
+    const int unicode_kind = PyUnicode_KIND(pattern);
+    const void *const unicode_data = PyUnicode_DATA(pattern);
     /* declaration of some local helping macros */
-#define READ(ind) PyUnicode_READ(kind, data, (ind))
-    /* advance 'ind' if the character is 'ch' */
-#define ADVANCE_IF_CHAR(ch, ind, maxind) \
-    do { \
-        /* the following forces ind to be a variable name */ \
-        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
-        if ((ind) < (maxind) && READ(ind) == (ch)) { \
-            ++ind; \
-        } \
-    } while (0)
-    /* advance 'ind' until the character compares to 'READ[ind] CMPOP ch' */
-#define _WHILE_READ_CMP(ch, ind, maxind, CMPOP) \
-    do { \
-        /* the following forces ind to be a variable name */ \
-        Py_ssize_t *Py_UNUSED(_ind) = &ind; \
-        while ((ind) < (maxind) && READ(ind) CMPOP (ch)) { \
-            ++ind; \
-        } \
+#define READ(IND) PyUnicode_READ(unicode_kind, unicode_data, (IND))
+    /* advance IND if the character is CHAR */
+#define ADVANCE_IF_NEXT_CHAR_IS(CHAR, IND, MAXIND)              \
+    do {                                                        \
+        /* the following forces IND to be a variable name */    \
+        void *Py_UNUSED(_ind) = &IND;                           \
+        if ((IND) < (MAXIND) && READ(IND) == (CHAR)) {          \
+            ++IND;                                              \
+        }                                                       \
     } while (0)
-    /* advance 'from' as long as READ(from) != ch */
-#define ADVANCE_TO_NEXT(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), !=)
-    /* advance 'from' as long as READ(from) == ch */
-#define SKIP_DUPLICATES(ch, from, maxind) _WHILE_READ_CMP((ch), (from), (maxind), ==)
+
     Py_ssize_t i = 0;   // current index
     Py_ssize_t wi = 0;  // number of characters written
     while (i < n) {
@@ -181,7 +178,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         switch (chr) {
             case '*': {
                 _WRITE_CHAR_OR(writer, '*', goto abort);
-                SKIP_DUPLICATES('*', i, n);
+                // skip duplicated '*'
+                for (; i < n && READ(i) == '*'; ++i);
                 PyObject *index = PyLong_FromSsize_t(wi++);
                 if (index == NULL) {
                     goto abort;
@@ -200,10 +198,10 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 break;
             }
             case '[': {
-                Py_ssize_t j = i;           // 'i' is already at next char
-                ADVANCE_IF_CHAR('!', j, n); // [!
-                ADVANCE_IF_CHAR(']', j, n); // [!] or []
-                ADVANCE_TO_NEXT(']', j, n); // locate closing ']'
+                Py_ssize_t j = i;
+                ADVANCE_IF_NEXT_CHAR_IS('!', j, n);     // [!
+                ADVANCE_IF_NEXT_CHAR_IS(']', j, n);     // [!] or []
+                for (; j < n && READ(j) != ']'; ++j);   // locate closing ']'
                 if (j >= n) {
                     _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
                     wi += 2; // we just wrote 2 characters
@@ -224,8 +222,10 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                             goto abort;
                         }
                         s1 = PyObject_CallMethodObjArgs(
-                            s0, &_Py_ID(replace),
-                            state->backslash_str, state->backslash_esc_str,
+                            s0,
+                            &_Py_ID(replace),
+                            state->backslash_str,
+                            state->backslash_esc_str,
                             NULL
                         );
                         Py_DECREF(s0);
@@ -269,17 +269,18 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 if (escchr == NULL) {
                     goto abort;
                 }
-                _WRITE_STRING_OR(writer, escchr, Py_DECREF(escchr); goto abort);
-                wi += PyUnicode_GET_LENGTH(escchr);
+                Py_ssize_t difflen = PyUnicode_GET_LENGTH(escchr);
+                int rc = _WRITE_STRING(writer, escchr);
                 Py_DECREF(escchr);
+                if (rc < 0) {
+                    goto abort;
+                }
+                wi += difflen;
                 break;
             }
         }
     }
-#undef SKIP_DUPLICATES
-#undef ADVANCE_TO_NEXT
-#undef _WHILE_READ_CMP
-#undef ADVANCE_IF_CHAR
+#undef ADVANCE_IF_NEXT_CHAR_IS
 #undef READ
     Py_DECREF(re_sub_func);
     Py_DECREF(re_escape_func);
@@ -388,12 +389,12 @@ translate_expression_split(fnmatchmodule_state *state,
         assert(chunkscount > 0);
         PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
         assert(chunk != NULL);
-        PyObject *repl = PyUnicode_Concat(chunk, state->hyphen_str);
-        // PyList_SetItem() does not create a new reference on 'repl'
-        // so we should not decref 'repl' after the call, unless there
+        PyObject *str = PyUnicode_Concat(chunk, state->hyphen_str);
+        // PyList_SetItem() does not create a new reference on 'str'
+        // so we should not decref 'str' after the call, unless there
         // is an issue while setting the item.
-        if (repl == NULL || PyList_SetItem(chunks, chunkscount - 1, repl) < 0) {
-            Py_XDECREF(repl);
+        if (str == NULL || PyList_SetItem(chunks, chunkscount - 1, str) < 0) {
+            Py_XDECREF(str);
             goto abort;
         }
     }
@@ -620,7 +621,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
         return NULL;
     }
     _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
-    i = j + 1;              // jump after the '*'
+    i = j + 1; // jump after the '*'
     for (Py_ssize_t k = 1; k < m; ++k) {
         // process all but the last wildcard.
         PyObject *ind = PyList_GET_ITEM(indices, k);

From d73f66d5c2b00c78e816bd53d9575047674aa2b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 17 Jul 2024 15:18:11 +0200
Subject: [PATCH 63/97] update comments and names

---
 Modules/_fnmatch/_fnmatchmodule.c | 12 ++++++------
 Modules/_fnmatch/_fnmatchmodule.h | 21 +++++++++++----------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 3609a59e9bcd7b..acab53951d7b89 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -142,8 +142,8 @@ fnmatchmodule_exec(PyObject *module)
     INTERN_STRING(st, hyphen_esc_str, "\\-");
     INTERN_STRING(st, backslash_str, "\\");
     INTERN_STRING(st, backslash_esc_str, "\\\\");
-    INTERN_STRING(st, inactive_toks_str, "([&~|])");
-    INTERN_STRING(st, inactive_toks_repl_str, "\\\\\\1");
+    INTERN_STRING(st, setops_str, "([&~|])");
+    INTERN_STRING(st, setops_repl_str, "\\\\\\1");
     return 0;
 }
 #undef INTERN_STRING
@@ -153,8 +153,8 @@ static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
-    Py_VISIT(st->inactive_toks_repl_str);
-    Py_VISIT(st->inactive_toks_str);
+    Py_VISIT(st->setops_repl_str);
+    Py_VISIT(st->setops_str);
     Py_VISIT(st->backslash_esc_str);
     Py_VISIT(st->backslash_str);
     Py_VISIT(st->hyphen_esc_str);
@@ -170,8 +170,8 @@ static int
 fnmatchmodule_clear(PyObject *m)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
-    Py_CLEAR(st->inactive_toks_repl_str);
-    Py_CLEAR(st->inactive_toks_str);
+    Py_CLEAR(st->setops_repl_str);
+    Py_CLEAR(st->setops_str);
     Py_CLEAR(st->backslash_esc_str);
     Py_CLEAR(st->backslash_str);
     Py_CLEAR(st->hyphen_esc_str);
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/_fnmatchmodule.h
index 46afd8e0dda630..ae1c01c90f8d26 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/_fnmatchmodule.h
@@ -12,21 +12,22 @@
 #include "Python.h"
 
 typedef struct {
-    PyObject *os_module;                // import os
-    PyObject *posixpath_module;         // import posixpath
-    PyObject *re_module;                // import re
+    PyObject *os_module;            // import os
+    PyObject *posixpath_module;     // import posixpath
+    PyObject *re_module;            // import re
 
-    PyObject *translator;               // LRU-cached translation unit
+    PyObject *translator;           // LRU-cached translation unit
 
     // strings used by translate.c
-    PyObject *hyphen_str;               // hyphen '-'
-    PyObject *hyphen_esc_str;           // escaped hyphen '\\-'
+    PyObject *hyphen_str;           // hyphen '-'
+    PyObject *hyphen_esc_str;       // escaped hyphen '\\-'
 
-    PyObject *backslash_str;            // backslash '\\'
-    PyObject *backslash_esc_str;        // escaped backslash '\\\\'
+    PyObject *backslash_str;        // backslash '\\'
+    PyObject *backslash_esc_str;    // escaped backslash '\\\\'
 
-    PyObject *inactive_toks_str;        // inactive tokens '([&~|])'
-    PyObject *inactive_toks_repl_str;   // replacement pattern '\\\\\\1'
+    /* set operation tokens (&&, ~~ and ||) are not supported in regex */
+    PyObject *setops_str;           // set operation tokens '([&~|])'
+    PyObject *setops_repl_str;      // replacement pattern '\\\\\\1'
 } fnmatchmodule_state;
 
 static inline fnmatchmodule_state *

From 6258b71981aae59445c84920073063fe181c734e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 17 Jul 2024 15:21:01 +0200
Subject: [PATCH 64/97] refactorization:

- improve comments
- improve variable names
- mark local macro definition region
- fix and explain the result length formula
- use `/* */` for docs and `//` for comments
---
 Modules/_fnmatch/translate.c | 519 +++++++++++++++++++----------------
 1 file changed, 285 insertions(+), 234 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 6d841f10840c94..47fd72a2de69f2 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -8,24 +8,25 @@
 #include "pycore_call.h"
 
 // ==== Macro definitions =====================================================
-//
+
+/* Execute the ON_ERROR statements if "CALL < 0". */
+#define _INTERNAL_CALL_OR_FAIL(CALL, ON_ERROR)  \
+    do {                                        \
+        if ((CALL) < 0) {                       \
+            ON_ERROR;                           \
+        }                                       \
+    } while (0)
+
 // The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
 // since they directly delegate to the _PyUnicodeWriter_Write* underlying
 // function. In particular, the caller is responsible for type safety.
 
-#define _WRITE_OR_FAIL(WRITE_OPERATION, ON_ERROR)   \
-    do {                                            \
-        if ((WRITE_OPERATION) < 0) {                \
-            ON_ERROR;                               \
-        }                                           \
-    } while (0)
-
 /* write a character CHAR */
 #define _WRITE_CHAR(WRITER, CHAR) \
     _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR))
 /* write a character CHAR or execute the ON_ERROR statements if it fails */
 #define _WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR) \
-    _WRITE_OR_FAIL(_WRITE_CHAR((WRITER), (CHAR)), ON_ERROR)
+    _INTERNAL_CALL_OR_FAIL(_WRITE_CHAR((WRITER), (CHAR)), ON_ERROR)
 
 /* write an ASCII string STRING of given length LENGTH */
 #define _WRITE_ASCII(WRITER, ASCII, LENGTH)                         \
@@ -36,14 +37,14 @@
  * or execute the ON_ERROR statements if it fails.
  */
 #define _WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR) \
-    _WRITE_OR_FAIL(_WRITE_ASCII((WRITER), (ASCII), (LENGTH)), ON_ERROR)
+    _INTERNAL_CALL_OR_FAIL(_WRITE_ASCII((WRITER), (ASCII), (LENGTH)), ON_ERROR)
 
 /* write the string STRING */
 #define _WRITE_STRING(WRITER, STRING) \
     _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
 /* write the string STRING or execute the ON_ERROR statements if it fails */
 #define _WRITE_STRING_OR(WRITER, STRING, ON_ERROR) \
-    _WRITE_OR_FAIL(_WRITE_STRING((WRITER), (STRING)), ON_ERROR)
+    _INTERNAL_CALL_OR_FAIL(_WRITE_STRING((WRITER), (STRING)), ON_ERROR)
 
 /* write the substring STRING[START:STOP] */
 #define _WRITE_BLOCK(WRITER, STRING, START, STOP)                   \
@@ -62,6 +63,38 @@
         }                                                               \
     } while (0)
 
+// ==== Inline helpers ========================================================
+
+/* replace backslashes in STRING by escaped backslashes */
+#define BACKSLASH_REPLACE(STATE, STRING)    \
+    PyObject_CallMethodObjArgs(             \
+        (STRING),                           \
+        &_Py_ID(replace),                   \
+        (STATE)->backslash_str,             \
+        (STATE)->backslash_esc_str,         \
+        NULL                                \
+    )
+
+/* replace hyphens in STRING by escaped hyphens */
+#define HYPHEN_REPLACE(STATE, STRING)       \
+    PyObject_CallMethodObjArgs(             \
+        (STRING),                           \
+        &_Py_ID(replace),                   \
+        (STATE)->hyphen_str,                \
+        (STATE)->hyphen_esc_str,            \
+        NULL                                \
+    )
+
+/* escape set operations in STRING using re.sub() */
+#define SETOPS_REPLACE(STATE, STRING, RE_SUB_FUNC)  \
+    PyObject_CallFunctionObjArgs(                   \
+        (RE_SUB_FUNC),                              \
+        (STATE)->setops_str,                        \
+        (STATE)->setops_repl_str,                   \
+        (STRING),                                   \
+        NULL                                        \
+    )
+
 // ==== Helper declarations ===================================================
 
 /*
@@ -75,21 +108,25 @@ get_unicode_character(Py_UCS4 ch);
 /*
  * Construct a regular expression out of a UNIX-style expression.
  *
- * The expression to translate is the content of an '[(BLOCK)]' expression
- * or '[!(BLOCK)]' expression. The BLOCK contains single unicode characters
- * or character ranges (e.g., 'a-z').
+ * The expression to translate is the content of an '[(BLOCK)]' expression,
+ * which contains single unicode characters or character ranges (e.g., 'a-z').
+ *
+ * By convention, 'start' and 'stop' represent the INCLUSIVE start index
+ * and EXCLUSIVE stop index of BLOCK in 'pattern'. Stated otherwise:
  *
- * By convention 'start' and 'stop' represent the INCLUSIVE start index
- * and EXCLUSIVE stop index of BLOCK in the full 'pattern'. Note that
- * we always have pattern[stop] == ']' and pattern[start] == BLOCK[0].
+ *      pattern[start] == BLOCK[0]
+ *      pattern[stop] == ']'
  *
  * For instance, for "ab[c-f]g[!1-5]", the values of 'start' and 'stop'
- * for the sub-pattern '[c-f]' are 3 and 6 respectively, whereas their
- * values for '[!1-5]' are 10 (not 9) and 13 respectively.
+ * for the sub-pattern '[c-f]' are 3 and 6 respectively, while their
+ * values for '[!1-5]' are 9 and 13 respectively.
+ *
+ * The 'pattern_str_find_meth' argument is a reference to pattern.find().
  */
 static PyObject *
 translate_expression(fnmatchmodule_state *state,
-                     PyObject *pattern, Py_ssize_t start, Py_ssize_t stop);
+                     PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+                     PyObject *pattern_str_find_meth);
 
 /*
  * Write the translated pattern obtained by translate_expression().
@@ -114,7 +151,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 {
     assert(PyUnicode_Check(pattern));
     fnmatchmodule_state *state = get_fnmatchmodule_state(module);
-    const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
+    const Py_ssize_t maxind = PyUnicode_GET_LENGTH(pattern);
 
     // We would write less data if there are successive '*',
     // which should not be the case in general. Otherwise,
@@ -133,59 +170,61 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     // same as the number of characters in the pattern.
     //
     // TODO(picnixz): should we limit the estimation?
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(n);
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(maxind);
     if (writer == NULL) {
         return NULL;
     }
 
     // list containing the indices where '*' has a special meaning
-    PyObject *indices = NULL;
+    PyObject *wildcard_indices = NULL;
     // cached functions (cache is local to the call)
     PyObject *re_escape_func = NULL, *re_sub_func = NULL;
+    PyObject *pattern_str_find_meth = NULL; // bound method of pattern.find()
 
-    indices = PyList_New(0);
-    if (indices == NULL) {
-        goto abort;
-    }
-    re_escape_func = PyObject_GetAttr(state->re_module, &_Py_ID(escape));
-    if (re_escape_func == NULL) {
-        goto abort;
-    }
-    re_sub_func = PyObject_GetAttr(state->re_module, &_Py_ID(sub));
-    if (re_sub_func == NULL) {
+    wildcard_indices = PyList_New(0);
+    if (wildcard_indices == NULL) {
         goto abort;
     }
-
-    const int unicode_kind = PyUnicode_KIND(pattern);
-    const void *const unicode_data = PyUnicode_DATA(pattern);
-    /* declaration of some local helping macros */
-#define READ(IND) PyUnicode_READ(unicode_kind, unicode_data, (IND))
+#define CACHE_ATTRIBUTE(DEST, OBJECT, NAME)         \
+    do {                                            \
+        DEST = PyObject_GetAttr((OBJECT), (NAME));  \
+        if ((DEST) == NULL) {                       \
+            goto abort;                             \
+        }                                           \
+    } while (0);
+    CACHE_ATTRIBUTE(re_escape_func, state->re_module, &_Py_ID(escape));
+    CACHE_ATTRIBUTE(re_sub_func, state->re_module, &_Py_ID(sub));
+    CACHE_ATTRIBUTE(pattern_str_find_meth, pattern, &_Py_ID(find));
+#undef CACHE_ATTRIBUTE
+
+    const int _unicode_kind = PyUnicode_KIND(pattern);
+    const void *const _unicode_data = PyUnicode_DATA(pattern);
+    // ---- def local macros --------------------------------------------------
+#define READ_CHAR(IND)      PyUnicode_READ(_unicode_kind, _unicode_data, (IND))
+#define WRITE_CHAR(CHAR)    _WRITE_CHAR_OR(writer, (CHAR), goto abort)
     /* advance IND if the character is CHAR */
-#define ADVANCE_IF_NEXT_CHAR_IS(CHAR, IND, MAXIND)              \
-    do {                                                        \
-        /* the following forces IND to be a variable name */    \
-        void *Py_UNUSED(_ind) = &IND;                           \
-        if ((IND) < (MAXIND) && READ(IND) == (CHAR)) {          \
-            ++IND;                                              \
-        }                                                       \
+#define ADVANCE_IF_CHAR_IS(CHAR, IND, MAXIND)               \
+    do {                                                    \
+        if ((IND) < (MAXIND) && READ_CHAR(IND) == (CHAR)) { \
+            ++IND;                                          \
+        }                                                   \
     } while (0)
-
-    Py_ssize_t i = 0;   // current index
-    Py_ssize_t wi = 0;  // number of characters written
-    while (i < n) {
-        // read and advance to the next character
-        Py_UCS4 chr = READ(i++);
+    // ------------------------------------------------------------------------
+    Py_ssize_t i = 0;       // current index
+    Py_ssize_t written = 0; // number of characters written
+    while (i < maxind) {
+        Py_UCS4 chr = READ_CHAR(i++);
         switch (chr) {
             case '*': {
-                _WRITE_CHAR_OR(writer, '*', goto abort);
+                WRITE_CHAR('*');
                 // skip duplicated '*'
-                for (; i < n && READ(i) == '*'; ++i);
-                PyObject *index = PyLong_FromSsize_t(wi++);
-                if (index == NULL) {
+                for (; i < maxind && READ_CHAR(i) == '*'; ++i);
+                PyObject *wildcard_index = PyLong_FromSsize_t(written++);
+                if (wildcard_index == NULL) {
                     goto abort;
                 }
-                int rc = PyList_Append(indices, index);
-                Py_DECREF(index);
+                int rc = PyList_Append(wildcard_indices, wildcard_index);
+                Py_DECREF(wildcard_index);
                 if (rc < 0) {
                     goto abort;
                 }
@@ -193,68 +232,55 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             }
             case '?': {
                 // translate optional '?' (fnmatch) into optional '.' (regex)
-                _WRITE_CHAR_OR(writer, '.', goto abort);
-                ++wi; // increase the expected result's length
+                WRITE_CHAR('.');
+                ++written; // increase the expected result's length
                 break;
             }
             case '[': {
+                assert(i > 0);
+                assert(READ_CHAR(i - 1) == '[');
                 Py_ssize_t j = i;
-                ADVANCE_IF_NEXT_CHAR_IS('!', j, n);     // [!
-                ADVANCE_IF_NEXT_CHAR_IS(']', j, n);     // [!] or []
-                for (; j < n && READ(j) != ']'; ++j);   // locate closing ']'
-                if (j >= n) {
+                ADVANCE_IF_CHAR_IS('!', j, maxind);             // [!
+                ADVANCE_IF_CHAR_IS(']', j, maxind);             // [!] or []
+                for (; j < maxind && READ_CHAR(j) != ']'; ++j); // locate ']'
+                if (j >= maxind) {
                     _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
-                    wi += 2; // we just wrote 2 characters
+                    written += 2; // we just wrote 2 characters
                     break;  // early break for clarity
                 }
                 else {
-                    //              v--- pattern[j] (exclusive)
-                    // '[' * ... * ']'
-                    //     ^----- pattern[i] (inclusive)
+                    assert(READ_CHAR(j) == ']');
                     Py_ssize_t pos = PyUnicode_FindChar(pattern, '-', i, j, 1);
                     if (pos == -2) {
                         goto abort;
                     }
-                    PyObject *s1 = NULL, *s2 = NULL;
+                    PyObject *pre_expr = NULL, *expr = NULL;
                     if (pos == -1) {
-                        PyObject *s0 = PyUnicode_Substring(pattern, i, j);
-                        if (s0 == NULL) {
+                        PyObject *tmp = PyUnicode_Substring(pattern, i, j);
+                        if (tmp == NULL) {
                             goto abort;
                         }
-                        s1 = PyObject_CallMethodObjArgs(
-                            s0,
-                            &_Py_ID(replace),
-                            state->backslash_str,
-                            state->backslash_esc_str,
-                            NULL
-                        );
-                        Py_DECREF(s0);
+                        pre_expr = BACKSLASH_REPLACE(state, tmp);
+                        Py_DECREF(tmp);
                     }
                     else {
-                        assert(pos >= 0);
-                        assert(READ(j) == ']');
-                        s1 = translate_expression(state, pattern, i, j);
+                        pre_expr = translate_expression(state, pattern, i, j,
+                                                        pattern_str_find_meth);
                     }
-                    if (s1 == NULL) {
+                    if (pre_expr == NULL) {
                         goto abort;
                     }
-                    s2 = PyObject_CallFunctionObjArgs(
-                        re_sub_func,
-                        state->inactive_toks_str,
-                        state->inactive_toks_repl_str,
-                        s1,
-                        NULL
-                    );
-                    Py_DECREF(s1);
-                    if (s2 == NULL) {
+                    expr = SETOPS_REPLACE(state, pre_expr, re_sub_func);
+                    Py_DECREF(pre_expr);
+                    if (expr == NULL) {
                         goto abort;
                     }
-                    Py_ssize_t difflen = write_expression(writer, s2);
-                    Py_DECREF(s2);
-                    if (difflen < 0) {
+                    Py_ssize_t expr_len = write_expression(writer, expr);
+                    Py_DECREF(expr);
+                    if (expr_len < 0) {
                         goto abort;
                     }
-                    wi += difflen;
+                    written += expr_len;
                     i = j + 1;  // jump to the character after ']'
                     break;      // early break for clarity
                 }
@@ -264,39 +290,42 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 if (str == NULL) {
                     goto abort;
                 }
-                PyObject *escchr = PyObject_CallOneArg(re_escape_func, str);
+                PyObject *escaped = PyObject_CallOneArg(re_escape_func, str);
                 Py_DECREF(str);
-                if (escchr == NULL) {
+                if (escaped == NULL) {
                     goto abort;
                 }
-                Py_ssize_t difflen = PyUnicode_GET_LENGTH(escchr);
-                int rc = _WRITE_STRING(writer, escchr);
-                Py_DECREF(escchr);
+                Py_ssize_t escaped_len = PyUnicode_GET_LENGTH(escaped);
+                int rc = _WRITE_STRING(writer, escaped);
+                Py_DECREF(escaped);
                 if (rc < 0) {
                     goto abort;
                 }
-                wi += difflen;
+                written += escaped_len;
                 break;
             }
         }
     }
-#undef ADVANCE_IF_NEXT_CHAR_IS
+#undef ADVANCE_IF_CHAR_IS
+#undef WRITE_CHAR
 #undef READ
+    Py_DECREF(pattern_str_find_meth);
     Py_DECREF(re_sub_func);
     Py_DECREF(re_escape_func);
     PyObject *translated = PyUnicodeWriter_Finish(writer);
     if (translated == NULL) {
-        Py_DECREF(indices);
+        Py_DECREF(wildcard_indices);
         return NULL;
     }
-    PyObject *res = process_wildcards(translated, indices);
+    PyObject *res = process_wildcards(translated, wildcard_indices);
     Py_DECREF(translated);
-    Py_DECREF(indices);
+    Py_DECREF(wildcard_indices);
     return res;
 abort:
+    Py_XDECREF(pattern_str_find_meth);
     Py_XDECREF(re_sub_func);
     Py_XDECREF(re_escape_func);
-    Py_XDECREF(indices);
+    Py_XDECREF(wildcard_indices);
     PyUnicodeWriter_Discard(writer);
     return NULL;
 }
@@ -329,67 +358,77 @@ get_unicode_character(Py_UCS4 ch)
 }
 
 /*
- * Extract a list of chunks from the pattern group described by i and j.
+ * Extract a list of chunks from the pattern group described by start and stop.
+ *
+ * For instance, the chunks for [a-z0-9] or [!a-z0-9] are ['a', 'z0', '9'].
  *
  * See translate_expression() for its usage.
  */
 static PyObject *
-translate_expression_split(fnmatchmodule_state *state,
-                           PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
+split_expression(fnmatchmodule_state *state,
+                 PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+                 PyObject *str_find_func)
 {
-    PyObject *chunks = NULL;
-    // local cache for some objects
-    PyObject *str_find_func = NULL, *max_find_index = NULL;
+    PyObject *chunks = NULL, *maxind = NULL;
+    PyObject *hyphen = state->hyphen_str;
 
     chunks = PyList_New(0);
     if (chunks == NULL) {
         goto abort;
     }
-    str_find_func = PyObject_GetAttr(pattern, &_Py_ID(find));
-    if (str_find_func == NULL) {
-        goto abort;
-    }
-    max_find_index = PyLong_FromSsize_t(j);
-    if (max_find_index == NULL) {
+    maxind = PyLong_FromSsize_t(stop);
+    if (maxind == NULL) {
         goto abort;
     }
 
-    Py_ssize_t k = (PyUnicode_READ_CHAR(pattern, i) == '!') ? i + 2 : i + 1;
-    while (k < j) {
-        PyObject *eobj = PyObject_CallFunction(
-            str_find_func, "OnO", state->hyphen_str, k, max_find_index);
-        if (eobj == NULL) {
+    // ---- def local macros --------------------------------------------------
+    /* add pattern[START:STOP] to the list of chunks */
+#define ADD_CHUNK(START, STOP)                                              \
+    do {                                                                    \
+        PyObject *chunk = PyUnicode_Substring(pattern, (START), (STOP));    \
+        if (chunk == NULL) {                                                \
+            goto abort;                                                     \
+        }                                                                   \
+        int rc = PyList_Append(chunks, chunk);                              \
+        Py_DECREF(chunk);                                                   \
+        if (rc < 0) {                                                       \
+            goto abort;                                                     \
+        }                                                                   \
+    } while (0)
+    // ------------------------------------------------------------------------
+    Py_ssize_t chunk_start = start;
+    bool is_complement = PyUnicode_READ_CHAR(pattern, start) == '!';
+    // skip '!' character (it is handled separately in write_expression())
+    Py_ssize_t ind = is_complement ? start + 2 : start + 1;
+    while (ind < stop) {
+        PyObject *p_chunk_stop = PyObject_CallFunction(str_find_func, "OnO",
+                                                       hyphen, ind, maxind);
+        if (p_chunk_stop == NULL) {
             goto abort;
         }
-        Py_ssize_t t = PyLong_AsSsize_t(eobj);
-        Py_DECREF(eobj);
-        if (t < 0) {
+        Py_ssize_t chunk_stop = PyLong_AsSsize_t(p_chunk_stop);
+        Py_DECREF(p_chunk_stop);
+        if (chunk_stop < 0) {
             if (PyErr_Occurred()) {
                 goto abort;
             }
             // -1 here means that '-' was not found
-            assert(t == -1);
+            assert(chunk_stop == -1);
             break;
         }
-        PyObject *sub = PyUnicode_Substring(pattern, i, t);
-        if (sub == NULL) {
-            goto abort;
-        }
-        int rc = PyList_Append(chunks, sub);
-        Py_DECREF(sub);
-        if (rc < 0) {
-            goto abort;
-        }
-        i = t + 1;
-        k = t + 3;
+        ADD_CHUNK(chunk_start, chunk_stop);
+        chunk_start = chunk_stop + 1;    // jump after '-'
+        ind = chunk_stop + 3;            // ensure a non-empty next chunk
+    }
+    if (chunk_start < stop) {
+        ADD_CHUNK(chunk_start, stop);
     }
-    // handle the last group
-    if (i >= j) {
+    else {
         Py_ssize_t chunkscount = PyList_GET_SIZE(chunks);
         assert(chunkscount > 0);
         PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
         assert(chunk != NULL);
-        PyObject *str = PyUnicode_Concat(chunk, state->hyphen_str);
+        PyObject *str = PyUnicode_Concat(chunk, hyphen);
         // PyList_SetItem() does not create a new reference on 'str'
         // so we should not decref 'str' after the call, unless there
         // is an issue while setting the item.
@@ -398,24 +437,11 @@ translate_expression_split(fnmatchmodule_state *state,
             goto abort;
         }
     }
-    else {
-        // add the remaining sub-pattern
-        PyObject *sub = PyUnicode_Substring(pattern, i, j);
-        if (sub == NULL) {
-            goto abort;
-        }
-        int rc = PyList_Append(chunks, sub);
-        Py_DECREF(sub);
-        if (rc < 0) {
-            goto abort;
-        }
-    }
-    Py_DECREF(max_find_index);
-    Py_DECREF(str_find_func);
+#undef ADD_CHUNK
+    Py_DECREF(maxind);
     return chunks;
 abort:
-    Py_XDECREF(max_find_index);
-    Py_XDECREF(str_find_func);
+    Py_XDECREF(maxind);
     Py_XDECREF(chunks);
     return NULL;
 }
@@ -426,16 +452,16 @@ translate_expression_split(fnmatchmodule_state *state,
  * See translate_expression() for its usage.
  */
 static int
-translate_expression_simplify(fnmatchmodule_state *st, PyObject *chunks)
+simplify_expression(PyObject *chunks)
 {
-    Py_ssize_t c = PyList_GET_SIZE(chunks);
-    while (--c) {
-        PyObject *c1 = PyList_GET_ITEM(chunks, c - 1);
+    // for k in range(len(chunks) - 1, 0, -1):
+    for (Py_ssize_t k = PyList_GET_SIZE(chunks) - 1; k > 0; --k) {
+        PyObject *c1 = PyList_GET_ITEM(chunks, k - 1);
         assert(c1 != NULL);
         Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1);
         assert(c1len > 0);
 
-        PyObject *c2 = PyList_GET_ITEM(chunks, c);
+        PyObject *c2 = PyList_GET_ITEM(chunks, k);
         assert(c2 != NULL);
         Py_ssize_t c2len = PyUnicode_GET_LENGTH(c2);
         assert(c2len > 0);
@@ -443,31 +469,46 @@ translate_expression_simplify(fnmatchmodule_state *st, PyObject *chunks)
         if (PyUnicode_READ_CHAR(c1, c1len - 1) > PyUnicode_READ_CHAR(c2, 0)) {
             Py_ssize_t olen = c1len + c2len - 2;
             assert(olen >= 0);
-            // see https://github.com/python/cpython/issues/114917 for
-            // why we need olen + 1 and not olen currently
-            PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen + 1);
-            if (writer == NULL) {
-                return -1;
+            PyObject *str = NULL;
+            if (olen == 0) {        // c1[:1] + c2[1:] == ''
+                str = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
+                assert(_Py_IsImmortal(str));
             }
-            // all but the last character in the first chunk
-            if (_WRITE_BLOCK(writer, c1, 0, c1len - 1) < 0) {
-                PyUnicodeWriter_Discard(writer);
-                return -1;
+            else if (c1len == 1) {  // c1[:1] + c2[1:] == c2[1:]
+                assert(c2len > 1);
+                str = PyUnicode_Substring(c2, 1, c2len);
             }
-            // all but the first character in the second chunk
-            if (_WRITE_BLOCK(writer, c2, 1, c2len) < 0) {
-                PyUnicodeWriter_Discard(writer);
-                return -1;
+            else if (c2len == 1) {  // c1[:1] + c2[1:] == c1[:1]
+                assert(c1len > 1);
+                str = PyUnicode_Substring(c1, 0, c1len - 1);
+            }
+            else {
+                assert(c1len > 1);
+                assert(c2len > 1);
+                PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen);
+                if (writer == NULL) {
+                    return -1;
+                }
+                // all but the last character in the first chunk
+                if (_WRITE_BLOCK(writer, c1, 0, c1len - 1) < 0) {
+                    PyUnicodeWriter_Discard(writer);
+                    return -1;
+                }
+                // all but the first character in the second chunk
+                if (_WRITE_BLOCK(writer, c2, 1, c2len) < 0) {
+                    PyUnicodeWriter_Discard(writer);
+                    return -1;
+                }
+                str = PyUnicodeWriter_Finish(writer);
             }
-            // PyList_SetItem() does not create a new reference on 'merged'
-            // so we should not decref 'merged' after the call, unless there
+            // PyList_SetItem() does not create a new reference on 'str'
+            // so we should not decref 'str' after the call, unless there
             // is an issue while setting the item.
-            PyObject *merged = PyUnicodeWriter_Finish(writer);
-            if (merged == NULL || PyList_SetItem(chunks, c - 1, merged) < 0) {
-                Py_XDECREF(merged);
+            if (str == NULL || PyList_SetItem(chunks, k - 1, str) < 0) {
+                Py_XDECREF(str);
                 return -1;
             }
-            if (PySequence_DelItem(chunks, c) < 0) {
+            if (PySequence_DelItem(chunks, k) < 0) {
                 return -1;
             }
         }
@@ -482,24 +523,16 @@ translate_expression_simplify(fnmatchmodule_state *st, PyObject *chunks)
  * See translate_expression() for its usage.
  */
 static int
-translate_expression_escape(fnmatchmodule_state *st, PyObject *chunks)
+escape_expression(fnmatchmodule_state *state, PyObject *chunks)
 {
     for (Py_ssize_t c = 0; c < PyList_GET_SIZE(chunks); ++c) {
         PyObject *s0 = PyList_GET_ITEM(chunks, c);
         assert(s0 != NULL);
-        PyObject *s1 = PyObject_CallMethodObjArgs(s0,
-                                                  &_Py_ID(replace),
-                                                  st->backslash_str,
-                                                  st->backslash_esc_str,
-                                                  NULL);
+        PyObject *s1 = BACKSLASH_REPLACE(state, s0);
         if (s1 == NULL) {
             return -1;
         }
-        PyObject *s2 = PyObject_CallMethodObjArgs(s1,
-                                                  &_Py_ID(replace),
-                                                  st->hyphen_str,
-                                                  st->hyphen_esc_str,
-                                                  NULL);
+        PyObject *s2 = HYPHEN_REPLACE(state, s1);
         Py_DECREF(s1);
         // PyList_SetItem() does not create a new reference on 's2'
         // so we should not decref 's2' after the call, unless there
@@ -514,18 +547,20 @@ translate_expression_escape(fnmatchmodule_state *st, PyObject *chunks)
 
 static PyObject *
 translate_expression(fnmatchmodule_state *state,
-                     PyObject *pattern, Py_ssize_t i, Py_ssize_t j)
+                     PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+                     PyObject *pattern_str_find_meth)
 {
-    PyObject *chunks = translate_expression_split(state, pattern, i, j);
+    PyObject *chunks = split_expression(state, pattern, start, stop,
+                                        pattern_str_find_meth);
     if (chunks == NULL) {
         goto abort;
     }
     // remove empty ranges
-    if (translate_expression_simplify(state, chunks) < 0) {
+    if (simplify_expression(chunks) < 0) {
         goto abort;
     }
     // escape backslashes and set differences
-    if (translate_expression_escape(state, chunks) < 0) {
+    if (escape_expression(state, chunks) < 0) {
         goto abort;
     }
     PyObject *res = PyUnicode_Join(state->hyphen_str, chunks);
@@ -539,17 +574,19 @@ translate_expression(fnmatchmodule_state *state,
 static Py_ssize_t
 write_expression(PyUnicodeWriter *writer, PyObject *expression)
 {
-#define WRITE_CHAR(c)           _WRITE_CHAR_OR(writer, (c), return -1)
-#define WRITE_STRING(s)         _WRITE_STRING_OR(writer, (s), return -1)
+    // ---- def local macros --------------------------------------------------
+#define WRITE_CHAR(CHAR)    _WRITE_CHAR_OR(writer, (CHAR), return -1)
+#define WRITE_STRING(STR)   _WRITE_STRING_OR(writer, (STR), return -1)
+    // ------------------------------------------------------------------------
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
-        /* empty range: never match */
+        // empty range: never match
         _WRITE_ASCII_OR(writer, "(?!)", 4, return -1);
         return 4;
     }
     Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
     if (grouplen == 1 && token == '!') {
-        /* negated empty range: match any character */
+        // negated empty range: match any character
         WRITE_CHAR('.');
         return 1;
     }
@@ -582,48 +619,57 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression)
 static PyObject *
 process_wildcards(PyObject *pattern, PyObject *indices)
 {
-    const Py_ssize_t m = PyList_GET_SIZE(indices);
-    if (m == 0) {
+    const Py_ssize_t M = PyList_GET_SIZE(indices);
+    if (M == 0) {
         // "(?s:" + pattern + ")\Z"
         return PyUnicode_FromFormat("(?s:%U)\\Z", pattern);
     }
-    /*
-     * Special cases: indices[0] == 0 or indices[-1] + 1 == n
-     *
-     * If indices[0] == 0       write (?>.*?abcdef) instead of abcdef
-     * If indices[-1] == n - 1  write '.*' instead of empty string
-     */
-    Py_ssize_t i = 0, j, n = PyUnicode_GET_LENGTH(pattern);
-    /*
-     * If the pattern starts with '*', we will write everything
-     * before it. So we will write at least indices[0] characters.
-     *
-     * For the inner groups 'STAR STRING ...' we always surround
-     * the STRING by "(?>.*?" and ")", and thus we will write at
-     * least 7 + len(STRING) characters.
-     *
-     * We write one additional '.*' if indices[-1] + 1 == n.
-     *
-     * Since the result is surrounded by "(?s:" and ")\Z", we
-     * write at least "indices[0] + 7*m + n + 6" characters,
-     * where 'm' is the number of stars and 'n' the length
-     * of the /translated) pattern.
-     */
-    PyObject *jobj = PyList_GET_ITEM(indices, 0);
-    assert(jobj != NULL);
-    j = PyLong_AsSsize_t(jobj);  // get the first position of '*'
+    // Special cases: indices[0] == 0 or indices[-1] + 1 == n
+    //
+    // If indices[0] == 0       write (?>.*?abcdef) instead of abcdef
+    // If indices[-1] == n - 1  write '.*' instead of empty string
+    Py_ssize_t i = 0, N = PyUnicode_GET_LENGTH(pattern);
+    // get the first position of '*'
+    Py_ssize_t j = PyLong_AsSsize_t(PyList_GET_ITEM(indices, 0));
     if (j < 0) {
         return NULL;
     }
-    Py_ssize_t estimate = j + 7 * m + n + 6;
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(estimate);
+    // By construction, we have
+    //
+    //      pattern = [PREFIX] [[(* INNER) ... (* INNER)] (* OUTER)] [*]
+    //
+    // where [...] is an optional group and () is required to exist.
+    //
+    // Case 1:  pattern ends with a wildcard:
+    //
+    //      - Write the PREFIX.
+    //      - Write any group (* GROUP) as "(?>.*?" + GROUP + ")".
+    //      - Write a final ".*" due to the final wildcard.
+    //      - Number of characters to write: N + 6 * (M - 1) + 1, where
+    //        the +1 is because the '*' in the final ".*" is counted by N.
+    //
+    // Case 2:  pattern does not end with a wildcard:
+    //
+    //      - Write the PREFIX.
+    //      - Write an INNER group (* INNER) as "(?>.*?" + INNER + ")".
+    //      - Write the OUTER group (* OUTER) as ".*" + OUTER.
+    //      - Number of characters to write: N + 6 * (M - 1) + 1, where
+    //        the +1 is because the '*' in ".*" + OUTER is counted by N.
+    //
+    // In both cases, we write N + 6(M - 1) + 1 characters. Since the final
+    // result is surrounded by "(?s:" and ")\\Z", we have:
+    //
+    //      Number of written characters: N + 6(M - 1) + 1 + 7 = N + 6M + 2.
+    Py_ssize_t output_size = 6 * M + N + 2;
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(output_size);
     if (writer == NULL) {
         return NULL;
     }
+    // write everything before the first wildcard normally
     _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
     i = j + 1; // jump after the '*'
-    for (Py_ssize_t k = 1; k < m; ++k) {
-        // process all but the last wildcard.
+    for (Py_ssize_t k = 1; k < M; ++k) {
+        // process all but the last wildcard
         PyObject *ind = PyList_GET_ITEM(indices, k);
         assert(ind != NULL);
         j = PyLong_AsSsize_t(ind);
@@ -640,13 +686,14 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     // handle the remaining wildcard
     _WRITE_ASCII_OR(writer, ".*", 2, goto abort);
     // write the remaining substring (if non-empty)
-    _WRITE_BLOCK_OR(writer, pattern, i, n, goto abort);
+    _WRITE_BLOCK_OR(writer, pattern, i, N, goto abort);
     PyObject *processed = PyUnicodeWriter_Finish(writer);
     if (processed == NULL) {
         return NULL;
     }
-    // "(?s:" + processed + ")\Z"
+    // "(?s:" + processed + ")\\Z"
     PyObject *res = PyUnicode_FromFormat("(?s:%U)\\Z", processed);
+    assert(PyUnicode_GET_LENGTH(res) == output_size);
     Py_DECREF(processed);
     return res;
 abort:
@@ -654,6 +701,10 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     return NULL;
 }
 
+#undef SETOPS_REPLACE
+#undef HYPHEN_REPLACE
+#undef BACKSLASH_REPLACE
+
 #undef _WRITE_BLOCK_OR
 #undef _WRITE_BLOCK
 #undef _WRITE_STRING_OR
@@ -662,4 +713,4 @@ process_wildcards(PyObject *pattern, PyObject *indices)
 #undef _WRITE_ASCII
 #undef _WRITE_CHAR_OR
 #undef _WRITE_CHAR
-#undef _WRITE_OR_FAIL
+#undef _INTERNAL_CALL_OR_FAIL

From d595cb449e02ab5c68864e1eeb1ce7a3a707947b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 18 Jul 2024 10:01:13 +0200
Subject: [PATCH 65/97] improve coverage

---
 Lib/test/test_fnmatch.py | 135 +++++++++++++++++++++++++++------------
 1 file changed, 94 insertions(+), 41 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 19f12db4fa2160..4ddfce72ff7652 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -267,12 +267,106 @@ def test_translate(self):
         self.assertTrue(re.match(fatre, 'cbabcaxc'))
         self.assertFalse(re.match(fatre, 'dabccbad'))
 
+    def test_translate_wildcards(self):
+        for pattern, expect in [
+            ('ab*', r'(?s:ab.*)\Z'),
+            ('ab*cd', r'(?s:ab.*cd)\Z'),
+            ('ab*cd*', r'(?s:ab(?>.*?cd).*)\Z'),
+            ('ab*cd*12', r'(?s:ab(?>.*?cd).*12)\Z'),
+            ('ab*cd*12*', r'(?s:ab(?>.*?cd)(?>.*?12).*)\Z'),
+            ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\Z'),
+            ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+        ]:
+            translated = self.fnmatch.translate(pattern)
+            self.assertEqual(translated, expect, pattern)
+
+        for pattern, expect in [
+            ('*ab', r'(?s:.*ab)\Z'),
+            ('*ab*', r'(?s:(?>.*?ab).*)\Z'),
+            ('*ab*cd', r'(?s:(?>.*?ab).*cd)\Z'),
+            ('*ab*cd*', r'(?s:(?>.*?ab)(?>.*?cd).*)\Z'),
+            ('*ab*cd*12', r'(?s:(?>.*?ab)(?>.*?cd).*12)\Z'),
+            ('*ab*cd*12*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*)\Z'),
+            ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\Z'),
+            ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+        ]:
+            translated = self.fnmatch.translate(pattern)
+            self.assertEqual(translated, expect, pattern)
+
+    def test_translate_expressions(self):
+        '[', '[-abc]', '[[]b', '[[a]b', '[\\\\]', '[\\]', '[]-]', '[][!]',
+        '[]]b', '[]a[]b', '[^a-c]*', '[a-\\z]',
+        '[a-c]b*', '[a-y]*[^c]', '[abc-]', '\\*',
+        '[0-4-3-2]', '[b-ac-z9-1]', '[!b-ac-z9-1]', '[!]b-ac-z9-1]',
+        '[]b-ac-z9-1]', '[]b-ac-z9-1]*', '*[]b-ac-z9-1]',
+        for pattern, expect in [
+            ('[', r'(?s:\[)\Z'),
+            ('[!', r'(?s:\[!)\Z'),
+            ('[]', r'(?s:\[\])\Z'),
+            ('[abc', r'(?s:\[abc)\Z'),
+            ('[!abc', r'(?s:\[!abc)\Z'),
+            ('[abc]', r'(?s:[abc])\Z'),
+            ('[!abc]', r'(?s:[^abc])\Z'),
+            # with [[
+            ('[[', r'(?s:\[\[)\Z'),
+            ('[[a', r'(?s:\[\[a)\Z'),
+            ('[[]', r'(?s:[\[])\Z'),
+            ('[[]a', r'(?s:[\[]a)\Z'),
+            ('[[]]', r'(?s:[\[]\])\Z'),
+            ('[[]a]', r'(?s:[\[]a\])\Z'),
+            ('[[a]', r'(?s:[\[a])\Z'),
+            ('[[a]]', r'(?s:[\[a]\])\Z'),
+            ('[[a]b', r'(?s:[\[a]b)\Z'),
+            # backslashes
+            ('[\\', r'(?s:\[\\)\Z'),
+            (r'[\]', r'(?s:[\\])\Z'),
+            (r'[\\]', r'(?s:[\\\\])\Z'),
+        ]:
+            translated = self.fnmatch.translate(pattern)
+            self.assertEqual(translated, expect, pattern)
+
 class PurePythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase):
     fnmatch = py_fnmatch
 
 class CPythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase):
     fnmatch = c_fnmatch
 
+    @staticmethod
+    def translate_func(pattern):
+        # Pure Python implementation of translate()
+        STAR = object()
+        parts = py_fnmatch._translate(pattern, STAR, '.')
+        return py_fnmatch._join_translated_parts(parts, STAR)
+
+    def test_translate(self):
+        # We want to check that the C implementation is EXACTLY the same
+        # as the Python implementation. For that, we will need to cover
+        # a lot of cases.
+        translate = self.fnmatch.translate
+
+        for choice in itertools.combinations_with_replacement('*?.', 5):
+            for suffix in ['', '!']:
+                pat = suffix + ''.join(choice)
+                with self.subTest(pattern=pat):
+                    self.assertEqual(translate(pat), self.translate_func(pat))
+
+        for pat in [
+            '',
+            '!!a*', '!\\!a*', '!a*', '*', '**', '*******?', '*******c', '*****??', '**/',
+            '*.js', '*/man*/bash.*', '*???', '?', '?*****??', '?*****?c', '?***?****',
+            '?***?****?', '?***?****c', '?*?', '??', '???', '???*', '[!\\]',
+            '\\**', '\\*\\*', 'a*', 'a*****?c', 'a****c**?**??*****', 'a***c',
+            'a**?**cd**?**??***k', 'a**?**cd**?**??***k**', 'a**?**cd**?**??k',
+            'a**?**cd**?**??k***', 'a*[^c]',
+            'a*cd**?**??k', 'a/*', 'a/**', 'a/**/b',
+            'a/**/b/**/c', 'a/.*/c', 'a/?', 'a/??', 'a[X-]b', 'a[\\.]c',
+            'a[\\b]c', 'a[bc', 'a\\*?/*', 'a\\*b/*',
+            'ab[!de]', 'ab[cd]', 'ab[cd]ef', 'abc', 'b*/', 'foo*',
+            'man/man1/bash.1'
+        ]:
+            with self.subTest(pattern=pat):
+                self.assertEqual(translate(pat), self.translate_func(pat))
+
 class FilterTestCaseMixin:
     fnmatch = None
 
@@ -310,46 +404,5 @@ class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
 class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
     fnmatch = c_fnmatch
 
-    @staticmethod
-    def translate_func(pattern):
-        # Pure Python implementation of translate()
-        STAR = object()
-        parts = py_fnmatch._translate(pattern, STAR, '.')
-        return py_fnmatch._join_translated_parts(parts, STAR)
-
-    def test_translate(self):
-        # We want to check that the C implementation is EXACTLY the same
-        # as the Python implementation. For that, we will need to cover
-        # a lot of cases.
-        translate = self.fnmatch.translate
-
-        for choice in itertools.combinations_with_replacement('*?.', 5):
-            for suffix in ['', '!']:
-                pat = suffix + ''.join(choice)
-                with self.subTest(pattern=pat):
-                    self.assertEqual(translate(pat), self.translate_func(pat))
-
-        for pat in [
-            '',
-            '!!a*', '!\\!a*', '!a*', '*', '**', '*******?', '*******c', '*****??', '**/',
-            '*.js', '*/man*/bash.*', '*???', '?', '?*****??', '?*****?c', '?***?****',
-            '?***?****?', '?***?****c', '?*?', '??', '???', '???*', '[!\\]',
-            '[*', '[-abc]', '[[]b', '[[a]b', '[\\\\]', '[\\]', '[]-]', '[][!]',
-            '[]]b', '[]a[]b', '[^a-c]*', '[a-\\z]',
-            '[a-c]b*', '[a-y]*[^c]', '[abc-]', '\\*',
-            '[0-4-3-2]', '[b-ac-z9-1]', '[!b-ac-z9-1]', '[!]b-ac-z9-1]',
-            '[]b-ac-z9-1]', '[]b-ac-z9-1]*', '*[]b-ac-z9-1]',
-            '\\**', '\\*\\*', 'a*', 'a*****?c', 'a****c**?**??*****', 'a***c',
-            'a**?**cd**?**??***k', 'a**?**cd**?**??***k**', 'a**?**cd**?**??k',
-            'a**?**cd**?**??k***', 'a*[^c]',
-            'a*cd**?**??k', 'a/*', 'a/**', 'a/**/b',
-            'a/**/b/**/c', 'a/.*/c', 'a/?', 'a/??', 'a[X-]b', 'a[\\.]c',
-            'a[\\b]c', 'a[bc', 'a\\*?/*', 'a\\*b/*',
-            'ab[!de]', 'ab[cd]', 'ab[cd]ef', 'abc', 'b*/', 'foo*',
-            'man/man1/bash.1'
-        ]:
-            with self.subTest(pattern=pat):
-                self.assertEqual(translate(pat), self.translate_func(pat))
-
 if __name__ == "__main__":
     unittest.main()

From e4296d8fb21e17c2ae0ec75aacad617a9101c89d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 18 Jul 2024 15:40:02 +0200
Subject: [PATCH 66/97] update benchmarks

---
 .../Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
index 639af4fb31ff93..f374f28456d65d 100644
--- a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
@@ -1,2 +1,2 @@
-Improve the performances of :func:`fnmatch.translate` by 50% and of
-:func:`fnmatch.filter` by 10%.  Patch by Bénédikt Tran.
+Improve the performances of :func:`fnmatch.translate` by 2x and of
+:func:`fnmatch.filter` by 1.1x.  Patch by Bénédikt Tran.

From cc92c4be8cd61cd6bcae680ffee801a72ec9faf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 18 Jul 2024 15:41:01 +0200
Subject: [PATCH 67/97] fixup

---
 Lib/test/test_fnmatch.py | 10 +++++-----
 Makefile.pre.in          |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 4ddfce72ff7652..9e00054d6ab14c 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -377,6 +377,11 @@ def test_filter(self):
         self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'),
                          [b'Python', b'Perl'])
 
+    def test_mix_bytes_str(self):
+        filter = self.fnmatch.filter
+        self.assertRaises(TypeError, filter, ['test'], b'*')
+        self.assertRaises(TypeError, filter, [b'test'], '*')
+
     def test_case(self):
         ignorecase = os.path.normcase('P') == os.path.normcase('p')
         filter = self.fnmatch.filter
@@ -393,11 +398,6 @@ def test_sep(self):
         self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
                          ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
 
-    def test_mix_bytes_str(self):
-        filter = self.fnmatch.filter
-        self.assertRaises(TypeError, filter, ['test'], b'*')
-        self.assertRaises(TypeError, filter, [b'test'], '*')
-
 class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
     fnmatch = py_fnmatch
 
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 7722873a83ea57..07cbd0a7567233 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -348,6 +348,7 @@ IO_OBJS=	\
 		Modules/_io/bytesio.o \
 		Modules/_io/stringio.o
 
+
 ##########################################################################
 # mimalloc
 

From ee27297ea3b4f4abc411fb866bfb5ea627c3c6ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 20 Jul 2024 12:07:01 +0200
Subject: [PATCH 68/97] refactorization:

- split into more files
- reduce the number of exported symbols
- extract global macros into `macros.h`
- simplify `fnmatch.filter()`
- make the flow of `fnmatch.translate()` more readable
---
 Modules/_fnmatch/_fnmatchmodule.c             |  84 +++--
 Modules/_fnmatch/filter.c                     |  50 +++
 Modules/_fnmatch/macros.h                     | 119 +++++++
 Modules/_fnmatch/matcher.c                    |  85 -----
 Modules/_fnmatch/translate.c                  | 301 ++++++------------
 Modules/_fnmatch/{_fnmatchmodule.h => util.h} |  45 +--
 6 files changed, 342 insertions(+), 342 deletions(-)
 create mode 100644 Modules/_fnmatch/filter.c
 create mode 100644 Modules/_fnmatch/macros.h
 delete mode 100644 Modules/_fnmatch/matcher.c
 rename Modules/_fnmatch/{_fnmatchmodule.h => util.h} (60%)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index acab53951d7b89..1f03f050f4c831 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,6 +1,35 @@
-#include "_fnmatchmodule.h"
+/*
+ * C accelerator for the 'fnmatch' module.
+ *
+ * Currently, the following inconsistencies in the Python implementation exist:
+ *
+ * - fnmatch.filter(NAMES, PATTERN) works with pathlib.Path() instances
+ *   in NAMES on Windows but raises a TypeError on POSIX platforms.
+ *
+ * The reason is that os.path.normcase() is called on each NAME in NAMES
+ * but not on POSIX platforms. In particular, os.fspath() is never called:
+ *
+ *      POSIX       fnmatch.filter([Path("a")], "*") -> TypeError
+ *      Windows     fnmatch.filter([Path("a")], "*") -> [Path("a")]
+ *
+ * - Case normalization uses the runtime value of os.path.normcase(),
+ *   forcing us to query the attribute each time.
+ *
+ * The C implementation of fnmatch.filter() uses the same os.path.normcase()
+ * when iterating over NAMES, ignoring side-effects on os.path.normcase()
+ * that may occur when processing a NAME in NAMES.
+ *
+ * More generally, os.path.normcase() is retrieved at most once per call
+ * to fnmatch.filter() or fnmatch.fnmatch().
+ */
+
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "util.h"                       // prototypes
 
-#include "pycore_runtime.h" // _Py_ID()
+#include "pycore_runtime.h"             // for _Py_ID()
 
 #include "clinic/_fnmatchmodule.c.h"
 
@@ -26,9 +55,13 @@ get_matcher_function_impl(PyObject *module, PyObject *pattern)
     }
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     // compile the pattern
-    PyObject *compiled = PyObject_CallMethodOneArg(st->re_module,
-                                                   &_Py_ID(compile),
-                                                   translated);
+    PyObject *compile_func = PyObject_GetAttr(st->re_module, &_Py_ID(compile));
+    if (compile_func == NULL) {
+        Py_DECREF(translated);
+        return NULL;
+    }
+    PyObject *compiled = PyObject_CallOneArg(compile_func, translated);
+    Py_DECREF(compile_func);
     Py_DECREF(translated);
     if (compiled == NULL) {
         return NULL;
@@ -41,7 +74,7 @@ get_matcher_function_impl(PyObject *module, PyObject *pattern)
 
 static PyMethodDef get_matcher_function_def = {
     "get_matcher_function",
-    (PyCFunction)(get_matcher_function_impl),
+    get_matcher_function_impl,
     METH_O,
     NULL
 };
@@ -55,25 +88,25 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
     if (maxsize == NULL) {
         return -1;
     }
-    PyObject *lru_cache = _PyImport_GetModuleAttrString("functools",
-                                                        "lru_cache");
-    if (lru_cache == NULL) {
+    PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    if (cache == NULL) {
         Py_DECREF(maxsize);
         return -1;
     }
-    PyObject *decorator = PyObject_CallFunctionObjArgs(
-        lru_cache, maxsize, Py_True, NULL);
-    Py_DECREF(lru_cache);
+    PyObject *args[3] = {NULL, maxsize, Py_True};
+    size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+    PyObject *wrapper = PyObject_Vectorcall(cache, &args[1], nargsf, NULL);
     Py_DECREF(maxsize);
-    if (decorator == NULL) {
+    Py_DECREF(cache);
+    if (wrapper == NULL) {
         return -1;
     }
     assert(module != NULL);
-    PyObject *decorated = PyCFunction_New(&get_matcher_function_def, module);
+    PyObject *wrapped = PyCFunction_New(&get_matcher_function_def, module);
     // reference on 'translator' will be removed upon module cleanup
-    st->translator = PyObject_CallOneArg(decorator, decorated);
-    Py_DECREF(decorated);
-    Py_DECREF(decorator);
+    st->translator = PyObject_CallOneArg(wrapper, wrapped);
+    Py_DECREF(wrapped);
+    Py_DECREF(wrapper);
     if (st->translator == NULL) {
         return -1;
     }
@@ -100,7 +133,7 @@ get_platform_normcase_function(PyObject *module, bool *isposix)
     }
     PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
     if (isposix != NULL) {
-        *isposix = (bool)Py_Is(os_path, st->posixpath_module);
+        *isposix = Py_Is(os_path, st->posixpath_module);
     }
     Py_DECREF(os_path);
     return normcase;
@@ -208,10 +241,6 @@ static PyObject *
 fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
 /*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/
 {
-    // filter() always calls os.path.normcase() on the pattern,
-    // but not on the names being mathed if os.path is posixmodule
-    // XXX: maybe this should be changed in Python as well?
-    // Note: the Python implementation uses the *runtime* os.path.normcase.
     bool isposix = 0;
     PyObject *normcase = get_platform_normcase_function(module, &isposix);
     if (normcase == NULL) {
@@ -229,9 +258,8 @@ fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
         Py_DECREF(normcase);
         return NULL;
     }
-    PyObject *filtered = isposix
-        ? _Py_fnmatch_filter(matcher, names)
-        : _Py_fnmatch_filter_normalized(matcher, names, normcase);
+    PyObject *normalizer = isposix ? NULL : normcase;
+    PyObject *filtered = _Py_fnmatch_filter(matcher, names, normalizer);
     Py_DECREF(matcher);
     Py_DECREF(normcase);
     return filtered;
@@ -308,8 +336,12 @@ fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern)
     if (matcher == NULL) {
         return -1;
     }
-    int matching = _Py_fnmatch_match(matcher, name);
+    // If 'name' is of incorrect type, it will be detected when calling
+    // the matcher function (we check 're.compile(pattern).match(name)').
+    PyObject *match = PyObject_CallOneArg(matcher, name);
     Py_DECREF(matcher);
+    int matching = match == NULL ? -1 : !Py_IsNone(match);
+    Py_XDECREF(match);
     return matching;
 }
 
diff --git a/Modules/_fnmatch/filter.c b/Modules/_fnmatch/filter.c
new file mode 100644
index 00000000000000..5b44f6accfc8df
--- /dev/null
+++ b/Modules/_fnmatch/filter.c
@@ -0,0 +1,50 @@
+/*
+ * Provide the implementation of the high-level matcher-based functions.
+ */
+
+#include "Python.h"
+
+PyObject *
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer)
+{
+    PyObject *iter = PyObject_GetIter(names);
+    if (iter == NULL) {
+        return NULL;
+    }
+    PyObject *res = PyList_New(0);
+    if (res == NULL) {
+        Py_DECREF(iter);
+        return NULL;
+    }
+    PyObject *name = NULL;
+    while ((name = PyIter_Next(iter))) {
+        PyObject *match;
+        if (normalizer == NULL) {
+            match = PyObject_CallOneArg(matcher, name);
+        }
+        else {
+            PyObject *normalized = PyObject_CallOneArg(normalizer, name);
+            if (normalized == NULL) {
+                goto abort;
+            }
+            match = PyObject_CallOneArg(matcher, normalized);
+            Py_DECREF(normalized);
+        }
+        if (match == NULL) {
+            goto abort;
+        }
+        int matching = Py_IsNone(match) == 0;
+        Py_DECREF(match);
+        if (matching && PyList_Append(res, name) < 0) {
+            goto abort;
+        }
+        Py_DECREF(name);
+    }
+    Py_DECREF(iter);
+    return res;
+abort:
+    Py_DECREF(name);
+    Py_DECREF(iter);
+    Py_DECREF(res);
+    return NULL;
+}
diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
new file mode 100644
index 00000000000000..04bf4a684a3035
--- /dev/null
+++ b/Modules/_fnmatch/macros.h
@@ -0,0 +1,119 @@
+/*
+ * This file contains various macro definitions in order to reduce the
+ * number of lines in translate.c. Do not use them for something else.
+ */
+
+#ifndef _FNMATCH_MACROS_H
+#define _FNMATCH_MACROS_H
+
+// ==== Macro definitions =====================================================
+
+// The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
+// since they directly delegate to the _PyUnicodeWriter_Write* underlying
+// function. In particular, the caller is responsible for type safety.
+
+/* Write a character CHAR. */
+#define _WRITE_CHAR(WRITER, CHAR) \
+    _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR))
+
+/* Write an ASCII string STRING of given length LENGTH. */
+#define _WRITE_ASCII(WRITER, STRING, LENGTH)                        \
+    _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(WRITER), \
+                                      (STRING), (LENGTH))
+/* Write the string STRING. */
+#define _WRITE_STRING(WRITER, STRING) \
+    _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
+
+/* Write the substring STRING[START:STOP]. */
+#define _WRITE_BLOCK(WRITER, STRING, START, STOP)                   \
+    _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER),   \
+                                    (STRING), (START), (STOP))
+
+// ----------------------------------------------------------------------------
+
+/* Write a character CHAR or execute the ON_ERROR statements if it fails. */
+#define WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR)       \
+    do {                                            \
+        if (_WRITE_CHAR((WRITER), (CHAR)) < 0) {    \
+            ON_ERROR;                               \
+        }                                           \
+    } while (0)
+
+/*
+ * Write an ASCII string STRING of given length LENGTH,
+ * or execute the ON_ERROR statements if it fails.
+ */
+#define WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR)         \
+    do {                                                        \
+        if (_WRITE_ASCII((WRITER), (ASCII), (LENGTH)) < 0) {    \
+            ON_ERROR;                                           \
+        }                                                       \
+    } while (0)
+
+/* Write the string STRING or execute the ON_ERROR statements if it fails. */
+#define WRITE_STRING_OR(WRITER, STRING, ON_ERROR)       \
+    do {                                                \
+        if (_WRITE_STRING((WRITER), (STRING)) < 0) {    \
+            ON_ERROR;                                   \
+        }                                               \
+    } while (0)
+
+/*
+ * Write the substring STRING[START:STOP] if START < STOP,
+ * or execute the ON_ERROR statements if it fails.
+ */
+#define WRITE_BLOCK_OR(WRITER, STRING, START, STOP, ON_ERROR)           \
+    do {                                                                \
+        /* intermediate variables to allow in-place operations */       \
+        Py_ssize_t _i = (START), _j = (STOP);                           \
+        if (_i < _j && _WRITE_BLOCK((WRITER), (STRING), _i, _j) < 0) {  \
+            ON_ERROR;                                                   \
+        }                                                               \
+    } while (0)
+
+// ----------------------------------------------------------------------------
+
+// Macros which execute "goto abort" if an error occurs.
+
+#define WRITE_CHAR_OR_ABORT(WRITER, CHAR) \
+    WRITE_CHAR_OR((WRITER), (CHAR), goto abort)
+#define WRITE_ASCII_OR_ABORT(WRITER, STRING, LENGTH) \
+    WRITE_ASCII_OR((WRITER), (STRING), (LENGTH), goto abort)
+#define WRITE_STRING_OR_ABORT(WRITER, STRING) \
+    WRITE_STRING_OR((WRITER), (STRING), goto abort)
+#define WRITE_BLOCK_OR_ABORT(WRITER, STRING, START, STOP) \
+    WRITE_BLOCK_OR((WRITER), (STRING), (START), (STOP), goto abort)
+
+// ----------------------------------------------------------------------------
+
+/* Replace backslashes in STRING by escaped backslashes. */
+#define BACKSLASH_REPLACE(STATE, STRING)    \
+    PyObject_CallMethodObjArgs(             \
+        (STRING),                           \
+        &_Py_ID(replace),                   \
+        (STATE)->backslash_str,             \
+        (STATE)->backslash_esc_str,         \
+        NULL                                \
+    )
+
+/* Replace hyphens in STRING by escaped hyphens. */
+#define HYPHEN_REPLACE(STATE, STRING)       \
+    PyObject_CallMethodObjArgs(             \
+        (STRING),                           \
+        &_Py_ID(replace),                   \
+        (STATE)->hyphen_str,                \
+        (STATE)->hyphen_esc_str,            \
+        NULL                                \
+    )
+
+/* Escape set operations in STRING using re.sub(). */
+#define SETOPS_REPLACE(STATE, STRING, RE_SUB_FUNC)  \
+    PyObject_CallFunctionObjArgs(                   \
+        (RE_SUB_FUNC),                              \
+        (STATE)->setops_str,                        \
+        (STATE)->setops_repl_str,                   \
+        (STRING),                                   \
+        NULL                                        \
+    )
+
+#endif // _FNMATCH_MACROS_H
diff --git a/Modules/_fnmatch/matcher.c b/Modules/_fnmatch/matcher.c
deleted file mode 100644
index 22fdc41d719b59..00000000000000
--- a/Modules/_fnmatch/matcher.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Provide the implementation of the high-level matcher-based functions.
- */
-
-#include "_fnmatchmodule.h"
-
-inline int
-_Py_fnmatch_match(PyObject *matcher, PyObject *name)
-{
-    // If 'name' is of incorrect type, it will be detected when calling
-    // the matcher function (we emulate 're.compile(...).match(name)').
-    PyObject *match = PyObject_CallOneArg(matcher, name);
-    if (match == NULL) {
-        return -1;
-    }
-    int matching = Py_IsNone(match) ? 0 : 1;
-    Py_DECREF(match);
-    return matching;
-}
-
-PyObject *
-_Py_fnmatch_filter(PyObject *matcher, PyObject *names)
-{
-    PyObject *iter = PyObject_GetIter(names);
-    if (iter == NULL) {
-        return NULL;
-    }
-    PyObject *res = PyList_New(0);
-    if (res == NULL) {
-        Py_DECREF(iter);
-        return NULL;
-    }
-    PyObject *name = NULL;
-    while ((name = PyIter_Next(iter))) {
-        int matching = _Py_fnmatch_match(matcher, name);
-        if (matching < 0 || (matching == 1 && PyList_Append(res, name) < 0)) {
-            goto abort;
-        }
-        Py_DECREF(name);
-    }
-    Py_DECREF(iter);
-    return res;
-abort:
-    Py_DECREF(name);
-    Py_DECREF(iter);
-    Py_DECREF(res);
-    return NULL;
-}
-
-PyObject *
-_Py_fnmatch_filter_normalized(PyObject *matcher,
-                              PyObject *names,
-                              PyObject *normcase)
-{
-    PyObject *iter = PyObject_GetIter(names);
-    if (iter == NULL) {
-        return NULL;
-    }
-    PyObject *res = PyList_New(0);
-    if (res == NULL) {
-        Py_DECREF(iter);
-        return NULL;
-    }
-    PyObject *name = NULL;
-    while ((name = PyIter_Next(iter))) {
-        PyObject *normalized = PyObject_CallOneArg(normcase, name);
-        if (normalized == NULL) {
-            goto abort;
-        }
-        int matching = _Py_fnmatch_match(matcher, normalized);
-        Py_DECREF(normalized);
-        // add the non-normalized name if its normalization matches
-        if (matching < 0 || (matching == 1 && PyList_Append(res, name) < 0)) {
-            goto abort;
-        }
-        Py_DECREF(name);
-    }
-    Py_DECREF(iter);
-    return res;
-abort:
-    Py_DECREF(name);
-    Py_DECREF(iter);
-    Py_DECREF(res);
-    return NULL;
-}
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 47fd72a2de69f2..10552434a7d616 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -3,97 +3,14 @@
  * to RE patterns.
  */
 
-#include "_fnmatchmodule.h" // for get_fnmatchmodulestate_state()
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
 
-#include "pycore_call.h"
+#include "macros.h"
+#include "util.h"           // for get_fnmatchmodulestate_state()
 
-// ==== Macro definitions =====================================================
-
-/* Execute the ON_ERROR statements if "CALL < 0". */
-#define _INTERNAL_CALL_OR_FAIL(CALL, ON_ERROR)  \
-    do {                                        \
-        if ((CALL) < 0) {                       \
-            ON_ERROR;                           \
-        }                                       \
-    } while (0)
-
-// The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
-// since they directly delegate to the _PyUnicodeWriter_Write* underlying
-// function. In particular, the caller is responsible for type safety.
-
-/* write a character CHAR */
-#define _WRITE_CHAR(WRITER, CHAR) \
-    _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR))
-/* write a character CHAR or execute the ON_ERROR statements if it fails */
-#define _WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR) \
-    _INTERNAL_CALL_OR_FAIL(_WRITE_CHAR((WRITER), (CHAR)), ON_ERROR)
-
-/* write an ASCII string STRING of given length LENGTH */
-#define _WRITE_ASCII(WRITER, ASCII, LENGTH)                         \
-    _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(WRITER), \
-                                      (ASCII), (LENGTH))
-/*
- * Write an ASCII string STRING of given length LENGTH,
- * or execute the ON_ERROR statements if it fails.
- */
-#define _WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR) \
-    _INTERNAL_CALL_OR_FAIL(_WRITE_ASCII((WRITER), (ASCII), (LENGTH)), ON_ERROR)
-
-/* write the string STRING */
-#define _WRITE_STRING(WRITER, STRING) \
-    _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
-/* write the string STRING or execute the ON_ERROR statements if it fails */
-#define _WRITE_STRING_OR(WRITER, STRING, ON_ERROR) \
-    _INTERNAL_CALL_OR_FAIL(_WRITE_STRING((WRITER), (STRING)), ON_ERROR)
-
-/* write the substring STRING[START:STOP] */
-#define _WRITE_BLOCK(WRITER, STRING, START, STOP)                   \
-    _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER),   \
-                                    (STRING), (START), (STOP))
-/*
- * Write the substring STRING[START:STOP] if START < STOP,
- * or execute the ON_ERROR statements if it fails.
- */
-#define _WRITE_BLOCK_OR(WRITER, STRING, START, STOP, ON_ERROR)          \
-    do {                                                                \
-        /* intermediate variables to allow in-place operations */       \
-        Py_ssize_t _i = (START), _j = (STOP);                           \
-        if (_i < _j && _WRITE_BLOCK((WRITER), (STRING), _i, _j) < 0) {  \
-            ON_ERROR;                                                   \
-        }                                                               \
-    } while (0)
-
-// ==== Inline helpers ========================================================
-
-/* replace backslashes in STRING by escaped backslashes */
-#define BACKSLASH_REPLACE(STATE, STRING)    \
-    PyObject_CallMethodObjArgs(             \
-        (STRING),                           \
-        &_Py_ID(replace),                   \
-        (STATE)->backslash_str,             \
-        (STATE)->backslash_esc_str,         \
-        NULL                                \
-    )
-
-/* replace hyphens in STRING by escaped hyphens */
-#define HYPHEN_REPLACE(STATE, STRING)       \
-    PyObject_CallMethodObjArgs(             \
-        (STRING),                           \
-        &_Py_ID(replace),                   \
-        (STATE)->hyphen_str,                \
-        (STATE)->hyphen_esc_str,            \
-        NULL                                \
-    )
-
-/* escape set operations in STRING using re.sub() */
-#define SETOPS_REPLACE(STATE, STRING, RE_SUB_FUNC)  \
-    PyObject_CallFunctionObjArgs(                   \
-        (RE_SUB_FUNC),                              \
-        (STATE)->setops_str,                        \
-        (STATE)->setops_repl_str,                   \
-        (STRING),                                   \
-        NULL                                        \
-    )
+#include "pycore_runtime.h" // for _Py_ID()
 
 // ==== Helper declarations ===================================================
 
@@ -154,11 +71,11 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     const Py_ssize_t maxind = PyUnicode_GET_LENGTH(pattern);
 
     // We would write less data if there are successive '*',
-    // which should not be the case in general. Otherwise,
-    // we write >= n characters since escaping them always
-    // add more characters.
+    // which usually happens once or twice in the pattern.
+    // Otherwise, we write >= maxind characters since escaping
+    // them always add more characters.
     //
-    // Note that only b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f' need to
+    // Note that only '()[]{}?*+-|^$\\.&~# \t\n\r\v\f' need to
     // be escaped when translated to RE patterns and '*' and '?'
     // are already handled without being escaped.
     //
@@ -168,8 +85,6 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     // and there is a sparse number of them. Therefore, we only
     // estimate the number of characters to be written to be the
     // same as the number of characters in the pattern.
-    //
-    // TODO(picnixz): should we limit the estimation?
     PyUnicodeWriter *writer = PyUnicodeWriter_Create(maxind);
     if (writer == NULL) {
         return NULL;
@@ -197,11 +112,12 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     CACHE_ATTRIBUTE(pattern_str_find_meth, pattern, &_Py_ID(find));
 #undef CACHE_ATTRIBUTE
 
-    const int _unicode_kind = PyUnicode_KIND(pattern);
-    const void *const _unicode_data = PyUnicode_DATA(pattern);
+    const int pattern_kind = PyUnicode_KIND(pattern);
+    const void *const pattern_data = PyUnicode_DATA(pattern);
     // ---- def local macros --------------------------------------------------
-#define READ_CHAR(IND)      PyUnicode_READ(_unicode_kind, _unicode_data, (IND))
-#define WRITE_CHAR(CHAR)    _WRITE_CHAR_OR(writer, (CHAR), goto abort)
+#define READ_CHAR(IND)          PyUnicode_READ(pattern_kind, pattern_data, IND)
+#define WRITE_CHAR(CHAR)        WRITE_CHAR_OR_ABORT(writer, CHAR)
+#define WRITE_ASCII(STR, LEN)   WRITE_ASCII_OR_ABORT(writer, STR, LEN)
     /* advance IND if the character is CHAR */
 #define ADVANCE_IF_CHAR_IS(CHAR, IND, MAXIND)               \
     do {                                                    \
@@ -219,6 +135,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 WRITE_CHAR('*');
                 // skip duplicated '*'
                 for (; i < maxind && READ_CHAR(i) == '*'; ++i);
+                // store the position of the wildcard
                 PyObject *wildcard_index = PyLong_FromSsize_t(written++);
                 if (wildcard_index == NULL) {
                     goto abort;
@@ -244,9 +161,9 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 ADVANCE_IF_CHAR_IS(']', j, maxind);             // [!] or []
                 for (; j < maxind && READ_CHAR(j) != ']'; ++j); // locate ']'
                 if (j >= maxind) {
-                    _WRITE_ASCII_OR(writer, "\\[", 2, goto abort);
-                    written += 2; // we just wrote 2 characters
-                    break;  // early break for clarity
+                    WRITE_ASCII("\\[", 2);
+                    written += 2;   // we just wrote 2 characters
+                    break;          // explicit early break for clarity
                 }
                 else {
                     assert(READ_CHAR(j) == ']');
@@ -282,7 +199,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     }
                     written += expr_len;
                     i = j + 1;  // jump to the character after ']'
-                    break;      // early break for clarity
+                    break;      // explicit early break for clarity
                 }
             }
             default: {
@@ -296,6 +213,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     goto abort;
                 }
                 Py_ssize_t escaped_len = PyUnicode_GET_LENGTH(escaped);
+                // Do NOT use WRITE_STRING_OR_ABORT() since 'escaped'
+                // must be first decref'ed in case of an error.
                 int rc = _WRITE_STRING(writer, escaped);
                 Py_DECREF(escaped);
                 if (rc < 0) {
@@ -307,8 +226,9 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         }
     }
 #undef ADVANCE_IF_CHAR_IS
+#undef WRITE_ASCII
 #undef WRITE_CHAR
-#undef READ
+#undef READ_CHAR
     Py_DECREF(pattern_str_find_meth);
     Py_DECREF(re_sub_func);
     Py_DECREF(re_escape_func);
@@ -487,17 +407,17 @@ simplify_expression(PyObject *chunks)
                 assert(c2len > 1);
                 PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen);
                 if (writer == NULL) {
-                    return -1;
+                    goto abort;
                 }
                 // all but the last character in the first chunk
                 if (_WRITE_BLOCK(writer, c1, 0, c1len - 1) < 0) {
                     PyUnicodeWriter_Discard(writer);
-                    return -1;
+                    goto abort;
                 }
                 // all but the first character in the second chunk
                 if (_WRITE_BLOCK(writer, c2, 1, c2len) < 0) {
                     PyUnicodeWriter_Discard(writer);
-                    return -1;
+                    goto abort;
                 }
                 str = PyUnicodeWriter_Finish(writer);
             }
@@ -506,14 +426,16 @@ simplify_expression(PyObject *chunks)
             // is an issue while setting the item.
             if (str == NULL || PyList_SetItem(chunks, k - 1, str) < 0) {
                 Py_XDECREF(str);
-                return -1;
+                goto abort;
             }
             if (PySequence_DelItem(chunks, k) < 0) {
-                return -1;
+                goto abort;
             }
         }
     }
     return 0;
+abort:
+    return -1;
 }
 
 /*
@@ -530,7 +452,7 @@ escape_expression(fnmatchmodule_state *state, PyObject *chunks)
         assert(s0 != NULL);
         PyObject *s1 = BACKSLASH_REPLACE(state, s0);
         if (s1 == NULL) {
-            return -1;
+            goto abort;
         }
         PyObject *s2 = HYPHEN_REPLACE(state, s1);
         Py_DECREF(s1);
@@ -539,10 +461,12 @@ escape_expression(fnmatchmodule_state *state, PyObject *chunks)
         // is an issue while setting the item.
         if (s2 == NULL || PyList_SetItem(chunks, c, s2) < 0) {
             Py_XDECREF(s2);
-            return -1;
+            goto abort;
         }
     }
     return 0;
+abort:
+    return -1;
 }
 
 static PyObject *
@@ -575,13 +499,15 @@ static Py_ssize_t
 write_expression(PyUnicodeWriter *writer, PyObject *expression)
 {
     // ---- def local macros --------------------------------------------------
-#define WRITE_CHAR(CHAR)    _WRITE_CHAR_OR(writer, (CHAR), return -1)
-#define WRITE_STRING(STR)   _WRITE_STRING_OR(writer, (STR), return -1)
+#define WRITE_CHAR(CHAR)        WRITE_CHAR_OR_ABORT(writer, CHAR)
+#define WRITE_ASCII(STR, LEN)   WRITE_ASCII_OR_ABORT(writer, STR, LEN)
+#define WRITE_STRING(STR)       WRITE_STRING_OR_ABORT(writer, STR)
+#define WRITE_BLOCK(STR, I, J)  WRITE_BLOCK_OR_ABORT(writer, STR, I, J)
     // ------------------------------------------------------------------------
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         // empty range: never match
-        _WRITE_ASCII_OR(writer, "(?!)", 4, return -1);
+        WRITE_ASCII("(?!)", 4);
         return 4;
     }
     Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
@@ -595,7 +521,7 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression)
     switch (token) {
         case '!': {
             WRITE_CHAR('^'); // replace '!' by '^'
-            _WRITE_BLOCK_OR(writer, expression, 1, grouplen, return -1);
+            WRITE_BLOCK(expression, 1, grouplen);
             break;
         }
         case '^':
@@ -612,105 +538,90 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression)
     }
     WRITE_CHAR(']');
     return grouplen + extra;
+abort:
+    return -1;
+#undef WRITE_BLOCK
 #undef WRITE_STRING
+#undef WRITE_ASCII
 #undef WRITE_CHAR
 }
 
 static PyObject *
 process_wildcards(PyObject *pattern, PyObject *indices)
 {
-    const Py_ssize_t M = PyList_GET_SIZE(indices);
-    if (M == 0) {
-        // "(?s:" + pattern + ")\Z"
-        return PyUnicode_FromFormat("(?s:%U)\\Z", pattern);
-    }
-    // Special cases: indices[0] == 0 or indices[-1] + 1 == n
-    //
-    // If indices[0] == 0       write (?>.*?abcdef) instead of abcdef
-    // If indices[-1] == n - 1  write '.*' instead of empty string
-    Py_ssize_t i = 0, N = PyUnicode_GET_LENGTH(pattern);
-    // get the first position of '*'
-    Py_ssize_t j = PyLong_AsSsize_t(PyList_GET_ITEM(indices, 0));
-    if (j < 0) {
-        return NULL;
-    }
-    // By construction, we have
-    //
-    //      pattern = [PREFIX] [[(* INNER) ... (* INNER)] (* OUTER)] [*]
-    //
-    // where [...] is an optional group and () is required to exist.
+    const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
+    const Py_ssize_t m = PyList_GET_SIZE(indices);
+    // Let m = len(indices) and n = len(pattern). By construction,
     //
-    // Case 1:  pattern ends with a wildcard:
+    //      pattern = [PREFIX] [[(* INNER) ... (* INNER)] (*) [OUTER]]
     //
-    //      - Write the PREFIX.
-    //      - Write any group (* GROUP) as "(?>.*?" + GROUP + ")".
-    //      - Write a final ".*" due to the final wildcard.
-    //      - Number of characters to write: N + 6 * (M - 1) + 1, where
-    //        the +1 is because the '*' in the final ".*" is counted by N.
+    // where [...] is an optional group and (...) is a required group.
     //
-    // Case 2:  pattern does not end with a wildcard:
+    // The algorithm is as follows:
     //
-    //      - Write the PREFIX.
-    //      - Write an INNER group (* INNER) as "(?>.*?" + INNER + ")".
-    //      - Write the OUTER group (* OUTER) as ".*" + OUTER.
-    //      - Number of characters to write: N + 6 * (M - 1) + 1, where
-    //        the +1 is because the '*' in ".*" + OUTER is counted by N.
+    //  - Write "(?s:".
+    //  - Write the optional PREFIX.
+    //  - Write an INNER group (* INNER) as "(?>.*?" + INNER + ")".
+    //  - Write ".*" instead of the last wildcard.
+    //  - Write an optional OUTER string normally.
+    //  - Write ")\\Z".
     //
-    // In both cases, we write N + 6(M - 1) + 1 characters. Since the final
-    // result is surrounded by "(?s:" and ")\\Z", we have:
-    //
-    //      Number of written characters: N + 6(M - 1) + 1 + 7 = N + 6M + 2.
-    Py_ssize_t output_size = 6 * M + N + 2;
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(output_size);
+    // If m = 0, the writer needs n + 7 characters. Otherwise, it requires
+    // exactly n + 6(m-1) + 1 + 7 = n + 6m + 2 characters, where the "+1"
+    // is due to the fact that writing ".*" instead of "*" only increases
+    // the total length of the pattern by 1 (and not by 2).
+    const Py_ssize_t reslen = m == 0 ? n + 7 : n + 6 * m + 2;
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(reslen);
     if (writer == NULL) {
         return NULL;
     }
-    // write everything before the first wildcard normally
-    _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
-    i = j + 1; // jump after the '*'
-    for (Py_ssize_t k = 1; k < M; ++k) {
-        // process all but the last wildcard
-        PyObject *ind = PyList_GET_ITEM(indices, k);
-        assert(ind != NULL);
-        j = PyLong_AsSsize_t(ind);
-        if (j < 0) {
-            goto abort;
-        }
-        assert(i < j);
-        // write the atomic RE group '(?>.*?' + BLOCK + ')'
-        _WRITE_ASCII_OR(writer, "(?>.*?", 6, goto abort);
-        _WRITE_BLOCK_OR(writer, pattern, i, j, goto abort);
-        _WRITE_CHAR_OR(writer, ')', goto abort);
-        i = j + 1;
+    // ---- def local macros --------------------------------------------------
+#define WRITE_CHAR(CHAR)        WRITE_CHAR_OR_ABORT(writer, CHAR)
+#define WRITE_ASCII(STR, LEN)   WRITE_ASCII_OR_ABORT(writer, STR, LEN)
+#define WRITE_STRING(STR)       WRITE_STRING_OR_ABORT(writer, STR)
+#define WRITE_BLOCK(STR, I, J)  WRITE_BLOCK_OR_ABORT(writer, STR, I, J)
+#define LOAD_WILDCARD_INDEX(VAR, IND)                           \
+    do {                                                        \
+        VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, IND));  \
+        if (VAR < 0) {                                          \
+            goto abort;                                         \
+        }                                                       \
+    } while (0)
+    // ------------------------------------------------------------------------
+    WRITE_ASCII("(?s:", 4);
+    if (m == 0) {
+        WRITE_STRING(pattern);
     }
-    // handle the remaining wildcard
-    _WRITE_ASCII_OR(writer, ".*", 2, goto abort);
-    // write the remaining substring (if non-empty)
-    _WRITE_BLOCK_OR(writer, pattern, i, N, goto abort);
-    PyObject *processed = PyUnicodeWriter_Finish(writer);
-    if (processed == NULL) {
-        return NULL;
+    else {
+        Py_ssize_t i = 0, j = -1;
+        // process the optional PREFIX
+        LOAD_WILDCARD_INDEX(j, 0);
+        WRITE_BLOCK(pattern, i, j);
+        i = j + 1;
+        for (Py_ssize_t k = 1; k < m; ++k) {
+            // process the (* INNER) groups
+            LOAD_WILDCARD_INDEX(j, k);
+            assert(i < j);
+            // write the atomic RE group '(?>.*?' + INNER + ')'
+            WRITE_ASCII("(?>.*?", 6);
+            WRITE_BLOCK(pattern, i, j);
+            WRITE_CHAR(')');
+            i = j + 1;
+        }
+        // handle the (*) [OUTER] part
+        WRITE_ASCII(".*", 2);
+        WRITE_BLOCK(pattern, i, n);
     }
-    // "(?s:" + processed + ")\\Z"
-    PyObject *res = PyUnicode_FromFormat("(?s:%U)\\Z", processed);
-    assert(PyUnicode_GET_LENGTH(res) == output_size);
-    Py_DECREF(processed);
+    WRITE_ASCII(")\\Z", 3);
+    PyObject *res = PyUnicodeWriter_Finish(writer);
+    assert(res == NULL || PyUnicode_GET_LENGTH(res) == reslen);
     return res;
 abort:
     PyUnicodeWriter_Discard(writer);
     return NULL;
+#undef LOAD_WILDCARD_INDEX
+#undef WRITE_BLOCK
+#undef WRITE_STRING
+#undef WRITE_ASCII
+#undef WRITE_CHAR
 }
-
-#undef SETOPS_REPLACE
-#undef HYPHEN_REPLACE
-#undef BACKSLASH_REPLACE
-
-#undef _WRITE_BLOCK_OR
-#undef _WRITE_BLOCK
-#undef _WRITE_STRING_OR
-#undef _WRITE_STRING
-#undef _WRITE_ASCII_OR
-#undef _WRITE_ASCII
-#undef _WRITE_CHAR_OR
-#undef _WRITE_CHAR
-#undef _INTERNAL_CALL_OR_FAIL
diff --git a/Modules/_fnmatch/_fnmatchmodule.h b/Modules/_fnmatch/util.h
similarity index 60%
rename from Modules/_fnmatch/_fnmatchmodule.h
rename to Modules/_fnmatch/util.h
index ae1c01c90f8d26..371930f5c17262 100644
--- a/Modules/_fnmatch/_fnmatchmodule.h
+++ b/Modules/_fnmatch/util.h
@@ -1,13 +1,9 @@
 /*
- * C accelerator for the 'fnmatch' module.
+ * This file contains helper prototypes and structures.
  */
 
-#ifndef _FNMATCHMODULE_H
-#define _FNMATCHMODULE_H
-
-#ifndef Py_BUILD_CORE_BUILTIN
-#  define Py_BUILD_CORE_MODULE 1
-#endif
+#ifndef _FNMATCH_UTIL_H
+#define _FNMATCH_UTIL_H
 
 #include "Python.h"
 
@@ -40,23 +36,6 @@ get_fnmatchmodule_state(PyObject *module)
 
 // ==== Helper prototypes =====================================================
 
-/*
- * Test whether a name matches a compiled RE pattern.
- *
- * Parameters
- *
- *  matcher     A reference to the 'match()' method of a compiled pattern.
- *  string      The string to match (str or bytes object).
- *
- * Returns
- *
- *  -1  if the call 'matcher(string)' failed (e.g., invalid type),
- *   0  if the 'string' does NOT match the pattern,
- *   1  if the 'string' matches the pattern.
- */
-extern int
-_Py_fnmatch_match(PyObject *matcher, PyObject *string);
-
 /*
  * Returns a list of matched names, or NULL if an error occurred.
  *
@@ -64,20 +43,14 @@ _Py_fnmatch_match(PyObject *matcher, PyObject *string);
  *
  *  matcher     A reference to the 'match()' method of a compiled pattern.
  *  names       An iterable of strings (str or bytes objects) to match.
- */
-extern PyObject *
-_Py_fnmatch_filter(PyObject *matcher, PyObject *names);
-
-/*
- * Similar to _Py_fnmatch_filter() but matches os.path.normcase(name)
- * instead. The returned values are however a sub-sequence of 'names'.
+ *  normalizer  Optional normalization function.
+ *
+ *  This is equivalent to:
  *
- * The 'normcase' argument is a callable implementing os.path.normcase().
+ *      [name for name in names if matcher(normalizer(name))]
  */
 extern PyObject *
-_Py_fnmatch_filter_normalized(PyObject *matcher,
-                              PyObject *names,
-                              PyObject *normcase);
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer);
 
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
@@ -90,4 +63,4 @@ _Py_fnmatch_filter_normalized(PyObject *matcher,
 extern PyObject *
 _Py_fnmatch_translate(PyObject *module, PyObject *pattern);
 
-#endif // _FNMATCHMODULE_H
+#endif // _FNMATCH_UTIL_H

From 0622be6118734ccb89d8c175117f2169f1241ac3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 20 Jul 2024 12:07:15 +0200
Subject: [PATCH 69/97] update dependencies

---
 Makefile.pre.in                    | 2 +-
 Modules/Setup.stdlib.in            | 2 +-
 PCbuild/pythoncore.vcxproj         | 4 ++--
 PCbuild/pythoncore.vcxproj.filters | 7 +++++--
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 07cbd0a7567233..8bc19b0410fdfe 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -3117,7 +3117,7 @@ MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h
 MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@
 MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@
 MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
-MODULE__FNMATCH_DEPS=$(srcdir)/Modules/_fnmatch/_fnmatchmodule.h
+MODULE__FNMATCH_DEPS=$(srcdir)/Modules/_fnmatch/macros.h $(srcdir)/Modules/_fnmatch/util.h
 MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
 MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
 MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index e689d18b70b035..f33af67aa26499 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -33,7 +33,7 @@
 @MODULE__BISECT_TRUE@_bisect _bisectmodule.c
 @MODULE__CONTEXTVARS_TRUE@_contextvars _contextvarsmodule.c
 @MODULE__CSV_TRUE@_csv _csv.c
-@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/matcher.c _fnmatch/translate.c
+@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/filter.c _fnmatch/translate.c
 @MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c
 @MODULE__JSON_TRUE@_json _json.c
 @MODULE__LSPROF_TRUE@_lsprof _lsprof.c rotatingtree.c
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index ea52c20cc66db1..0d83ac770348b1 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -368,7 +368,7 @@
     <ClInclude Include="..\Modules\_math.h" />
     <ClInclude Include="..\Modules\hashtable.h" />
     <ClInclude Include="..\Modules\rotatingtree.h" />
-    <ClInclude Include="..\Modules\_fnmatch\_fnmatchmodule.h" />
+    <ClInclude Include="..\Modules\_fnmatch\util.h" />
     <ClInclude Include="..\Modules\_io\_iomodule.h" />
     <ClInclude Include="..\Modules\cjkcodecs\alg_jisx0201.h" />
     <ClInclude Include="..\Modules\cjkcodecs\cjkcodecs.h" />
@@ -475,7 +475,7 @@
     <ClCompile Include="..\Modules\timemodule.c" />
     <ClCompile Include="..\Modules\xxsubtype.c" />
     <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c" />
-    <ClCompile Include="..\Modules\_fnmatch\matcher.c" />
+    <ClCompile Include="..\Modules\_fnmatch\filter.c" />
     <ClCompile Include="..\Modules\_fnmatch\translate.c" />
     <ClCompile Include="..\Modules\_interpretersmodule.c" />
     <ClCompile Include="..\Modules\_interpchannelsmodule.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 912407b56ed783..d68d8df7aa3ba7 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -252,7 +252,10 @@
     <ClInclude Include="..\Modules\rotatingtree.h">
       <Filter>Modules</Filter>
     </ClInclude>
-    <ClInclude Include="..\Modules\_fnmatch\_fnmatchmodule.h">
+    <ClInclude Include="..\Modules\_fnmatch\macros.h">
+      <Filter>Modules\_fnmatch</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Modules\_fnmatch\util.h">
       <Filter>Modules\_fnmatch</Filter>
     </ClInclude>
     <ClInclude Include="..\Modules\_io\_iomodule.h">
@@ -1064,7 +1067,7 @@
     <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c">
       <Filter>Modules\_fnmatch</Filter>
     </ClCompile>
-    <ClCompile Include="..\Modules\_fnmatch\matcher.c">
+    <ClCompile Include="..\Modules\_fnmatch\filter.c">
       <Filter>Modules\_fnmatch</Filter>
     </ClCompile>
     <ClCompile Include="..\Modules\_fnmatch\translate.c">

From 481fae0ecc42a13cc53b95e0106103dba6477cb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 20 Jul 2024 12:07:20 +0200
Subject: [PATCH 70/97] merge commit

---
 Tools/cases_generator/tier1_generator.py | 2 +-
 Tools/cases_generator/tier2_generator.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py
index 5dec66e8e0af15..a5a771cbf25392 100644
--- a/Tools/cases_generator/tier1_generator.py
+++ b/Tools/cases_generator/tier1_generator.py
@@ -51,7 +51,7 @@ def declare_variables(inst: Instruction, out: CWriter) -> None:
             for var in reversed(part.stack.inputs):
                 stack.pop(var)
             for var in part.stack.outputs:
-                 stack.push(var)
+                stack.push(var)
         except StackError as ex:
             raise analysis_error(ex.args[0], part.body[0]) from None
     required = set(stack.defined)
diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py
index 88ad0fd797f0cc..6e4b92f4c28c6b 100644
--- a/Tools/cases_generator/tier2_generator.py
+++ b/Tools/cases_generator/tier2_generator.py
@@ -53,7 +53,7 @@ def declare_variables(uop: Uop, out: CWriter) -> None:
     for var in reversed(uop.stack.inputs):
         stack.pop(var)
     for var in uop.stack.outputs:
-            stack.push(var)
+        stack.push(var)
     required = set(stack.defined)
     for var in reversed(uop.stack.inputs):
         declare_variable(var, uop, required, out)

From ff60eeb068c1611b38f7cef52444c9c97d85e292 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 20 Jul 2024 12:08:12 +0200
Subject: [PATCH 71/97] Revert "merge commit"

This reverts commit 481fae0ecc42a13cc53b95e0106103dba6477cb8.
---
 Tools/cases_generator/tier1_generator.py | 2 +-
 Tools/cases_generator/tier2_generator.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py
index a5a771cbf25392..5dec66e8e0af15 100644
--- a/Tools/cases_generator/tier1_generator.py
+++ b/Tools/cases_generator/tier1_generator.py
@@ -51,7 +51,7 @@ def declare_variables(inst: Instruction, out: CWriter) -> None:
             for var in reversed(part.stack.inputs):
                 stack.pop(var)
             for var in part.stack.outputs:
-                stack.push(var)
+                 stack.push(var)
         except StackError as ex:
             raise analysis_error(ex.args[0], part.body[0]) from None
     required = set(stack.defined)
diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py
index 6e4b92f4c28c6b..88ad0fd797f0cc 100644
--- a/Tools/cases_generator/tier2_generator.py
+++ b/Tools/cases_generator/tier2_generator.py
@@ -53,7 +53,7 @@ def declare_variables(uop: Uop, out: CWriter) -> None:
     for var in reversed(uop.stack.inputs):
         stack.pop(var)
     for var in uop.stack.outputs:
-        stack.push(var)
+            stack.push(var)
     required = set(stack.defined)
     for var in reversed(uop.stack.inputs):
         declare_variable(var, uop, required, out)

From 372758283504bb4db652ccf3675664a807633c49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 20 Jul 2024 12:29:53 +0200
Subject: [PATCH 72/97] update dependencies

---
 PCbuild/pythoncore.vcxproj | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 0d83ac770348b1..cef1d0691be9f3 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -368,6 +368,7 @@
     <ClInclude Include="..\Modules\_math.h" />
     <ClInclude Include="..\Modules\hashtable.h" />
     <ClInclude Include="..\Modules\rotatingtree.h" />
+    <ClInclude Include="..\Modules\_fnmatch\macros.h" />
     <ClInclude Include="..\Modules\_fnmatch\util.h" />
     <ClInclude Include="..\Modules\_io\_iomodule.h" />
     <ClInclude Include="..\Modules\cjkcodecs\alg_jisx0201.h" />

From c89cf47ab8b71e3219f0303f5029348c33dc6f58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 14:41:01 +0200
Subject: [PATCH 73/97] Fix missing exception handler

---
 Lib/test/test_fnmatch.py  | 10 ++++++++++
 Modules/_fnmatch/filter.c |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 9e00054d6ab14c..11b734f266012d 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -377,6 +377,16 @@ def test_filter(self):
         self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'),
                          [b'Python', b'Perl'])
 
+    def test_filter_iter_errors(self):
+        class BadList:
+            def __iter__(self):
+                yield 'abc'
+                raise ValueError("nope")
+
+        with self.assertRaisesRegex(ValueError, r'^nope$'):
+            self.fnmatch.filter(BadList(), '*')
+
+
     def test_mix_bytes_str(self):
         filter = self.fnmatch.filter
         self.assertRaises(TypeError, filter, ['test'], b'*')
diff --git a/Modules/_fnmatch/filter.c b/Modules/_fnmatch/filter.c
index 5b44f6accfc8df..d3611b7f5f883e 100644
--- a/Modules/_fnmatch/filter.c
+++ b/Modules/_fnmatch/filter.c
@@ -41,6 +41,9 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer)
         Py_DECREF(name);
     }
     Py_DECREF(iter);
+    if (PyErr_Occurred()) {
+        Py_CLEAR(res);
+    }
     return res;
 abort:
     Py_DECREF(name);

From 4fbd06b90da09acf01f5b391240e8c26bc98d156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 14:41:18 +0200
Subject: [PATCH 74/97] cosmetic changes

---
 Modules/_fnmatch/_fnmatchmodule.c |  1 +
 Modules/_fnmatch/macros.h         | 11 ++++++-----
 Modules/_fnmatch/translate.c      |  6 +++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 1f03f050f4c831..a771b406fccc0e 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -179,6 +179,7 @@ fnmatchmodule_exec(PyObject *module)
     INTERN_STRING(st, setops_repl_str, "\\\\\\1");
     return 0;
 }
+
 #undef INTERN_STRING
 #undef IMPORT_MODULE
 
diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
index 04bf4a684a3035..6c0a9ea72b45c0 100644
--- a/Modules/_fnmatch/macros.h
+++ b/Modules/_fnmatch/macros.h
@@ -12,19 +12,20 @@
 // since they directly delegate to the _PyUnicodeWriter_Write* underlying
 // function. In particular, the caller is responsible for type safety.
 
-/* Write a character CHAR. */
+/* Cast WRITER and call _PyUnicodeWriter_WriteChar(). */
 #define _WRITE_CHAR(WRITER, CHAR) \
     _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR))
 
-/* Write an ASCII string STRING of given length LENGTH. */
+/* Cast WRITER and call _PyUnicodeWriter_WriteASCIIString(). */
 #define _WRITE_ASCII(WRITER, STRING, LENGTH)                        \
     _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(WRITER), \
                                       (STRING), (LENGTH))
-/* Write the string STRING. */
+
+/* Cast WRITER and call _PyUnicodeWriter_WriteStr(). */
 #define _WRITE_STRING(WRITER, STRING) \
     _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
 
-/* Write the substring STRING[START:STOP]. */
+/* Cast WRITER and call _PyUnicodeWriter_WriteSubstring(). */
 #define _WRITE_BLOCK(WRITER, STRING, START, STOP)                   \
     _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER),   \
                                     (STRING), (START), (STOP))
@@ -59,7 +60,7 @@
     } while (0)
 
 /*
- * Write the substring STRING[START:STOP] if START < STOP,
+ * Write the substring STRING[START:STOP] (no-op if the substring is empty)
  * or execute the ON_ERROR statements if it fails.
  */
 #define WRITE_BLOCK_OR(WRITER, STRING, START, STOP, ON_ERROR)           \
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 10552434a7d616..eec2367934e56c 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -477,7 +477,7 @@ translate_expression(fnmatchmodule_state *state,
     PyObject *chunks = split_expression(state, pattern, start, stop,
                                         pattern_str_find_meth);
     if (chunks == NULL) {
-        goto abort;
+        return NULL;
     }
     // remove empty ranges
     if (simplify_expression(chunks) < 0) {
@@ -491,7 +491,7 @@ translate_expression(fnmatchmodule_state *state,
     Py_DECREF(chunks);
     return res;
 abort:
-    Py_XDECREF(chunks);
+    Py_DECREF(chunks);
     return NULL;
 }
 
@@ -570,7 +570,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     // exactly n + 6(m-1) + 1 + 7 = n + 6m + 2 characters, where the "+1"
     // is due to the fact that writing ".*" instead of "*" only increases
     // the total length of the pattern by 1 (and not by 2).
-    const Py_ssize_t reslen = m == 0 ? n + 7 : n + 6 * m + 2;
+    const Py_ssize_t reslen = m == 0 ? (n + 7) : (n + 6 * m + 2);
     PyUnicodeWriter *writer = PyUnicodeWriter_Create(reslen);
     if (writer == NULL) {
         return NULL;

From 3b348f528df9d6c305808989e8564899d340f115 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 12:14:40 +0200
Subject: [PATCH 75/97] update comments

---
 Modules/_fnmatch/util.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
index 371930f5c17262..4e5228659af4b4 100644
--- a/Modules/_fnmatch/util.h
+++ b/Modules/_fnmatch/util.h
@@ -7,6 +7,9 @@
 
 #include "Python.h"
 
+typedef struct {
+} translatemodule_state;
+
 typedef struct {
     PyObject *os_module;            // import os
     PyObject *posixpath_module;     // import posixpath
@@ -55,10 +58,14 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer);
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
  *
- * The 'pattern' must be a Unicode object (not a bytes) object,
- * and the translated pattern will be a Unicode object as well.
+ * Parameters
+ *
+ *  module          A module with a state given by get_fnmatchmodule_state().
+ *  pattern         A Unicode object to translate.
+ *
+ * Returns
  *
- * Note: this is the C implementation of fnmatch.translate().
+ *  A translated unicode RE pattern.
  */
 extern PyObject *
 _Py_fnmatch_translate(PyObject *module, PyObject *pattern);

From da42703ac99b4f8fb47f165b7c7b124c9536f1a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 12:14:45 +0200
Subject: [PATCH 76/97] remove some macros

---
 Modules/_fnmatch/translate.c | 79 +++++++++++++-----------------------
 1 file changed, 29 insertions(+), 50 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index eec2367934e56c..eb69aba35964cc 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -115,9 +115,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     const int pattern_kind = PyUnicode_KIND(pattern);
     const void *const pattern_data = PyUnicode_DATA(pattern);
     // ---- def local macros --------------------------------------------------
-#define READ_CHAR(IND)          PyUnicode_READ(pattern_kind, pattern_data, IND)
-#define WRITE_CHAR(CHAR)        WRITE_CHAR_OR_ABORT(writer, CHAR)
-#define WRITE_ASCII(STR, LEN)   WRITE_ASCII_OR_ABORT(writer, STR, LEN)
+#define READ_CHAR(IND)  PyUnicode_READ(pattern_kind, pattern_data, IND)
     /* advance IND if the character is CHAR */
 #define ADVANCE_IF_CHAR_IS(CHAR, IND, MAXIND)               \
     do {                                                    \
@@ -132,7 +130,8 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         Py_UCS4 chr = READ_CHAR(i++);
         switch (chr) {
             case '*': {
-                WRITE_CHAR('*');
+                // translate wildcard '*' (fnmatch) into optional '.' (regex)
+                WRITE_CHAR_OR_ABORT(writer, '*');
                 // skip duplicated '*'
                 for (; i < maxind && READ_CHAR(i) == '*'; ++i);
                 // store the position of the wildcard
@@ -149,7 +148,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             }
             case '?': {
                 // translate optional '?' (fnmatch) into optional '.' (regex)
-                WRITE_CHAR('.');
+                WRITE_CHAR_OR_ABORT(writer, '.');
                 ++written; // increase the expected result's length
                 break;
             }
@@ -161,7 +160,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 ADVANCE_IF_CHAR_IS(']', j, maxind);             // [!] or []
                 for (; j < maxind && READ_CHAR(j) != ']'; ++j); // locate ']'
                 if (j >= maxind) {
-                    WRITE_ASCII("\\[", 2);
+                    WRITE_ASCII_OR_ABORT(writer, "\\[", 2);
                     written += 2;   // we just wrote 2 characters
                     break;          // explicit early break for clarity
                 }
@@ -226,8 +225,6 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         }
     }
 #undef ADVANCE_IF_CHAR_IS
-#undef WRITE_ASCII
-#undef WRITE_CHAR
 #undef READ_CHAR
     Py_DECREF(pattern_str_find_meth);
     Py_DECREF(re_sub_func);
@@ -498,52 +495,42 @@ translate_expression(fnmatchmodule_state *state,
 static Py_ssize_t
 write_expression(PyUnicodeWriter *writer, PyObject *expression)
 {
-    // ---- def local macros --------------------------------------------------
-#define WRITE_CHAR(CHAR)        WRITE_CHAR_OR_ABORT(writer, CHAR)
-#define WRITE_ASCII(STR, LEN)   WRITE_ASCII_OR_ABORT(writer, STR, LEN)
-#define WRITE_STRING(STR)       WRITE_STRING_OR_ABORT(writer, STR)
-#define WRITE_BLOCK(STR, I, J)  WRITE_BLOCK_OR_ABORT(writer, STR, I, J)
-    // ------------------------------------------------------------------------
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         // empty range: never match
-        WRITE_ASCII("(?!)", 4);
+        WRITE_ASCII_OR_ABORT(writer, "(?!)", 4);
         return 4;
     }
     Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
     if (grouplen == 1 && token == '!') {
         // negated empty range: match any character
-        WRITE_CHAR('.');
+        WRITE_CHAR_OR_ABORT(writer, '.');
         return 1;
     }
     Py_ssize_t extra = 2; // '[' and ']'
-    WRITE_CHAR('[');
+    WRITE_CHAR_OR_ABORT(writer, '[');
     switch (token) {
         case '!': {
-            WRITE_CHAR('^'); // replace '!' by '^'
-            WRITE_BLOCK(expression, 1, grouplen);
+            WRITE_CHAR_OR_ABORT(writer, '^'); // replace '!' by '^'
+            WRITE_BLOCK_OR_ABORT(writer, expression, 1, grouplen);
             break;
         }
         case '^':
         case '[': {
-            WRITE_CHAR('\\');
+            WRITE_CHAR_OR_ABORT(writer, '\\');
             ++extra; // because we wrote '\\'
-            WRITE_STRING(expression);
+            WRITE_STRING_OR_ABORT(writer, expression);
             break;
         }
         default: {
-            WRITE_STRING(expression);
+            WRITE_STRING_OR_ABORT(writer, expression);
             break;
         }
     }
-    WRITE_CHAR(']');
+    WRITE_CHAR_OR_ABORT(writer, ']');
     return grouplen + extra;
 abort:
     return -1;
-#undef WRITE_BLOCK
-#undef WRITE_STRING
-#undef WRITE_ASCII
-#undef WRITE_CHAR
 }
 
 static PyObject *
@@ -576,43 +563,39 @@ process_wildcards(PyObject *pattern, PyObject *indices)
         return NULL;
     }
     // ---- def local macros --------------------------------------------------
-#define WRITE_CHAR(CHAR)        WRITE_CHAR_OR_ABORT(writer, CHAR)
-#define WRITE_ASCII(STR, LEN)   WRITE_ASCII_OR_ABORT(writer, STR, LEN)
-#define WRITE_STRING(STR)       WRITE_STRING_OR_ABORT(writer, STR)
-#define WRITE_BLOCK(STR, I, J)  WRITE_BLOCK_OR_ABORT(writer, STR, I, J)
-#define LOAD_WILDCARD_INDEX(VAR, IND)                           \
-    do {                                                        \
-        VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, IND));  \
-        if (VAR < 0) {                                          \
-            goto abort;                                         \
-        }                                                       \
+#define LOAD_WILDCARD_INDEX(VAR, IND)                               \
+    do {                                                            \
+        VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, (IND)));    \
+        if (VAR < 0) {                                              \
+            goto abort;                                             \
+        }                                                           \
     } while (0)
     // ------------------------------------------------------------------------
-    WRITE_ASCII("(?s:", 4);
+    WRITE_ASCII_OR_ABORT(writer, "(?s:", 4);
     if (m == 0) {
-        WRITE_STRING(pattern);
+        WRITE_STRING_OR_ABORT(writer, pattern);
     }
     else {
         Py_ssize_t i = 0, j = -1;
         // process the optional PREFIX
         LOAD_WILDCARD_INDEX(j, 0);
-        WRITE_BLOCK(pattern, i, j);
+        WRITE_BLOCK_OR_ABORT(writer, pattern, 0, j);
         i = j + 1;
         for (Py_ssize_t k = 1; k < m; ++k) {
             // process the (* INNER) groups
             LOAD_WILDCARD_INDEX(j, k);
             assert(i < j);
             // write the atomic RE group '(?>.*?' + INNER + ')'
-            WRITE_ASCII("(?>.*?", 6);
-            WRITE_BLOCK(pattern, i, j);
-            WRITE_CHAR(')');
+            WRITE_ASCII_OR_ABORT(writer, "(?>.*?", 6);
+            WRITE_BLOCK_OR_ABORT(writer, pattern, i, j);
+            WRITE_CHAR_OR_ABORT(writer, ')');
             i = j + 1;
         }
         // handle the (*) [OUTER] part
-        WRITE_ASCII(".*", 2);
-        WRITE_BLOCK(pattern, i, n);
+        WRITE_ASCII_OR_ABORT(writer, ".*", 2);
+        WRITE_BLOCK_OR_ABORT(writer, pattern, i, n);
     }
-    WRITE_ASCII(")\\Z", 3);
+    WRITE_ASCII_OR_ABORT(writer, ")\\Z", 3);
     PyObject *res = PyUnicodeWriter_Finish(writer);
     assert(res == NULL || PyUnicode_GET_LENGTH(res) == reslen);
     return res;
@@ -620,8 +603,4 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     PyUnicodeWriter_Discard(writer);
     return NULL;
 #undef LOAD_WILDCARD_INDEX
-#undef WRITE_BLOCK
-#undef WRITE_STRING
-#undef WRITE_ASCII
-#undef WRITE_CHAR
 }

From 97ed24d2eeaf64192ec850e1bb3a75e7676f181d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 14:14:19 +0200
Subject: [PATCH 77/97] cleanup

---
 Lib/test/test_fnmatch.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 11b734f266012d..ea4f86743506db 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -294,11 +294,6 @@ def test_translate_wildcards(self):
             self.assertEqual(translated, expect, pattern)
 
     def test_translate_expressions(self):
-        '[', '[-abc]', '[[]b', '[[a]b', '[\\\\]', '[\\]', '[]-]', '[][!]',
-        '[]]b', '[]a[]b', '[^a-c]*', '[a-\\z]',
-        '[a-c]b*', '[a-y]*[^c]', '[abc-]', '\\*',
-        '[0-4-3-2]', '[b-ac-z9-1]', '[!b-ac-z9-1]', '[!]b-ac-z9-1]',
-        '[]b-ac-z9-1]', '[]b-ac-z9-1]*', '*[]b-ac-z9-1]',
         for pattern, expect in [
             ('[', r'(?s:\[)\Z'),
             ('[!', r'(?s:\[!)\Z'),

From d38a0cb664f469df5f3a17d7d293c10f1d7ce948 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 14:14:22 +0200
Subject: [PATCH 78/97] test empty range

---
 Lib/test/test_fnmatch.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index ea4f86743506db..6ab244021ea20d 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -269,6 +269,7 @@ def test_translate(self):
 
     def test_translate_wildcards(self):
         for pattern, expect in [
+            ('', r'(?s:)\Z'),
             ('ab*', r'(?s:ab.*)\Z'),
             ('ab*cd', r'(?s:ab.*cd)\Z'),
             ('ab*cd*', r'(?s:ab(?>.*?cd).*)\Z'),

From db88ff521f04a9bc8b4da8799456523104672666 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 14:55:27 +0200
Subject: [PATCH 79/97] update macros.h

- add convenience macros
- make some macros private
---
 Modules/_fnmatch/macros.h | 63 +++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 16 deletions(-)

diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
index 6c0a9ea72b45c0..14ef4f142e9b98 100644
--- a/Modules/_fnmatch/macros.h
+++ b/Modules/_fnmatch/macros.h
@@ -8,6 +8,34 @@
 
 // ==== Macro definitions =====================================================
 
+/*
+ * Check that INTVAL is >= 0 or execute 'goto abort'.
+ *
+ * This macro is provided for convenience and should be
+ * carefully used if more resources should be released
+ * before jumping to the 'abort' label.
+ */
+#define CHECK_INTVAL_OR_ABORT(INTVAL)   \
+    do {                                \
+        if ((INTVAL) < 0) {             \
+            goto abort;                 \
+        }                               \
+    } while (0)
+
+/*
+ * Check that OBJ is not NULL or execute 'goto abort'.
+ *
+ * This macro is provided for convenience and should be
+ * carefully used if more resources should be released
+ * before jumping to the 'abort' label.
+ */
+#define CHECK_NON_NULL_OR_ABORT(OBJ)  \
+    do {                        \
+        if ((OBJ) == NULL) {    \
+            goto abort;         \
+        }                       \
+    } while (0)
+
 // The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
 // since they directly delegate to the _PyUnicodeWriter_Write* underlying
 // function. In particular, the caller is responsible for type safety.
@@ -33,7 +61,7 @@
 // ----------------------------------------------------------------------------
 
 /* Write a character CHAR or execute the ON_ERROR statements if it fails. */
-#define WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR)       \
+#define _WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR)      \
     do {                                            \
         if (_WRITE_CHAR((WRITER), (CHAR)) < 0) {    \
             ON_ERROR;                               \
@@ -44,7 +72,7 @@
  * Write an ASCII string STRING of given length LENGTH,
  * or execute the ON_ERROR statements if it fails.
  */
-#define WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR)         \
+#define _WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR)        \
     do {                                                        \
         if (_WRITE_ASCII((WRITER), (ASCII), (LENGTH)) < 0) {    \
             ON_ERROR;                                           \
@@ -52,7 +80,7 @@
     } while (0)
 
 /* Write the string STRING or execute the ON_ERROR statements if it fails. */
-#define WRITE_STRING_OR(WRITER, STRING, ON_ERROR)       \
+#define _WRITE_STRING_OR(WRITER, STRING, ON_ERROR)      \
     do {                                                \
         if (_WRITE_STRING((WRITER), (STRING)) < 0) {    \
             ON_ERROR;                                   \
@@ -63,7 +91,7 @@
  * Write the substring STRING[START:STOP] (no-op if the substring is empty)
  * or execute the ON_ERROR statements if it fails.
  */
-#define WRITE_BLOCK_OR(WRITER, STRING, START, STOP, ON_ERROR)           \
+#define _WRITE_SUBSTRING_OR(WRITER, STRING, START, STOP, ON_ERROR)      \
     do {                                                                \
         /* intermediate variables to allow in-place operations */       \
         Py_ssize_t _i = (START), _j = (STOP);                           \
@@ -77,13 +105,13 @@
 // Macros which execute "goto abort" if an error occurs.
 
 #define WRITE_CHAR_OR_ABORT(WRITER, CHAR) \
-    WRITE_CHAR_OR((WRITER), (CHAR), goto abort)
+    _WRITE_CHAR_OR((WRITER), (CHAR), goto abort)
 #define WRITE_ASCII_OR_ABORT(WRITER, STRING, LENGTH) \
-    WRITE_ASCII_OR((WRITER), (STRING), (LENGTH), goto abort)
+    _WRITE_ASCII_OR((WRITER), (STRING), (LENGTH), goto abort)
 #define WRITE_STRING_OR_ABORT(WRITER, STRING) \
-    WRITE_STRING_OR((WRITER), (STRING), goto abort)
+    _WRITE_STRING_OR((WRITER), (STRING), goto abort)
 #define WRITE_BLOCK_OR_ABORT(WRITER, STRING, START, STOP) \
-    WRITE_BLOCK_OR((WRITER), (STRING), (START), (STOP), goto abort)
+    _WRITE_SUBSTRING_OR((WRITER), (STRING), (START), (STOP), goto abort)
 
 // ----------------------------------------------------------------------------
 
@@ -107,14 +135,17 @@
         NULL                                \
     )
 
-/* Escape set operations in STRING using re.sub(). */
-#define SETOPS_REPLACE(STATE, STRING, RE_SUB_FUNC)  \
-    PyObject_CallFunctionObjArgs(                   \
-        (RE_SUB_FUNC),                              \
-        (STATE)->setops_str,                        \
-        (STATE)->setops_repl_str,                   \
-        (STRING),                                   \
-        NULL                                        \
+/*
+ * Escape set operations in STRING using re.sub().
+ *
+ * SETOPS_RE_SUB_METH is a reference to re.compile('([&~|])').sub().
+ */
+#define SETOPS_REPLACE(STATE, STRING, SETOPS_RE_SUB_METH)   \
+    PyObject_CallFunctionObjArgs(                           \
+        (SETOPS_RE_SUB_METH),                               \
+        (STATE)->setops_repl_str,                           \
+        (STRING),                                           \
+        NULL                                                \
     )
 
 #endif // _FNMATCH_MACROS_H

From 2caa5b89fbe0dac0233df0eae3f3e10c7d4ea175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 14:56:46 +0200
Subject: [PATCH 80/97] update algorithm

Previous improvement: 2.05x.
This improvement: 2.8x.
---
 Modules/_fnmatch/translate.c | 188 +++++++++++++++++++++--------------
 1 file changed, 114 insertions(+), 74 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index eb69aba35964cc..798a2e89dfd64b 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -15,12 +15,16 @@
 // ==== Helper declarations ===================================================
 
 /*
- * Creates a new Unicode object from a Py_UCS4 character.
+ * Write re.escape(pattern[start:stop]).
  *
- * Note: this is 'unicode_char' taken from Objects/unicodeobject.c.
+ * This returns the number of written characters, or -1 if an error occurred.
+ *
+ * @pre     0 <= start < stop <= len(pattern)
  */
-static PyObject *
-get_unicode_character(Py_UCS4 ch);
+static inline Py_ssize_t
+escape_block(PyUnicodeWriter *writer,
+             PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+             PyObject *re_escape_func);
 
 /*
  * Construct a regular expression out of a UNIX-style expression.
@@ -51,7 +55,9 @@ translate_expression(fnmatchmodule_state *state,
  * This returns the number of written characters, or -1 if an error occurred.
  */
 static Py_ssize_t
-write_expression(PyUnicodeWriter *writer, PyObject *expression);
+write_expression(fnmatchmodule_state *state,
+                 PyUnicodeWriter *writer, PyObject *expression,
+                 PyObject *setops_re_sub_meth);
 
 /*
  * Build the final regular expression by processing the wildcards.
@@ -63,6 +69,17 @@ process_wildcards(PyObject *pattern, PyObject *indices);
 
 // ==== API implementation ====================================================
 
+static inline PyObject *
+get_setops_re_sub_method(fnmatchmodule_state *state)
+{
+    PyObject *compiled = PyObject_CallMethodOneArg(state->re_module,
+                                                   &_Py_ID(compile),
+                                                   state->setops_str);
+    PyObject *method = PyObject_GetAttr(compiled, &_Py_ID(sub));
+    Py_DECREF(compiled);
+    return method;
+}
+
 PyObject *
 _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 {
@@ -90,28 +107,34 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
         return NULL;
     }
 
+    // ---- decl local objects ------------------------------------------------
     // list containing the indices where '*' has a special meaning
     PyObject *wildcard_indices = NULL;
     // cached functions (cache is local to the call)
-    PyObject *re_escape_func = NULL, *re_sub_func = NULL;
-    PyObject *pattern_str_find_meth = NULL; // bound method of pattern.find()
-
+    PyObject *re_escape_func = NULL;        // re.escape()
+    PyObject *setops_re_subfn = NULL;       // re.compile('([&~|])').sub()
+    PyObject *pattern_str_find_meth = NULL; // pattern.find()
+    // ---- def local objects -------------------------------------------------
     wildcard_indices = PyList_New(0);
     if (wildcard_indices == NULL) {
         goto abort;
     }
-#define CACHE_ATTRIBUTE(DEST, OBJECT, NAME)         \
-    do {                                            \
-        DEST = PyObject_GetAttr((OBJECT), (NAME));  \
-        if ((DEST) == NULL) {                       \
-            goto abort;                             \
-        }                                           \
-    } while (0);
-    CACHE_ATTRIBUTE(re_escape_func, state->re_module, &_Py_ID(escape));
-    CACHE_ATTRIBUTE(re_sub_func, state->re_module, &_Py_ID(sub));
-    CACHE_ATTRIBUTE(pattern_str_find_meth, pattern, &_Py_ID(find));
-#undef CACHE_ATTRIBUTE
-
+    // The Python implementation always takes queries re.escape() and re.sub()
+    // inside translate() and thus we should at least allow external users to
+    // mock those functions (thus, we cannot cache them in the module's state).
+    re_escape_func = PyObject_GetAttr(state->re_module, &_Py_ID(escape));
+    if (re_escape_func == NULL) {
+        goto abort;
+    }
+    setops_re_subfn = get_setops_re_sub_method(state);
+    if (setops_re_subfn == NULL) {
+        goto abort;
+    }
+    pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find));
+    if (pattern_str_find_meth == NULL) {
+        goto abort;
+    }
+    // ------------------------------------------------------------------------
     const int pattern_kind = PyUnicode_KIND(pattern);
     const void *const pattern_data = PyUnicode_DATA(pattern);
     // ---- def local macros --------------------------------------------------
@@ -123,13 +146,28 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             ++IND;                                          \
         }                                                   \
     } while (0)
+#define WRITE_PENDING(ESCSTOP)                                  \
+    do {                                                        \
+        if (escstart != -1) {                                   \
+            Py_ssize_t t = escape_block(writer, pattern,        \
+                                        escstart, (ESCSTOP),    \
+                                        re_escape_func);        \
+            if (t < 0) {                                        \
+                goto abort;                                     \
+            }                                                   \
+            written += t;                                       \
+            escstart = -1;                                      \
+        }                                                       \
+    } while (0)
     // ------------------------------------------------------------------------
-    Py_ssize_t i = 0;       // current index
-    Py_ssize_t written = 0; // number of characters written
-    while (i < maxind) {
+    Py_ssize_t i = 0;                       // current index
+    Py_ssize_t written = 0;                 // number of characters written
+    Py_ssize_t escstart = -1, escstop = -1; // start/stop escaping indices
+    while ((escstop = i) < maxind) {
         Py_UCS4 chr = READ_CHAR(i++);
         switch (chr) {
             case '*': {
+                WRITE_PENDING(escstop);
                 // translate wildcard '*' (fnmatch) into optional '.' (regex)
                 WRITE_CHAR_OR_ABORT(writer, '*');
                 // skip duplicated '*'
@@ -147,12 +185,14 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 break;
             }
             case '?': {
+                WRITE_PENDING(escstop);
                 // translate optional '?' (fnmatch) into optional '.' (regex)
                 WRITE_CHAR_OR_ABORT(writer, '.');
                 ++written; // increase the expected result's length
                 break;
             }
             case '[': {
+                WRITE_PENDING(escstop);
                 assert(i > 0);
                 assert(READ_CHAR(i - 1) == '[');
                 Py_ssize_t j = i;
@@ -170,28 +210,24 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     if (pos == -2) {
                         goto abort;
                     }
-                    PyObject *pre_expr = NULL, *expr = NULL;
+                    PyObject *expr = NULL;
                     if (pos == -1) {
                         PyObject *tmp = PyUnicode_Substring(pattern, i, j);
                         if (tmp == NULL) {
                             goto abort;
                         }
-                        pre_expr = BACKSLASH_REPLACE(state, tmp);
+                        expr = BACKSLASH_REPLACE(state, tmp);
                         Py_DECREF(tmp);
                     }
                     else {
-                        pre_expr = translate_expression(state, pattern, i, j,
-                                                        pattern_str_find_meth);
-                    }
-                    if (pre_expr == NULL) {
-                        goto abort;
+                        expr = translate_expression(state, pattern, i, j,
+                                                    pattern_str_find_meth);
                     }
-                    expr = SETOPS_REPLACE(state, pre_expr, re_sub_func);
-                    Py_DECREF(pre_expr);
                     if (expr == NULL) {
                         goto abort;
                     }
-                    Py_ssize_t expr_len = write_expression(writer, expr);
+                    Py_ssize_t expr_len = write_expression(state, writer, expr,
+                                                           setops_re_subfn);
                     Py_DECREF(expr);
                     if (expr_len < 0) {
                         goto abort;
@@ -202,32 +238,20 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 }
             }
             default: {
-                PyObject *str = get_unicode_character(chr);
-                if (str == NULL) {
-                    goto abort;
-                }
-                PyObject *escaped = PyObject_CallOneArg(re_escape_func, str);
-                Py_DECREF(str);
-                if (escaped == NULL) {
-                    goto abort;
+                if (escstart == -1) {
+                    assert(i >= 1);
+                    escstart = i - 1;
                 }
-                Py_ssize_t escaped_len = PyUnicode_GET_LENGTH(escaped);
-                // Do NOT use WRITE_STRING_OR_ABORT() since 'escaped'
-                // must be first decref'ed in case of an error.
-                int rc = _WRITE_STRING(writer, escaped);
-                Py_DECREF(escaped);
-                if (rc < 0) {
-                    goto abort;
-                }
-                written += escaped_len;
                 break;
             }
         }
     }
+    WRITE_PENDING(maxind);
+#undef WRITE_PENDING
 #undef ADVANCE_IF_CHAR_IS
 #undef READ_CHAR
     Py_DECREF(pattern_str_find_meth);
-    Py_DECREF(re_sub_func);
+    Py_DECREF(setops_re_subfn);
     Py_DECREF(re_escape_func);
     PyObject *translated = PyUnicodeWriter_Finish(writer);
     if (translated == NULL) {
@@ -240,7 +264,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     return res;
 abort:
     Py_XDECREF(pattern_str_find_meth);
-    Py_XDECREF(re_sub_func);
+    Py_XDECREF(setops_re_subfn);
     Py_XDECREF(re_escape_func);
     Py_XDECREF(wildcard_indices);
     PyUnicodeWriter_Discard(writer);
@@ -249,29 +273,35 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 
 // ==== Helper implementations ================================================
 
-static PyObject *
-get_unicode_character(Py_UCS4 ch)
+static inline Py_ssize_t
+escape_block(PyUnicodeWriter *writer,
+             PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+             PyObject *re_escape_func)
 {
-    assert(ch <= 0x10ffff);
-    if (ch < 256) {
-        PyObject *o = _Py_LATIN1_CHR(ch);
-        assert(_Py_IsImmortal(o));
-        return o;
+#ifdef Py_DEBUG
+    if (start < 0 || start >= stop || stop > PyUnicode_GET_LENGTH(pattern)) {
+        PyErr_BadInternalCall();
+        return -1;
     }
-    PyObject *unicode = PyUnicode_New(1, ch);
-    if (unicode == NULL) {
-        return NULL;
+#endif
+    PyObject *str = PyUnicode_Substring(pattern, start, stop);
+    if (str == NULL) {
+        goto abort;
     }
-    assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND);
-    if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
-        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+    PyObject *escaped = PyObject_CallOneArg(re_escape_func, str);
+    Py_DECREF(str);
+    if (escaped == NULL) {
+        goto abort;
     }
-    else {
-        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
-        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
+    int rc = _WRITE_STRING(writer, escaped);
+    Py_DECREF(escaped);
+    if (rc < 0) {
+        goto abort;
     }
-    assert(_PyUnicode_CheckConsistency(unicode, 1));
-    return unicode;
+    return written;
+abort:
+    return -1;
 }
 
 /*
@@ -493,8 +523,11 @@ translate_expression(fnmatchmodule_state *state,
 }
 
 static Py_ssize_t
-write_expression(PyUnicodeWriter *writer, PyObject *expression)
+write_expression(fnmatchmodule_state *state,
+                 PyUnicodeWriter *writer, PyObject *expression,
+                 PyObject *setops_re_sub_meth)
 {
+    PyObject *safe_expression = NULL;  // for the 'goto abort' statements
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
     if (grouplen == 0) {
         // empty range: never match
@@ -509,27 +542,34 @@ write_expression(PyUnicodeWriter *writer, PyObject *expression)
     }
     Py_ssize_t extra = 2; // '[' and ']'
     WRITE_CHAR_OR_ABORT(writer, '[');
+    // escape set operations as late as possible
+    safe_expression = SETOPS_REPLACE(state, expression, setops_re_sub_meth);
+    if (safe_expression == NULL) {
+        goto abort;
+    }
     switch (token) {
         case '!': {
             WRITE_CHAR_OR_ABORT(writer, '^'); // replace '!' by '^'
-            WRITE_BLOCK_OR_ABORT(writer, expression, 1, grouplen);
+            WRITE_BLOCK_OR_ABORT(writer, safe_expression, 1, grouplen);
             break;
         }
         case '^':
         case '[': {
             WRITE_CHAR_OR_ABORT(writer, '\\');
             ++extra; // because we wrote '\\'
-            WRITE_STRING_OR_ABORT(writer, expression);
+            WRITE_STRING_OR_ABORT(writer, safe_expression);
             break;
         }
         default: {
-            WRITE_STRING_OR_ABORT(writer, expression);
+            WRITE_STRING_OR_ABORT(writer, safe_expression);
             break;
         }
     }
+    Py_DECREF(safe_expression);
     WRITE_CHAR_OR_ABORT(writer, ']');
     return grouplen + extra;
 abort:
+    Py_XDECREF(safe_expression);
     return -1;
 }
 

From ef2de2a737059a7afe3ff30e67eb8b967ffe30cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 15:09:49 +0200
Subject: [PATCH 81/97] PEP 7

---
 Modules/_fnmatch/_fnmatchmodule.c | 34 ++++++++++++++++---------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index a771b406fccc0e..a050616217c71e 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -141,24 +141,26 @@ get_platform_normcase_function(PyObject *module, bool *isposix)
 
 // ==== Module state functions ================================================
 
-#define IMPORT_MODULE(state, attribute, name) \
-    do { \
-        /* make sure that the attribute is initialized once */ \
-        assert(state->attribute == NULL); \
-        state->attribute = PyImport_ImportModule((name)); \
-        if (state->attribute == NULL) { \
-            return -1; \
-        } \
+/* Import a named module and store it in 'STATE->ATTRIBUTE'. */
+#define IMPORT_MODULE(STATE, ATTRIBUTE, MODULE_NAME)                \
+    do {                                                            \
+        /* make sure that the attribute is initialized once */      \
+        assert(STATE->ATTRIBUTE == NULL);                           \
+        STATE->ATTRIBUTE = PyImport_ImportModule((MODULE_NAME));    \
+        if (STATE->ATTRIBUTE == NULL) {                             \
+            return -1;                                              \
+        }                                                           \
     } while (0)
 
-#define INTERN_STRING(state, attribute, literal) \
-    do { \
-        /* make sure that the attribute is initialized once */ \
-        assert(state->attribute == NULL); \
-        state->attribute = PyUnicode_InternFromString((literal)); \
-        if (state->attribute == NULL) { \
-            return -1; \
-        } \
+/* Intern a literal STRING and store it in 'STATE->ATTRIBUTE'. */
+#define INTERN_STRING(STATE, ATTRIBUTE, STRING)                     \
+    do {                                                            \
+        /* make sure that the attribute is initialized once */      \
+        assert(STATE->ATTRIBUTE == NULL);                           \
+        STATE->ATTRIBUTE = PyUnicode_InternFromString((STRING));    \
+        if (STATE->ATTRIBUTE == NULL) {                             \
+            return -1;                                              \
+        }                                                           \
     } while (0)
 
 static int

From c00c8f91c0089cda964a27de1557db8a327fb344 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 15:16:46 +0200
Subject: [PATCH 82/97] remove un-necessary code

---
 Modules/_fnmatch/util.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
index 4e5228659af4b4..379fed066de3d8 100644
--- a/Modules/_fnmatch/util.h
+++ b/Modules/_fnmatch/util.h
@@ -7,9 +7,6 @@
 
 #include "Python.h"
 
-typedef struct {
-} translatemodule_state;
-
 typedef struct {
     PyObject *os_module;            // import os
     PyObject *posixpath_module;     // import posixpath

From e93cd878031c11bedd44dd057257f64353fe66df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 15:17:33 +0200
Subject: [PATCH 83/97] use convenience macros for aborting flow

---
 Modules/_fnmatch/translate.c | 111 ++++++++++-------------------------
 1 file changed, 30 insertions(+), 81 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 798a2e89dfd64b..175b04eef5f1c6 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -116,24 +116,16 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     PyObject *pattern_str_find_meth = NULL; // pattern.find()
     // ---- def local objects -------------------------------------------------
     wildcard_indices = PyList_New(0);
-    if (wildcard_indices == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(wildcard_indices);
     // The Python implementation always takes queries re.escape() and re.sub()
     // inside translate() and thus we should at least allow external users to
     // mock those functions (thus, we cannot cache them in the module's state).
     re_escape_func = PyObject_GetAttr(state->re_module, &_Py_ID(escape));
-    if (re_escape_func == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(re_escape_func);
     setops_re_subfn = get_setops_re_sub_method(state);
-    if (setops_re_subfn == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(setops_re_subfn);
     pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find));
-    if (pattern_str_find_meth == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(pattern_str_find_meth);
     // ------------------------------------------------------------------------
     const int pattern_kind = PyUnicode_KIND(pattern);
     const void *const pattern_data = PyUnicode_DATA(pattern);
@@ -152,9 +144,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             Py_ssize_t t = escape_block(writer, pattern,        \
                                         escstart, (ESCSTOP),    \
                                         re_escape_func);        \
-            if (t < 0) {                                        \
-                goto abort;                                     \
-            }                                                   \
+            CHECK_INTVAL_OR_ABORT(t);                           \
             written += t;                                       \
             escstart = -1;                                      \
         }                                                       \
@@ -174,14 +164,10 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 for (; i < maxind && READ_CHAR(i) == '*'; ++i);
                 // store the position of the wildcard
                 PyObject *wildcard_index = PyLong_FromSsize_t(written++);
-                if (wildcard_index == NULL) {
-                    goto abort;
-                }
+                CHECK_NON_NULL_OR_ABORT(wildcard_index);
                 int rc = PyList_Append(wildcard_indices, wildcard_index);
                 Py_DECREF(wildcard_index);
-                if (rc < 0) {
-                    goto abort;
-                }
+                CHECK_INTVAL_OR_ABORT(rc);
                 break;
             }
             case '?': {
@@ -193,8 +179,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             }
             case '[': {
                 WRITE_PENDING(escstop);
-                assert(i > 0);
-                assert(READ_CHAR(i - 1) == '[');
+                assert(READ_CHAR(escstop) == '[');
                 Py_ssize_t j = i;
                 ADVANCE_IF_CHAR_IS('!', j, maxind);             // [!
                 ADVANCE_IF_CHAR_IS(']', j, maxind);             // [!] or []
@@ -213,9 +198,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     PyObject *expr = NULL;
                     if (pos == -1) {
                         PyObject *tmp = PyUnicode_Substring(pattern, i, j);
-                        if (tmp == NULL) {
-                            goto abort;
-                        }
+                        CHECK_NON_NULL_OR_ABORT(tmp);
                         expr = BACKSLASH_REPLACE(state, tmp);
                         Py_DECREF(tmp);
                     }
@@ -223,15 +206,11 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                         expr = translate_expression(state, pattern, i, j,
                                                     pattern_str_find_meth);
                     }
-                    if (expr == NULL) {
-                        goto abort;
-                    }
+                    CHECK_NON_NULL_OR_ABORT(expr);
                     Py_ssize_t expr_len = write_expression(state, writer, expr,
                                                            setops_re_subfn);
                     Py_DECREF(expr);
-                    if (expr_len < 0) {
-                        goto abort;
-                    }
+                    CHECK_INTVAL_OR_ABORT(expr_len);
                     written += expr_len;
                     i = j + 1;  // jump to the character after ']'
                     break;      // explicit early break for clarity
@@ -285,20 +264,14 @@ escape_block(PyUnicodeWriter *writer,
     }
 #endif
     PyObject *str = PyUnicode_Substring(pattern, start, stop);
-    if (str == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(str);
     PyObject *escaped = PyObject_CallOneArg(re_escape_func, str);
     Py_DECREF(str);
-    if (escaped == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(escaped);
     Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
     int rc = _WRITE_STRING(writer, escaped);
     Py_DECREF(escaped);
-    if (rc < 0) {
-        goto abort;
-    }
+    CHECK_INTVAL_OR_ABORT(rc);
     return written;
 abort:
     return -1;
@@ -316,31 +289,23 @@ split_expression(fnmatchmodule_state *state,
                  PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
                  PyObject *str_find_func)
 {
+    // ---- decl local objects ------------------------------------------------
     PyObject *chunks = NULL, *maxind = NULL;
     PyObject *hyphen = state->hyphen_str;
-
+    // ---- def local objects -------------------------------------------------
     chunks = PyList_New(0);
-    if (chunks == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(chunks);
     maxind = PyLong_FromSsize_t(stop);
-    if (maxind == NULL) {
-        goto abort;
-    }
-
+    CHECK_NON_NULL_OR_ABORT(maxind);
     // ---- def local macros --------------------------------------------------
     /* add pattern[START:STOP] to the list of chunks */
 #define ADD_CHUNK(START, STOP)                                              \
     do {                                                                    \
         PyObject *chunk = PyUnicode_Substring(pattern, (START), (STOP));    \
-        if (chunk == NULL) {                                                \
-            goto abort;                                                     \
-        }                                                                   \
+        CHECK_NON_NULL_OR_ABORT(chunk);                                     \
         int rc = PyList_Append(chunks, chunk);                              \
         Py_DECREF(chunk);                                                   \
-        if (rc < 0) {                                                       \
-            goto abort;                                                     \
-        }                                                                   \
+        CHECK_INTVAL_OR_ABORT(rc);                                          \
     } while (0)
     // ------------------------------------------------------------------------
     Py_ssize_t chunk_start = start;
@@ -350,9 +315,7 @@ split_expression(fnmatchmodule_state *state,
     while (ind < stop) {
         PyObject *p_chunk_stop = PyObject_CallFunction(str_find_func, "OnO",
                                                        hyphen, ind, maxind);
-        if (p_chunk_stop == NULL) {
-            goto abort;
-        }
+        CHECK_NON_NULL_OR_ABORT(p_chunk_stop);
         Py_ssize_t chunk_stop = PyLong_AsSsize_t(p_chunk_stop);
         Py_DECREF(p_chunk_stop);
         if (chunk_stop < 0) {
@@ -433,9 +396,7 @@ simplify_expression(PyObject *chunks)
                 assert(c1len > 1);
                 assert(c2len > 1);
                 PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen);
-                if (writer == NULL) {
-                    goto abort;
-                }
+                CHECK_NON_NULL_OR_ABORT(writer);
                 // all but the last character in the first chunk
                 if (_WRITE_BLOCK(writer, c1, 0, c1len - 1) < 0) {
                     PyUnicodeWriter_Discard(writer);
@@ -455,9 +416,7 @@ simplify_expression(PyObject *chunks)
                 Py_XDECREF(str);
                 goto abort;
             }
-            if (PySequence_DelItem(chunks, k) < 0) {
-                goto abort;
-            }
+            CHECK_INTVAL_OR_ABORT(PySequence_DelItem(chunks, k));
         }
     }
     return 0;
@@ -478,9 +437,7 @@ escape_expression(fnmatchmodule_state *state, PyObject *chunks)
         PyObject *s0 = PyList_GET_ITEM(chunks, c);
         assert(s0 != NULL);
         PyObject *s1 = BACKSLASH_REPLACE(state, s0);
-        if (s1 == NULL) {
-            goto abort;
-        }
+        CHECK_NON_NULL_OR_ABORT(s1);
         PyObject *s2 = HYPHEN_REPLACE(state, s1);
         Py_DECREF(s1);
         // PyList_SetItem() does not create a new reference on 's2'
@@ -503,22 +460,16 @@ translate_expression(fnmatchmodule_state *state,
 {
     PyObject *chunks = split_expression(state, pattern, start, stop,
                                         pattern_str_find_meth);
-    if (chunks == NULL) {
-        return NULL;
-    }
+    CHECK_NON_NULL_OR_ABORT(chunks);
     // remove empty ranges
-    if (simplify_expression(chunks) < 0) {
-        goto abort;
-    }
+    CHECK_INTVAL_OR_ABORT(simplify_expression(chunks));
     // escape backslashes and set differences
-    if (escape_expression(state, chunks) < 0) {
-        goto abort;
-    }
+    CHECK_INTVAL_OR_ABORT(escape_expression(state, chunks));
     PyObject *res = PyUnicode_Join(state->hyphen_str, chunks);
     Py_DECREF(chunks);
     return res;
 abort:
-    Py_DECREF(chunks);
+    Py_XDECREF(chunks);
     return NULL;
 }
 
@@ -544,9 +495,7 @@ write_expression(fnmatchmodule_state *state,
     WRITE_CHAR_OR_ABORT(writer, '[');
     // escape set operations as late as possible
     safe_expression = SETOPS_REPLACE(state, expression, setops_re_sub_meth);
-    if (safe_expression == NULL) {
-        goto abort;
-    }
+    CHECK_NON_NULL_OR_ABORT(safe_expression);
     switch (token) {
         case '!': {
             WRITE_CHAR_OR_ABORT(writer, '^'); // replace '!' by '^'
@@ -606,7 +555,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
 #define LOAD_WILDCARD_INDEX(VAR, IND)                               \
     do {                                                            \
         VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, (IND)));    \
-        if (VAR < 0) {                                              \
+        if ((VAR) < 0 && PyErr_Occurred())  {                       \
             goto abort;                                             \
         }                                                           \
     } while (0)

From 3a0567670225b993aea7e029d6b436ae94405faa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 28 Jul 2024 16:06:22 +0200
Subject: [PATCH 84/97] refactor macros

---
 Modules/_fnmatch/macros.h    | 77 ++++++++++++------------------------
 Modules/_fnmatch/translate.c | 66 ++++++++++++++++---------------
 2 files changed, 61 insertions(+), 82 deletions(-)

diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
index 14ef4f142e9b98..d78a75b52d1dc0 100644
--- a/Modules/_fnmatch/macros.h
+++ b/Modules/_fnmatch/macros.h
@@ -9,15 +9,16 @@
 // ==== Macro definitions =====================================================
 
 /*
- * Check that INTVAL is >= 0 or execute 'goto abort'.
+ * Check that STATUS is >= 0 or execute 'goto abort'.
  *
  * This macro is provided for convenience and should be
  * carefully used if more resources should be released
  * before jumping to the 'abort' label.
  */
-#define CHECK_INTVAL_OR_ABORT(INTVAL)   \
+#define CHECK_RET_CODE_OR_ABORT(STATUS) \
     do {                                \
-        if ((INTVAL) < 0) {             \
+        if ((STATUS) < 0) {             \
+            assert(PyErr_Occurred());   \
             goto abort;                 \
         }                               \
     } while (0)
@@ -29,11 +30,11 @@
  * carefully used if more resources should be released
  * before jumping to the 'abort' label.
  */
-#define CHECK_NON_NULL_OR_ABORT(OBJ)  \
-    do {                        \
-        if ((OBJ) == NULL) {    \
-            goto abort;         \
-        }                       \
+#define CHECK_NOT_NULL_OR_ABORT(OBJ)    \
+    do {                                \
+        if ((OBJ) == NULL) {            \
+            goto abort;                 \
+        }                               \
     } while (0)
 
 // The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
@@ -54,67 +55,41 @@
     _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
 
 /* Cast WRITER and call _PyUnicodeWriter_WriteSubstring(). */
-#define _WRITE_BLOCK(WRITER, STRING, START, STOP)                   \
+#define _WRITE_SUBSTRING(WRITER, STRING, START, STOP)               \
     _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER),   \
                                     (STRING), (START), (STOP))
 
 // ----------------------------------------------------------------------------
 
-/* Write a character CHAR or execute the ON_ERROR statements if it fails. */
-#define _WRITE_CHAR_OR(WRITER, CHAR, ON_ERROR)      \
-    do {                                            \
-        if (_WRITE_CHAR((WRITER), (CHAR)) < 0) {    \
-            ON_ERROR;                               \
-        }                                           \
-    } while (0)
+/* Write the character CHAR or jump to the 'abort' label on failure. */
+#define WRITE_CHAR_OR_ABORT(WRITER, CHAR) \
+    CHECK_RET_CODE_OR_ABORT(_WRITE_CHAR((WRITER), (CHAR)))
 
 /*
  * Write an ASCII string STRING of given length LENGTH,
- * or execute the ON_ERROR statements if it fails.
+ * or jump to the 'abort' label on failure.
  */
-#define _WRITE_ASCII_OR(WRITER, ASCII, LENGTH, ON_ERROR)        \
-    do {                                                        \
-        if (_WRITE_ASCII((WRITER), (ASCII), (LENGTH)) < 0) {    \
-            ON_ERROR;                                           \
-        }                                                       \
-    } while (0)
+#define WRITE_ASCII_OR_ABORT(WRITER, ASCII, LENGTH)  \
+    CHECK_RET_CODE_OR_ABORT(_WRITE_ASCII((WRITER), (ASCII), (LENGTH)))
 
-/* Write the string STRING or execute the ON_ERROR statements if it fails. */
-#define _WRITE_STRING_OR(WRITER, STRING, ON_ERROR)      \
-    do {                                                \
-        if (_WRITE_STRING((WRITER), (STRING)) < 0) {    \
-            ON_ERROR;                                   \
-        }                                               \
-    } while (0)
+/* Write the string STRING or jump to the 'abort' label on failure. */
+#define WRITE_STRING_OR_ABORT(WRITER, STRING)  \
+    CHECK_RET_CODE_OR_ABORT(_WRITE_STRING((WRITER), (STRING)))
 
 /*
- * Write the substring STRING[START:STOP] (no-op if the substring is empty)
- * or execute the ON_ERROR statements if it fails.
+ * Write the substring STRING[START:STOP] (no-op if empty)
+ * or jump to the 'abort' label on failure.
  */
-#define _WRITE_SUBSTRING_OR(WRITER, STRING, START, STOP, ON_ERROR)      \
+#define WRITE_SUBSTRING_OR_ABORT(WRITER, STRING, START, STOP)           \
     do {                                                                \
-        /* intermediate variables to allow in-place operations */       \
-        Py_ssize_t _i = (START), _j = (STOP);                           \
-        if (_i < _j && _WRITE_BLOCK((WRITER), (STRING), _i, _j) < 0) {  \
-            ON_ERROR;                                                   \
-        }                                                               \
+        const Py_ssize_t _START = (START);                              \
+        const Py_ssize_t _STOP = (STOP);                                \
+        int _RC = _WRITE_SUBSTRING((WRITER), (STRING), _START, _STOP);  \
+        CHECK_RET_CODE_OR_ABORT(_RC);                                   \
     } while (0)
 
 // ----------------------------------------------------------------------------
 
-// Macros which execute "goto abort" if an error occurs.
-
-#define WRITE_CHAR_OR_ABORT(WRITER, CHAR) \
-    _WRITE_CHAR_OR((WRITER), (CHAR), goto abort)
-#define WRITE_ASCII_OR_ABORT(WRITER, STRING, LENGTH) \
-    _WRITE_ASCII_OR((WRITER), (STRING), (LENGTH), goto abort)
-#define WRITE_STRING_OR_ABORT(WRITER, STRING) \
-    _WRITE_STRING_OR((WRITER), (STRING), goto abort)
-#define WRITE_BLOCK_OR_ABORT(WRITER, STRING, START, STOP) \
-    _WRITE_SUBSTRING_OR((WRITER), (STRING), (START), (STOP), goto abort)
-
-// ----------------------------------------------------------------------------
-
 /* Replace backslashes in STRING by escaped backslashes. */
 #define BACKSLASH_REPLACE(STATE, STRING)    \
     PyObject_CallMethodObjArgs(             \
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 175b04eef5f1c6..e0ddce101338ad 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -116,16 +116,16 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     PyObject *pattern_str_find_meth = NULL; // pattern.find()
     // ---- def local objects -------------------------------------------------
     wildcard_indices = PyList_New(0);
-    CHECK_NON_NULL_OR_ABORT(wildcard_indices);
+    CHECK_NOT_NULL_OR_ABORT(wildcard_indices);
     // The Python implementation always takes queries re.escape() and re.sub()
     // inside translate() and thus we should at least allow external users to
     // mock those functions (thus, we cannot cache them in the module's state).
     re_escape_func = PyObject_GetAttr(state->re_module, &_Py_ID(escape));
-    CHECK_NON_NULL_OR_ABORT(re_escape_func);
+    CHECK_NOT_NULL_OR_ABORT(re_escape_func);
     setops_re_subfn = get_setops_re_sub_method(state);
-    CHECK_NON_NULL_OR_ABORT(setops_re_subfn);
+    CHECK_NOT_NULL_OR_ABORT(setops_re_subfn);
     pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find));
-    CHECK_NON_NULL_OR_ABORT(pattern_str_find_meth);
+    CHECK_NOT_NULL_OR_ABORT(pattern_str_find_meth);
     // ------------------------------------------------------------------------
     const int pattern_kind = PyUnicode_KIND(pattern);
     const void *const pattern_data = PyUnicode_DATA(pattern);
@@ -144,7 +144,9 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             Py_ssize_t t = escape_block(writer, pattern,        \
                                         escstart, (ESCSTOP),    \
                                         re_escape_func);        \
-            CHECK_INTVAL_OR_ABORT(t);                           \
+            if (t < 0) {                                        \
+                goto abort;                                     \
+            }                                                   \
             written += t;                                       \
             escstart = -1;                                      \
         }                                                       \
@@ -164,10 +166,10 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 for (; i < maxind && READ_CHAR(i) == '*'; ++i);
                 // store the position of the wildcard
                 PyObject *wildcard_index = PyLong_FromSsize_t(written++);
-                CHECK_NON_NULL_OR_ABORT(wildcard_index);
+                CHECK_NOT_NULL_OR_ABORT(wildcard_index);
                 int rc = PyList_Append(wildcard_indices, wildcard_index);
                 Py_DECREF(wildcard_index);
-                CHECK_INTVAL_OR_ABORT(rc);
+                CHECK_RET_CODE_OR_ABORT(rc);
                 break;
             }
             case '?': {
@@ -198,7 +200,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     PyObject *expr = NULL;
                     if (pos == -1) {
                         PyObject *tmp = PyUnicode_Substring(pattern, i, j);
-                        CHECK_NON_NULL_OR_ABORT(tmp);
+                        CHECK_NOT_NULL_OR_ABORT(tmp);
                         expr = BACKSLASH_REPLACE(state, tmp);
                         Py_DECREF(tmp);
                     }
@@ -206,11 +208,13 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                         expr = translate_expression(state, pattern, i, j,
                                                     pattern_str_find_meth);
                     }
-                    CHECK_NON_NULL_OR_ABORT(expr);
+                    CHECK_NOT_NULL_OR_ABORT(expr);
                     Py_ssize_t expr_len = write_expression(state, writer, expr,
                                                            setops_re_subfn);
                     Py_DECREF(expr);
-                    CHECK_INTVAL_OR_ABORT(expr_len);
+                    if (expr_len < 0) {
+                        goto abort;
+                    }
                     written += expr_len;
                     i = j + 1;  // jump to the character after ']'
                     break;      // explicit early break for clarity
@@ -264,14 +268,14 @@ escape_block(PyUnicodeWriter *writer,
     }
 #endif
     PyObject *str = PyUnicode_Substring(pattern, start, stop);
-    CHECK_NON_NULL_OR_ABORT(str);
+    CHECK_NOT_NULL_OR_ABORT(str);
     PyObject *escaped = PyObject_CallOneArg(re_escape_func, str);
     Py_DECREF(str);
-    CHECK_NON_NULL_OR_ABORT(escaped);
+    CHECK_NOT_NULL_OR_ABORT(escaped);
     Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
     int rc = _WRITE_STRING(writer, escaped);
     Py_DECREF(escaped);
-    CHECK_INTVAL_OR_ABORT(rc);
+    CHECK_RET_CODE_OR_ABORT(rc);
     return written;
 abort:
     return -1;
@@ -294,18 +298,18 @@ split_expression(fnmatchmodule_state *state,
     PyObject *hyphen = state->hyphen_str;
     // ---- def local objects -------------------------------------------------
     chunks = PyList_New(0);
-    CHECK_NON_NULL_OR_ABORT(chunks);
+    CHECK_NOT_NULL_OR_ABORT(chunks);
     maxind = PyLong_FromSsize_t(stop);
-    CHECK_NON_NULL_OR_ABORT(maxind);
+    CHECK_NOT_NULL_OR_ABORT(maxind);
     // ---- def local macros --------------------------------------------------
     /* add pattern[START:STOP] to the list of chunks */
 #define ADD_CHUNK(START, STOP)                                              \
     do {                                                                    \
         PyObject *chunk = PyUnicode_Substring(pattern, (START), (STOP));    \
-        CHECK_NON_NULL_OR_ABORT(chunk);                                     \
+        CHECK_NOT_NULL_OR_ABORT(chunk);                                     \
         int rc = PyList_Append(chunks, chunk);                              \
         Py_DECREF(chunk);                                                   \
-        CHECK_INTVAL_OR_ABORT(rc);                                          \
+        CHECK_RET_CODE_OR_ABORT(rc);                                        \
     } while (0)
     // ------------------------------------------------------------------------
     Py_ssize_t chunk_start = start;
@@ -315,7 +319,7 @@ split_expression(fnmatchmodule_state *state,
     while (ind < stop) {
         PyObject *p_chunk_stop = PyObject_CallFunction(str_find_func, "OnO",
                                                        hyphen, ind, maxind);
-        CHECK_NON_NULL_OR_ABORT(p_chunk_stop);
+        CHECK_NOT_NULL_OR_ABORT(p_chunk_stop);
         Py_ssize_t chunk_stop = PyLong_AsSsize_t(p_chunk_stop);
         Py_DECREF(p_chunk_stop);
         if (chunk_stop < 0) {
@@ -396,14 +400,14 @@ simplify_expression(PyObject *chunks)
                 assert(c1len > 1);
                 assert(c2len > 1);
                 PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen);
-                CHECK_NON_NULL_OR_ABORT(writer);
+                CHECK_NOT_NULL_OR_ABORT(writer);
                 // all but the last character in the first chunk
-                if (_WRITE_BLOCK(writer, c1, 0, c1len - 1) < 0) {
+                if (_WRITE_SUBSTRING(writer, c1, 0, c1len - 1) < 0) {
                     PyUnicodeWriter_Discard(writer);
                     goto abort;
                 }
                 // all but the first character in the second chunk
-                if (_WRITE_BLOCK(writer, c2, 1, c2len) < 0) {
+                if (_WRITE_SUBSTRING(writer, c2, 1, c2len) < 0) {
                     PyUnicodeWriter_Discard(writer);
                     goto abort;
                 }
@@ -416,7 +420,7 @@ simplify_expression(PyObject *chunks)
                 Py_XDECREF(str);
                 goto abort;
             }
-            CHECK_INTVAL_OR_ABORT(PySequence_DelItem(chunks, k));
+            CHECK_RET_CODE_OR_ABORT(PySequence_DelItem(chunks, k));
         }
     }
     return 0;
@@ -437,7 +441,7 @@ escape_expression(fnmatchmodule_state *state, PyObject *chunks)
         PyObject *s0 = PyList_GET_ITEM(chunks, c);
         assert(s0 != NULL);
         PyObject *s1 = BACKSLASH_REPLACE(state, s0);
-        CHECK_NON_NULL_OR_ABORT(s1);
+        CHECK_NOT_NULL_OR_ABORT(s1);
         PyObject *s2 = HYPHEN_REPLACE(state, s1);
         Py_DECREF(s1);
         // PyList_SetItem() does not create a new reference on 's2'
@@ -460,11 +464,11 @@ translate_expression(fnmatchmodule_state *state,
 {
     PyObject *chunks = split_expression(state, pattern, start, stop,
                                         pattern_str_find_meth);
-    CHECK_NON_NULL_OR_ABORT(chunks);
+    CHECK_NOT_NULL_OR_ABORT(chunks);
     // remove empty ranges
-    CHECK_INTVAL_OR_ABORT(simplify_expression(chunks));
+    CHECK_RET_CODE_OR_ABORT(simplify_expression(chunks));
     // escape backslashes and set differences
-    CHECK_INTVAL_OR_ABORT(escape_expression(state, chunks));
+    CHECK_RET_CODE_OR_ABORT(escape_expression(state, chunks));
     PyObject *res = PyUnicode_Join(state->hyphen_str, chunks);
     Py_DECREF(chunks);
     return res;
@@ -495,11 +499,11 @@ write_expression(fnmatchmodule_state *state,
     WRITE_CHAR_OR_ABORT(writer, '[');
     // escape set operations as late as possible
     safe_expression = SETOPS_REPLACE(state, expression, setops_re_sub_meth);
-    CHECK_NON_NULL_OR_ABORT(safe_expression);
+    CHECK_NOT_NULL_OR_ABORT(safe_expression);
     switch (token) {
         case '!': {
             WRITE_CHAR_OR_ABORT(writer, '^'); // replace '!' by '^'
-            WRITE_BLOCK_OR_ABORT(writer, safe_expression, 1, grouplen);
+            WRITE_SUBSTRING_OR_ABORT(writer, safe_expression, 1, grouplen);
             break;
         }
         case '^':
@@ -568,7 +572,7 @@ process_wildcards(PyObject *pattern, PyObject *indices)
         Py_ssize_t i = 0, j = -1;
         // process the optional PREFIX
         LOAD_WILDCARD_INDEX(j, 0);
-        WRITE_BLOCK_OR_ABORT(writer, pattern, 0, j);
+        WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j);
         i = j + 1;
         for (Py_ssize_t k = 1; k < m; ++k) {
             // process the (* INNER) groups
@@ -576,13 +580,13 @@ process_wildcards(PyObject *pattern, PyObject *indices)
             assert(i < j);
             // write the atomic RE group '(?>.*?' + INNER + ')'
             WRITE_ASCII_OR_ABORT(writer, "(?>.*?", 6);
-            WRITE_BLOCK_OR_ABORT(writer, pattern, i, j);
+            WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j);
             WRITE_CHAR_OR_ABORT(writer, ')');
             i = j + 1;
         }
         // handle the (*) [OUTER] part
         WRITE_ASCII_OR_ABORT(writer, ".*", 2);
-        WRITE_BLOCK_OR_ABORT(writer, pattern, i, n);
+        WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, n);
     }
     WRITE_ASCII_OR_ABORT(writer, ")\\Z", 3);
     PyObject *res = PyUnicodeWriter_Finish(writer);

From db756262f75e0b7a4ab0d853850f6078677f442f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 18 Aug 2024 12:42:01 +0200
Subject: [PATCH 85/97] rename `COMPILED_CACHE_SIZE` to `LRU_CACHE_SIZE`

---
 Modules/_fnmatch/_fnmatchmodule.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index a050616217c71e..7aa4cae265e77f 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -33,7 +33,7 @@
 
 #include "clinic/_fnmatchmodule.c.h"
 
-#define COMPILED_CACHE_SIZE     32768
+#define LRU_CACHE_SIZE          32768
 #define INVALID_PATTERN_TYPE    "pattern must be a string or a bytes object"
 
 // ==== Cached translation unit ===============================================
@@ -84,7 +84,7 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
 {
     // make sure that this function is called once
     assert(st->translator == NULL);
-    PyObject *maxsize = PyLong_FromLong(COMPILED_CACHE_SIZE);
+    PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE);
     if (maxsize == NULL) {
         return -1;
     }

From df76ba3dcd5cb6d84ba4f06f45fb663ea1e33bf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 18 Aug 2024 12:58:18 +0200
Subject: [PATCH 86/97] add LRU cache for `re.escape`

---
 Modules/_fnmatch/_fnmatchmodule.c | 41 +++++++++++++++++++++++++++++++
 Modules/_fnmatch/util.h           |  1 +
 2 files changed, 42 insertions(+)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 7aa4cae265e77f..b6170c8719c277 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -113,6 +113,42 @@ fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
     return 0;
 }
 
+// ==== Cached re.escape() unit ===============================================
+
+/* Create an LRU-cached function for re.escape(). */
+static int
+fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
+                              fnmatchmodule_state *st)
+{
+    // make sure that this function is called once
+    assert(st->re_escape == NULL);
+    PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE);
+    if (maxsize == NULL) {
+        return -1;
+    }
+    PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+    if (cache == NULL) {
+        Py_DECREF(maxsize);
+        return -1;
+    }
+    PyObject *wrapper = PyObject_CallOneArg(cache, maxsize);
+    Py_DECREF(maxsize);
+    Py_DECREF(cache);
+    if (wrapper == NULL) {
+        return -1;
+    }
+    assert(st->re_module != NULL);
+    PyObject *wrapped = PyObject_GetAttr(st->re_module, &_Py_ID(escape));
+    // reference on 'escapechar' will be removed upon module cleanup
+    st->re_escape = PyObject_CallOneArg(wrapper, wrapped);
+    Py_DECREF(wrapped);
+    Py_DECREF(wrapper);
+    if (st->re_escape == NULL) {
+        return -1;
+    }
+    return 0;
+}
+
 // ==== Module data getters ===================================================
 
 static inline PyObject * /* reference to re.compile(pattern).match() */
@@ -173,6 +209,9 @@ fnmatchmodule_exec(PyObject *module)
     if (fnmatchmodule_load_translator(module, st) < 0) {
         return -1;
     }
+    if (fnmatchmodule_load_escapefunc(module, st) < 0) {
+        return -1;
+    }
     INTERN_STRING(st, hyphen_str, "-");
     INTERN_STRING(st, hyphen_esc_str, "\\-");
     INTERN_STRING(st, backslash_str, "\\");
@@ -195,6 +234,7 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
     Py_VISIT(st->backslash_str);
     Py_VISIT(st->hyphen_esc_str);
     Py_VISIT(st->hyphen_str);
+    Py_VISIT(st->re_escape);
     Py_VISIT(st->translator);
     Py_VISIT(st->re_module);
     Py_VISIT(st->posixpath_module);
@@ -212,6 +252,7 @@ fnmatchmodule_clear(PyObject *m)
     Py_CLEAR(st->backslash_str);
     Py_CLEAR(st->hyphen_esc_str);
     Py_CLEAR(st->hyphen_str);
+    Py_CLEAR(st->re_escape);
     Py_CLEAR(st->translator);
     Py_CLEAR(st->re_module);
     Py_CLEAR(st->posixpath_module);
diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
index 379fed066de3d8..36e21bc6f5f09e 100644
--- a/Modules/_fnmatch/util.h
+++ b/Modules/_fnmatch/util.h
@@ -13,6 +13,7 @@ typedef struct {
     PyObject *re_module;            // import re
 
     PyObject *translator;           // LRU-cached translation unit
+    PyObject *re_escape;            // LRU-cached re.escape() function
 
     // strings used by translate.c
     PyObject *hyphen_str;           // hyphen '-'

From ac46e2cbb656440c31461da4d063c05bd041856a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 15:36:28 +0200
Subject: [PATCH 87/97] cache `re.compile(...).sub` for set operations tokens

---
 Modules/_fnmatch/_fnmatchmodule.c | 36 ++++++++++++++++++++++++++++---
 Modules/_fnmatch/util.h           |  2 +-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index b6170c8719c277..bceca5e73cff6d 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -149,6 +149,34 @@ fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
     return 0;
 }
 
+// ==== Cached re.sub() unit for set operation tokens =========================
+
+/* Create an LRU-cached function for re.compile('([&~|])').sub(). */
+static int
+fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module),
+                                 fnmatchmodule_state *st)
+{
+    // make sure that this function is called once
+    assert(st->setops_re_subfn == NULL);
+    PyObject *pattern = PyUnicode_FromString("([&~|])");
+    if (pattern == NULL) {
+        return -1;
+    }
+    PyObject *compiled = PyObject_CallMethodOneArg(st->re_module,
+                                                   &_Py_ID(compile),
+                                                   pattern);
+    Py_DECREF(pattern);
+    if (compiled == NULL) {
+        return -1;
+    }
+    st->setops_re_subfn = PyObject_GetAttr(compiled, &_Py_ID(sub));
+    Py_DECREF(compiled);
+    if (st->setops_re_subfn == NULL) {
+        return -1;
+    }
+    return 0;
+}
+
 // ==== Module data getters ===================================================
 
 static inline PyObject * /* reference to re.compile(pattern).match() */
@@ -216,7 +244,9 @@ fnmatchmodule_exec(PyObject *module)
     INTERN_STRING(st, hyphen_esc_str, "\\-");
     INTERN_STRING(st, backslash_str, "\\");
     INTERN_STRING(st, backslash_esc_str, "\\\\");
-    INTERN_STRING(st, setops_str, "([&~|])");
+    if (fnmatchmodule_load_setops_re_sub(module, st) < 0) {
+        return -1;
+    }
     INTERN_STRING(st, setops_repl_str, "\\\\\\1");
     return 0;
 }
@@ -229,7 +259,7 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
     Py_VISIT(st->setops_repl_str);
-    Py_VISIT(st->setops_str);
+    Py_VISIT(st->setops_re_subfn);
     Py_VISIT(st->backslash_esc_str);
     Py_VISIT(st->backslash_str);
     Py_VISIT(st->hyphen_esc_str);
@@ -247,7 +277,7 @@ fnmatchmodule_clear(PyObject *m)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(m);
     Py_CLEAR(st->setops_repl_str);
-    Py_CLEAR(st->setops_str);
+    Py_CLEAR(st->setops_re_subfn);
     Py_CLEAR(st->backslash_esc_str);
     Py_CLEAR(st->backslash_str);
     Py_CLEAR(st->hyphen_esc_str);
diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
index 36e21bc6f5f09e..ac5c4362d78a4e 100644
--- a/Modules/_fnmatch/util.h
+++ b/Modules/_fnmatch/util.h
@@ -23,7 +23,7 @@ typedef struct {
     PyObject *backslash_esc_str;    // escaped backslash '\\\\'
 
     /* set operation tokens (&&, ~~ and ||) are not supported in regex */
-    PyObject *setops_str;           // set operation tokens '([&~|])'
+    PyObject *setops_re_subfn;      // cached re.compile('([&~|])').sub()
     PyObject *setops_repl_str;      // replacement pattern '\\\\\\1'
 } fnmatchmodule_state;
 

From fa04ea1be6127e57877da84ae38b1f803dd69c43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 18 Aug 2024 13:30:27 +0200
Subject: [PATCH 88/97] update implementation

---
 Modules/_fnmatch/translate.c | 117 +++++++++++++----------------------
 1 file changed, 42 insertions(+), 75 deletions(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index e0ddce101338ad..b6f0304664c610 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -15,16 +15,12 @@
 // ==== Helper declarations ===================================================
 
 /*
- * Write re.escape(pattern[start:stop]).
+ * Write re.escape(ch).
  *
  * This returns the number of written characters, or -1 if an error occurred.
- *
- * @pre     0 <= start < stop <= len(pattern)
  */
-static inline Py_ssize_t
-escape_block(PyUnicodeWriter *writer,
-             PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
-             PyObject *re_escape_func);
+static Py_ssize_t
+escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch);
 
 /*
  * Construct a regular expression out of a UNIX-style expression.
@@ -56,8 +52,7 @@ translate_expression(fnmatchmodule_state *state,
  */
 static Py_ssize_t
 write_expression(fnmatchmodule_state *state,
-                 PyUnicodeWriter *writer, PyObject *expression,
-                 PyObject *setops_re_sub_meth);
+                 PyUnicodeWriter *writer, PyObject *expression);
 
 /*
  * Build the final regular expression by processing the wildcards.
@@ -69,17 +64,6 @@ process_wildcards(PyObject *pattern, PyObject *indices);
 
 // ==== API implementation ====================================================
 
-static inline PyObject *
-get_setops_re_sub_method(fnmatchmodule_state *state)
-{
-    PyObject *compiled = PyObject_CallMethodOneArg(state->re_module,
-                                                   &_Py_ID(compile),
-                                                   state->setops_str);
-    PyObject *method = PyObject_GetAttr(compiled, &_Py_ID(sub));
-    Py_DECREF(compiled);
-    return method;
-}
-
 PyObject *
 _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 {
@@ -110,20 +94,11 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     // ---- decl local objects ------------------------------------------------
     // list containing the indices where '*' has a special meaning
     PyObject *wildcard_indices = NULL;
-    // cached functions (cache is local to the call)
-    PyObject *re_escape_func = NULL;        // re.escape()
-    PyObject *setops_re_subfn = NULL;       // re.compile('([&~|])').sub()
+    // call-level cached functions
     PyObject *pattern_str_find_meth = NULL; // pattern.find()
     // ---- def local objects -------------------------------------------------
     wildcard_indices = PyList_New(0);
     CHECK_NOT_NULL_OR_ABORT(wildcard_indices);
-    // The Python implementation always takes queries re.escape() and re.sub()
-    // inside translate() and thus we should at least allow external users to
-    // mock those functions (thus, we cannot cache them in the module's state).
-    re_escape_func = PyObject_GetAttr(state->re_module, &_Py_ID(escape));
-    CHECK_NOT_NULL_OR_ABORT(re_escape_func);
-    setops_re_subfn = get_setops_re_sub_method(state);
-    CHECK_NOT_NULL_OR_ABORT(setops_re_subfn);
     pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find));
     CHECK_NOT_NULL_OR_ABORT(pattern_str_find_meth);
     // ------------------------------------------------------------------------
@@ -138,28 +113,13 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             ++IND;                                          \
         }                                                   \
     } while (0)
-#define WRITE_PENDING(ESCSTOP)                                  \
-    do {                                                        \
-        if (escstart != -1) {                                   \
-            Py_ssize_t t = escape_block(writer, pattern,        \
-                                        escstart, (ESCSTOP),    \
-                                        re_escape_func);        \
-            if (t < 0) {                                        \
-                goto abort;                                     \
-            }                                                   \
-            written += t;                                       \
-            escstart = -1;                                      \
-        }                                                       \
-    } while (0)
     // ------------------------------------------------------------------------
     Py_ssize_t i = 0;                       // current index
     Py_ssize_t written = 0;                 // number of characters written
-    Py_ssize_t escstart = -1, escstop = -1; // start/stop escaping indices
-    while ((escstop = i) < maxind) {
+    while (i < maxind) {
         Py_UCS4 chr = READ_CHAR(i++);
         switch (chr) {
             case '*': {
-                WRITE_PENDING(escstop);
                 // translate wildcard '*' (fnmatch) into optional '.' (regex)
                 WRITE_CHAR_OR_ABORT(writer, '*');
                 // skip duplicated '*'
@@ -173,15 +133,13 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 break;
             }
             case '?': {
-                WRITE_PENDING(escstop);
                 // translate optional '?' (fnmatch) into optional '.' (regex)
                 WRITE_CHAR_OR_ABORT(writer, '.');
                 ++written; // increase the expected result's length
                 break;
             }
             case '[': {
-                WRITE_PENDING(escstop);
-                assert(READ_CHAR(escstop) == '[');
+                assert(READ_CHAR(i - 1) == '[');
                 Py_ssize_t j = i;
                 ADVANCE_IF_CHAR_IS('!', j, maxind);             // [!
                 ADVANCE_IF_CHAR_IS(']', j, maxind);             // [!] or []
@@ -209,8 +167,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                                                     pattern_str_find_meth);
                     }
                     CHECK_NOT_NULL_OR_ABORT(expr);
-                    Py_ssize_t expr_len = write_expression(state, writer, expr,
-                                                           setops_re_subfn);
+                    Py_ssize_t expr_len = write_expression(state, writer, expr);
                     Py_DECREF(expr);
                     if (expr_len < 0) {
                         goto abort;
@@ -221,21 +178,16 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                 }
             }
             default: {
-                if (escstart == -1) {
-                    assert(i >= 1);
-                    escstart = i - 1;
-                }
+                Py_ssize_t t = escape_char(state, writer, chr);
+                CHECK_RET_CODE_OR_ABORT(t);
+                written += t;
                 break;
             }
         }
     }
-    WRITE_PENDING(maxind);
-#undef WRITE_PENDING
 #undef ADVANCE_IF_CHAR_IS
 #undef READ_CHAR
     Py_DECREF(pattern_str_find_meth);
-    Py_DECREF(setops_re_subfn);
-    Py_DECREF(re_escape_func);
     PyObject *translated = PyUnicodeWriter_Finish(writer);
     if (translated == NULL) {
         Py_DECREF(wildcard_indices);
@@ -247,8 +199,6 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     return res;
 abort:
     Py_XDECREF(pattern_str_find_meth);
-    Py_XDECREF(setops_re_subfn);
-    Py_XDECREF(re_escape_func);
     Py_XDECREF(wildcard_indices);
     PyUnicodeWriter_Discard(writer);
     return NULL;
@@ -256,20 +206,38 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
 
 // ==== Helper implementations ================================================
 
-static inline Py_ssize_t
-escape_block(PyUnicodeWriter *writer,
-             PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
-             PyObject *re_escape_func)
+/* taken from unicodeobject.c */
+static inline PyObject *
+unicode_char(Py_UCS4 ch)
 {
-#ifdef Py_DEBUG
-    if (start < 0 || start >= stop || stop > PyUnicode_GET_LENGTH(pattern)) {
-        PyErr_BadInternalCall();
-        return -1;
+#define MAX_UNICODE 0x10ffff
+    assert(ch <= MAX_UNICODE);
+#undef MAX_UNICODE
+    if (ch < 256) {
+        return _Py_LATIN1_CHR(ch);
     }
-#endif
-    PyObject *str = PyUnicode_Substring(pattern, start, stop);
+    PyObject *unicode = PyUnicode_New(1, ch);
+    if (unicode == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND);
+    if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
+        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+    }
+    else {
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    }
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+    return unicode;
+}
+
+static Py_ssize_t
+escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    PyObject *str = unicode_char(ch);
     CHECK_NOT_NULL_OR_ABORT(str);
-    PyObject *escaped = PyObject_CallOneArg(re_escape_func, str);
+    PyObject *escaped = PyObject_CallOneArg(state->re_escape, str);
     Py_DECREF(str);
     CHECK_NOT_NULL_OR_ABORT(escaped);
     Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
@@ -479,8 +447,7 @@ translate_expression(fnmatchmodule_state *state,
 
 static Py_ssize_t
 write_expression(fnmatchmodule_state *state,
-                 PyUnicodeWriter *writer, PyObject *expression,
-                 PyObject *setops_re_sub_meth)
+                 PyUnicodeWriter *writer, PyObject *expression)
 {
     PyObject *safe_expression = NULL;  // for the 'goto abort' statements
     Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
@@ -498,7 +465,7 @@ write_expression(fnmatchmodule_state *state,
     Py_ssize_t extra = 2; // '[' and ']'
     WRITE_CHAR_OR_ABORT(writer, '[');
     // escape set operations as late as possible
-    safe_expression = SETOPS_REPLACE(state, expression, setops_re_sub_meth);
+    safe_expression = SETOPS_REPLACE(state, expression, state->setops_re_subfn);
     CHECK_NOT_NULL_OR_ABORT(safe_expression);
     switch (token) {
         case '!': {

From 0b4ccede1f7b9ddf5328090113785cd9a98507b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:20:19 +0200
Subject: [PATCH 89/97] use macros for abort-flow

---
 Modules/_fnmatch/_fnmatchmodule.c | 87 ++++++++++++-------------------
 1 file changed, 34 insertions(+), 53 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index bceca5e73cff6d..48330d78249ea2 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -27,6 +27,7 @@
 #  define Py_BUILD_CORE_MODULE 1
 #endif
 
+#include "macros.h"
 #include "util.h"                       // prototypes
 
 #include "pycore_runtime.h"             // for _Py_ID()
@@ -159,22 +160,18 @@ fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module),
     // make sure that this function is called once
     assert(st->setops_re_subfn == NULL);
     PyObject *pattern = PyUnicode_FromString("([&~|])");
-    if (pattern == NULL) {
-        return -1;
-    }
+    CHECK_NOT_NULL_OR_ABORT(pattern);
     PyObject *compiled = PyObject_CallMethodOneArg(st->re_module,
                                                    &_Py_ID(compile),
                                                    pattern);
     Py_DECREF(pattern);
-    if (compiled == NULL) {
-        return -1;
-    }
+    CHECK_NOT_NULL_OR_ABORT(compiled);
     st->setops_re_subfn = PyObject_GetAttr(compiled, &_Py_ID(sub));
     Py_DECREF(compiled);
-    if (st->setops_re_subfn == NULL) {
-        return -1;
-    }
+    CHECK_NOT_NULL_OR_ABORT(st->setops_re_subfn);
     return 0;
+abort:
+    return -1;
 }
 
 // ==== Module data getters ===================================================
@@ -205,54 +202,45 @@ get_platform_normcase_function(PyObject *module, bool *isposix)
 
 // ==== Module state functions ================================================
 
-/* Import a named module and store it in 'STATE->ATTRIBUTE'. */
+static int
+fnmatchmodule_exec(PyObject *module)
+{
+    // ---- def local macros --------------------------------------------------
+    /* Import a named module and store it in 'STATE->ATTRIBUTE'. */
 #define IMPORT_MODULE(STATE, ATTRIBUTE, MODULE_NAME)                \
     do {                                                            \
         /* make sure that the attribute is initialized once */      \
         assert(STATE->ATTRIBUTE == NULL);                           \
         STATE->ATTRIBUTE = PyImport_ImportModule((MODULE_NAME));    \
-        if (STATE->ATTRIBUTE == NULL) {                             \
-            return -1;                                              \
-        }                                                           \
+        CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE);                  \
     } while (0)
-
-/* Intern a literal STRING and store it in 'STATE->ATTRIBUTE'. */
+    /* Intern a literal STRING and store it in 'STATE->ATTRIBUTE'. */
 #define INTERN_STRING(STATE, ATTRIBUTE, STRING)                     \
     do {                                                            \
         /* make sure that the attribute is initialized once */      \
         assert(STATE->ATTRIBUTE == NULL);                           \
         STATE->ATTRIBUTE = PyUnicode_InternFromString((STRING));    \
-        if (STATE->ATTRIBUTE == NULL) {                             \
-            return -1;                                              \
-        }                                                           \
+        CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE);                  \
     } while (0)
-
-static int
-fnmatchmodule_exec(PyObject *module)
-{
+    // ------------------------------------------------------------------------
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     IMPORT_MODULE(st, os_module, "os");
     IMPORT_MODULE(st, posixpath_module, "posixpath");
     IMPORT_MODULE(st, re_module, "re");
-    if (fnmatchmodule_load_translator(module, st) < 0) {
-        return -1;
-    }
-    if (fnmatchmodule_load_escapefunc(module, st) < 0) {
-        return -1;
-    }
+    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_translator(module, st));
+    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(module, st));
     INTERN_STRING(st, hyphen_str, "-");
     INTERN_STRING(st, hyphen_esc_str, "\\-");
     INTERN_STRING(st, backslash_str, "\\");
     INTERN_STRING(st, backslash_esc_str, "\\\\");
-    if (fnmatchmodule_load_setops_re_sub(module, st) < 0) {
-        return -1;
-    }
+    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_setops_re_sub(module, st));
     INTERN_STRING(st, setops_repl_str, "\\\\\\1");
     return 0;
-}
-
+abort:
+    return -1;
 #undef INTERN_STRING
 #undef IMPORT_MODULE
+}
 
 static int
 fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
@@ -316,27 +304,22 @@ fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
 /*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/
 {
     bool isposix = 0;
-    PyObject *normcase = get_platform_normcase_function(module, &isposix);
-    if (normcase == NULL) {
-        return NULL;
-    }
+    PyObject *normcase = NULL;  // for the 'goto abort' statements
+    normcase = get_platform_normcase_function(module, &isposix);
+    CHECK_NOT_NULL_OR_ABORT(normcase);
     PyObject *normalized_pattern = PyObject_CallOneArg(normcase, pattern);
-    if (normalized_pattern == NULL) {
-        Py_DECREF(normcase);
-        return NULL;
-    }
+    CHECK_NOT_NULL_OR_ABORT(normalized_pattern);
     // the matcher is cached with respect to the *normalized* pattern
     PyObject *matcher = get_matcher_function(module, normalized_pattern);
     Py_DECREF(normalized_pattern);
-    if (matcher == NULL) {
-        Py_DECREF(normcase);
-        return NULL;
-    }
-    PyObject *normalizer = isposix ? NULL : normcase;
-    PyObject *filtered = _Py_fnmatch_filter(matcher, names, normalizer);
+    CHECK_NOT_NULL_OR_ABORT(matcher);
+    PyObject *filtered = _Py_fnmatch_filter(matcher, names, normcase);
     Py_DECREF(matcher);
     Py_DECREF(normcase);
     return filtered;
+abort:
+    Py_XDECREF(normcase);
+    return NULL;
 }
 
 /*[clinic input]
@@ -437,14 +420,10 @@ fnmatch_translate_impl(PyObject *module, PyObject *pattern)
         PyObject *decoded = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
                                                    PyBytes_GET_SIZE(pattern),
                                                    "strict");
-        if (decoded == NULL) {
-            return NULL;
-        }
+        CHECK_NOT_NULL_OR_ABORT(decoded);
         PyObject *translated = _Py_fnmatch_translate(module, decoded);
         Py_DECREF(decoded);
-        if (translated == NULL) {
-            return NULL;
-        }
+        CHECK_NOT_NULL_OR_ABORT(translated);
         PyObject *res = PyUnicode_AsLatin1String(translated);
         Py_DECREF(translated);
         return res;
@@ -456,6 +435,8 @@ fnmatch_translate_impl(PyObject *module, PyObject *pattern)
         PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
         return NULL;
     }
+abort:
+    return NULL;
 }
 
 // ==== Module specs ==========================================================

From 5039bcedb37dd504e8ecde58f671a08ee29b3810 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:37:51 +0200
Subject: [PATCH 90/97] allow path-like objects in `fnmatch.filter`

See gh-123122 for the rationale.
---
 Modules/_fnmatch/_fnmatchmodule.c | 14 +++-----------
 Modules/_fnmatch/filter.c         | 19 +++++++------------
 Modules/_fnmatch/util.h           |  7 +++----
 3 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 48330d78249ea2..cd963103f05723 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -185,7 +185,7 @@ get_matcher_function(PyObject *module, PyObject *pattern)
 }
 
 static inline PyObject * /* reference to os.path.normcase() */
-get_platform_normcase_function(PyObject *module, bool *isposix)
+get_platform_normcase_function(PyObject *module)
 {
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
@@ -193,9 +193,6 @@ get_platform_normcase_function(PyObject *module, bool *isposix)
         return NULL;
     }
     PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
-    if (isposix != NULL) {
-        *isposix = Py_Is(os_path, st->posixpath_module);
-    }
     Py_DECREF(os_path);
     return normcase;
 }
@@ -225,7 +222,6 @@ fnmatchmodule_exec(PyObject *module)
     // ------------------------------------------------------------------------
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
     IMPORT_MODULE(st, os_module, "os");
-    IMPORT_MODULE(st, posixpath_module, "posixpath");
     IMPORT_MODULE(st, re_module, "re");
     CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_translator(module, st));
     CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(module, st));
@@ -255,7 +251,6 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
     Py_VISIT(st->re_escape);
     Py_VISIT(st->translator);
     Py_VISIT(st->re_module);
-    Py_VISIT(st->posixpath_module);
     Py_VISIT(st->os_module);
     return 0;
 }
@@ -273,7 +268,6 @@ fnmatchmodule_clear(PyObject *m)
     Py_CLEAR(st->re_escape);
     Py_CLEAR(st->translator);
     Py_CLEAR(st->re_module);
-    Py_CLEAR(st->posixpath_module);
     Py_CLEAR(st->os_module);
     return 0;
 }
@@ -303,9 +297,8 @@ static PyObject *
 fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
 /*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/
 {
-    bool isposix = 0;
     PyObject *normcase = NULL;  // for the 'goto abort' statements
-    normcase = get_platform_normcase_function(module, &isposix);
+    normcase = get_platform_normcase_function(module);
     CHECK_NOT_NULL_OR_ABORT(normcase);
     PyObject *normalized_pattern = PyObject_CallOneArg(normcase, pattern);
     CHECK_NOT_NULL_OR_ABORT(normalized_pattern);
@@ -349,8 +342,7 @@ static int
 fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern)
 /*[clinic end generated code: output=c9dc542e8d6933b6 input=279a4a4f2ddea6a2]*/
 {
-    // use the runtime 'os.path' value and not a cached one
-    PyObject *normcase = get_platform_normcase_function(module, NULL);
+    PyObject *normcase = get_platform_normcase_function(module);
     if (normcase == NULL) {
         return -1;
     }
diff --git a/Modules/_fnmatch/filter.c b/Modules/_fnmatch/filter.c
index d3611b7f5f883e..bd1d6c8ec85073 100644
--- a/Modules/_fnmatch/filter.c
+++ b/Modules/_fnmatch/filter.c
@@ -5,8 +5,9 @@
 #include "Python.h"
 
 PyObject *
-_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer)
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase)
 {
+    assert(normcase != NULL);
     PyObject *iter = PyObject_GetIter(names);
     if (iter == NULL) {
         return NULL;
@@ -18,18 +19,12 @@ _Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer)
     }
     PyObject *name = NULL;
     while ((name = PyIter_Next(iter))) {
-        PyObject *match;
-        if (normalizer == NULL) {
-            match = PyObject_CallOneArg(matcher, name);
-        }
-        else {
-            PyObject *normalized = PyObject_CallOneArg(normalizer, name);
-            if (normalized == NULL) {
-                goto abort;
-            }
-            match = PyObject_CallOneArg(matcher, normalized);
-            Py_DECREF(normalized);
+        PyObject *normalized = PyObject_CallOneArg(normcase, name);
+        if (normalized == NULL) {
+            goto abort;
         }
+        PyObject *match = PyObject_CallOneArg(matcher, normalized);
+        Py_DECREF(normalized);
         if (match == NULL) {
             goto abort;
         }
diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
index ac5c4362d78a4e..276921328dd868 100644
--- a/Modules/_fnmatch/util.h
+++ b/Modules/_fnmatch/util.h
@@ -9,7 +9,6 @@
 
 typedef struct {
     PyObject *os_module;            // import os
-    PyObject *posixpath_module;     // import posixpath
     PyObject *re_module;            // import re
 
     PyObject *translator;           // LRU-cached translation unit
@@ -44,14 +43,14 @@ get_fnmatchmodule_state(PyObject *module)
  *
  *  matcher     A reference to the 'match()' method of a compiled pattern.
  *  names       An iterable of strings (str or bytes objects) to match.
- *  normalizer  Optional normalization function.
+ *  normcase    A reference to os.path.normcase().
  *
  *  This is equivalent to:
  *
- *      [name for name in names if matcher(normalizer(name))]
+ *      [name for name in names if matcher(normcase(name))]
  */
 extern PyObject *
-_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normalizer);
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase);
 
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.

From afc22b2101cac0b66a41dee595f48152cf1552a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:43:22 +0200
Subject: [PATCH 91/97] macros bike-shedding

---
 Modules/_fnmatch/macros.h    |  8 +++++++-
 Modules/_fnmatch/translate.c | 11 ++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
index d78a75b52d1dc0..a39586338ea62a 100644
--- a/Modules/_fnmatch/macros.h
+++ b/Modules/_fnmatch/macros.h
@@ -1,6 +1,6 @@
 /*
  * This file contains various macro definitions in order to reduce the
- * number of lines in translate.c. Do not use them for something else.
+ * number of lines in '_fnmatch'. Do not use them for something else.
  */
 
 #ifndef _FNMATCH_MACROS_H
@@ -23,6 +23,12 @@
         }                               \
     } while (0)
 
+/*
+ * Identical to CHECK_RET_CODE_OR_ABORT but where the
+ * argument is semantically used as a positive integer.
+ */
+#define CHECK_UNSIGNED_INT_OR_ABORT     CHECK_RET_CODE_OR_ABORT
+
 /*
  * Check that OBJ is not NULL or execute 'goto abort'.
  *
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index b6f0304664c610..cc22fd5e7ef7af 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -169,9 +169,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
                     CHECK_NOT_NULL_OR_ABORT(expr);
                     Py_ssize_t expr_len = write_expression(state, writer, expr);
                     Py_DECREF(expr);
-                    if (expr_len < 0) {
-                        goto abort;
-                    }
+                    CHECK_UNSIGNED_INT_OR_ABORT(expr_len);
                     written += expr_len;
                     i = j + 1;  // jump to the character after ']'
                     break;      // explicit early break for clarity
@@ -179,7 +177,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
             }
             default: {
                 Py_ssize_t t = escape_char(state, writer, chr);
-                CHECK_RET_CODE_OR_ABORT(t);
+                CHECK_UNSIGNED_INT_OR_ABORT(t);
                 written += t;
                 break;
             }
@@ -526,9 +524,8 @@ process_wildcards(PyObject *pattern, PyObject *indices)
 #define LOAD_WILDCARD_INDEX(VAR, IND)                               \
     do {                                                            \
         VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, (IND)));    \
-        if ((VAR) < 0 && PyErr_Occurred())  {                       \
-            goto abort;                                             \
-        }                                                           \
+        /* wildcard indices must be >= 0 */                         \
+        CHECK_UNSIGNED_INT_OR_ABORT(VAR);                           \
     } while (0)
     // ------------------------------------------------------------------------
     WRITE_ASCII_OR_ABORT(writer, "(?s:", 4);

From e66a602843251cd4eb6936770c2dfffcb7369b49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:43:35 +0200
Subject: [PATCH 92/97] type bike-shedding

---
 Modules/_fnmatch/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index cc22fd5e7ef7af..3dbd0d59d094d2 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -102,7 +102,7 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find));
     CHECK_NOT_NULL_OR_ABORT(pattern_str_find_meth);
     // ------------------------------------------------------------------------
-    const int pattern_kind = PyUnicode_KIND(pattern);
+    const unsigned int pattern_kind = PyUnicode_KIND(pattern);
     const void *const pattern_data = PyUnicode_DATA(pattern);
     // ---- def local macros --------------------------------------------------
 #define READ_CHAR(IND)  PyUnicode_READ(pattern_kind, pattern_data, IND)

From 2b59064103393da0d3a30fe564ed13190005c628 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:58:58 +0200
Subject: [PATCH 93/97] update outdated comment

---
 Modules/_fnmatch/_fnmatchmodule.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index cd963103f05723..4c05fa2d5d1b68 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,17 +1,6 @@
 /*
  * C accelerator for the 'fnmatch' module.
  *
- * Currently, the following inconsistencies in the Python implementation exist:
- *
- * - fnmatch.filter(NAMES, PATTERN) works with pathlib.Path() instances
- *   in NAMES on Windows but raises a TypeError on POSIX platforms.
- *
- * The reason is that os.path.normcase() is called on each NAME in NAMES
- * but not on POSIX platforms. In particular, os.fspath() is never called:
- *
- *      POSIX       fnmatch.filter([Path("a")], "*") -> TypeError
- *      Windows     fnmatch.filter([Path("a")], "*") -> [Path("a")]
- *
  * - Case normalization uses the runtime value of os.path.normcase(),
  *   forcing us to query the attribute each time.
  *

From 8efbe9a358d9675b336a6a3767cb906ab2f2774b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 20 Aug 2024 16:40:53 +0200
Subject: [PATCH 94/97] only keep `fnmatch.translate` in C

---
 .../pycore_global_objects_fini_generated.h    |   1 -
 Include/internal/pycore_global_strings.h      |   1 -
 .../internal/pycore_runtime_init_generated.h  |   1 -
 .../internal/pycore_unicodeobject_generated.h |   4 -
 Lib/fnmatch.py                                | 103 +++----
 Lib/test/test_fnmatch.py                      |   7 -
 Modules/Setup.stdlib.in                       |   2 +-
 Modules/_fnmatch/_fnmatchmodule.c             | 291 ++----------------
 Modules/_fnmatch/clinic/_fnmatchmodule.c.h    | 199 +-----------
 Modules/_fnmatch/filter.c                     |  48 ---
 Modules/_fnmatch/macros.h                     |  20 +-
 Modules/_fnmatch/translate.c                  |  87 +-----
 Modules/_fnmatch/util.h                       |  20 --
 PCbuild/pythoncore.vcxproj                    |   1 -
 PCbuild/pythoncore.vcxproj.filters            |   3 -
 15 files changed, 89 insertions(+), 699 deletions(-)
 delete mode 100644 Modules/_fnmatch/filter.c

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 661490cd73c00d..209410d716a07d 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -1103,7 +1103,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nlocals));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_depth));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_offset));
-    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(normcase));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ns));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nstype));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nt));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 2882390f9780c2..5431ba18bf4b24 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -592,7 +592,6 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(nlocals)
         STRUCT_FOR_ID(node_depth)
         STRUCT_FOR_ID(node_offset)
-        STRUCT_FOR_ID(normcase)
         STRUCT_FOR_ID(ns)
         STRUCT_FOR_ID(nstype)
         STRUCT_FOR_ID(nt)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index ecd624e4bca02a..f3e8d4c5fab26d 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1101,7 +1101,6 @@ extern "C" {
     INIT_ID(nlocals), \
     INIT_ID(node_depth), \
     INIT_ID(node_offset), \
-    INIT_ID(normcase), \
     INIT_ID(ns), \
     INIT_ID(nstype), \
     INIT_ID(nt), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index e114fca09aefe8..2a494149e6143a 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -2168,10 +2168,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
-    string = &_Py_ID(normcase);
-    _PyUnicode_InternStatic(interp, &string);
-    assert(_PyUnicode_CheckConsistency(string, 1));
-    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(ns);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 1dc52f2575ae6c..0a1dc7c5196597 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -16,65 +16,65 @@
 
 __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
 
-try:
-    from _fnmatch import filter
-except ImportError:
-    def filter(names, pat):
-        """Construct a list from the names in *names* matching *pat*."""
-        result = []
-        pat = os.path.normcase(pat)
-        match = _compile_pattern(pat)
-        if os.path is posixpath:
-            # normcase on posix is NOP. Optimize it away from the loop.
-            for name in names:
-                if match(name):
-                    result.append(name)
-        else:
-            for name in names:
-                if match(os.path.normcase(name)):
-                    result.append(name)
-        return result
+def fnmatch(name, pat):
+    """Test whether FILENAME matches PATTERN.
 
-try:
-    from _fnmatch import fnmatch
-except ImportError:
-    def fnmatch(name, pat):
-        """Test whether *name* matches *pat*.
+    Patterns are Unix shell style:
 
-        Patterns are Unix shell style:
+    *       matches everything
+    ?       matches any single character
+    [seq]   matches any character in seq
+    [!seq]  matches any char not in seq
 
-        *       matches everything
-        ?       matches any single character
-        [seq]   matches any character in seq
-        [!seq]  matches any char not in seq
+    An initial period in FILENAME is not special.
+    Both FILENAME and PATTERN are first case-normalized
+    if the operating system requires it.
+    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
+    """
+    name = os.path.normcase(name)
+    pat = os.path.normcase(pat)
+    return fnmatchcase(name, pat)
 
-        An initial period in *name* is not special.
-        Both *name* and *pat* are first case-normalized
-        if the operating system requires it.
+@functools.lru_cache(maxsize=32768, typed=True)
+def _compile_pattern(pat):
+    if isinstance(pat, bytes):
+        pat_str = str(pat, 'ISO-8859-1')
+        res_str = translate(pat_str)
+        res = bytes(res_str, 'ISO-8859-1')
+    else:
+        res = translate(pat)
+    return re.compile(res).match
 
-        If you don't want this, use fnmatchcase(name, pat).
-        """
-        name = os.path.normcase(name)
-        pat = os.path.normcase(pat)
-        return fnmatchcase(name, pat)
+def filter(names, pat):
+    """Construct a list from those elements of the iterable NAMES that match PAT."""
+    result = []
+    pat = os.path.normcase(pat)
+    match = _compile_pattern(pat)
+    if os.path is posixpath:
+        # normcase on posix is NOP. Optimize it away from the loop.
+        for name in names:
+            if match(name):
+                result.append(name)
+    else:
+        for name in names:
+            if match(os.path.normcase(name)):
+                result.append(name)
+    return result
 
-try:
-    from _fnmatch import fnmatchcase
-except ImportError:
-    def fnmatchcase(name, pat):
-        """Test whether *name* matches *pat*, including case.
+def fnmatchcase(name, pat):
+    """Test whether FILENAME matches PATTERN, including case.
 
-        This is a version of fnmatch() which doesn't case-normalize
-        its arguments.
-        """
-        match = _compile_pattern(pat)
-        return match(name) is not None
+    This is a version of fnmatch() which doesn't case-normalize
+    its arguments.
+    """
+    match = _compile_pattern(pat)
+    return match(name) is not None
 
 try:
     from _fnmatch import translate
 except ImportError:
     def translate(pat):
-        """Translate a shell pattern *pat* to a regular expression.
+        """Translate a shell PATTERN to a regular expression.
 
         There is no way to quote meta-characters.
         """
@@ -83,15 +83,6 @@ def translate(pat):
         parts = _translate(pat, STAR, '.')
         return _join_translated_parts(parts, STAR)
 
-@functools.lru_cache(maxsize=32768, typed=True)
-def _compile_pattern(pat):
-    if isinstance(pat, bytes):
-        pat_str = str(pat, 'ISO-8859-1')
-        res_str = translate(pat_str)
-        res = bytes(res_str, 'ISO-8859-1')
-    else:
-        res = translate(pat)
-    return re.compile(res).match
 
 def _translate(pat, STAR, QUESTION_MARK):
     res = []
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 6ab244021ea20d..034324139511bb 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -229,9 +229,6 @@ def test_warnings(self):
 class PurePythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase):
     fnmatch = py_fnmatch
 
-class CPythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase):
-    fnmatch = c_fnmatch
-
 class TranslateTestCaseMixin:
     fnmatch = None
 
@@ -382,7 +379,6 @@ def __iter__(self):
         with self.assertRaisesRegex(ValueError, r'^nope$'):
             self.fnmatch.filter(BadList(), '*')
 
-
     def test_mix_bytes_str(self):
         filter = self.fnmatch.filter
         self.assertRaises(TypeError, filter, ['test'], b'*')
@@ -407,8 +403,5 @@ def test_sep(self):
 class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
     fnmatch = py_fnmatch
 
-class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
-    fnmatch = c_fnmatch
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index f33af67aa26499..8195b7c75c2aa8 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -33,7 +33,7 @@
 @MODULE__BISECT_TRUE@_bisect _bisectmodule.c
 @MODULE__CONTEXTVARS_TRUE@_contextvars _contextvarsmodule.c
 @MODULE__CSV_TRUE@_csv _csv.c
-@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/filter.c _fnmatch/translate.c
+@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/translate.c
 @MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c
 @MODULE__JSON_TRUE@_json _json.c
 @MODULE__LSPROF_TRUE@_lsprof _lsprof.c rotatingtree.c
diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 4c05fa2d5d1b68..1ae44424a7ffcd 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -1,15 +1,5 @@
 /*
  * C accelerator for the 'fnmatch' module.
- *
- * - Case normalization uses the runtime value of os.path.normcase(),
- *   forcing us to query the attribute each time.
- *
- * The C implementation of fnmatch.filter() uses the same os.path.normcase()
- * when iterating over NAMES, ignoring side-effects on os.path.normcase()
- * that may occur when processing a NAME in NAMES.
- *
- * More generally, os.path.normcase() is retrieved at most once per call
- * to fnmatch.filter() or fnmatch.fnmatch().
  */
 
 #ifndef Py_BUILD_CORE_BUILTIN
@@ -26,83 +16,6 @@
 #define LRU_CACHE_SIZE          32768
 #define INVALID_PATTERN_TYPE    "pattern must be a string or a bytes object"
 
-// ==== Cached translation unit ===============================================
-
-/*
- * Compile a UNIX shell pattern into a RE pattern
- * and returns the corresponding 'match()' method.
- *
- * This function is LRU-cached by the module itself.
- */
-static PyObject *
-get_matcher_function_impl(PyObject *module, PyObject *pattern)
-{
-    // translate the pattern into a RE pattern
-    assert(module != NULL);
-    PyObject *translated = fnmatch_translate_impl(module, pattern);
-    if (translated == NULL) {
-        return NULL;
-    }
-    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    // compile the pattern
-    PyObject *compile_func = PyObject_GetAttr(st->re_module, &_Py_ID(compile));
-    if (compile_func == NULL) {
-        Py_DECREF(translated);
-        return NULL;
-    }
-    PyObject *compiled = PyObject_CallOneArg(compile_func, translated);
-    Py_DECREF(compile_func);
-    Py_DECREF(translated);
-    if (compiled == NULL) {
-        return NULL;
-    }
-    // get the compiled pattern matcher function
-    PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
-    Py_DECREF(compiled);
-    return matcher;
-}
-
-static PyMethodDef get_matcher_function_def = {
-    "get_matcher_function",
-    get_matcher_function_impl,
-    METH_O,
-    NULL
-};
-
-static int
-fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
-{
-    // make sure that this function is called once
-    assert(st->translator == NULL);
-    PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE);
-    if (maxsize == NULL) {
-        return -1;
-    }
-    PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
-    if (cache == NULL) {
-        Py_DECREF(maxsize);
-        return -1;
-    }
-    PyObject *args[3] = {NULL, maxsize, Py_True};
-    size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
-    PyObject *wrapper = PyObject_Vectorcall(cache, &args[1], nargsf, NULL);
-    Py_DECREF(maxsize);
-    Py_DECREF(cache);
-    if (wrapper == NULL) {
-        return -1;
-    }
-    assert(module != NULL);
-    PyObject *wrapped = PyCFunction_New(&get_matcher_function_def, module);
-    // reference on 'translator' will be removed upon module cleanup
-    st->translator = PyObject_CallOneArg(wrapper, wrapped);
-    Py_DECREF(wrapped);
-    Py_DECREF(wrapper);
-    if (st->translator == NULL) {
-        return -1;
-    }
-    return 0;
-}
-
 // ==== Cached re.escape() unit ===============================================
 
 /* Create an LRU-cached function for re.escape(). */
@@ -113,9 +26,7 @@ fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
     // make sure that this function is called once
     assert(st->re_escape == NULL);
     PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE);
-    if (maxsize == NULL) {
-        return -1;
-    }
+    CHECK_NOT_NULL_OR_ABORT(maxsize);
     PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
     if (cache == NULL) {
         Py_DECREF(maxsize);
@@ -124,35 +35,39 @@ fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
     PyObject *wrapper = PyObject_CallOneArg(cache, maxsize);
     Py_DECREF(maxsize);
     Py_DECREF(cache);
-    if (wrapper == NULL) {
+    CHECK_NOT_NULL_OR_ABORT(wrapper);
+    PyObject *wrapped = _PyImport_GetModuleAttrString("re", "escape");
+    if (wrapped == NULL) {
+        Py_DECREF(wrapper);
         return -1;
     }
-    assert(st->re_module != NULL);
-    PyObject *wrapped = PyObject_GetAttr(st->re_module, &_Py_ID(escape));
-    // reference on 'escapechar' will be removed upon module cleanup
     st->re_escape = PyObject_CallOneArg(wrapper, wrapped);
     Py_DECREF(wrapped);
     Py_DECREF(wrapper);
-    if (st->re_escape == NULL) {
-        return -1;
-    }
+    CHECK_NOT_NULL_OR_ABORT(st->re_escape);
     return 0;
+abort:
+    return -1;
 }
 
 // ==== Cached re.sub() unit for set operation tokens =========================
 
-/* Create an LRU-cached function for re.compile('([&~|])').sub(). */
+/* Store a reference to re.compile('([&~|])').sub(). */
 static int
 fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module),
                                  fnmatchmodule_state *st)
 {
     // make sure that this function is called once
     assert(st->setops_re_subfn == NULL);
-    PyObject *pattern = PyUnicode_FromString("([&~|])");
+    PyObject *pattern = PyUnicode_FromStringAndSize("([&~|])", 7);
     CHECK_NOT_NULL_OR_ABORT(pattern);
-    PyObject *compiled = PyObject_CallMethodOneArg(st->re_module,
-                                                   &_Py_ID(compile),
-                                                   pattern);
+    PyObject *re_compile = _PyImport_GetModuleAttrString("re", "compile");
+    if (re_compile == NULL) {
+        Py_DECREF(pattern);
+        return -1;
+    }
+    PyObject *compiled = PyObject_CallOneArg(re_compile, pattern);
+    Py_DECREF(re_compile);
     Py_DECREF(pattern);
     CHECK_NOT_NULL_OR_ABORT(compiled);
     st->setops_re_subfn = PyObject_GetAttr(compiled, &_Py_ID(sub));
@@ -163,56 +78,20 @@ fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module),
     return -1;
 }
 
-// ==== Module data getters ===================================================
-
-static inline PyObject * /* reference to re.compile(pattern).match() */
-get_matcher_function(PyObject *module, PyObject *pattern)
-{
-    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    assert(st->translator != NULL);
-    return PyObject_CallOneArg(st->translator, pattern);
-}
-
-static inline PyObject * /* reference to os.path.normcase() */
-get_platform_normcase_function(PyObject *module)
-{
-    fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
-    if (os_path == NULL) {
-        return NULL;
-    }
-    PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
-    Py_DECREF(os_path);
-    return normcase;
-}
-
 // ==== Module state functions ================================================
 
 static int
 fnmatchmodule_exec(PyObject *module)
 {
     // ---- def local macros --------------------------------------------------
-    /* Import a named module and store it in 'STATE->ATTRIBUTE'. */
-#define IMPORT_MODULE(STATE, ATTRIBUTE, MODULE_NAME)                \
-    do {                                                            \
-        /* make sure that the attribute is initialized once */      \
-        assert(STATE->ATTRIBUTE == NULL);                           \
-        STATE->ATTRIBUTE = PyImport_ImportModule((MODULE_NAME));    \
-        CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE);                  \
-    } while (0)
     /* Intern a literal STRING and store it in 'STATE->ATTRIBUTE'. */
 #define INTERN_STRING(STATE, ATTRIBUTE, STRING)                     \
     do {                                                            \
-        /* make sure that the attribute is initialized once */      \
-        assert(STATE->ATTRIBUTE == NULL);                           \
         STATE->ATTRIBUTE = PyUnicode_InternFromString((STRING));    \
         CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE);                  \
     } while (0)
     // ------------------------------------------------------------------------
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    IMPORT_MODULE(st, os_module, "os");
-    IMPORT_MODULE(st, re_module, "re");
-    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_translator(module, st));
     CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(module, st));
     INTERN_STRING(st, hyphen_str, "-");
     INTERN_STRING(st, hyphen_esc_str, "\\-");
@@ -224,7 +103,6 @@ fnmatchmodule_exec(PyObject *module)
 abort:
     return -1;
 #undef INTERN_STRING
-#undef IMPORT_MODULE
 }
 
 static int
@@ -238,9 +116,6 @@ fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
     Py_VISIT(st->hyphen_esc_str);
     Py_VISIT(st->hyphen_str);
     Py_VISIT(st->re_escape);
-    Py_VISIT(st->translator);
-    Py_VISIT(st->re_module);
-    Py_VISIT(st->os_module);
     return 0;
 }
 
@@ -255,9 +130,6 @@ fnmatchmodule_clear(PyObject *m)
     Py_CLEAR(st->hyphen_esc_str);
     Py_CLEAR(st->hyphen_str);
     Py_CLEAR(st->re_escape);
-    Py_CLEAR(st->translator);
-    Py_CLEAR(st->re_module);
-    Py_CLEAR(st->os_module);
     return 0;
 }
 
@@ -272,117 +144,6 @@ module fnmatch
 [clinic start generated code]*/
 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=797aa965370a9ef2]*/
 
-/*[clinic input]
-fnmatch.filter -> object
-
-    names: object
-    pat as pattern: object
-
-Construct a list from the names in *names* matching *pat*.
-
-[clinic start generated code]*/
-
-static PyObject *
-fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
-/*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/
-{
-    PyObject *normcase = NULL;  // for the 'goto abort' statements
-    normcase = get_platform_normcase_function(module);
-    CHECK_NOT_NULL_OR_ABORT(normcase);
-    PyObject *normalized_pattern = PyObject_CallOneArg(normcase, pattern);
-    CHECK_NOT_NULL_OR_ABORT(normalized_pattern);
-    // the matcher is cached with respect to the *normalized* pattern
-    PyObject *matcher = get_matcher_function(module, normalized_pattern);
-    Py_DECREF(normalized_pattern);
-    CHECK_NOT_NULL_OR_ABORT(matcher);
-    PyObject *filtered = _Py_fnmatch_filter(matcher, names, normcase);
-    Py_DECREF(matcher);
-    Py_DECREF(normcase);
-    return filtered;
-abort:
-    Py_XDECREF(normcase);
-    return NULL;
-}
-
-/*[clinic input]
-fnmatch.fnmatch -> bool
-
-    name: object
-    pat as pattern: object
-
-Test whether *name* matches *pat*.
-
-Patterns are Unix shell style:
-
-*       matches everything
-?       matches any single character
-[seq]   matches any character in seq
-[!seq]  matches any char not in seq
-
-An initial period in *name* is not special.
-Both *name* and *pat* are first case-normalized
-if the operating system requires it.
-
-If you don't want this, use fnmatchcase(name, pat).
-
-[clinic start generated code]*/
-
-static int
-fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern)
-/*[clinic end generated code: output=c9dc542e8d6933b6 input=279a4a4f2ddea6a2]*/
-{
-    PyObject *normcase = get_platform_normcase_function(module);
-    if (normcase == NULL) {
-        return -1;
-    }
-    // apply case normalization on both arguments
-    PyObject *norm_name = PyObject_CallOneArg(normcase, name);
-    if (norm_name == NULL) {
-        Py_DECREF(normcase);
-        return -1;
-    }
-    PyObject *norm_pattern = PyObject_CallOneArg(normcase, pattern);
-    Py_DECREF(normcase);
-    if (norm_pattern == NULL) {
-        Py_DECREF(norm_name);
-        return -1;
-    }
-    int matching = fnmatch_fnmatchcase_impl(module, norm_name, norm_pattern);
-    Py_DECREF(norm_pattern);
-    Py_DECREF(norm_name);
-    return matching;
-}
-
-/*[clinic input]
-fnmatch.fnmatchcase -> bool
-
-    name: object
-    pat as pattern: object
-
-Test whether *name* matches *pat*, including case.
-
-This is a version of fnmatch() which doesn't case-normalize
-its arguments.
-[clinic start generated code]*/
-
-static int
-fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern)
-/*[clinic end generated code: output=4d6b268169001876 input=91d62999c08fd55e]*/
-{
-    // fnmatchcase() does not apply any case normalization on the inputs
-    PyObject *matcher = get_matcher_function(module, pattern);
-    if (matcher == NULL) {
-        return -1;
-    }
-    // If 'name' is of incorrect type, it will be detected when calling
-    // the matcher function (we check 're.compile(pattern).match(name)').
-    PyObject *match = PyObject_CallOneArg(matcher, name);
-    Py_DECREF(matcher);
-    int matching = match == NULL ? -1 : !Py_IsNone(match);
-    Py_XDECREF(match);
-    return matching;
-}
-
 /*[clinic input]
 fnmatch.translate -> object
 
@@ -422,21 +183,7 @@ fnmatch_translate_impl(PyObject *module, PyObject *pattern)
 
 // ==== Module specs ==========================================================
 
-// fmt: off
-PyDoc_STRVAR(fnmatchmodule_doc,
-"Filename matching with shell patterns.\n"
-"fnmatch(FILENAME, PATTERN) matches according to the local convention.\n"
-"fnmatchcase(FILENAME, PATTERN) always takes case in account.\n\n"
-"The functions operate by translating the pattern into a regular\n"
-"expression.  They cache the compiled regular expressions for speed.\n\n"
-"The function translate(PATTERN) returns a regular expression\n"
-"corresponding to PATTERN.  (It does not compile it.)");
-// fmt: on
-
 static PyMethodDef fnmatchmodule_methods[] = {
-    FNMATCH_FILTER_METHODDEF
-    FNMATCH_FNMATCH_METHODDEF
-    FNMATCH_FNMATCHCASE_METHODDEF
     FNMATCH_TRANSLATE_METHODDEF
     {NULL, NULL}
 };
@@ -451,7 +198,7 @@ static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
 static struct PyModuleDef _fnmatchmodule = {
     PyModuleDef_HEAD_INIT,
     .m_name = "_fnmatch",
-    .m_doc = fnmatchmodule_doc,
+    .m_doc = NULL,
     .m_size = sizeof(fnmatchmodule_state),
     .m_methods = fnmatchmodule_methods,
     .m_slots = fnmatchmodule_slots,
@@ -467,4 +214,4 @@ PyInit__fnmatch(void)
 }
 
 #undef INVALID_PATTERN_TYPE
-#undef COMPILED_CACHE_SIZE
+#undef LRU_CACHE_SIZE
diff --git a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
index c611f01673b326..38129540d37433 100644
--- a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
+++ b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
@@ -8,203 +8,6 @@ preserve
 #endif
 #include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
 
-PyDoc_STRVAR(fnmatch_filter__doc__,
-"filter($module, /, names, pat)\n"
-"--\n"
-"\n"
-"Construct a list from the names in *names* matching *pat*.");
-
-#define FNMATCH_FILTER_METHODDEF    \
-    {"filter", _PyCFunction_CAST(fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, fnmatch_filter__doc__},
-
-static PyObject *
-fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern);
-
-static PyObject *
-fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
-{
-    PyObject *return_value = NULL;
-    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
-
-    #define NUM_KEYWORDS 2
-    static struct {
-        PyGC_Head _this_is_not_used;
-        PyObject_VAR_HEAD
-        PyObject *ob_item[NUM_KEYWORDS];
-    } _kwtuple = {
-        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
-        .ob_item = { &_Py_ID(names), &_Py_ID(pat), },
-    };
-    #undef NUM_KEYWORDS
-    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
-
-    #else  // !Py_BUILD_CORE
-    #  define KWTUPLE NULL
-    #endif  // !Py_BUILD_CORE
-
-    static const char * const _keywords[] = {"names", "pat", NULL};
-    static _PyArg_Parser _parser = {
-        .keywords = _keywords,
-        .fname = "filter",
-        .kwtuple = KWTUPLE,
-    };
-    #undef KWTUPLE
-    PyObject *argsbuf[2];
-    PyObject *names;
-    PyObject *pattern;
-
-    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
-    if (!args) {
-        goto exit;
-    }
-    names = args[0];
-    pattern = args[1];
-    return_value = fnmatch_filter_impl(module, names, pattern);
-
-exit:
-    return return_value;
-}
-
-PyDoc_STRVAR(fnmatch_fnmatch__doc__,
-"fnmatch($module, /, name, pat)\n"
-"--\n"
-"\n"
-"Test whether *name* matches *pat*.\n"
-"\n"
-"Patterns are Unix shell style:\n"
-"\n"
-"*       matches everything\n"
-"?       matches any single character\n"
-"[seq]   matches any character in seq\n"
-"[!seq]  matches any char not in seq\n"
-"\n"
-"An initial period in *name* is not special.\n"
-"Both *name* and *pat* are first case-normalized\n"
-"if the operating system requires it.\n"
-"\n"
-"If you don\'t want this, use fnmatchcase(name, pat).");
-
-#define FNMATCH_FNMATCH_METHODDEF    \
-    {"fnmatch", _PyCFunction_CAST(fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatch__doc__},
-
-static int
-fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern);
-
-static PyObject *
-fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
-{
-    PyObject *return_value = NULL;
-    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
-
-    #define NUM_KEYWORDS 2
-    static struct {
-        PyGC_Head _this_is_not_used;
-        PyObject_VAR_HEAD
-        PyObject *ob_item[NUM_KEYWORDS];
-    } _kwtuple = {
-        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
-        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
-    };
-    #undef NUM_KEYWORDS
-    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
-
-    #else  // !Py_BUILD_CORE
-    #  define KWTUPLE NULL
-    #endif  // !Py_BUILD_CORE
-
-    static const char * const _keywords[] = {"name", "pat", NULL};
-    static _PyArg_Parser _parser = {
-        .keywords = _keywords,
-        .fname = "fnmatch",
-        .kwtuple = KWTUPLE,
-    };
-    #undef KWTUPLE
-    PyObject *argsbuf[2];
-    PyObject *name;
-    PyObject *pattern;
-    int _return_value;
-
-    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
-    if (!args) {
-        goto exit;
-    }
-    name = args[0];
-    pattern = args[1];
-    _return_value = fnmatch_fnmatch_impl(module, name, pattern);
-    if ((_return_value == -1) && PyErr_Occurred()) {
-        goto exit;
-    }
-    return_value = PyBool_FromLong((long)_return_value);
-
-exit:
-    return return_value;
-}
-
-PyDoc_STRVAR(fnmatch_fnmatchcase__doc__,
-"fnmatchcase($module, /, name, pat)\n"
-"--\n"
-"\n"
-"Test whether *name* matches *pat*, including case.\n"
-"\n"
-"This is a version of fnmatch() which doesn\'t case-normalize\n"
-"its arguments.");
-
-#define FNMATCH_FNMATCHCASE_METHODDEF    \
-    {"fnmatchcase", _PyCFunction_CAST(fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatchcase__doc__},
-
-static int
-fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern);
-
-static PyObject *
-fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
-{
-    PyObject *return_value = NULL;
-    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
-
-    #define NUM_KEYWORDS 2
-    static struct {
-        PyGC_Head _this_is_not_used;
-        PyObject_VAR_HEAD
-        PyObject *ob_item[NUM_KEYWORDS];
-    } _kwtuple = {
-        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
-        .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
-    };
-    #undef NUM_KEYWORDS
-    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
-
-    #else  // !Py_BUILD_CORE
-    #  define KWTUPLE NULL
-    #endif  // !Py_BUILD_CORE
-
-    static const char * const _keywords[] = {"name", "pat", NULL};
-    static _PyArg_Parser _parser = {
-        .keywords = _keywords,
-        .fname = "fnmatchcase",
-        .kwtuple = KWTUPLE,
-    };
-    #undef KWTUPLE
-    PyObject *argsbuf[2];
-    PyObject *name;
-    PyObject *pattern;
-    int _return_value;
-
-    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
-    if (!args) {
-        goto exit;
-    }
-    name = args[0];
-    pattern = args[1];
-    _return_value = fnmatch_fnmatchcase_impl(module, name, pattern);
-    if ((_return_value == -1) && PyErr_Occurred()) {
-        goto exit;
-    }
-    return_value = PyBool_FromLong((long)_return_value);
-
-exit:
-    return return_value;
-}
-
 PyDoc_STRVAR(fnmatch_translate__doc__,
 "translate($module, /, pat)\n"
 "--\n"
@@ -261,4 +64,4 @@ fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=50f858ef4bfb569a input=a9049054013a1b77]*/
+/*[clinic end generated code: output=eab39d3bb9f3a13d input=a9049054013a1b77]*/
diff --git a/Modules/_fnmatch/filter.c b/Modules/_fnmatch/filter.c
deleted file mode 100644
index bd1d6c8ec85073..00000000000000
--- a/Modules/_fnmatch/filter.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Provide the implementation of the high-level matcher-based functions.
- */
-
-#include "Python.h"
-
-PyObject *
-_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase)
-{
-    assert(normcase != NULL);
-    PyObject *iter = PyObject_GetIter(names);
-    if (iter == NULL) {
-        return NULL;
-    }
-    PyObject *res = PyList_New(0);
-    if (res == NULL) {
-        Py_DECREF(iter);
-        return NULL;
-    }
-    PyObject *name = NULL;
-    while ((name = PyIter_Next(iter))) {
-        PyObject *normalized = PyObject_CallOneArg(normcase, name);
-        if (normalized == NULL) {
-            goto abort;
-        }
-        PyObject *match = PyObject_CallOneArg(matcher, normalized);
-        Py_DECREF(normalized);
-        if (match == NULL) {
-            goto abort;
-        }
-        int matching = Py_IsNone(match) == 0;
-        Py_DECREF(match);
-        if (matching && PyList_Append(res, name) < 0) {
-            goto abort;
-        }
-        Py_DECREF(name);
-    }
-    Py_DECREF(iter);
-    if (PyErr_Occurred()) {
-        Py_CLEAR(res);
-    }
-    return res;
-abort:
-    Py_DECREF(name);
-    Py_DECREF(iter);
-    Py_DECREF(res);
-    return NULL;
-}
diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
index a39586338ea62a..2363e1b8051ff8 100644
--- a/Modules/_fnmatch/macros.h
+++ b/Modules/_fnmatch/macros.h
@@ -6,8 +6,6 @@
 #ifndef _FNMATCH_MACROS_H
 #define _FNMATCH_MACROS_H
 
-// ==== Macro definitions =====================================================
-
 /*
  * Check that STATUS is >= 0 or execute 'goto abort'.
  *
@@ -116,17 +114,13 @@
         NULL                                \
     )
 
-/*
- * Escape set operations in STRING using re.sub().
- *
- * SETOPS_RE_SUB_METH is a reference to re.compile('([&~|])').sub().
- */
-#define SETOPS_REPLACE(STATE, STRING, SETOPS_RE_SUB_METH)   \
-    PyObject_CallFunctionObjArgs(                           \
-        (SETOPS_RE_SUB_METH),                               \
-        (STATE)->setops_repl_str,                           \
-        (STRING),                                           \
-        NULL                                                \
+/* Escape set operations in STRING using re.sub(). */
+#define SETOPS_REPLACE(STATE, STRING)       \
+    PyObject_CallFunctionObjArgs(           \
+        (STATE)->setops_re_subfn,           \
+        (STATE)->setops_repl_str,           \
+        (STRING),                           \
+        NULL                                \
     )
 
 #endif // _FNMATCH_MACROS_H
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
index 3dbd0d59d094d2..ef2d2e43f4b3b1 100644
--- a/Modules/_fnmatch/translate.c
+++ b/Modules/_fnmatch/translate.c
@@ -70,32 +70,13 @@ _Py_fnmatch_translate(PyObject *module, PyObject *pattern)
     assert(PyUnicode_Check(pattern));
     fnmatchmodule_state *state = get_fnmatchmodule_state(module);
     const Py_ssize_t maxind = PyUnicode_GET_LENGTH(pattern);
-
-    // We would write less data if there are successive '*',
-    // which usually happens once or twice in the pattern.
-    // Otherwise, we write >= maxind characters since escaping
-    // them always add more characters.
-    //
-    // Note that only '()[]{}?*+-|^$\\.&~# \t\n\r\v\f' need to
-    // be escaped when translated to RE patterns and '*' and '?'
-    // are already handled without being escaped.
-    //
-    // In general, UNIX style patterns are more likely to contain
-    // wildcards than characters to be escaped, with the exception
-    // of '-', '\' and '~' (we usually want to match filenmaes),
-    // and there is a sparse number of them. Therefore, we only
-    // estimate the number of characters to be written to be the
-    // same as the number of characters in the pattern.
     PyUnicodeWriter *writer = PyUnicodeWriter_Create(maxind);
     if (writer == NULL) {
         return NULL;
     }
-
     // ---- decl local objects ------------------------------------------------
-    // list containing the indices where '*' has a special meaning
-    PyObject *wildcard_indices = NULL;
-    // call-level cached functions
-    PyObject *pattern_str_find_meth = NULL; // pattern.find()
+    PyObject *wildcard_indices = NULL;      // positions of stars
+    PyObject *pattern_str_find_meth = NULL; // cached pattern.find()
     // ---- def local objects -------------------------------------------------
     wildcard_indices = PyList_New(0);
     CHECK_NOT_NULL_OR_ABORT(wildcard_indices);
@@ -251,8 +232,6 @@ escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch)
  * Extract a list of chunks from the pattern group described by start and stop.
  *
  * For instance, the chunks for [a-z0-9] or [!a-z0-9] are ['a', 'z0', '9'].
- *
- * See translate_expression() for its usage.
  */
 static PyObject *
 split_expression(fnmatchmodule_state *state,
@@ -307,11 +286,7 @@ split_expression(fnmatchmodule_state *state,
         Py_ssize_t chunkscount = PyList_GET_SIZE(chunks);
         assert(chunkscount > 0);
         PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
-        assert(chunk != NULL);
         PyObject *str = PyUnicode_Concat(chunk, hyphen);
-        // PyList_SetItem() does not create a new reference on 'str'
-        // so we should not decref 'str' after the call, unless there
-        // is an issue while setting the item.
         if (str == NULL || PyList_SetItem(chunks, chunkscount - 1, str) < 0) {
             Py_XDECREF(str);
             goto abort;
@@ -326,25 +301,17 @@ split_expression(fnmatchmodule_state *state,
     return NULL;
 }
 
-/*
- * Remove empty ranges (they are invalid in RE).
- *
- * See translate_expression() for its usage.
- */
+/* Remove empty ranges (they are invalid in RE). */
 static int
 simplify_expression(PyObject *chunks)
 {
     // for k in range(len(chunks) - 1, 0, -1):
     for (Py_ssize_t k = PyList_GET_SIZE(chunks) - 1; k > 0; --k) {
         PyObject *c1 = PyList_GET_ITEM(chunks, k - 1);
-        assert(c1 != NULL);
         Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1);
-        assert(c1len > 0);
 
         PyObject *c2 = PyList_GET_ITEM(chunks, k);
-        assert(c2 != NULL);
         Py_ssize_t c2len = PyUnicode_GET_LENGTH(c2);
-        assert(c2len > 0);
 
         if (PyUnicode_READ_CHAR(c1, c1len - 1) > PyUnicode_READ_CHAR(c2, 0)) {
             Py_ssize_t olen = c1len + c2len - 2;
@@ -352,19 +319,14 @@ simplify_expression(PyObject *chunks)
             PyObject *str = NULL;
             if (olen == 0) {        // c1[:1] + c2[1:] == ''
                 str = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
-                assert(_Py_IsImmortal(str));
             }
             else if (c1len == 1) {  // c1[:1] + c2[1:] == c2[1:]
-                assert(c2len > 1);
                 str = PyUnicode_Substring(c2, 1, c2len);
             }
             else if (c2len == 1) {  // c1[:1] + c2[1:] == c1[:1]
-                assert(c1len > 1);
                 str = PyUnicode_Substring(c1, 0, c1len - 1);
             }
             else {
-                assert(c1len > 1);
-                assert(c2len > 1);
                 PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen);
                 CHECK_NOT_NULL_OR_ABORT(writer);
                 // all but the last character in the first chunk
@@ -379,9 +341,6 @@ simplify_expression(PyObject *chunks)
                 }
                 str = PyUnicodeWriter_Finish(writer);
             }
-            // PyList_SetItem() does not create a new reference on 'str'
-            // so we should not decref 'str' after the call, unless there
-            // is an issue while setting the item.
             if (str == NULL || PyList_SetItem(chunks, k - 1, str) < 0) {
                 Py_XDECREF(str);
                 goto abort;
@@ -394,26 +353,17 @@ simplify_expression(PyObject *chunks)
     return -1;
 }
 
-/*
- * Escape backslashes and hyphens for set difference (--),
- * but hyphens that create ranges should not be escaped.
- *
- * See translate_expression() for its usage.
- */
+/* Escape backslashes and hyphens for set difference (--). */
 static int
 escape_expression(fnmatchmodule_state *state, PyObject *chunks)
 {
-    for (Py_ssize_t c = 0; c < PyList_GET_SIZE(chunks); ++c) {
-        PyObject *s0 = PyList_GET_ITEM(chunks, c);
-        assert(s0 != NULL);
-        PyObject *s1 = BACKSLASH_REPLACE(state, s0);
+    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(chunks); ++i) {
+        PyObject *chunk = PyList_GET_ITEM(chunks, i);
+        PyObject *s1 = BACKSLASH_REPLACE(state, chunk);
         CHECK_NOT_NULL_OR_ABORT(s1);
         PyObject *s2 = HYPHEN_REPLACE(state, s1);
         Py_DECREF(s1);
-        // PyList_SetItem() does not create a new reference on 's2'
-        // so we should not decref 's2' after the call, unless there
-        // is an issue while setting the item.
-        if (s2 == NULL || PyList_SetItem(chunks, c, s2) < 0) {
+        if (s2 == NULL || PyList_SetItem(chunks, i, s2) < 0) {
             Py_XDECREF(s2);
             goto abort;
         }
@@ -431,9 +381,7 @@ translate_expression(fnmatchmodule_state *state,
     PyObject *chunks = split_expression(state, pattern, start, stop,
                                         pattern_str_find_meth);
     CHECK_NOT_NULL_OR_ABORT(chunks);
-    // remove empty ranges
     CHECK_RET_CODE_OR_ABORT(simplify_expression(chunks));
-    // escape backslashes and set differences
     CHECK_RET_CODE_OR_ABORT(escape_expression(state, chunks));
     PyObject *res = PyUnicode_Join(state->hyphen_str, chunks);
     Py_DECREF(chunks);
@@ -463,7 +411,7 @@ write_expression(fnmatchmodule_state *state,
     Py_ssize_t extra = 2; // '[' and ']'
     WRITE_CHAR_OR_ABORT(writer, '[');
     // escape set operations as late as possible
-    safe_expression = SETOPS_REPLACE(state, expression, state->setops_re_subfn);
+    safe_expression = SETOPS_REPLACE(state, expression);
     CHECK_NOT_NULL_OR_ABORT(safe_expression);
     switch (token) {
         case '!': {
@@ -520,27 +468,21 @@ process_wildcards(PyObject *pattern, PyObject *indices)
     if (writer == NULL) {
         return NULL;
     }
-    // ---- def local macros --------------------------------------------------
-#define LOAD_WILDCARD_INDEX(VAR, IND)                               \
-    do {                                                            \
-        VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, (IND)));    \
-        /* wildcard indices must be >= 0 */                         \
-        CHECK_UNSIGNED_INT_OR_ABORT(VAR);                           \
-    } while (0)
-    // ------------------------------------------------------------------------
     WRITE_ASCII_OR_ABORT(writer, "(?s:", 4);
     if (m == 0) {
         WRITE_STRING_OR_ABORT(writer, pattern);
     }
     else {
-        Py_ssize_t i = 0, j = -1;
+        Py_ssize_t i = 0;
         // process the optional PREFIX
-        LOAD_WILDCARD_INDEX(j, 0);
+        Py_ssize_t j = PyLong_AsSsize_t(PyList_GET_ITEM(indices, 0));
+        CHECK_UNSIGNED_INT_OR_ABORT(j);
         WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j);
         i = j + 1;
         for (Py_ssize_t k = 1; k < m; ++k) {
             // process the (* INNER) groups
-            LOAD_WILDCARD_INDEX(j, k);
+            j = PyLong_AsSsize_t(PyList_GET_ITEM(indices, k));
+            CHECK_UNSIGNED_INT_OR_ABORT(j);
             assert(i < j);
             // write the atomic RE group '(?>.*?' + INNER + ')'
             WRITE_ASCII_OR_ABORT(writer, "(?>.*?", 6);
@@ -559,5 +501,4 @@ process_wildcards(PyObject *pattern, PyObject *indices)
 abort:
     PyUnicodeWriter_Discard(writer);
     return NULL;
-#undef LOAD_WILDCARD_INDEX
 }
diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
index 276921328dd868..8f598fa66f7c1d 100644
--- a/Modules/_fnmatch/util.h
+++ b/Modules/_fnmatch/util.h
@@ -8,10 +8,6 @@
 #include "Python.h"
 
 typedef struct {
-    PyObject *os_module;            // import os
-    PyObject *re_module;            // import re
-
-    PyObject *translator;           // LRU-cached translation unit
     PyObject *re_escape;            // LRU-cached re.escape() function
 
     // strings used by translate.c
@@ -36,22 +32,6 @@ get_fnmatchmodule_state(PyObject *module)
 
 // ==== Helper prototypes =====================================================
 
-/*
- * Returns a list of matched names, or NULL if an error occurred.
- *
- * Parameters
- *
- *  matcher     A reference to the 'match()' method of a compiled pattern.
- *  names       An iterable of strings (str or bytes objects) to match.
- *  normcase    A reference to os.path.normcase().
- *
- *  This is equivalent to:
- *
- *      [name for name in names if matcher(normcase(name))]
- */
-extern PyObject *
-_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase);
-
 /*
  * C accelerator for translating UNIX shell patterns into RE patterns.
  *
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 2083072f6cf8cf..20141f370bc7a4 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -477,7 +477,6 @@
     <ClCompile Include="..\Modules\timemodule.c" />
     <ClCompile Include="..\Modules\xxsubtype.c" />
     <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c" />
-    <ClCompile Include="..\Modules\_fnmatch\filter.c" />
     <ClCompile Include="..\Modules\_fnmatch\translate.c" />
     <ClCompile Include="..\Modules\_interpretersmodule.c" />
     <ClCompile Include="..\Modules\_interpchannelsmodule.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 301030d50b5733..94de5f38778401 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -1070,9 +1070,6 @@
     <ClCompile Include="..\Modules\_fnmatch\_fnmatchmodule.c">
       <Filter>Modules\_fnmatch</Filter>
     </ClCompile>
-    <ClCompile Include="..\Modules\_fnmatch\filter.c">
-      <Filter>Modules\_fnmatch</Filter>
-    </ClCompile>
     <ClCompile Include="..\Modules\_fnmatch\translate.c">
       <Filter>Modules\_fnmatch</Filter>
     </ClCompile>

From 5c37da720f06ea70974223437e42a0657b7fb533 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:28:30 +0200
Subject: [PATCH 95/97] remove legacy tests

---
 Lib/test/test_fnmatch.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 034324139511bb..186f4eb81dee83 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -370,15 +370,6 @@ def test_filter(self):
         self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'),
                          [b'Python', b'Perl'])
 
-    def test_filter_iter_errors(self):
-        class BadList:
-            def __iter__(self):
-                yield 'abc'
-                raise ValueError("nope")
-
-        with self.assertRaisesRegex(ValueError, r'^nope$'):
-            self.fnmatch.filter(BadList(), '*')
-
     def test_mix_bytes_str(self):
         filter = self.fnmatch.filter
         self.assertRaises(TypeError, filter, ['test'], b'*')

From 79fb2f2df098bd11f44186f7bd60db38e73fabee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:28:33 +0200
Subject: [PATCH 96/97] update NEWS

---
 .../Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
index f374f28456d65d..e310ca0a76bc0d 100644
--- a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
@@ -1,2 +1,2 @@
-Improve the performances of :func:`fnmatch.translate` by 2x and of
-:func:`fnmatch.filter` by 1.1x.  Patch by Bénédikt Tran.
+Improve the performances of :func:`fnmatch.translate` by a factor 7.
+Patch by Bénédikt Tran.

From 6e9879f5bce6718138a067dcffada80022919aaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:41:17 +0200
Subject: [PATCH 97/97] cleanup

---
 Modules/_fnmatch/_fnmatchmodule.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
index 1ae44424a7ffcd..9b3413cf3f233a 100644
--- a/Modules/_fnmatch/_fnmatchmodule.c
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -20,8 +20,7 @@
 
 /* Create an LRU-cached function for re.escape(). */
 static int
-fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
-                              fnmatchmodule_state *st)
+fnmatchmodule_load_escapefunc(fnmatchmodule_state *st)
 {
     // make sure that this function is called once
     assert(st->re_escape == NULL);
@@ -54,8 +53,7 @@ fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
 
 /* Store a reference to re.compile('([&~|])').sub(). */
 static int
-fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module),
-                                 fnmatchmodule_state *st)
+fnmatchmodule_load_setops_re_sub(fnmatchmodule_state *st)
 {
     // make sure that this function is called once
     assert(st->setops_re_subfn == NULL);
@@ -92,17 +90,17 @@ fnmatchmodule_exec(PyObject *module)
     } while (0)
     // ------------------------------------------------------------------------
     fnmatchmodule_state *st = get_fnmatchmodule_state(module);
-    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(module, st));
+    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(st));
     INTERN_STRING(st, hyphen_str, "-");
     INTERN_STRING(st, hyphen_esc_str, "\\-");
     INTERN_STRING(st, backslash_str, "\\");
     INTERN_STRING(st, backslash_esc_str, "\\\\");
-    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_setops_re_sub(module, st));
+    CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_setops_re_sub(st));
     INTERN_STRING(st, setops_repl_str, "\\\\\\1");
+#undef INTERN_STRING
     return 0;
 abort:
     return -1;
-#undef INTERN_STRING
 }
 
 static int