From 631c91282fd3850388cd37f5f5b466363a229b8c Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 16 Apr 2023 18:56:15 +0200 Subject: [PATCH 1/3] Add capsule wrapper for codecs --- Modules/cjkcodecs/cjkcodecs.h | 39 ++++++++++++++++++++++++------ Modules/cjkcodecs/multibytecodec.c | 6 ++--- Modules/cjkcodecs/multibytecodec.h | 7 +++++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 646a9fd255ce20..5bad6080361dfb 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -284,6 +284,27 @@ getmultibytecodec(void) return _PyImport_GetModuleAttrString("_multibytecodec", "__create_codec"); } +static void +destroy_codec_capsule(PyObject *capsule) +{ + void *data = PyCapsule_GetPointer(capsule, CODEC_CAPSULE); + fprintf(stderr, "uncapsulating %s\n", ((codec_capsule *)data)->codec->encoding); + PyMem_Free(data); +} + +static codec_capsule * +capsulate_codec(const MultibyteCodec *codec) +{ + fprintf(stderr, "capsulating %s\n", codec->encoding); + codec_capsule *data = PyMem_Malloc(sizeof(codec_capsule)); + if (data == NULL) { + PyErr_NoMemory(); + return NULL; + } + data->codec = codec; + return data; +} + static PyObject * _getcodec(const MultibyteCodec *codec) { @@ -292,10 +313,15 @@ _getcodec(const MultibyteCodec *codec) return NULL; } - PyObject *codecobj = PyCapsule_New((void *)codec, - PyMultibyteCodec_CAPSULE_NAME, - NULL); + codec_capsule *data = capsulate_codec(codec); + if (data == NULL) { + Py_DECREF(cofunc); + return NULL; + } + PyObject *codecobj = PyCapsule_New(data, CODEC_CAPSULE, + destroy_codec_capsule); if (codecobj == NULL) { + PyMem_Free(data); Py_DECREF(cofunc); return NULL; } @@ -352,8 +378,7 @@ register_maps(PyObject *module) char mhname[256] = "__map_"; strcpy(mhname + sizeof("__map_") - 1, h->charset); - PyObject *capsule = PyCapsule_New((void *)h, - PyMultibyteCodec_CAPSULE_NAME, NULL); + PyObject *capsule = PyCapsule_New((void *)h, MAP_CAPSULE, NULL); if (capsule == NULL) { return -1; } @@ -417,14 +442,14 @@ importmap(const char *modname, const char *symbol, o = PyObject_GetAttrString(mod, symbol); if (o == NULL) goto errorexit; - else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { + else if (!PyCapsule_IsValid(o, MAP_CAPSULE)) { PyErr_SetString(PyExc_ValueError, "map data must be a Capsule."); goto errorexit; } else { struct dbcs_map *map; - map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); + map = PyCapsule_GetPointer(o, MAP_CAPSULE); if (encmap != NULL) *encmap = map->encmap; if (decmap != NULL) diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 55778cdb59e4dc..5930512f179003 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -1953,14 +1953,14 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg) /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/ { MultibyteCodecObject *self; - MultibyteCodec *codec; - if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { + if (!PyCapsule_IsValid(arg, CODEC_CAPSULE)) { PyErr_SetString(PyExc_ValueError, "argument type invalid"); return NULL; } - codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); + codec_capsule *data = PyCapsule_GetPointer(arg, CODEC_CAPSULE); + MultibyteCodec *codec = data->codec; if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) return NULL; diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index 69404ba96aa1f0..fe003f7200dda8 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -130,7 +130,12 @@ typedef struct { #define MBENC_FLUSH 0x0001 /* encode all characters encodable */ #define MBENC_MAX MBENC_FLUSH -#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*" +typedef struct { + const MultibyteCodec *codec; +} codec_capsule; + +#define MAP_CAPSULE "multibytecodec.map" +#define CODEC_CAPSULE "multibytecodec.codec" #ifdef __cplusplus From bdcfe4475f5f5396e7b88c58fcee6bdc74f1c16a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 16 Apr 2023 19:02:44 +0200 Subject: [PATCH 2/3] Put a reference to the cjk module into the codec capsule --- Modules/cjkcodecs/cjkcodecs.h | 17 ++++++++++------- Modules/cjkcodecs/multibytecodec.h | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 5bad6080361dfb..95a31b506dad20 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -287,13 +287,15 @@ getmultibytecodec(void) static void destroy_codec_capsule(PyObject *capsule) { - void *data = PyCapsule_GetPointer(capsule, CODEC_CAPSULE); - fprintf(stderr, "uncapsulating %s\n", ((codec_capsule *)data)->codec->encoding); - PyMem_Free(data); + void *ptr = PyCapsule_GetPointer(capsule, CODEC_CAPSULE); + codec_capsule *data = (codec_capsule *)ptr; + fprintf(stderr, "uncapsulating %s\n", data->codec->encoding); + Py_DECREF(data->cjk_module); + PyMem_Free(ptr); } static codec_capsule * -capsulate_codec(const MultibyteCodec *codec) +capsulate_codec(PyObject *mod, const MultibyteCodec *codec) { fprintf(stderr, "capsulating %s\n", codec->encoding); codec_capsule *data = PyMem_Malloc(sizeof(codec_capsule)); @@ -302,18 +304,19 @@ capsulate_codec(const MultibyteCodec *codec) return NULL; } data->codec = codec; + data->cjk_module = Py_NewRef(mod); return data; } static PyObject * -_getcodec(const MultibyteCodec *codec) +_getcodec(PyObject *self, const MultibyteCodec *codec) { PyObject *cofunc = getmultibytecodec(); if (cofunc == NULL) { return NULL; } - codec_capsule *data = capsulate_codec(codec); + codec_capsule *data = capsulate_codec(self, codec); if (data == NULL) { Py_DECREF(cofunc); return NULL; @@ -349,7 +352,7 @@ getcodec(PyObject *self, PyObject *encoding) for (int i = 0; i < st->num_codecs; i++) { const MultibyteCodec *codec = &st->codec_list[i]; if (strcmp(codec->encoding, enc) == 0) { - return _getcodec(codec); + return _getcodec(self, codec); } } diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index fe003f7200dda8..cc409ade4e0540 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -132,6 +132,7 @@ typedef struct { typedef struct { const MultibyteCodec *codec; + PyObject *cjk_module; } codec_capsule; #define MAP_CAPSULE "multibytecodec.map" From e376c475fb53dac3c56bd58df6750df26cd86e31 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 16 Apr 2023 19:09:43 +0200 Subject: [PATCH 3/3] Make MultibyteCodecObject own a reference to the codec module that owns its codec memory --- Modules/cjkcodecs/cjkcodecs.h | 2 -- Modules/cjkcodecs/multibytecodec.c | 13 ++++++++++++- Modules/cjkcodecs/multibytecodec.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 95a31b506dad20..1b0355310eddab 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -289,7 +289,6 @@ destroy_codec_capsule(PyObject *capsule) { void *ptr = PyCapsule_GetPointer(capsule, CODEC_CAPSULE); codec_capsule *data = (codec_capsule *)ptr; - fprintf(stderr, "uncapsulating %s\n", data->codec->encoding); Py_DECREF(data->cjk_module); PyMem_Free(ptr); } @@ -297,7 +296,6 @@ destroy_codec_capsule(PyObject *capsule) static codec_capsule * capsulate_codec(PyObject *mod, const MultibyteCodec *codec) { - fprintf(stderr, "capsulating %s\n", codec->encoding); codec_capsule *data = PyMem_Malloc(sizeof(codec_capsule)); if (data == NULL) { PyErr_NoMemory(); diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 5930512f179003..651981f462d10a 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -720,9 +720,17 @@ static struct PyMethodDef multibytecodec_methods[] = { }; static int -multibytecodec_traverse(PyObject *self, visitproc visit, void *arg) +multibytecodec_clear(MultibyteCodecObject *self) +{ + Py_CLEAR(self->cjk_module); + return 0; +} + +static int +multibytecodec_traverse(MultibyteCodecObject *self, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->cjk_module); return 0; } @@ -731,6 +739,7 @@ multibytecodec_dealloc(MultibyteCodecObject *self) { PyObject_GC_UnTrack(self); PyTypeObject *tp = Py_TYPE(self); + (void)multibytecodec_clear(self); tp->tp_free(self); Py_DECREF(tp); } @@ -740,6 +749,7 @@ static PyType_Slot multibytecodec_slots[] = { {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_methods, multibytecodec_methods}, {Py_tp_traverse, multibytecodec_traverse}, + {Py_tp_clear, multibytecodec_clear}, {0, NULL}, }; @@ -1969,6 +1979,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg) if (self == NULL) return NULL; self->codec = codec; + self->cjk_module = Py_NewRef(data->cjk_module); PyObject_GC_Track(self); return (PyObject *)self; diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index cc409ade4e0540..e8e4a08886b1e4 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -63,6 +63,7 @@ typedef struct { typedef struct { PyObject_HEAD MultibyteCodec *codec; + PyObject *cjk_module; } MultibyteCodecObject; #define MultibyteCodec_Check(state, op) Py_IS_TYPE((op), state->multibytecodec_type)