Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f8290df

Browse filesBrowse files
authored
gh-116738: Make _codecs module thread-safe (#117530)
The module itself is a thin wrapper around calls to functions in `Python/codecs.c`, so that's where the meaningful changes happened: - Move codecs-related state that lives on `PyInterpreterState` to a struct declared in `pycore_codecs.h`. - In free-threaded builds, add a mutex to `codecs_state` to synchronize operations on `search_path`. Because `search_path_mutex` is used as a normal mutex and not a critical section, we must be extremely careful with operations called while holding it. - The codec registry is explicitly initialized as part of `_PyUnicode_InitEncodings` to simplify thread-safety.
1 parent 4e2caf2 commit f8290df
Copy full SHA for f8290df

File tree

Expand file treeCollapse file tree

6 files changed

+120
-79
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+120
-79
lines changed

‎Include/internal/pycore_codecs.h

Copy file name to clipboardExpand all lines: Include/internal/pycore_codecs.h
+31Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@ extern "C" {
88
# error "this header requires Py_BUILD_CORE define"
99
#endif
1010

11+
#include "pycore_lock.h" // PyMutex
12+
13+
/* Initialize codecs-related state for the given interpreter, including
14+
registering the first codec search function. Must be called before any other
15+
PyCodec-related functions, and while only one thread is active. */
16+
extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp);
17+
18+
/* Finalize codecs-related state for the given interpreter. No PyCodec-related
19+
functions other than PyCodec_Unregister() may be called after this. */
20+
extern void _PyCodec_Fini(PyInterpreterState *interp);
21+
1122
extern PyObject* _PyCodec_Lookup(const char *encoding);
1223

1324
/* Text codec specific encoding and decoding API.
@@ -48,6 +59,26 @@ extern PyObject* _PyCodecInfo_GetIncrementalEncoder(
4859
PyObject *codec_info,
4960
const char *errors);
5061

62+
// Per-interpreter state used by codecs.c.
63+
struct codecs_state {
64+
// A list of callable objects used to search for codecs.
65+
PyObject *search_path;
66+
67+
// A dict mapping codec names to codecs returned from a callable in
68+
// search_path.
69+
PyObject *search_cache;
70+
71+
// A dict mapping error handling strategies to functions to implement them.
72+
PyObject *error_registry;
73+
74+
#ifdef Py_GIL_DISABLED
75+
// Used to safely delete a specific item from search_path.
76+
PyMutex search_path_mutex;
77+
#endif
78+
79+
// Whether or not the rest of the state is initialized.
80+
int initialized;
81+
};
5182

5283
#ifdef __cplusplus
5384
}

‎Include/internal/pycore_interp.h

Copy file name to clipboardExpand all lines: Include/internal/pycore_interp.h
+2-4Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ extern "C" {
1414
#include "pycore_atexit.h" // struct atexit_state
1515
#include "pycore_ceval_state.h" // struct _ceval_state
1616
#include "pycore_code.h" // struct callable_cache
17+
#include "pycore_codecs.h" // struct codecs_state
1718
#include "pycore_context.h" // struct _Py_context_state
1819
#include "pycore_crossinterp.h" // struct _xidregistry
1920
#include "pycore_dict_state.h" // struct _Py_dict_state
@@ -182,10 +183,7 @@ struct _is {
182183
possible to facilitate out-of-process observability
183184
tools. */
184185

185-
PyObject *codec_search_path;
186-
PyObject *codec_search_cache;
187-
PyObject *codec_error_registry;
188-
int codecs_initialized;
186+
struct codecs_state codecs;
189187

190188
PyConfig config;
191189
unsigned long feature_flags;

‎Objects/unicodeobject.c

Copy file name to clipboardExpand all lines: Objects/unicodeobject.c
+5-1Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15441,7 +15441,11 @@ init_fs_encoding(PyThreadState *tstate)
1544115441
PyStatus
1544215442
_PyUnicode_InitEncodings(PyThreadState *tstate)
1544315443
{
15444-
PyStatus status = init_fs_encoding(tstate);
15444+
PyStatus status = _PyCodec_InitRegistry(tstate->interp);
15445+
if (_PyStatus_EXCEPTION(status)) {
15446+
return status;
15447+
}
15448+
status = init_fs_encoding(tstate);
1544515449
if (_PyStatus_EXCEPTION(status)) {
1544615450
return status;
1544715451
}

‎Python/codecs.c

Copy file name to clipboardExpand all lines: Python/codecs.c
+80-70Lines changed: 80 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
1111
#include "Python.h"
1212
#include "pycore_call.h" // _PyObject_CallNoArgs()
1313
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
14+
#include "pycore_lock.h" // PyMutex
1415
#include "pycore_pyerrors.h" // _PyErr_FormatNote()
1516
#include "pycore_pystate.h" // _PyInterpreterState_GET()
1617
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
@@ -19,24 +20,10 @@ const char *Py_hexdigits = "0123456789abcdef";
1920

2021
/* --- Codec Registry ----------------------------------------------------- */
2122

22-
/* Import the standard encodings package which will register the first
23-
codec search function.
24-
25-
This is done in a lazy way so that the Unicode implementation does
26-
not downgrade startup time of scripts not needing it.
27-
28-
ImportErrors are silently ignored by this function. Only one try is
29-
made.
30-
31-
*/
32-
33-
static int _PyCodecRegistry_Init(void); /* Forward */
34-
3523
int PyCodec_Register(PyObject *search_function)
3624
{
3725
PyInterpreterState *interp = _PyInterpreterState_GET();
38-
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
39-
goto onError;
26+
assert(interp->codecs.initialized);
4027
if (search_function == NULL) {
4128
PyErr_BadArgument();
4229
goto onError;
@@ -45,7 +32,14 @@ int PyCodec_Register(PyObject *search_function)
4532
PyErr_SetString(PyExc_TypeError, "argument must be callable");
4633
goto onError;
4734
}
48-
return PyList_Append(interp->codec_search_path, search_function);
35+
#ifdef Py_GIL_DISABLED
36+
PyMutex_Lock(&interp->codecs.search_path_mutex);
37+
#endif
38+
int ret = PyList_Append(interp->codecs.search_path, search_function);
39+
#ifdef Py_GIL_DISABLED
40+
PyMutex_Unlock(&interp->codecs.search_path_mutex);
41+
#endif
42+
return ret;
4943

5044
onError:
5145
return -1;
@@ -55,22 +49,34 @@ int
5549
PyCodec_Unregister(PyObject *search_function)
5650
{
5751
PyInterpreterState *interp = _PyInterpreterState_GET();
58-
PyObject *codec_search_path = interp->codec_search_path;
59-
/* Do nothing if codec_search_path is not created yet or was cleared. */
60-
if (codec_search_path == NULL) {
52+
if (interp->codecs.initialized != 1) {
53+
/* Do nothing if codecs state was cleared (only possible during
54+
interpreter shutdown). */
6155
return 0;
6256
}
6357

58+
PyObject *codec_search_path = interp->codecs.search_path;
6459
assert(PyList_CheckExact(codec_search_path));
65-
Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
66-
for (Py_ssize_t i = 0; i < n; i++) {
67-
PyObject *item = PyList_GET_ITEM(codec_search_path, i);
60+
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(codec_search_path); i++) {
61+
#ifdef Py_GIL_DISABLED
62+
PyMutex_Lock(&interp->codecs.search_path_mutex);
63+
#endif
64+
PyObject *item = PyList_GetItemRef(codec_search_path, i);
65+
int ret = 1;
6866
if (item == search_function) {
69-
if (interp->codec_search_cache != NULL) {
70-
assert(PyDict_CheckExact(interp->codec_search_cache));
71-
PyDict_Clear(interp->codec_search_cache);
72-
}
73-
return PyList_SetSlice(codec_search_path, i, i+1, NULL);
67+
// We hold a reference to the item, so its destructor can't run
68+
// while we hold search_path_mutex.
69+
ret = PyList_SetSlice(codec_search_path, i, i+1, NULL);
70+
}
71+
#ifdef Py_GIL_DISABLED
72+
PyMutex_Unlock(&interp->codecs.search_path_mutex);
73+
#endif
74+
Py_DECREF(item);
75+
if (ret != 1) {
76+
assert(interp->codecs.search_cache != NULL);
77+
assert(PyDict_CheckExact(interp->codecs.search_cache));
78+
PyDict_Clear(interp->codecs.search_cache);
79+
return ret;
7480
}
7581
}
7682
return 0;
@@ -132,9 +138,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
132138
}
133139

134140
PyInterpreterState *interp = _PyInterpreterState_GET();
135-
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) {
136-
return NULL;
137-
}
141+
assert(interp->codecs.initialized);
138142

139143
/* Convert the encoding to a normalized Python string: all
140144
characters are converted to lower case, spaces and hyphens are
@@ -147,7 +151,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
147151

148152
/* First, try to lookup the name in the registry dictionary */
149153
PyObject *result;
150-
if (PyDict_GetItemRef(interp->codec_search_cache, v, &result) < 0) {
154+
if (PyDict_GetItemRef(interp->codecs.search_cache, v, &result) < 0) {
151155
goto onError;
152156
}
153157
if (result != NULL) {
@@ -156,7 +160,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
156160
}
157161

158162
/* Next, scan the search functions in order of registration */
159-
const Py_ssize_t len = PyList_Size(interp->codec_search_path);
163+
const Py_ssize_t len = PyList_Size(interp->codecs.search_path);
160164
if (len < 0)
161165
goto onError;
162166
if (len == 0) {
@@ -170,14 +174,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
170174
for (i = 0; i < len; i++) {
171175
PyObject *func;
172176

173-
func = PyList_GetItem(interp->codec_search_path, i);
177+
func = PyList_GetItemRef(interp->codecs.search_path, i);
174178
if (func == NULL)
175179
goto onError;
176180
result = PyObject_CallOneArg(func, v);
181+
Py_DECREF(func);
177182
if (result == NULL)
178183
goto onError;
179184
if (result == Py_None) {
180-
Py_DECREF(result);
185+
Py_CLEAR(result);
181186
continue;
182187
}
183188
if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
@@ -188,15 +193,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
188193
}
189194
break;
190195
}
191-
if (i == len) {
196+
if (result == NULL) {
192197
/* XXX Perhaps we should cache misses too ? */
193198
PyErr_Format(PyExc_LookupError,
194199
"unknown encoding: %s", encoding);
195200
goto onError;
196201
}
197202

198203
/* Cache and return the result */
199-
if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
204+
if (PyDict_SetItem(interp->codecs.search_cache, v, result) < 0) {
200205
Py_DECREF(result);
201206
goto onError;
202207
}
@@ -600,13 +605,12 @@ PyObject *_PyCodec_DecodeText(PyObject *object,
600605
int PyCodec_RegisterError(const char *name, PyObject *error)
601606
{
602607
PyInterpreterState *interp = _PyInterpreterState_GET();
603-
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
604-
return -1;
608+
assert(interp->codecs.initialized);
605609
if (!PyCallable_Check(error)) {
606610
PyErr_SetString(PyExc_TypeError, "handler must be callable");
607611
return -1;
608612
}
609-
return PyDict_SetItemString(interp->codec_error_registry,
613+
return PyDict_SetItemString(interp->codecs.error_registry,
610614
name, error);
611615
}
612616

@@ -616,13 +620,12 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
616620
PyObject *PyCodec_LookupError(const char *name)
617621
{
618622
PyInterpreterState *interp = _PyInterpreterState_GET();
619-
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
620-
return NULL;
623+
assert(interp->codecs.initialized);
621624

622625
if (name==NULL)
623626
name = "strict";
624627
PyObject *handler;
625-
if (PyDict_GetItemStringRef(interp->codec_error_registry, name, &handler) < 0) {
628+
if (PyDict_GetItemStringRef(interp->codecs.error_registry, name, &handler) < 0) {
626629
return NULL;
627630
}
628631
if (handler == NULL) {
@@ -1375,7 +1378,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
13751378
return PyCodec_SurrogateEscapeErrors(exc);
13761379
}
13771380

1378-
static int _PyCodecRegistry_Init(void)
1381+
PyStatus
1382+
_PyCodec_InitRegistry(PyInterpreterState *interp)
13791383
{
13801384
static struct {
13811385
const char *name;
@@ -1463,45 +1467,51 @@ static int _PyCodecRegistry_Init(void)
14631467
}
14641468
};
14651469

1466-
PyInterpreterState *interp = _PyInterpreterState_GET();
1467-
PyObject *mod;
1468-
1469-
if (interp->codec_search_path != NULL)
1470-
return 0;
1471-
1472-
interp->codec_search_path = PyList_New(0);
1473-
if (interp->codec_search_path == NULL) {
1474-
return -1;
1470+
assert(interp->codecs.initialized == 0);
1471+
interp->codecs.search_path = PyList_New(0);
1472+
if (interp->codecs.search_path == NULL) {
1473+
return PyStatus_NoMemory();
14751474
}
1476-
1477-
interp->codec_search_cache = PyDict_New();
1478-
if (interp->codec_search_cache == NULL) {
1479-
return -1;
1475+
interp->codecs.search_cache = PyDict_New();
1476+
if (interp->codecs.search_cache == NULL) {
1477+
return PyStatus_NoMemory();
14801478
}
1481-
1482-
interp->codec_error_registry = PyDict_New();
1483-
if (interp->codec_error_registry == NULL) {
1484-
return -1;
1479+
interp->codecs.error_registry = PyDict_New();
1480+
if (interp->codecs.error_registry == NULL) {
1481+
return PyStatus_NoMemory();
14851482
}
1486-
14871483
for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
14881484
PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
1489-
if (!func) {
1490-
return -1;
1485+
if (func == NULL) {
1486+
return PyStatus_NoMemory();
14911487
}
14921488

1493-
int res = PyCodec_RegisterError(methods[i].name, func);
1489+
int res = PyDict_SetItemString(interp->codecs.error_registry,
1490+
methods[i].name, func);
14941491
Py_DECREF(func);
1495-
if (res) {
1496-
return -1;
1492+
if (res < 0) {
1493+
return PyStatus_Error("Failed to insert into codec error registry");
14971494
}
14981495
}
14991496

1500-
mod = PyImport_ImportModule("encodings");
1497+
interp->codecs.initialized = 1;
1498+
1499+
// Importing `encodings' will call back into this module to register codec
1500+
// search functions, so this is done after everything else is initialized.
1501+
PyObject *mod = PyImport_ImportModule("encodings");
15011502
if (mod == NULL) {
1502-
return -1;
1503+
return PyStatus_Error("Failed to import encodings module");
15031504
}
15041505
Py_DECREF(mod);
1505-
interp->codecs_initialized = 1;
1506-
return 0;
1506+
1507+
return PyStatus_Ok();
1508+
}
1509+
1510+
void
1511+
_PyCodec_Fini(PyInterpreterState *interp)
1512+
{
1513+
Py_CLEAR(interp->codecs.search_path);
1514+
Py_CLEAR(interp->codecs.search_cache);
1515+
Py_CLEAR(interp->codecs.error_registry);
1516+
interp->codecs.initialized = 0;
15071517
}

‎Python/pystate.c

Copy file name to clipboardExpand all lines: Python/pystate.c
+1-3Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -843,9 +843,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
843843
}
844844

845845
PyConfig_Clear(&interp->config);
846-
Py_CLEAR(interp->codec_search_path);
847-
Py_CLEAR(interp->codec_search_cache);
848-
Py_CLEAR(interp->codec_error_registry);
846+
_PyCodec_Fini(interp);
849847

850848
assert(interp->imports.modules == NULL);
851849
assert(interp->imports.modules_by_index == NULL);

‎Tools/c-analyzer/cpython/ignored.tsv

Copy file name to clipboardExpand all lines: Tools/c-analyzer/cpython/ignored.tsv
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ Python/ceval.c - _PyEval_BinaryOps -
344344
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
345345
Python/codecs.c - Py_hexdigits -
346346
Python/codecs.c - ucnhash_capi -
347-
Python/codecs.c _PyCodecRegistry_Init methods -
347+
Python/codecs.c _PyCodec_InitRegistry methods -
348348
Python/compile.c - NO_LABEL -
349349
Python/compile.c - NO_LOCATION -
350350
Python/dynload_shlib.c - _PyImport_DynLoadFiletab -

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.