Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ea25180

Browse filesBrowse files
authored
bpo-40521: Per-interpreter interned strings (GH-20085)
Make the Unicode dictionary of interned strings compatible with subinterpreters. Remove the INTERN_NAME_STRINGS macro in typeobject.c: names are always now interned (even if EXPERIMENTAL_ISOLATED_SUBINTERPRETERS macro is defined). _PyUnicode_ClearInterned() now uses PyDict_Next() to no longer allocate memory, to ensure that the interned dictionary is cleared.
1 parent 993e88c commit ea25180
Copy full SHA for ea25180

File tree

5 files changed

+43
-83
lines changed
Filter options

5 files changed

+43
-83
lines changed

‎Include/internal/pycore_interp.h

Copy file name to clipboardExpand all lines: Include/internal/pycore_interp.h
+11Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,17 @@ struct _Py_unicode_state {
7676
shared as well. */
7777
PyObject *latin1[256];
7878
struct _Py_unicode_fs_codec fs_codec;
79+
80+
/* This dictionary holds all interned unicode strings. Note that references
81+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
82+
When the interned string reaches a refcnt of 0 the string deallocation
83+
function will delete the reference from this dictionary.
84+
85+
Another way to look at this is that to say that the actual reference
86+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
87+
*/
88+
PyObject *interned;
89+
7990
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
8091
struct _Py_unicode_ids ids;
8192
};
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make the Unicode dictionary of interned strings compatible with
2+
subinterpreters. Patch by Victor Stinner.

‎Objects/typeobject.c

Copy file name to clipboardExpand all lines: Objects/typeobject.c
-22Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@ typedef struct PySlot_Offset {
4848
} PySlot_Offset;
4949

5050

51-
/* bpo-40521: Interned strings are shared by all subinterpreters */
52-
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
53-
# define INTERN_NAME_STRINGS
54-
#endif
55-
5651
/* alphabetical order */
5752
_Py_IDENTIFIER(__abstractmethods__);
5853
_Py_IDENTIFIER(__class__);
@@ -3527,7 +3522,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
35273522
if (name == NULL)
35283523
return -1;
35293524
}
3530-
#ifdef INTERN_NAME_STRINGS
35313525
if (!PyUnicode_CHECK_INTERNED(name)) {
35323526
PyUnicode_InternInPlace(&name);
35333527
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3537,7 +3531,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
35373531
return -1;
35383532
}
35393533
}
3540-
#endif
35413534
}
35423535
else {
35433536
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -7683,17 +7676,10 @@ _PyTypes_InitSlotDefs(void)
76837676
for (slotdef *p = slotdefs; p->name; p++) {
76847677
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
76857678
assert(!p[1].name || p->offset <= p[1].offset);
7686-
#ifdef INTERN_NAME_STRINGS
76877679
p->name_strobj = PyUnicode_InternFromString(p->name);
76887680
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
76897681
return _PyStatus_NO_MEMORY();
76907682
}
7691-
#else
7692-
p->name_strobj = PyUnicode_FromString(p->name);
7693-
if (!p->name_strobj) {
7694-
return _PyStatus_NO_MEMORY();
7695-
}
7696-
#endif
76977683
}
76987684
slotdefs_initialized = 1;
76997685
return _PyStatus_OK();
@@ -7718,24 +7704,16 @@ update_slot(PyTypeObject *type, PyObject *name)
77187704
int offset;
77197705

77207706
assert(PyUnicode_CheckExact(name));
7721-
#ifdef INTERN_NAME_STRINGS
77227707
assert(PyUnicode_CHECK_INTERNED(name));
7723-
#endif
77247708

77257709
assert(slotdefs_initialized);
77267710
pp = ptrs;
77277711
for (p = slotdefs; p->name; p++) {
77287712
assert(PyUnicode_CheckExact(p->name_strobj));
77297713
assert(PyUnicode_CheckExact(name));
7730-
#ifdef INTERN_NAME_STRINGS
77317714
if (p->name_strobj == name) {
77327715
*pp++ = p;
77337716
}
7734-
#else
7735-
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
7736-
*pp++ = p;
7737-
}
7738-
#endif
77397717
}
77407718
*pp = NULL;
77417719
for (pp = ptrs; *pp; pp++) {

‎Objects/unicodeobject.c

Copy file name to clipboardExpand all lines: Objects/unicodeobject.c
+28-61Lines changed: 28 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -206,22 +206,6 @@ extern "C" {
206206
# define OVERALLOCATE_FACTOR 4
207207
#endif
208208

209-
/* bpo-40521: Interned strings are shared by all interpreters. */
210-
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
211-
# define INTERNED_STRINGS
212-
#endif
213-
214-
/* This dictionary holds all interned unicode strings. Note that references
215-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
216-
When the interned string reaches a refcnt of 0 the string deallocation
217-
function will delete the reference from this dictionary.
218-
219-
Another way to look at this is that to say that the actual reference
220-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
221-
*/
222-
#ifdef INTERNED_STRINGS
223-
static PyObject *interned = NULL;
224-
#endif
225209

226210
static struct _Py_unicode_state*
227211
get_unicode_state(void)
@@ -1946,22 +1930,23 @@ unicode_dealloc(PyObject *unicode)
19461930
break;
19471931

19481932
case SSTATE_INTERNED_MORTAL:
1949-
#ifdef INTERNED_STRINGS
1933+
{
1934+
struct _Py_unicode_state *state = get_unicode_state();
19501935
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19511936
references (key and value) which were ignored by
19521937
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19531938
to prevent calling unicode_dealloc() again. Adjust refcnt after
19541939
PyDict_DelItem(). */
19551940
assert(Py_REFCNT(unicode) == 0);
19561941
Py_SET_REFCNT(unicode, 3);
1957-
if (PyDict_DelItem(interned, unicode) != 0) {
1942+
if (PyDict_DelItem(state->interned, unicode) != 0) {
19581943
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19591944
NULL);
19601945
}
19611946
assert(Py_REFCNT(unicode) == 1);
19621947
Py_SET_REFCNT(unicode, 0);
1963-
#endif
19641948
break;
1949+
}
19651950

19661951
case SSTATE_INTERNED_IMMORTAL:
19671952
_PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
@@ -11536,12 +11521,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1153611521
if (PyUnicode_CHECK_INTERNED(left))
1153711522
return 0;
1153811523

11539-
#ifdef INTERNED_STRINGS
1154011524
assert(_PyUnicode_HASH(right_uni) != -1);
1154111525
Py_hash_t hash = _PyUnicode_HASH(left);
11542-
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
11526+
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1154311527
return 0;
11544-
#endif
11528+
}
1154511529

1154611530
return unicode_compare_eq(left, right_uni);
1154711531
}
@@ -15765,23 +15749,21 @@ PyUnicode_InternInPlace(PyObject **p)
1576515749
return;
1576615750
}
1576715751

15768-
#ifdef INTERNED_STRINGS
1576915752
if (PyUnicode_READY(s) == -1) {
1577015753
PyErr_Clear();
1577115754
return;
1577215755
}
1577315756

15774-
if (interned == NULL) {
15775-
interned = PyDict_New();
15776-
if (interned == NULL) {
15757+
struct _Py_unicode_state *state = get_unicode_state();
15758+
if (state->interned == NULL) {
15759+
state->interned = PyDict_New();
15760+
if (state->interned == NULL) {
1577715761
PyErr_Clear(); /* Don't leave an exception */
1577815762
return;
1577915763
}
1578015764
}
1578115765

15782-
PyObject *t;
15783-
t = PyDict_SetDefault(interned, s, s);
15784-
15766+
PyObject *t = PyDict_SetDefault(state->interned, s, s);
1578515767
if (t == NULL) {
1578615768
PyErr_Clear();
1578715769
return;
@@ -15798,13 +15780,9 @@ PyUnicode_InternInPlace(PyObject **p)
1579815780
this. */
1579915781
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1580015782
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15801-
#else
15802-
// PyDict expects that interned strings have their hash
15803-
// (PyASCIIObject.hash) already computed.
15804-
(void)unicode_hash(s);
15805-
#endif
1580615783
}
1580715784

15785+
1580815786
void
1580915787
PyUnicode_InternImmortal(PyObject **p)
1581015788
{
@@ -15838,35 +15816,25 @@ PyUnicode_InternFromString(const char *cp)
1583815816
void
1583915817
_PyUnicode_ClearInterned(PyThreadState *tstate)
1584015818
{
15841-
if (!_Py_IsMainInterpreter(tstate)) {
15842-
// interned dict is shared by all interpreters
15843-
return;
15844-
}
15845-
15846-
if (interned == NULL) {
15847-
return;
15848-
}
15849-
assert(PyDict_CheckExact(interned));
15850-
15851-
PyObject *keys = PyDict_Keys(interned);
15852-
if (keys == NULL) {
15853-
PyErr_Clear();
15819+
struct _Py_unicode_state *state = &tstate->interp->unicode;
15820+
if (state->interned == NULL) {
1585415821
return;
1585515822
}
15856-
assert(PyList_CheckExact(keys));
15823+
assert(PyDict_CheckExact(state->interned));
1585715824

1585815825
/* Interned unicode strings are not forcibly deallocated; rather, we give
1585915826
them their stolen references back, and then clear and DECREF the
1586015827
interned dict. */
1586115828

15862-
Py_ssize_t n = PyList_GET_SIZE(keys);
1586315829
#ifdef INTERNED_STATS
15864-
fprintf(stderr, "releasing %zd interned strings\n", n);
15830+
fprintf(stderr, "releasing %zd interned strings\n",
15831+
PyDict_GET_SIZE(state->interned));
1586515832

1586615833
Py_ssize_t immortal_size = 0, mortal_size = 0;
1586715834
#endif
15868-
for (Py_ssize_t i = 0; i < n; i++) {
15869-
PyObject *s = PyList_GET_ITEM(keys, i);
15835+
Py_ssize_t pos = 0;
15836+
PyObject *s, *ignored_value;
15837+
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
1587015838
assert(PyUnicode_IS_READY(s));
1587115839

1587215840
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15896,10 +15864,9 @@ _PyUnicode_ClearInterned(PyThreadState *tstate)
1589615864
"total size of all interned strings: %zd/%zd mortal/immortal\n",
1589715865
mortal_size, immortal_size);
1589815866
#endif
15899-
Py_DECREF(keys);
1590015867

15901-
PyDict_Clear(interned);
15902-
Py_CLEAR(interned);
15868+
PyDict_Clear(state->interned);
15869+
Py_CLEAR(state->interned);
1590315870
}
1590415871

1590515872

@@ -16269,19 +16236,19 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1626916236
void
1627016237
_PyUnicode_Fini(PyThreadState *tstate)
1627116238
{
16239+
struct _Py_unicode_state *state = &tstate->interp->unicode;
16240+
1627216241
// _PyUnicode_ClearInterned() must be called before
16242+
assert(state->interned == NULL);
1627316243

16274-
struct _Py_unicode_state *state = &tstate->interp->unicode;
16244+
_PyUnicode_FiniEncodings(&state->fs_codec);
1627516245

16276-
Py_CLEAR(state->empty_string);
16246+
unicode_clear_identifiers(tstate);
1627716247

1627816248
for (Py_ssize_t i = 0; i < 256; i++) {
1627916249
Py_CLEAR(state->latin1[i]);
1628016250
}
16281-
16282-
unicode_clear_identifiers(tstate);
16283-
16284-
_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
16251+
Py_CLEAR(state->empty_string);
1628516252
}
1628616253

1628716254

‎Python/pylifecycle.c

Copy file name to clipboardExpand all lines: Python/pylifecycle.c
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,8 @@ finalize_interp_types(PyThreadState *tstate)
15731573
_PyFrame_Fini(tstate);
15741574
_PyAsyncGen_Fini(tstate);
15751575
_PyContext_Fini(tstate);
1576+
// Call _PyUnicode_ClearInterned() before _PyDict_Fini() since it uses
1577+
// a dict internally.
15761578
_PyUnicode_ClearInterned(tstate);
15771579

15781580
_PyDict_Fini(tstate);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.